Skip to content

Commit dfd0a93

Browse files
committed
PDF metadata for multiple authors
1 parent 10c1896 commit dfd0a93

File tree

4 files changed

+51
-33
lines changed

4 files changed

+51
-33
lines changed

dvcurator/gui.py

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,7 @@ def make_github(self):
165165

166166
def rename(self):
167167
self.disable_buttons()
168-
prefix = dvcurator.rename.last_name_prefix(self.citation)
169-
t = threading.Thread(target=dvcurator.rename.basic_rename, args=(self.subfolder_path, prefix))
168+
t = threading.Thread(target=dvcurator.rename.basic_rename, args=(self.subfolder_path, self.citation))
170169
t.start()
171170
self.schedule_check(t)
172171

@@ -178,7 +177,7 @@ def convert(self):
178177

179178
def set_metadata(self):
180179
self.disable_buttons()
181-
t = threading.Thread(target=dvcurator.pdf_metadata.standard_metadata, args=(self.subfolder_path, self.citation['author'][0]['authorName']['value']))
180+
t = threading.Thread(target=dvcurator.pdf_metadata.standard_metadata, args=(self.subfolder_path, self.citation))
182181
t.start()
183182
self.schedule_check(t)
184183

@@ -201,21 +200,22 @@ def __init__(self, parent, *args, **kwargs):
201200
self.filemenu = tk.Menu(self.menubar, tearoff=False)
202201
self.filemenu.add_command(label="Open config", command=self.load_config)
203202
self.filemenu.add_command(label="Save current config", command=self.save_config)
204-
self.filemenu.add_command(label="Select project subfolder manually", command=self.set_subfolder)
205203
self.filemenu.add_command(label="Exit dvcurator", command=parent.destroy)
206204
self.menubar.add_cascade(label="File", menu=self.filemenu)
207205

208206
self.editmenu = tk.Menu(self.menubar, tearoff=False)
209-
self.editmenu.add_command(label="Open Dropbox subfolder", command=self.open_explorer)
210207
self.editmenu.add_command(label="Basic file rename", command=self.rename)
211208
self.editmenu.add_command(label="Convert docx to pdf", command=self.convert)
212209
self.editmenu.add_command(label="Set PDF metadata", command=self.set_metadata)
210+
self.editmenu.add_separator()
211+
self.editmenu.add_command(label="Open Dropbox subfolder", command=self.open_explorer)
212+
self.editmenu.add_command(label="Select project subfolder manually", command=self.set_subfolder)
213213
self.menubar.add_cascade(label="Edit", menu=self.editmenu)
214214
self.menubar.entryconfig("Edit", state=tk.DISABLED)
215215
parent.config(menu=self.menubar)
216216

217217
# Checklist of tickets included in the .md files
218-
checklist = tk.Frame(self)
218+
checklist = tk.LabelFrame(self, text="Project issues:")
219219
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
220220
self.issues = os.listdir(os.path.join(sys._MEIPASS, "issues"))
221221
else:
@@ -230,17 +230,7 @@ def __init__(self, parent, *args, **kwargs):
230230

231231
# Settings
232232
settings = tk.Frame(self)
233-
#config_file=tk.StringVar()
234-
#config_label = tk.Label(settings, text="Config file: ")
235-
#config_options = tk.Frame(settings)
236-
#config_entry = tk.Button(config_options, text="Open", command=self.load_config)
237-
#config_save = tk.Button(config_options, text="Save", command=self.save_config)
238233

239-
#config_label.grid(column=1, row=1)
240-
#config_entry.grid(column=1, row=1)
241-
#config_save.grid(column=2, row=1)
242-
#config_options.grid(column=2, row=1)
243-
244234
self.doi=tk.StringVar()
245235
doi_label = tk.Label(settings, text="Persistent ID (DOI): ")
246236
self.doi_entry = tk.Entry(settings, textvariable=self.doi)
@@ -289,7 +279,7 @@ def __init__(self, parent, *args, **kwargs):
289279
self.reset_button.pack()
290280

291281
settings.grid(column=1, row=1)
292-
checklist.grid(column=2, row=2)
282+
checklist.grid(column=2, row=2, padx=10)
293283
process.grid(column=2, row=1)
294284

295285
from tkinter import scrolledtext
@@ -299,15 +289,10 @@ def __init__(self, parent, *args, **kwargs):
299289
sys.stderr = redir
300290
self.out.grid(column=1, row=2)
301291

302-
303-
304-
305292
def main():
306293
root=tk.Tk()
307294
root.title("dvcurator " + dvcurator.version.version)
308295
MainApp(root).pack(side="top", fill="both", expand=True)
309-
310-
311296
root.mainloop()
312297

313298
if __name__ == "__main__":

dvcurator/pdf_metadata.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,18 @@ def find_pdfs(path):
1111
pdfs += [os.path.join(root, name)]
1212

1313
return pdfs
14-
15-
def standard_metadata(folder, author):
14+
15+
# Generate the string for the author metadata field
16+
# separate with semicolons between full names for > 1 author
17+
def combine_author_names(citation):
18+
author_string = citation['author'][0]['authorName']['value']
19+
if (len(citation['author']) > 1):
20+
for author in citation['author'][1:]:
21+
author_string += "; " + author['authorName']['value']
22+
return author_string
23+
24+
# This is the function run from the GUI
25+
def standard_metadata(folder, citation):
1626
import pikepdf, os, shutil
1727
import dvcurator.fs
1828

@@ -25,12 +35,14 @@ def standard_metadata(folder, author):
2535
print("Error: no PDFs detected in: " + edit_path)
2636
return None
2737

38+
author_string = combine_author_names(citation)
39+
2840
# Ideally, we would just edit the files in place
2941
# Some versions of pikepdf can't do this though
3042
# so we copy them to a separate folder, then save back to the orginal place
3143
old_path = os.path.join(edit_path, "originals")
3244
os.mkdir(old_path)
33-
45+
3446
for path in pdfs:
3547
original = os.path.join(old_path, os.path.basename(path))
3648
os.rename(path, original)
@@ -48,7 +60,7 @@ def standard_metadata(folder, author):
4860
print("!! Warning !! PDF is PDF/A")
4961
meta['dc:title'] = os.path.basename(path)
5062
#meta['dc:creator'] = author
51-
meta['pdf:Author'] = author
63+
meta['pdf:Author'] = author_string
5264
meta['dc:description'] = "QDR Data Project"
5365
meta['pdf:Subject'] = "QDR Data Project"
5466
meta['pdf:Keywords'] = "-"
@@ -57,8 +69,7 @@ def standard_metadata(folder, author):
5769
pdf.close()
5870
print("Metadata written to: %s" %os.path.basename(path))
5971

60-
#os.rmdir(old_path)
61-
#shutil.rmtree(old_path)
72+
shutil.rmtree(old_path)
6273
print("PDF metadata process complete!")
6374

6475
return edit_path

dvcurator/rename.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ def remove_all_accents(folder):
5353
unicodedata.normalize('NFKD', f).encode('ascii', 'ignore').decode('ascii')))
5454

5555
# This is the function we call from the GUI, which calls all the above
56-
def basic_rename(folder, prefix):
56+
def basic_rename(folder, citation):
5757
import dvcurator.fs
5858
print("Renaming files", end="... ")
59+
prefix = last_name_prefix(citation)
5960
new_path = dvcurator.fs.copy_new_step(folder, "rename")
6061
if (not new_path):
6162
return None

test.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,23 @@ def test_check(self):
3434
def test_search(self):
3535
self.assertTrue(github.search_existing("Karcher - Anonymous Peer Review", "QualitativeDataRepository/testing-demos"))
3636

37+
class TestRename(unittest.TestCase):
38+
39+
def test_rename(self):
40+
f = tempfile.TemporaryDirectory()
41+
first_folder = os.path.join(f.name, "QDR Prepared", "1_extract")
42+
os.makedirs(first_folder)
43+
44+
fake_file = "foobar.txt"
45+
with open(os.path.join(first_folder, fake_file), 'w') as fp:
46+
pass
47+
48+
citation = dataverse.get_citation(host, doi)
49+
new_path = rename.basic_rename(f.name, citation)
50+
new_file = os.listdir(new_path)[0]
51+
self.assertEqual(rename.last_name_prefix(citation) + "_" + fake_file,
52+
new_file)
53+
3754
class TestPDFMetadata(unittest.TestCase):
3855

3956
def test_makedir(self):
@@ -44,10 +61,14 @@ def test_makedir(self):
4461
f.cleanup()
4562

4663
def test_pdfmetadata(self):
47-
# This test is to make sure test_string gets written
64+
# This test is to make sure author string gets written
4865
# We read it back out from one of the files
4966
import pikepdf
50-
test_string = "Unit Test"
67+
68+
# Get author string from online citation
69+
citation = dataverse.get_citation(host, doi)
70+
author_string = pdf_metadata.combine_author_names(citation)
71+
5172
d = tempfile.TemporaryDirectory()
5273
temp_structure = os.path.normpath(os.path.join(d.name, "QDR Prepared/5_rename"))
5374
os.makedirs(temp_structure)
@@ -57,11 +78,11 @@ def test_pdfmetadata(self):
5778
for i in range(1, 11):
5879
empty_pdf.save(os.path.join(temp_structure, f'test{i}.pdf'))
5980

60-
edit_path = pdf_metadata.standard_metadata(d.name, test_string)
81+
edit_path = pdf_metadata.standard_metadata(d.name, citation)
6182
one_file = os.path.join(edit_path, os.listdir(edit_path)[4])
6283
example = pikepdf.open(one_file)
6384
meta = example.open_metadata()
64-
self.assertEqual(meta['pdf:Author'], test_string)
85+
self.assertEqual(meta['pdf:Author'], author_string)
6586
example.close()
6687

6788
d.cleanup()

0 commit comments

Comments
 (0)