This repository has been archived by the owner on Feb 12, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexportCollectionMetadataToCSV.py
114 lines (101 loc) · 4.12 KB
/
exportCollectionMetadataToCSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import json
import requests
import secrets
import time
import csv
from collections import Counter
import urllib3
import argparse
secretsVersion = input('To edit production server, enter the name of the secrets file: ')
if secretsVersion != '':
try:
secrets = __import__(secretsVersion)
print('Editing Production')
except ImportError:
print('Editing Stage')
else:
print('Editing Stage')
#login info kept in secrets.py file
baseURL = secrets.baseURL
email = secrets.email
password = secrets.password
filePath = secrets.filePath
verify = secrets.verify
skippedCollections = secrets.skippedCollections
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--handle', help='handle of the collection to retreive. optional - if not provided, the script will ask for input')
args = parser.parse_args()
if args.handle:
handle = args.handle
else:
handle = input('Enter collection handle: ')
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
#authentication
startTime = time.time()
data = {'email':email,'password':password}
header = {'content-type':'application/json','accept':'application/json'}
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
cookies = {'JSESSIONID': session}
headerFileUpload = {'accept':'application/json'}
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
userFullName = status['fullname']
print('authenticated')
endpoint = baseURL+'/rest/handle/'+handle
collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
collectionID = collection['uuid']
collectionTitle = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
itemList = {}
offset = 0
items = ''
while items != []:
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
while items.status_code != 200:
time.sleep(5)
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
items = items.json()
for k in range (0, len (items)):
itemID = items[k]['uuid']
itemHandle = items[k]['handle']
itemList[itemID] = itemHandle
offset = offset + 200
print(offset)
keyList = []
for itemID in itemList:
print(baseURL+'/rest/items/'+str(itemID)+'/metadata')
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
for metadataElement in metadata:
key = metadataElement['key']
if key not in keyList and key != 'dc.description.provenance':
keyList.append(key)
print(itemID, key)
keyListHeader = ['itemID']
keyListHeader = keyListHeader + keyList
print(keyListHeader)
f=csv.writer(open(filePath+handle.replace('/','-')+'Metadata.csv', 'w'))
f.writerow(keyListHeader)
itemRows = []
for itemID in itemList:
itemRow = dict.fromkeys(keyListHeader, '')
itemRow['itemID'] = itemID
print(itemID)
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
for metadataElement in metadata:
for key in keyListHeader:
if metadataElement['key'] == key:
try:
value = metadataElement['value']+'|'
except:
value = ''+'|'
try:
itemRow[key] = itemRow[key] + value
except:
itemRow[key] = value
itemList = []
for key in keyListHeader:
itemList.append(itemRow[key][:len(itemRow[key])-1])
f.writerow(itemList)
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
elapsedTime = time.time() - startTime
m, s = divmod(elapsedTime, 60)
h, m = divmod(m, 60)
print('Total script run time: ','%d:%02d:%02d' % (h, m, s))