-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathattributions2json.py
executable file
·109 lines (82 loc) · 3.07 KB
/
attributions2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
import base64
import json
import os
import shlex
import subprocess
import sys
import urllib.request, urllib.error, urllib.parse
class Attribution():
def __init__(self, name, license):
self.name = name
self.license = {'text' : license}
def export_json(output, *attributions):
jsonResult = json.dumps([a.__dict__ for a in attributions], sort_keys=False, indent=4)
try:
with open(output, 'w') as file:
file.write(jsonResult)
file.close()
except IOError as e:
print('I/O Error: ({0}) : {1}'.format(e.errno, e.strerror))
def get_repos(input):
with open(input) as file:
repos = [line.strip() for line in file]
return repos
def get_name(repo):
return repo.split('github.com/')[1].split('/')[1]
def get_owner_name(repo):
return '/'.join(repo.split('github.com/')[1].split('/')[0:2])
def get_content_headers():
headers = {}
if 'GITHUB_ACCESS_TOKEN' in os.environ:
headers['Authorization'] = 'token %s' % os.environ['GITHUB_ACCESS_TOKEN']
return headers
def send_content_request(repo, path):
request = urllib.request.Request('https://api.github.com/repos/%s/contents/%s' % (get_owner_name(repo), path.lstrip('/')),
headers = get_content_headers())
return json.loads(urllib.request.urlopen(request).read())
def find_license_path(repo):
data = send_content_request(repo, '/')
item = next((item for item in data if item['path'].startswith('LICENSE')), None)
if not item:
item = next((item for item in data if item['path'].startswith('COPYING')), None)
if item:
return item['path']
def get_license_text(repo):
path = find_license_path(repo)
if path:
data = send_content_request(repo, path)
if data['encoding'] != 'base64':
print("Encoding of Github response was not base64")
sys.exit(1)
decodedData = base64.b64decode(data['content'])
return decodedData.decode('utf-8')
def validate_file(file, type):
if type == 'input':
if not os.path.isfile(file):
print('File not found: {}'.format(file))
sys.exit(1)
else:
if not file.endswith('.json'):
print('Output file must be .json')
sys.exit(1)
if __name__ == '__main__':
if len(sys.argv) != 3:
print('Missing script argument: ./attributions2json [./Attribution File] [output_file.json]')
sys.exit(1)
input_file = sys.argv[1]
validate_file(input_file, 'input')
output_file = sys.argv[2]
validate_file(output_file, 'output')
if 'GITHUB_ACCESS_TOKEN' not in os.environ:
print("Warning: No GITHUB_ACCESS_TOKEN environment variable configured. Expect to run into API rate limits.")
urls = get_repos(input_file)
attributions = []
for url in urls:
name = get_name(url)
text = get_license_text(url)
if text:
attributions.append(Attribution(name, text))
else:
print('{}: license file not found'.format(name))
export_json(output_file, *attributions)