-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreposense.py
127 lines (100 loc) · 4.45 KB
/
reposense.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import argparse
import json
import os
import sys
REPOSENSE_FOLDER_NAME = 'reposense-report'
AUTHORSHIP_FILE = "authorship.json"
OUTPUT_FOLDER = "output"
def create_author_if_not_exist(db, author):
if author not in db and author not in ["-", ""]:
db[author] = ["# " + author]
def add_file_header(db, author, filename):
if author in db:
ext = os.path.splitext(filename)[1].replace('.', '')
db[author].append("###### " + filename)
db[author].append("```" + ext)
def close_file_header(db, author):
if author in db:
db[author].append("```")
def add_line_to_author(db, author, line):
if author in db:
db[author].append(line['content'])
def filter_low_contrib(db, min_count=3):
min_count += 3 # include the 2 header and 1 eof lines
for author in db:
start_index = 0
index = 0
lines = db[author]
new_lines = []
for line in lines:
if line.startswith("######"):
start_index = index
elif line.endswith("```") and index - start_index >= min_count:
new_lines += lines[start_index:index+1]
index += 1
db[author] = new_lines
def get_authorship_files(directory):
result = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(AUTHORSHIP_FILE):
result.append(os.path.join(root, file))
return result
def parse_args():
parser = argparse.ArgumentParser(
description='Convert authorship.json generated by RepoSense.jar to markdown documents.')
parser.add_argument('--formats', nargs='+',
help='File formats to be included. Default: all files will be included.')
parser.add_argument('--directory', nargs='?', default='.',
help='Directory containing the reposense-report for conversion. Default: current working directory.')
parser.add_argument('--minlines', nargs='?', type=int, default='3',
help='Minimum number of consecutive lines for acceptance. '
'Chunks that does not meet the requirement will not be extracted to the markdown file. Default: 3.')
parser.add_argument('--datadir', nargs='?', type=str,
help="Data files' directory to be included. Default: All data files will be included.")
parser.add_argument('--subdir', nargs='?', type=str,
help="Create a sub-directory in the group folder to store the author files."
"Default: author files will be placed directly in the group folder.")
args = parser.parse_args(sys.argv[1:])
return vars(args)
if __name__ == "__main__":
args = parse_args()
inclusive_exts = args['formats']
input_directory = args['directory'] # where to look for reposense-report folder
min_line = args['minlines']
data_dir = args['datadir'] if args['datadir'] is not None else ""
sub_dir = "/" + args['subdir'] + "/" if args['subdir'] is not None else ""
file_list = get_authorship_files(input_directory)
for file in file_list:
with open(file, 'r') as f:
data_files = json.load(f)
db = {}
for data in data_files:
last_author = ""
data_filename = data['path']
if not data_filename.startswith(data_dir):
continue
ext = os.path.splitext(data_filename)[1].replace('.', '')
if inclusive_exts is not None and ext not in inclusive_exts and len(inclusive_exts) > 0:
continue
for line in data['lines']:
try:
author = line['author']['gitId']
except Exception as e:
author = "-"
if author != last_author:
close_file_header(db, last_author)
create_author_if_not_exist(db, author)
add_file_header(db, author, data_filename)
add_line_to_author(db, author, line)
last_author = author
close_file_header(db, last_author)
split = file.split(REPOSENSE_FOLDER_NAME, 1)
filename = split[0] if len(split) is 1 else split[1]
output_path = OUTPUT_FOLDER + os.path.dirname(filename) + sub_dir
if not os.path.exists(output_path):
os.makedirs(output_path)
filter_low_contrib(db, min_line)
for author in db:
with open(os.path.join(output_path, author + ".md"), 'w') as f:
f.write('\n'.join(db[author]))