-
Notifications
You must be signed in to change notification settings - Fork 2
/
acc_features.py
executable file
·39 lines (34 loc) · 1.71 KB
/
acc_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python3
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from pickle import dump
from os import listdir, mkdir
from os.path import isdir, join
in_directory = 'feature_vectors'
out_directory = 'gen'
def accumulate_features(in_dir=in_directory, out_dir=out_directory):
if not isdir(out_dir):
mkdir(out_dir)
paths = [join(in_dir, f) for f in listdir(in_dir)]
features = []
for path in paths:
with open(path, 'r') as file:
lines = file.read().splitlines()
for line in lines:
if line not in features:
features.append(line)
print('Accumulated feature', len(features))
print(len(features))
with open(join(out_dir, 'features.p'), 'wb') as file:
dump(features, file)
if __name__ == '__main__':
parser = ArgumentParser(description='Accumulates all possible features of the observations in sparse string format ',
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('-i', '--input', type=str, default=in_directory, help='The directory in which the input '
'observations in their raw string format '
'are located.')
parser.add_argument('-o', '--output', type=str, default=out_directory, help='The directory which the output file '
'should be written to.')
args = parser.parse_args()
in_directory = args.input
out_directory = args.output
accumulate_features(in_directory, out_directory)