-
Notifications
You must be signed in to change notification settings - Fork 1
/
re-import.py
37 lines (27 loc) · 1.01 KB
/
re-import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd,numpy as np
import pickle
import pathlib
from concurrent.futures import ThreadPoolExecutor
import itertools
def load_pickle_list(thisPickle):
with open(str(thisPickle), 'rb') as handle:
batch_signs = pickle.load(handle)
flattened_data = []
for letter, dicts in batch_signs.items():
for dict_ in dicts:
flattened_data.append({
'Letter': letter,
'Frame': dict_['Frame'],
'Sequence': dict_['Sequence'],
})
return flattened_data
def main():
pickleFolder = pathlib.Path("output/")
sign_pickles = pickleFolder.rglob('*.pickle')
list_of_pickles = [str(p) for p in sign_pickles]
with ThreadPoolExecutor() as executor:
filesList = list(itertools.chain.from_iterable(list(executor.map(load_pickle_list, list_of_pickles))))
df = pd.DataFrame(filesList)
df.to_parquet("output/combined.parquet", engine = 'pyarrow', compression = 'gzip')
if __name__ == "__main__":
main()