-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombiner.py
55 lines (41 loc) · 1.49 KB
/
combiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from os import listdir, path
import pandas as pd
temp_directory = 'data/temp/'
def write_permanent_vehicle_positions(file_path: str, df: pd.DataFrame):
sorted_df = df.sort_values(
by=[
'Transportas',
'Marsrutas',
'ReisoID',
'MasinosNumeris',
'MasinosTipas',
'MatavimoLaikas',
'Gauta',
],
)
sorted_df.to_parquet(file_path, index=False)
print(sorted_df.info())
def combine_vehicle_position_files(file_name: str, temporary_file_path: str):
combined_df = new_df = pd.read_parquet(temporary_file_path)
permanent_file_path = f'data/vehicle_positions/vilnius/{file_name}'
if path.exists(permanent_file_path):
previous_df = pd.read_parquet(permanent_file_path)
combined_df = pd.concat([previous_df, new_df], axis=0) \
.drop_duplicates(
subset=[
'Transportas',
'Marsrutas',
'ReisoID',
'MasinosNumeris',
'MasinosTipas',
'MatavimoLaikas',
]
)
write_permanent_vehicle_positions(permanent_file_path, combined_df)
def combine_vehicle_position_files_in_directory():
for filename in listdir(temp_directory):
file_path = path.join(temp_directory, filename)
if path.isfile(file_path):
combine_vehicle_position_files(filename, file_path)
if __name__ == '__main__':
combine_vehicle_position_files_in_directory()