-
Notifications
You must be signed in to change notification settings - Fork 1
/
filter_trajs.py
51 lines (43 loc) · 1.76 KB
/
filter_trajs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pandas as pd
import numpy as np
import utils
import argparse
from filterclassic import FilterCstPosition, FilterCstSpeed, MyFilterDerivative, FilterCstLatLon,FilterIsolated
# unwrap wrong on 248803487 of 2022-01-03
from traffic.core import Traffic
import matplotlib.pyplot as plt
def nointerpolate(x):
''' identity function '''
return x
def read_trajectories(f, strategy):
''' read a trajectory file named @f, and filters points using a @strategy'''
df = pd.read_parquet(f)
for v in ["flight_id"]:
df[v] = df[v].astype(np.int64)
df = df.drop_duplicates(["flight_id","timestamp"]).sort_values(["flight_id","timestamp"]).reset_index(drop=True)#.head(10_000)
if strategy == "classic":
filter = FilterCstLatLon()|FilterCstPosition()|FilterCstSpeed()|MyFilterDerivative()|FilterIsolated()
else:
raise Exception(f"strategy '{strategy}' not implemented")
dftrafficin = Traffic(df).filter(filter=filter,strategy=nointerpolate).eval(max_workers=1).data
dico_tomask = {
# "track":["track_unwrapped"],
"latitude":["u_component_of_wind","v_component_of_wind","temperature"],
"altitude":["u_component_of_wind","v_component_of_wind","temperature"],
}
for k,lvar in dico_tomask.items():
for v in lvar:
dftrafficin[v] = dftrafficin[[v]].mask(dftrafficin[k].isna())
return dftrafficin
def main():
parser = argparse.ArgumentParser(
description='filter out measurements that are likely erroneous',
)
parser.add_argument("-t_in")
parser.add_argument("-t_out")
parser.add_argument("-strategy")
args = parser.parse_args()
df = read_trajectories(args.t_in,args.strategy)
df.to_parquet(args.t_out,index=False)
if __name__ == '__main__':
main()