From a1b5e46046dfa80a994a7dfa6553a3586a5ba27a Mon Sep 17 00:00:00 2001 From: Christof Leutenegger <49416778+bifbof@users.noreply.github.com> Date: Sun, 30 Jun 2024 17:53:21 +0200 Subject: [PATCH] TST: fix ton of warnings (#627) --- tests/analysis/test_tracking_quality.py | 5 +++-- tests/data/geolife_long/sp_tpls.csv | 2 +- tests/data/trips/sp_tpls_gaps.csv | 2 +- tests/geogr/test_distances.py | 1 - tests/geogr/test_filter.py | 4 +--- tests/io/test_from_geopandas.py | 4 ++-- tests/io/test_postgis.py | 10 ++++----- tests/preprocessing/test_positionfixes.py | 9 ++++---- tests/preprocessing/test_staypoints.py | 2 +- tests/preprocessing/test_triplegs.py | 20 ++++++++--------- trackintel/analysis/metrics.py | 4 ++-- trackintel/analysis/tracking_quality.py | 26 +++++++++++++---------- trackintel/geogr/distances.py | 2 +- trackintel/geogr/filter.py | 6 +++--- trackintel/preprocessing/positionfixes.py | 8 +++---- trackintel/preprocessing/triplegs.py | 15 +++++++------ 16 files changed, 62 insertions(+), 58 deletions(-) diff --git a/tests/analysis/test_tracking_quality.py b/tests/analysis/test_tracking_quality.py index 323c31ed..90197ef6 100644 --- a/tests/analysis/test_tracking_quality.py +++ b/tests/analysis/test_tracking_quality.py @@ -188,11 +188,12 @@ def test_tracking_quality_user_error(self, testdata_sp_tpls_geolife_long): """Test if the an error is raised when passing unknown 'granularity' to _get_tracking_quality_user().""" sp_tpls = testdata_sp_tpls_geolife_long user_0 = sp_tpls.loc[sp_tpls["user_id"] == 0] + start_date = sp_tpls["started_at"].min().floor(freq="D") with pytest.raises(ValueError): - ti.analysis.tracking_quality._get_tracking_quality_user(user_0, granularity=12345) + ti.analysis.tracking_quality._get_tracking_quality_user(user_0, start_date, granularity=12345) with pytest.raises(ValueError): - ti.analysis.tracking_quality._get_tracking_quality_user(user_0, granularity="random") + ti.analysis.tracking_quality._get_tracking_quality_user(user_0, start_date, granularity="random") def test_staypoints_accessors(self, testdata_all_geolife_long): """Test tracking_quality calculation from staypoints accessor.""" diff --git a/tests/data/geolife_long/sp_tpls.csv b/tests/data/geolife_long/sp_tpls.csv index 4c5061a1..27867063 100644 --- a/tests/data/geolife_long/sp_tpls.csv +++ b/tests/data/geolife_long/sp_tpls.csv @@ -42,4 +42,4 @@ id,started_at,finished_at,user_id,type,is_activity,trip_id,prev_trip_id,next_tri 20,2008-10-24 04:12:55+00:00,2008-10-24 05:28:05+00:00,1,staypoint,True,,11,12,2008-10-24 05:28:05+00:00,False,False 20,2008-10-24 05:28:05+00:00,2008-10-24 05:39:50+00:00,1,tripleg,False,12,,,2008-10-24 05:39:53+00:00,True,False 21,2008-10-24 05:39:53+00:00,2008-10-24 06:08:42+00:00,1,staypoint,True,,12,13,2008-10-24 06:08:42+00:00,False,False -21,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,1,tripleg,False,13,,,,,False +21,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,1,tripleg,False,13,,,,False,False diff --git a/tests/data/trips/sp_tpls_gaps.csv b/tests/data/trips/sp_tpls_gaps.csv index 249b050c..3f5ccb12 100644 --- a/tests/data/trips/sp_tpls_gaps.csv +++ b/tests/data/trips/sp_tpls_gaps.csv @@ -29,4 +29,4 @@ id,started_at,finished_at,user_id,type,is_activity,trip_id,prev_trip_id,next_tri 70,2010-01-13 20:40:00,2010-01-14 00:44:00,1,staypoint,True,,8.0,,2010-01-15 20:39:00,False,True 126,2010-01-15 20:39:00,2010-01-15 20:40:00,1,tripleg,False,9.0,,,2010-01-15 20:44:00,False,False 127,2010-01-15 20:44:00,2010-01-15 20:50:00,1,tripleg,False,9.0,,,2010-01-17 20:39:00,False,True -128,2010-01-17 20:39:00,2010-01-17 20:40:00,1,tripleg,False,10.0,,,,,False +128,2010-01-17 20:39:00,2010-01-17 20:40:00,1,tripleg,False,10.0,,,,False,False diff --git a/tests/geogr/test_distances.py b/tests/geogr/test_distances.py index 2947b190..60271c74 100644 --- a/tests/geogr/test_distances.py +++ b/tests/geogr/test_distances.py @@ -242,7 +242,6 @@ def test_known_euclidean_distance(self, two_pfs): pfs0, euc00, pfs1, euc01 = two_pfs res00 = calculate_distance_matrix(X=pfs0, dist_metric="euclidean") res01 = calculate_distance_matrix(X=pfs0, Y=pfs1, dist_metric="euclidean") - print(res00) assert np.all(euc00 == res00) assert np.all(euc01 == res01) diff --git a/tests/geogr/test_filter.py b/tests/geogr/test_filter.py index 530e7dc1..00eb6bbb 100644 --- a/tests/geogr/test_filter.py +++ b/tests/geogr/test_filter.py @@ -18,8 +18,6 @@ def locs_from_geolife(): method="dbscan", epsilon=10, num_samples=1, distance_metric="haversine", agg_level="dataset" ) - # the projection needs to be defined: WGS84 - locs.crs = "epsg:4326" return locs @@ -79,7 +77,7 @@ def test_filter_triplegs(self): def test_filter_locations(self, locs_from_geolife): """Test if spatial_filter works for locations.""" locs = locs_from_geolife - extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson"), crs="epsg:4326") + extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson")) # filter locations with the area within_loc = locs.spatial_filter(areas=extent, method="within", re_project=True) diff --git a/tests/io/test_from_geopandas.py b/tests/io/test_from_geopandas.py index 317838a8..e961ab28 100644 --- a/tests/io/test_from_geopandas.py +++ b/tests/io/test_from_geopandas.py @@ -66,9 +66,9 @@ def test_setting_geometry(self, example_positionfixes): def test_set_crs(self, example_positionfixes): """Test if crs will be set.""" pfs = example_positionfixes.copy() - example_positionfixes.crs = "EPSG:2056" + example_positionfixes = example_positionfixes.set_crs("EPSG:2056", allow_override=True) # check if the crs is correctly set - pfs.crs = None + pfs = pfs.set_crs(None, allow_override=True) pfs = _trackintel_model(pfs, crs="EPSG:2056") assert_geodataframe_equal(example_positionfixes, pfs) diff --git a/tests/io/test_postgis.py b/tests/io/test_postgis.py index 9bbb81da..db7a7766 100644 --- a/tests/io/test_postgis.py +++ b/tests/io/test_postgis.py @@ -281,7 +281,7 @@ def test_no_crs(self, example_positionfixes, conn_postgis): table = "positionfixes" sql = f"SELECT * FROM {table}" geom_col = pfs.geometry.name - pfs.crs = None + pfs = pfs.set_crs(None, allow_override=True) no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS." try: with pytest.warns(UserWarning, match=no_crs_warning): @@ -375,7 +375,7 @@ def test_no_crs(self, example_triplegs, conn_postgis): table = "triplegs" sql = f"SELECT * FROM {table}" geom_col = tpls.geometry.name - tpls.crs = None + tpls = tpls.set_crs(None, allow_override=True) no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS." try: @@ -446,7 +446,7 @@ def test_no_crs(self, example_staypoints, conn_postgis): table = "staypoints" sql = f"SELECT * FROM {table}" geom_col = example_staypoints.geometry.name - sp.crs = None + sp = sp.set_crs(None, allow_override=True) no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS." try: @@ -500,7 +500,7 @@ def test_no_crs(self, example_locations, conn_postgis): table = "locations" sql = f"SELECT * FROM {table}" geom_col = locs.geometry.name - locs.crs = None + locs = locs.set_crs(None, allow_override=True) no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS." try: @@ -731,7 +731,7 @@ class TestGetSrid: def test_srid(self, example_positionfixes): """Test if `_get_srid` returns the correct srid.""" gdf = example_positionfixes.copy() - gdf.crs = None + gdf = gdf.set_crs(None, allow_override=True) assert _get_srid(gdf) == -1 srid = 3857 gdf.set_crs(f"epsg:{srid}", inplace=True) diff --git a/tests/preprocessing/test_positionfixes.py b/tests/preprocessing/test_positionfixes.py index a8d05dd8..7317c9aa 100644 --- a/tests/preprocessing/test_positionfixes.py +++ b/tests/preprocessing/test_positionfixes.py @@ -74,10 +74,11 @@ def example_positionfixes_isolated(): {"user_id": 2, "tracked_at": t2, "geometry": p2, "staypoint_id": pd.NA}, {"user_id": 2, "tracked_at": t4, "geometry": p3, "staypoint_id": 5}, ] - pfs = gpd.GeoDataFrame(data=list_dict, geometry="geometry", crs="EPSG:4326") + pfs = ti.Positionfixes(data=list_dict, geometry="geometry", crs="EPSG:4326") + pfs["staypoint_id"] = pfs["staypoint_id"].astype("Int64") pfs.index.name = "id" - return ti.Positionfixes(pfs) + return pfs class TestGenerate_staypoints: @@ -201,9 +202,7 @@ def test_include_last(self): """Test if the include_last arguement will include the last pfs as stp.""" pfs, _ = ti.io.dataset_reader.read_geolife(os.path.join("tests", "data", "geolife")) - pfs_wo, sp_wo = pfs.generate_staypoints( - method="sliding", dist_threshold=100, time_threshold=5.0, include_last=False - ) + pfs_wo, sp_wo = pfs.generate_staypoints() pfs_include, sp_include = pfs.generate_staypoints( method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True ) diff --git a/tests/preprocessing/test_staypoints.py b/tests/preprocessing/test_staypoints.py index de2345c6..d0cb1311 100644 --- a/tests/preprocessing/test_staypoints.py +++ b/tests/preprocessing/test_staypoints.py @@ -178,7 +178,7 @@ def test_dbscan_hav_euc(self): method="dbscan", epsilon=100, num_samples=1, distance_metric="haversine", agg_level="dataset" ) # WGS_1984 - sp.crs = "epsg:4326" + sp = sp.set_crs("epsg:4326", allow_override=True) # WGS_1984_UTM_Zone_49N sp = sp.to_crs("epsg:32649") diff --git a/tests/preprocessing/test_triplegs.py b/tests/preprocessing/test_triplegs.py index e82f65e3..3c6da791 100644 --- a/tests/preprocessing/test_triplegs.py +++ b/tests/preprocessing/test_triplegs.py @@ -387,27 +387,27 @@ def test_crs(self, example_triplegs): """Test that the resulting GeoDataFrame has the correct crs or a warning or error is thrown if not set""" sp, tpls = example_triplegs # Case 1: sp crs None --> throw warning and set to tpls crs - sp.crs = None + sp = sp.set_crs(None, allow_override=True) with pytest.warns(UserWarning): _, _, trips = generate_trips(sp, tpls) assert trips.crs == tpls.crs # Case 2: Both crs None --> warn and set to None - tpls.crs = None + tpls = tpls.set_crs(None, allow_override=True) with pytest.warns(UserWarning): _, _, trips = generate_trips(sp, tpls) assert trips.crs is None # Case 3: tpls crs is None --> throw warning and set to sp crs - sp.crs = "EPSG:4326" + sp = sp.set_crs("EPSG:4326", allow_override=True) with pytest.warns(UserWarning): _, _, trips = generate_trips(sp, tpls) assert trips.crs == "EPSG:4326" # Case 4: Both crs set and correspond - tpls.crs = "EPSG:2056" - sp.crs = "EPSG:2056" + tpls = tpls.set_crs("EPSG:2056", allow_override=True) + sp = sp.set_crs("EPSG:2056", allow_override=True) _, _, trips = generate_trips(sp, tpls) assert trips.crs == "EPSG:2056" # Case 5: Both crs set but differ --> throw error - sp.crs = "EPSG:4326" + sp = sp.set_crs("EPSG:4326", allow_override=True) error_msg = "CRS of staypoints and triplegs differ. Geometry cannot be joined safely." with pytest.raises(AssertionError, match=error_msg): generate_trips(sp, tpls) @@ -432,10 +432,9 @@ def _create_debug_sp_tpls_data(sp, tpls, gap_threshold): sp_tpls["is_activity"] = sp_tpls["is_activity"].__eq__(True) sp_tpls.sort_values(by=["user_id", "started_at"], inplace=True) sp_tpls["started_at_next"] = sp_tpls["started_at"].shift(-1) - sp_tpls["activity_next"] = sp_tpls["is_activity"].shift(-1) + sp_tpls["activity_next"] = sp_tpls["is_activity"].shift(-1, fill_value=False) sp_tpls["gap"] = (sp_tpls["started_at_next"] - sp_tpls["finished_at"]).dt.seconds / 60 > gap_threshold - return sp_tpls @@ -519,16 +518,17 @@ def _generate_trips_old(sp_input, tpls_input, gap_threshold=15, print_progress=F sp_tpls["started_at_next"] = sp_tpls["started_at"].shift(-1) sp_tpls["is_activity_next"] = sp_tpls["is_activity"].shift(-1) + cols = ["started_at", "finished_at", "user_id", "type", "is_activity", "id", "started_at_next", "is_activity_next"] if print_progress: tqdm.pandas(desc="User trip generation") trips = ( - sp_tpls.groupby(["user_id"], group_keys=False, as_index=False) + sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)[cols] .progress_apply(_generate_trips_user, gap_threshold=gap_threshold) .reset_index(drop=True) ) else: trips = ( - sp_tpls.groupby(["user_id"], group_keys=False, as_index=False) + sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)[cols] .apply(_generate_trips_user, gap_threshold=gap_threshold) .reset_index(drop=True) ) diff --git a/trackintel/analysis/metrics.py b/trackintel/analysis/metrics.py index 6683b920..64f2258b 100644 --- a/trackintel/analysis/metrics.py +++ b/trackintel/analysis/metrics.py @@ -38,9 +38,9 @@ def radius_gyration(sp, method="count", print_progress=False): if print_progress: tqdm.pandas(desc="User radius of gyration calculation") - s = sp.groupby("user_id").progress_apply(_radius_gyration_user, method=method) + s = sp.groupby("user_id").progress_apply(_radius_gyration_user, method=method, include_groups=False) else: - s = sp.groupby("user_id").apply(_radius_gyration_user, method=method) + s = sp.groupby("user_id").apply(_radius_gyration_user, method=method, include_groups=False) s = s.rename("radius_gyration") return s diff --git a/trackintel/analysis/tracking_quality.py b/trackintel/analysis/tracking_quality.py index 06d4400a..96a48e48 100644 --- a/trackintel/analysis/tracking_quality.py +++ b/trackintel/analysis/tracking_quality.py @@ -76,7 +76,9 @@ def temporal_tracking_quality(source, granularity="all"): return None if granularity == "all": - quality = df.groupby("user_id", as_index=False).apply(_get_tracking_quality_user, granularity) + quality = df.groupby("user_id", as_index=False).apply( + _get_tracking_quality_user, granularity, include_groups=False + ) return quality # split records that span several days @@ -90,19 +92,11 @@ def temporal_tracking_quality(source, granularity="all"): column_name = "week_monday" elif granularity == "weekday": - # get the tracked week relative to the first day - start_date = df["started_at"].min().floor(freq="D") - df["week"] = ((df["started_at"] - start_date)).dt.days // 7 - grouper = df["started_at"].dt.weekday column_name = "weekday" elif granularity == "hour": df = _split_overlaps(df, granularity="hour") - # get the tracked day relative to the first day - start_date = df["started_at"].min().floor(freq="D") - df["day"] = (df["started_at"] - start_date).dt.days - grouper = df["started_at"].dt.hour column_name = "hour" @@ -111,8 +105,13 @@ def temporal_tracking_quality(source, granularity="all"): f"granularity unknown. We only support ['all', 'day', 'week', 'weekday', 'hour']. You passed {granularity}" ) + start_date = df["started_at"].min().floor(freq="D") # calculate per-user per-grouper tracking quality - quality = df.groupby(["user_id", grouper]).apply(_get_tracking_quality_user, granularity).reset_index() + quality = ( + df.groupby(["user_id", grouper])[["started_at", "finished_at"]] + .apply(_get_tracking_quality_user, start_date, granularity) + .reset_index() + ) # rename and reorder quality.rename(columns={"started_at": column_name}, inplace=True) @@ -121,7 +120,7 @@ def temporal_tracking_quality(source, granularity="all"): return quality -def _get_tracking_quality_user(df, granularity="all"): +def _get_tracking_quality_user(df, start_date, granularity="all"): """ Tracking quality per-user per-granularity. @@ -130,6 +129,9 @@ def _get_tracking_quality_user(df, granularity="all"): df : Trackintel class The source dataframe + start_date: pd.Timestamp + When measurement started, used to calculate in which weekday or week the measurement lies. + granularity : {"all", "day", "weekday", "week", "hour"}, default "all" Determines the extent of the tracking. "all" the entire tracking period, "day" and "weekday" a whole day, "week" a whole week, and "hour" a whole hour. @@ -149,6 +151,7 @@ def _get_tracking_quality_user(df, granularity="all"): elif granularity == "weekday": # total seconds in an day * number of tracked weeks # (entries from multiple weeks may be grouped together) + df["week"] = ((df["started_at"] - start_date)).dt.days // 7 extent = 60 * 60 * 24 * (df["week"].max() - df["week"].min() + 1) elif granularity == "week": # total seconds in a week @@ -156,6 +159,7 @@ def _get_tracking_quality_user(df, granularity="all"): elif granularity == "hour": # total seconds in an hour * number of tracked days # (entries from multiple days may be grouped together) + df["day"] = (df["started_at"] - start_date).dt.days extent = (60 * 60) * (df["day"].max() - df["day"].min() + 1) else: raise ValueError( diff --git a/trackintel/geogr/distances.py b/trackintel/geogr/distances.py index 42470368..5602defe 100644 --- a/trackintel/geogr/distances.py +++ b/trackintel/geogr/distances.py @@ -358,7 +358,7 @@ def get_speed_triplegs(triplegs, positionfixes=None, method="tpls_speed"): if "tripleg_id" not in positionfixes: raise AttributeError('Positionfixes must include column "tripleg_id".') # group positionfixes by triplegs and compute average speed for each collection of positionfixes - grouped_pfs = positionfixes.groupby("tripleg_id").apply(_single_tripleg_mean_speed) + grouped_pfs = positionfixes.groupby("tripleg_id").apply(_single_tripleg_mean_speed, include_groups=False) # add the speed values to the triplegs column tpls = pd.merge(triplegs, grouped_pfs.rename("speed"), how="left", left_index=True, right_index=True) tpls.index = tpls.index.astype("int64") diff --git a/trackintel/geogr/filter.py b/trackintel/geogr/filter.py index 19bcafc8..c4fbbc65 100644 --- a/trackintel/geogr/filter.py +++ b/trackintel/geogr/filter.py @@ -55,11 +55,11 @@ def spatial_filter(source, areas, method="within", re_project=False): # get final result if method == "within": - ret_gdf = possible_matches.loc[possible_matches.within(areas.unary_union)] + ret_gdf = possible_matches.loc[possible_matches.within(areas.union_all())] elif method == "intersects": - ret_gdf = possible_matches.loc[possible_matches.intersects(areas.unary_union)] + ret_gdf = possible_matches.loc[possible_matches.intersects(areas.union_all())] elif method == "crosses": - ret_gdf = possible_matches.loc[possible_matches.crosses(areas.unary_union)] + ret_gdf = possible_matches.loc[possible_matches.crosses(areas.union_all())] else: raise ValueError("method unknown. We only support ['within', 'intersects', 'crosses']. " f"You passed {method}") diff --git a/trackintel/preprocessing/positionfixes.py b/trackintel/preprocessing/positionfixes.py index 817158ac..ebe0d51e 100644 --- a/trackintel/preprocessing/positionfixes.py +++ b/trackintel/preprocessing/positionfixes.py @@ -254,7 +254,7 @@ def generate_triplegs( # initialize the index list of pfs where a tpl will begin insert_index_ls = [] - pfs["staypoint_id"] = pd.NA + pfs["staypoint_id"] = pd.Series(dtype="Int64") for user_id_this in pfs["user_id"].unique(): sp_user = staypoints[staypoints["user_id"] == user_id_this] @@ -282,7 +282,7 @@ def generate_triplegs( # initialize tripleg_id with pd.NA and fill all pfs that belong to staypoints with -1 # pd.NA will be replaced later with tripleg ids - pfs["tripleg_id"] = pd.NA + pfs["tripleg_id"] = pd.Series(dtype="Int64") pfs.loc[~pd.isna(pfs["staypoint_id"]), "tripleg_id"] = -1 # get all conditions that trigger a new tripleg. @@ -437,7 +437,7 @@ def _generate_triplegs_overlap_staypoints(cond_temporal_gap, pfs, staypoints): # spatial overlap: overlap tripleg with the location of previous and next staypoint # geometry: tpl's share common start and end pfs with sp - cond_overlap_end = cond_overlap & ~cond_temporal_gap.shift(-1).fillna(False) & pd.isna(pfs["tripleg_id"]) + cond_overlap_end = cond_overlap & ~cond_temporal_gap.shift(-1, fill_value=False) & pd.isna(pfs["tripleg_id"]) pfs.loc[cond_overlap_end, "tripleg_id"] = between_tpls_ids.shift(-1)[cond_overlap_end] cond_empty = pd.isna(pfs["tripleg_id"]) pfs.loc[cond_empty, "tripleg_id"] = between_tpls_ids[cond_empty] @@ -524,7 +524,7 @@ def __create_new_staypoints(start, end, pfs, elevation_flag, geo_col, last_flag= # if end is the last pfs, we want to include the info from it as well if last_flag: end = len(pfs) - points = pfs[geo_col].iloc[start:end].unary_union + points = pfs[geo_col].iloc[start:end].union_all() if check_gdf_planar(pfs): new_sp[geo_col] = points.centroid else: diff --git a/trackintel/preprocessing/triplegs.py b/trackintel/preprocessing/triplegs.py index fd3d96fc..23b8eff3 100644 --- a/trackintel/preprocessing/triplegs.py +++ b/trackintel/preprocessing/triplegs.py @@ -133,7 +133,13 @@ def _seperate_ids(row): user_change[["type", "is_activity"]] = ["user_change", True] # nicer for debugging # merge trips with (filler) activities - trips.drop(columns=["type", "sp_tpls_id"], inplace=True) # make space so no overlap with activity "sp_tpls_id" + + # make space so no overlap with activity "sp_tpls_id" + trips.drop(columns=["type", "sp_tpls_id"], inplace=True) + + # trips are no activity (with this we don't have to fillna later) + trips["is_activity"] = False + # Inserting `gaps` and `user_change` into the dataframe creates buffers that catch shifted # "staypoint_id" and "trip_id" from corrupting staypoints/trips. trips_with_act = pd.concat((trips, sp_tpls_only_act, gaps, user_change), axis=0, ignore_index=True) @@ -153,8 +159,6 @@ def _seperate_ids(row): trips_with_act["prev_trip_id"] = trips_with_act["trip_id"].shift(1) trips_with_act["next_trip_id"] = trips_with_act["trip_id"].shift(-1) - # transform column to binary - trips_with_act["is_activity"] = trips_with_act["is_activity"].fillna(False) # delete activities trips = trips_with_act[~trips_with_act["is_activity"]].copy() @@ -268,9 +272,8 @@ def _concat_staypoints_triplegs(staypoints, triplegs, add_geometry): sp["type"] = "staypoint" # create table with relevant information from triplegs and staypoints. - sp_cols = ["started_at", "finished_at", "user_id", "type", "is_activity"] - tpls_cols = ["started_at", "finished_at", "user_id", "type"] - sp_tpls = pd.concat([sp[sp_cols], tpls[tpls_cols]]) + cols = ["started_at", "finished_at", "user_id", "type", "is_activity"] + sp_tpls = pd.concat([sp[cols], tpls[cols]]) sp_tpls["is_activity"] = sp_tpls["is_activity"].fillna(False) sp_tpls["sp_tpls_id"] = sp_tpls.index # store id for later reassignment if add_geometry: