Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: fix ton of warnings #627

Merged
merged 1 commit into from
Jun 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tests/analysis/test_tracking_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,12 @@ def test_tracking_quality_user_error(self, testdata_sp_tpls_geolife_long):
"""Test if the an error is raised when passing unknown 'granularity' to _get_tracking_quality_user()."""
sp_tpls = testdata_sp_tpls_geolife_long
user_0 = sp_tpls.loc[sp_tpls["user_id"] == 0]
start_date = sp_tpls["started_at"].min().floor(freq="D")

with pytest.raises(ValueError):
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, granularity=12345)
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, start_date, granularity=12345)
with pytest.raises(ValueError):
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, granularity="random")
ti.analysis.tracking_quality._get_tracking_quality_user(user_0, start_date, granularity="random")

def test_staypoints_accessors(self, testdata_all_geolife_long):
"""Test tracking_quality calculation from staypoints accessor."""
Expand Down
2 changes: 1 addition & 1 deletion tests/data/geolife_long/sp_tpls.csv
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ id,started_at,finished_at,user_id,type,is_activity,trip_id,prev_trip_id,next_tri
20,2008-10-24 04:12:55+00:00,2008-10-24 05:28:05+00:00,1,staypoint,True,,11,12,2008-10-24 05:28:05+00:00,False,False
20,2008-10-24 05:28:05+00:00,2008-10-24 05:39:50+00:00,1,tripleg,False,12,,,2008-10-24 05:39:53+00:00,True,False
21,2008-10-24 05:39:53+00:00,2008-10-24 06:08:42+00:00,1,staypoint,True,,12,13,2008-10-24 06:08:42+00:00,False,False
21,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,1,tripleg,False,13,,,,,False
21,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,1,tripleg,False,13,,,,False,False
2 changes: 1 addition & 1 deletion tests/data/trips/sp_tpls_gaps.csv
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ id,started_at,finished_at,user_id,type,is_activity,trip_id,prev_trip_id,next_tri
70,2010-01-13 20:40:00,2010-01-14 00:44:00,1,staypoint,True,,8.0,,2010-01-15 20:39:00,False,True
126,2010-01-15 20:39:00,2010-01-15 20:40:00,1,tripleg,False,9.0,,,2010-01-15 20:44:00,False,False
127,2010-01-15 20:44:00,2010-01-15 20:50:00,1,tripleg,False,9.0,,,2010-01-17 20:39:00,False,True
128,2010-01-17 20:39:00,2010-01-17 20:40:00,1,tripleg,False,10.0,,,,,False
128,2010-01-17 20:39:00,2010-01-17 20:40:00,1,tripleg,False,10.0,,,,False,False
1 change: 0 additions & 1 deletion tests/geogr/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def test_known_euclidean_distance(self, two_pfs):
pfs0, euc00, pfs1, euc01 = two_pfs
res00 = calculate_distance_matrix(X=pfs0, dist_metric="euclidean")
res01 = calculate_distance_matrix(X=pfs0, Y=pfs1, dist_metric="euclidean")
print(res00)
assert np.all(euc00 == res00)
assert np.all(euc01 == res01)

Expand Down
4 changes: 1 addition & 3 deletions tests/geogr/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ def locs_from_geolife():
method="dbscan", epsilon=10, num_samples=1, distance_metric="haversine", agg_level="dataset"
)

# the projection needs to be defined: WGS84
locs.crs = "epsg:4326"
return locs


Expand Down Expand Up @@ -79,7 +77,7 @@ def test_filter_triplegs(self):
def test_filter_locations(self, locs_from_geolife):
"""Test if spatial_filter works for locations."""
locs = locs_from_geolife
extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson"), crs="epsg:4326")
extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson"))

# filter locations with the area
within_loc = locs.spatial_filter(areas=extent, method="within", re_project=True)
Expand Down
4 changes: 2 additions & 2 deletions tests/io/test_from_geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def test_setting_geometry(self, example_positionfixes):
def test_set_crs(self, example_positionfixes):
"""Test if crs will be set."""
pfs = example_positionfixes.copy()
example_positionfixes.crs = "EPSG:2056"
example_positionfixes = example_positionfixes.set_crs("EPSG:2056", allow_override=True)
# check if the crs is correctly set
pfs.crs = None
pfs = pfs.set_crs(None, allow_override=True)
pfs = _trackintel_model(pfs, crs="EPSG:2056")
assert_geodataframe_equal(example_positionfixes, pfs)

Expand Down
10 changes: 5 additions & 5 deletions tests/io/test_postgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_no_crs(self, example_positionfixes, conn_postgis):
table = "positionfixes"
sql = f"SELECT * FROM {table}"
geom_col = pfs.geometry.name
pfs.crs = None
pfs = pfs.set_crs(None, allow_override=True)
no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
with pytest.warns(UserWarning, match=no_crs_warning):
Expand Down Expand Up @@ -375,7 +375,7 @@ def test_no_crs(self, example_triplegs, conn_postgis):
table = "triplegs"
sql = f"SELECT * FROM {table}"
geom_col = tpls.geometry.name
tpls.crs = None
tpls = tpls.set_crs(None, allow_override=True)

no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
Expand Down Expand Up @@ -446,7 +446,7 @@ def test_no_crs(self, example_staypoints, conn_postgis):
table = "staypoints"
sql = f"SELECT * FROM {table}"
geom_col = example_staypoints.geometry.name
sp.crs = None
sp = sp.set_crs(None, allow_override=True)

no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
Expand Down Expand Up @@ -500,7 +500,7 @@ def test_no_crs(self, example_locations, conn_postgis):
table = "locations"
sql = f"SELECT * FROM {table}"
geom_col = locs.geometry.name
locs.crs = None
locs = locs.set_crs(None, allow_override=True)

no_crs_warning = "Could not parse CRS from the GeoDataFrame. Inserting data without defined CRS."
try:
Expand Down Expand Up @@ -731,7 +731,7 @@ class TestGetSrid:
def test_srid(self, example_positionfixes):
"""Test if `_get_srid` returns the correct srid."""
gdf = example_positionfixes.copy()
gdf.crs = None
gdf = gdf.set_crs(None, allow_override=True)
assert _get_srid(gdf) == -1
srid = 3857
gdf.set_crs(f"epsg:{srid}", inplace=True)
Expand Down
9 changes: 4 additions & 5 deletions tests/preprocessing/test_positionfixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def example_positionfixes_isolated():
{"user_id": 2, "tracked_at": t2, "geometry": p2, "staypoint_id": pd.NA},
{"user_id": 2, "tracked_at": t4, "geometry": p3, "staypoint_id": 5},
]
pfs = gpd.GeoDataFrame(data=list_dict, geometry="geometry", crs="EPSG:4326")
pfs = ti.Positionfixes(data=list_dict, geometry="geometry", crs="EPSG:4326")
pfs["staypoint_id"] = pfs["staypoint_id"].astype("Int64")
pfs.index.name = "id"

return ti.Positionfixes(pfs)
return pfs


class TestGenerate_staypoints:
Expand Down Expand Up @@ -201,9 +202,7 @@ def test_include_last(self):
"""Test if the include_last arguement will include the last pfs as stp."""
pfs, _ = ti.io.dataset_reader.read_geolife(os.path.join("tests", "data", "geolife"))

pfs_wo, sp_wo = pfs.generate_staypoints(
method="sliding", dist_threshold=100, time_threshold=5.0, include_last=False
)
pfs_wo, sp_wo = pfs.generate_staypoints()
pfs_include, sp_include = pfs.generate_staypoints(
method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True
)
Expand Down
2 changes: 1 addition & 1 deletion tests/preprocessing/test_staypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def test_dbscan_hav_euc(self):
method="dbscan", epsilon=100, num_samples=1, distance_metric="haversine", agg_level="dataset"
)
# WGS_1984
sp.crs = "epsg:4326"
sp = sp.set_crs("epsg:4326", allow_override=True)
# WGS_1984_UTM_Zone_49N
sp = sp.to_crs("epsg:32649")

Expand Down
20 changes: 10 additions & 10 deletions tests/preprocessing/test_triplegs.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,27 +387,27 @@ def test_crs(self, example_triplegs):
"""Test that the resulting GeoDataFrame has the correct crs or a warning or error is thrown if not set"""
sp, tpls = example_triplegs
# Case 1: sp crs None --> throw warning and set to tpls crs
sp.crs = None
sp = sp.set_crs(None, allow_override=True)
with pytest.warns(UserWarning):
_, _, trips = generate_trips(sp, tpls)
assert trips.crs == tpls.crs
# Case 2: Both crs None --> warn and set to None
tpls.crs = None
tpls = tpls.set_crs(None, allow_override=True)
with pytest.warns(UserWarning):
_, _, trips = generate_trips(sp, tpls)
assert trips.crs is None
# Case 3: tpls crs is None --> throw warning and set to sp crs
sp.crs = "EPSG:4326"
sp = sp.set_crs("EPSG:4326", allow_override=True)
with pytest.warns(UserWarning):
_, _, trips = generate_trips(sp, tpls)
assert trips.crs == "EPSG:4326"
# Case 4: Both crs set and correspond
tpls.crs = "EPSG:2056"
sp.crs = "EPSG:2056"
tpls = tpls.set_crs("EPSG:2056", allow_override=True)
sp = sp.set_crs("EPSG:2056", allow_override=True)
_, _, trips = generate_trips(sp, tpls)
assert trips.crs == "EPSG:2056"
# Case 5: Both crs set but differ --> throw error
sp.crs = "EPSG:4326"
sp = sp.set_crs("EPSG:4326", allow_override=True)
error_msg = "CRS of staypoints and triplegs differ. Geometry cannot be joined safely."
with pytest.raises(AssertionError, match=error_msg):
generate_trips(sp, tpls)
Expand All @@ -432,10 +432,9 @@ def _create_debug_sp_tpls_data(sp, tpls, gap_threshold):
sp_tpls["is_activity"] = sp_tpls["is_activity"].__eq__(True)
sp_tpls.sort_values(by=["user_id", "started_at"], inplace=True)
sp_tpls["started_at_next"] = sp_tpls["started_at"].shift(-1)
sp_tpls["activity_next"] = sp_tpls["is_activity"].shift(-1)
sp_tpls["activity_next"] = sp_tpls["is_activity"].shift(-1, fill_value=False)

sp_tpls["gap"] = (sp_tpls["started_at_next"] - sp_tpls["finished_at"]).dt.seconds / 60 > gap_threshold

return sp_tpls


Expand Down Expand Up @@ -519,16 +518,17 @@ def _generate_trips_old(sp_input, tpls_input, gap_threshold=15, print_progress=F
sp_tpls["started_at_next"] = sp_tpls["started_at"].shift(-1)
sp_tpls["is_activity_next"] = sp_tpls["is_activity"].shift(-1)

cols = ["started_at", "finished_at", "user_id", "type", "is_activity", "id", "started_at_next", "is_activity_next"]
if print_progress:
tqdm.pandas(desc="User trip generation")
trips = (
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)[cols]
.progress_apply(_generate_trips_user, gap_threshold=gap_threshold)
.reset_index(drop=True)
)
else:
trips = (
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)
sp_tpls.groupby(["user_id"], group_keys=False, as_index=False)[cols]
.apply(_generate_trips_user, gap_threshold=gap_threshold)
.reset_index(drop=True)
)
Expand Down
4 changes: 2 additions & 2 deletions trackintel/analysis/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def radius_gyration(sp, method="count", print_progress=False):

if print_progress:
tqdm.pandas(desc="User radius of gyration calculation")
s = sp.groupby("user_id").progress_apply(_radius_gyration_user, method=method)
s = sp.groupby("user_id").progress_apply(_radius_gyration_user, method=method, include_groups=False)
else:
s = sp.groupby("user_id").apply(_radius_gyration_user, method=method)
s = sp.groupby("user_id").apply(_radius_gyration_user, method=method, include_groups=False)

s = s.rename("radius_gyration")
return s
Expand Down
26 changes: 15 additions & 11 deletions trackintel/analysis/tracking_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ def temporal_tracking_quality(source, granularity="all"):
return None

if granularity == "all":
quality = df.groupby("user_id", as_index=False).apply(_get_tracking_quality_user, granularity)
quality = df.groupby("user_id", as_index=False).apply(
_get_tracking_quality_user, granularity, include_groups=False
)
return quality

# split records that span several days
Expand All @@ -90,19 +92,11 @@ def temporal_tracking_quality(source, granularity="all"):
column_name = "week_monday"

elif granularity == "weekday":
# get the tracked week relative to the first day
start_date = df["started_at"].min().floor(freq="D")
df["week"] = ((df["started_at"] - start_date)).dt.days // 7

grouper = df["started_at"].dt.weekday
column_name = "weekday"

elif granularity == "hour":
df = _split_overlaps(df, granularity="hour")
# get the tracked day relative to the first day
start_date = df["started_at"].min().floor(freq="D")
df["day"] = (df["started_at"] - start_date).dt.days

grouper = df["started_at"].dt.hour
column_name = "hour"

Expand All @@ -111,8 +105,13 @@ def temporal_tracking_quality(source, granularity="all"):
f"granularity unknown. We only support ['all', 'day', 'week', 'weekday', 'hour']. You passed {granularity}"
)

start_date = df["started_at"].min().floor(freq="D")
# calculate per-user per-grouper tracking quality
quality = df.groupby(["user_id", grouper]).apply(_get_tracking_quality_user, granularity).reset_index()
quality = (
df.groupby(["user_id", grouper])[["started_at", "finished_at"]]
.apply(_get_tracking_quality_user, start_date, granularity)
.reset_index()
)

# rename and reorder
quality.rename(columns={"started_at": column_name}, inplace=True)
Expand All @@ -121,7 +120,7 @@ def temporal_tracking_quality(source, granularity="all"):
return quality


def _get_tracking_quality_user(df, granularity="all"):
def _get_tracking_quality_user(df, start_date, granularity="all"):
"""
Tracking quality per-user per-granularity.

Expand All @@ -130,6 +129,9 @@ def _get_tracking_quality_user(df, granularity="all"):
df : Trackintel class
The source dataframe

start_date: pd.Timestamp
When measurement started, used to calculate in which weekday or week the measurement lies.

granularity : {"all", "day", "weekday", "week", "hour"}, default "all"
Determines the extent of the tracking. "all" the entire tracking period,
"day" and "weekday" a whole day, "week" a whole week, and "hour" a whole hour.
Expand All @@ -149,13 +151,15 @@ def _get_tracking_quality_user(df, granularity="all"):
elif granularity == "weekday":
# total seconds in an day * number of tracked weeks
# (entries from multiple weeks may be grouped together)
df["week"] = ((df["started_at"] - start_date)).dt.days // 7
extent = 60 * 60 * 24 * (df["week"].max() - df["week"].min() + 1)
elif granularity == "week":
# total seconds in a week
extent = 60 * 60 * 24 * 7
elif granularity == "hour":
# total seconds in an hour * number of tracked days
# (entries from multiple days may be grouped together)
df["day"] = (df["started_at"] - start_date).dt.days
extent = (60 * 60) * (df["day"].max() - df["day"].min() + 1)
else:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion trackintel/geogr/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def get_speed_triplegs(triplegs, positionfixes=None, method="tpls_speed"):
if "tripleg_id" not in positionfixes:
raise AttributeError('Positionfixes must include column "tripleg_id".')
# group positionfixes by triplegs and compute average speed for each collection of positionfixes
grouped_pfs = positionfixes.groupby("tripleg_id").apply(_single_tripleg_mean_speed)
grouped_pfs = positionfixes.groupby("tripleg_id").apply(_single_tripleg_mean_speed, include_groups=False)
# add the speed values to the triplegs column
tpls = pd.merge(triplegs, grouped_pfs.rename("speed"), how="left", left_index=True, right_index=True)
tpls.index = tpls.index.astype("int64")
Expand Down
6 changes: 3 additions & 3 deletions trackintel/geogr/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ def spatial_filter(source, areas, method="within", re_project=False):

# get final result
if method == "within":
ret_gdf = possible_matches.loc[possible_matches.within(areas.unary_union)]
ret_gdf = possible_matches.loc[possible_matches.within(areas.union_all())]
elif method == "intersects":
ret_gdf = possible_matches.loc[possible_matches.intersects(areas.unary_union)]
ret_gdf = possible_matches.loc[possible_matches.intersects(areas.union_all())]
elif method == "crosses":
ret_gdf = possible_matches.loc[possible_matches.crosses(areas.unary_union)]
ret_gdf = possible_matches.loc[possible_matches.crosses(areas.union_all())]
else:
raise ValueError("method unknown. We only support ['within', 'intersects', 'crosses']. " f"You passed {method}")

Expand Down
8 changes: 4 additions & 4 deletions trackintel/preprocessing/positionfixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def generate_triplegs(

# initialize the index list of pfs where a tpl will begin
insert_index_ls = []
pfs["staypoint_id"] = pd.NA
pfs["staypoint_id"] = pd.Series(dtype="Int64")

for user_id_this in pfs["user_id"].unique():
sp_user = staypoints[staypoints["user_id"] == user_id_this]
Expand Down Expand Up @@ -282,7 +282,7 @@ def generate_triplegs(

# initialize tripleg_id with pd.NA and fill all pfs that belong to staypoints with -1
# pd.NA will be replaced later with tripleg ids
pfs["tripleg_id"] = pd.NA
pfs["tripleg_id"] = pd.Series(dtype="Int64")
pfs.loc[~pd.isna(pfs["staypoint_id"]), "tripleg_id"] = -1

# get all conditions that trigger a new tripleg.
Expand Down Expand Up @@ -437,7 +437,7 @@ def _generate_triplegs_overlap_staypoints(cond_temporal_gap, pfs, staypoints):

# spatial overlap: overlap tripleg with the location of previous and next staypoint
# geometry: tpl's share common start and end pfs with sp
cond_overlap_end = cond_overlap & ~cond_temporal_gap.shift(-1).fillna(False) & pd.isna(pfs["tripleg_id"])
cond_overlap_end = cond_overlap & ~cond_temporal_gap.shift(-1, fill_value=False) & pd.isna(pfs["tripleg_id"])
pfs.loc[cond_overlap_end, "tripleg_id"] = between_tpls_ids.shift(-1)[cond_overlap_end]
cond_empty = pd.isna(pfs["tripleg_id"])
pfs.loc[cond_empty, "tripleg_id"] = between_tpls_ids[cond_empty]
Expand Down Expand Up @@ -524,7 +524,7 @@ def __create_new_staypoints(start, end, pfs, elevation_flag, geo_col, last_flag=
# if end is the last pfs, we want to include the info from it as well
if last_flag:
end = len(pfs)
points = pfs[geo_col].iloc[start:end].unary_union
points = pfs[geo_col].iloc[start:end].union_all()
if check_gdf_planar(pfs):
new_sp[geo_col] = points.centroid
else:
Expand Down
15 changes: 9 additions & 6 deletions trackintel/preprocessing/triplegs.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,13 @@ def _seperate_ids(row):
user_change[["type", "is_activity"]] = ["user_change", True] # nicer for debugging

# merge trips with (filler) activities
trips.drop(columns=["type", "sp_tpls_id"], inplace=True) # make space so no overlap with activity "sp_tpls_id"

# make space so no overlap with activity "sp_tpls_id"
trips.drop(columns=["type", "sp_tpls_id"], inplace=True)

# trips are no activity (with this we don't have to fillna later)
trips["is_activity"] = False

# Inserting `gaps` and `user_change` into the dataframe creates buffers that catch shifted
# "staypoint_id" and "trip_id" from corrupting staypoints/trips.
trips_with_act = pd.concat((trips, sp_tpls_only_act, gaps, user_change), axis=0, ignore_index=True)
Expand All @@ -153,8 +159,6 @@ def _seperate_ids(row):
trips_with_act["prev_trip_id"] = trips_with_act["trip_id"].shift(1)
trips_with_act["next_trip_id"] = trips_with_act["trip_id"].shift(-1)

# transform column to binary
trips_with_act["is_activity"] = trips_with_act["is_activity"].fillna(False)
# delete activities
trips = trips_with_act[~trips_with_act["is_activity"]].copy()

Expand Down Expand Up @@ -268,9 +272,8 @@ def _concat_staypoints_triplegs(staypoints, triplegs, add_geometry):
sp["type"] = "staypoint"

# create table with relevant information from triplegs and staypoints.
sp_cols = ["started_at", "finished_at", "user_id", "type", "is_activity"]
tpls_cols = ["started_at", "finished_at", "user_id", "type"]
sp_tpls = pd.concat([sp[sp_cols], tpls[tpls_cols]])
cols = ["started_at", "finished_at", "user_id", "type", "is_activity"]
sp_tpls = pd.concat([sp[cols], tpls[cols]])
sp_tpls["is_activity"] = sp_tpls["is_activity"].fillna(False)
sp_tpls["sp_tpls_id"] = sp_tpls.index # store id for later reassignment
if add_geometry:
Expand Down
Loading