Skip to content

Commit

Permalink
Prioritize using longitude and latitude columns in building files whe…
Browse files Browse the repository at this point in the history
…n present.

This saves time over parsing building polygon WKTs and deriving centroids from them.

Also, drop duplicate buildings during example generation.

PiperOrigin-RevId: 686532902
  • Loading branch information
jzxu authored and copybara-github committed Oct 25, 2024
1 parent b6037a3 commit 6e3c1cb
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions src/skai/buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,25 @@ def _read_buildings_csv(path: str) -> gpd.GeoDataFrame:
"""
with tf.io.gfile.GFile(path, 'r') as csv_file:
df = pd.read_csv(csv_file)
if 'geometry' in df.columns:
if 'longitude' in df.columns and 'latitude' in df.columns:
geometries = gpd.GeoSeries(
gpd.points_from_xy(df['longitude'], df['latitude'])
)
df.drop(columns=['longitude', 'latitude'], inplace=True)
elif 'geometry' in df.columns:
logging.info('Parsing %d WKT strings. This could take a while.', len(df))
geometries = gpd.GeoSeries.from_wkt(df['geometry'])
df.drop(columns=['geometry'], inplace=True)
elif 'wkt' in df.columns:
logging.info('Parsing %d WKT strings. This could take a while.', len(df))
geometries = gpd.GeoSeries.from_wkt(df['wkt'])
df.drop(columns=['wkt'], inplace=True)
elif 'longitude' in df.columns and 'latitude' in df.columns:
geometries = gpd.points_from_xy(df['longitude'], df['latitude'])
df.drop(columns=['longitude', 'latitude'], inplace=True)
else:
raise ValueError(f'No geometry information found in file "{path}"')

return gpd.GeoDataFrame(df, geometry=geometries, crs=4326)
geometries = geometries.normalize()
gdf = gpd.GeoDataFrame(df, geometry=geometries, crs=4326)
return gdf.drop_duplicates()


def convert_buildings_file(
Expand Down Expand Up @@ -177,4 +181,3 @@ def read_building_coordinates(path: str) -> pd.DataFrame:
if df['latitude'].dtype != float:
raise TypeError(f'latitude column in file {path} is not type float')
return df

0 comments on commit 6e3c1cb

Please sign in to comment.