Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions to automatically search for images covering buildings. #79

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions src/generate_examples_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from absl import logging
import shapely.geometry
from skai import buildings
from skai import earth_engine
from skai import generate_examples
from skai import read_raster
import tensorflow as tf
Expand Down Expand Up @@ -200,12 +201,6 @@ def main(args):
)

gdal_env = generate_examples.parse_gdal_env(config.gdal_env)
if config.before_image_patterns:
before_image_patterns = config.before_image_patterns
elif config.before_image_config:
before_image_patterns = _read_image_config(config.before_image_config)
else:
before_image_patterns = []

if config.after_image_patterns:
after_image_patterns = config.after_image_patterns
Expand Down Expand Up @@ -248,6 +243,27 @@ def main(args):
else:
labeled_coordinates = []

if config.before_image_patterns:
before_image_patterns = config.before_image_patterns
elif config.before_image_config:
before_image_patterns = _read_image_config(config.before_image_config)
elif config.auto_select_before_images:
# TODO(jzxu): Remove potential multiple Earth Engine initialization calls.
earth_engine.initialize(config.earth_engine_service_account,
config.earth_engine_private_key)
image_ids = earth_engine.find_covering_images(
building_centroids,
config.before_image_collection_id,
config.before_image_project_ids,
config.before_image_start_date,
config.before_image_end_date,
config.max_before_images,
)
before_image_patterns = [f'EEDAI:{i}' for i in image_ids]
logging.info('Chosen before images: %s', before_image_patterns)
else:
before_image_patterns = []

generate_examples.generate_examples_pipeline(
before_image_patterns,
after_image_patterns,
Expand Down
172 changes: 167 additions & 5 deletions src/skai/earth_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@
from absl import logging
import ee # pytype: disable=import-error # mapping-is-not-sequence
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely.geometry
import tensorflow as tf

ShapelyGeometry = shapely.geometry.base.BaseGeometry

# When auto-selecting before images, this is the maximum number of images to
# choose from. If there are more images available in the date range and AOI
# specified, only the most recent images will be considered.
_MAX_BEFORE_IMAGE_POOL = 100


def _shapely_to_ee_feature(shapely_geometry: ShapelyGeometry) -> ee.Feature:
"""Converts shapely geometry into Earth Engine Feature."""
Expand All @@ -43,6 +49,24 @@ def _get_open_building_feature_centroid(feature: ee.Feature) -> ee.Feature:


def _download_feature_collection(
collection: ee.FeatureCollection) -> gpd.GeoDataFrame:
"""Downloads a FeatureCollection from Earth Engine as a GeoDataFrame.

Args:
collection: EE FeatureCollection to download.

Returns:
FeatureCollection data as a GeoDataFrame.
"""
url = collection.getDownloadURL('CSV')
with urllib.request.urlopen(url) as url_file:
df = pd.read_csv(url_file)
geometry = df['.geo'].apply(json.loads).apply(shapely.geometry.shape)
properties = df.drop(columns=['.geo'])
return gpd.GeoDataFrame(properties, geometry=geometry, crs=4326)


def _download_feature_collection_to_file(
collection: ee.FeatureCollection, properties: List[str],
output_path: str) -> gpd.GeoDataFrame:
"""Downloads a FeatureCollection from Earth Engine as a GeoDataFrame.
Expand All @@ -55,7 +79,7 @@ def _download_feature_collection(
Returns:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Google mapa

FeatureCollection data as a GeoDataFrame.
"""
url = collection.getDownloadURL('csv', properties)
url = collection.getDownloadURL('CSV', properties)
with urllib.request.urlopen(url) as url_file, tf.io.gfile.GFile(
output_path, 'wb') as output:
shutil.copyfileobj(url_file, output)
Expand All @@ -75,7 +99,65 @@ def _download_feature_collection(
geometry = None
properties = None

return gpd.GeoDataFrame(properties, geometry=geometry)
return gpd.GeoDataFrame(properties, geometry=geometry, crs=4326)


def _image_to_feature(image_object: ee.ComputedObject) -> ee.Feature:
"""Converts an image into a feature.

The feature's geometry will be an approximate outline of the non-blank pixels
of the image. The feature will have important properties of the image.

Args:
image_object: The image.

Returns:
A feature representing the image.
"""
image = ee.Image(image_object)
feature = image.select(0).multiply(0).reduceToVectors(scale=100).first()
feature = feature.set({
'image_id': image.get('system:id'),
'resolution': image.projection().nominalScale(),
'time': image.get('system:time_start'),
'provider': image.get('provider_id'),
'name': image.get('name'),
'label': None,
'count': None
})
return feature


def _get_image_collection_gdf(
image_collection: ee.ImageCollection,
max_images: int) -> gpd.GeoDataFrame:
"""Returns a GeoDataFrame for the images in a collection.

Each row of the GeoDataFrame corresponds to one image in the collection.
The row's geometry will be the image's geometry. The GeoDataFrame will have
the following properties:

image_id - EE id of the image.
resolution - Resolution of the image in meters.
time - Timestamp for when the image was taken.
provider - Image provider code.
name - The image name in EE.

Args:
image_collection: The image collection.
max_images: Maximum number of rows to return.

Returns:
GeoDataFrame.
"""
fc = ee.FeatureCollection(
image_collection.limit(max_images, 'system:time_start', False)
.toList(max_images)
.map(_image_to_feature)
)
gdf = _download_feature_collection(fc)
gdf['time'] = pd.to_datetime(gdf['time'], unit='ms')
return gdf


def get_open_buildings(regions: List[ShapelyGeometry],
Expand All @@ -99,10 +181,13 @@ def get_open_buildings(regions: List[ShapelyGeometry],
aoi_buildings = open_buildings.filterBounds(bounds)
if as_centroids:
centroids = aoi_buildings.map(_get_open_building_feature_centroid)
return _download_feature_collection(centroids, ['longitude', 'latitude'],
output_path)
return _download_feature_collection_to_file(
centroids, ['longitude', 'latitude'], output_path
)
else:
return _download_feature_collection(aoi_buildings, ['.geo'], output_path)
return _download_feature_collection_to_file(
aoi_buildings, ['.geo'], output_path
)


def get_open_buildings_centroids(
Expand Down Expand Up @@ -149,3 +234,80 @@ def initialize(service_account: str, private_key: Optional[str]) -> bool:
logging.error('Error initializing Earth Engine: %s', e)
return False
return True


def find_covering_images(
building_centroids: List[Tuple[float, float]],
image_collection_id: str,
project_ids: List[int],
start_date: str,
end_date: str,
max_images: int) -> List[str]:
"""Finds a set of images that best covers a set of points.

Args:
building_centroids: Buildings to cover.
image_collection_id: EE image collection to search from.
project_ids: Project ids for images.
start_date: Starting date, e.g. 2021-01-01.
end_date: Ending date, e.g. 2023-12-31.
max_images: Maximum number of images to return.

Returns:
A list of image ids that best covers the points.
"""
points = gpd.GeoDataFrame(
geometry=[shapely.geometry.Point(c) for c in building_centroids],
crs=4326
)
aoi = _shapely_to_ee_feature(points.unary_union.convex_hull).geometry()
image_collection = ee.ImageCollection(image_collection_id)
if project_ids:
project_filters = [
ee.Filter.listContains('project_membership', pid) for pid in project_ids
]
image_collection = image_collection.filter(ee.Filter.Or(*project_filters))

images = image_collection.filterBounds(aoi).filterDate(start_date, end_date)
images_gdf = _get_image_collection_gdf(images, _MAX_BEFORE_IMAGE_POOL)

# Create a MxN matrix where M is the number of points and N is the number of
# images. The element (i, j) is True if and only if point i intersects image
# j.
images_gdf_for_join = gpd.GeoDataFrame(geometry=images_gdf.geometry, crs=4326)
joined = gpd.sjoin(images_gdf_for_join, points)
intersections = np.zeros((len(points), len(images_gdf)), dtype=bool)
intersections[joined.index_right.values, joined.index.values] = True

# Use a greedy algorithm to choose images. Always choose the image that covers
# the most number of remaining points.
uncovered_points = np.ones(len(points), dtype=bool)
unused_images = np.arange(len(images_gdf))
chosen_image_ids = []
while (
np.any(uncovered_points)
and len(unused_images)
and len(chosen_image_ids) < max_images
):
# num_points_covered is a K-length array, where K is the number of unused
# images. num_points_covered[i] contains the number of remaining points
# covered by the ith unused image.
num_points_covered = np.sum(
intersections[np.ix_(uncovered_points, unused_images)], axis=0)
assert len(num_points_covered) == len(unused_images)
best_index = np.argmax(num_points_covered)
if num_points_covered[best_index] == 0:
# Remaining images don't cover any more points, so terminate.
break
best_image = unused_images[best_index]
chosen_image_ids.append(images_gdf.iloc[best_image]['image_id'])
unused_images = np.delete(unused_images, [best_index])
uncovered_points &= ~intersections[:, best_image]
num_points_left = np.sum(uncovered_points)
print(
f'Chose image {best_image}, '
f'covers {num_points_covered[best_index]} additional points, '
f'{num_points_left} points left.'
)

return chosen_image_ids
10 changes: 9 additions & 1 deletion src/skai/generate_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ class ExamplesGenerationConfig:
num_keep_labeled_examples: int = 1000
configuration_path: Optional[str] = None

# Parameters for auto-selecting before images.
auto_select_before_images: bool = False
before_image_collection_id: str = ''
before_image_project_ids: List[int] = dataclasses.field(default_factory=list)
before_image_start_date: str = ''
before_image_end_date: str = ''
max_before_images: int = 100

# TODO(mohammedelfatihsalah): Add a type for flagvalues argument in init_from_flags.
@staticmethod
def init_from_flags(flagvalues):
Expand Down Expand Up @@ -162,7 +170,7 @@ def init_from_json_path(json_path: str):
' will be it %s will be used.'
),
field.name,
val
field.default
)
return config

Expand Down