From c949a131eaabf23dfe5952be448b27fed4683d58 Mon Sep 17 00:00:00 2001 From: maany Date: Thu, 21 Nov 2024 15:54:30 +0100 Subject: [PATCH] complete overhaul for major release --- .vscode/launch.json | 131 ++++++ app/config.py | 64 +++ app/scraper.py | 382 ++++++------------ app/sdk/file_repository.py | 21 + app/sdk/kernel_plackster_gateway.py | 50 ++- app/sdk/scraped_data_repository.py | 37 +- app/time_travel/models.py | 31 ++ .../sentinel5p_metadata_generator.py | 180 +++++++++ augmentations/climate_augmentations.py | 208 ++++++---- augmentations/wildfire_augmentations.py | 122 ++++-- climate_augmentation.py | 242 +++++++++++ sentinel_5p_time_travel.py | 226 +++++++++++ sentinel_scraper.py | 72 ++-- utils.py | 103 ++++- 14 files changed, 1415 insertions(+), 454 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 app/config.py create mode 100644 app/time_travel/models.py create mode 100644 app/time_travel/sentinel5p_metadata_generator.py create mode 100644 climate_augmentation.py create mode 100644 sentinel_5p_time_travel.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..ab01fd0 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,131 @@ +{ + "configurations": [ + { + "name": "Disaster Tracking", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sentinel_scraper.py", + "console": "integratedTerminal", + "args": [ + "--case-study-name=disaster", + "--tracer-id=potato", + "--job-id=2", + "--start_date=2024-09-15T09:00", + "--end_date=2024-09-15T12:00", + "--interval=60", + "--long_left=-156.708984", + "--lat_up=20.759645", + "--long_right=-156.299744", + "--lat_down=20.955027", + "--log-level=INFO", + "--kp_auth_token=test123", "--kp_host=localhost", "--kp_port=8000", "--kp_scheme=http", + "--sentinel_client_id=e2397fbb-0fb6-4742-9cba-c142ad44f617", "--sentinel_client_secret=DD6XKTPeDz4juiLWtItNGPofztG9frc3", + "--evalscript_bands_path=https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/3867e78b12bf7d7dff44810c548ed20797b367ea/wildfire_evalscript.js", + "--evalscript_truecolor_path=https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/3867e78b12bf7d7dff44810c548ed20797b367ea/truecolor_wildfire.js", + ] + }, + { + "name": "Climate Monitoring", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sentinel_scraper.py", + "console": "integratedTerminal", + "args": [ + "--case-study-name=climate", + "--tracer-id=potato", + "--job-id=4", + "--start_date=2024-09-15T09:00", + "--end_date=2024-09-15T12:00", + "--interval=60", + "--long_left=-156.708984", + "--datasets-evalscripts={\"SENTINEL5P\": [\"climate-bands\", \"climate-mask\"], \"SENTINEL2-L1C\": [\"true-color\"]}", + "--lat_up=20.759645", + "--long_right=-156.299744", + "--lat_down=20.955027", + "--log-level=INFO", + "--kp_auth_token=test123", "--kp_host=localhost", "--kp_port=8000", "--kp_scheme=http", + "--sentinel_client_id=e2397fbb-0fb6-4742-9cba-c142ad44f617", "--sentinel_client_secret=DD6XKTPeDz4juiLWtItNGPofztG9frc3", + ] + }, + { + "name": "Climate Augmentation", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/climate_augmentation.py", + "console": "integratedTerminal", + "args": [ + "--case-study-name=climate", + "--tracer-id=potato", + "--job-id=4", + "--long_left=-156.708984", + "--datasets-evalscripts={\"SENTINEL5P\": [\"climate-bands\", \"climate-mask\"], \"SENTINEL2-L1C\": [\"true-color\"]}", + "--lat_up=20.759645", + "--long_right=-156.299744", + "--lat_down=20.955027", + "--log-level=INFO", + "--kp_auth_token=test123", "--kp_host=localhost", "--kp_port=8000", "--kp_scheme=http", + ] + }, + { + "name": "Sentinel-5P Scraping", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sentinel_scraper.py", + "console": "integratedTerminal", + "args": [ + "--case-study-name=sentinel-5p", + "--tracer-id=potato", + "--job-id=3", + "--start_date=2024-09-15T09:00", + "--end_date=2024-09-15T12:00", + "--interval=60", + "--long_left=-156.708984", + "--datasets-evalscripts={\"SENTINEL5P\": [\"climate-bands\", \"climate-mask\"], \"SENTINEL2-L1C\": [\"true-color\"]}", + "--lat_up=20.759645", + "--long_right=-156.299744", + "--lat_down=20.955027", + "--log-level=INFO", + "--kp_auth_token=test123", "--kp_host=localhost", "--kp_port=8000", "--kp_scheme=http", + "--sentinel_client_id=e2397fbb-0fb6-4742-9cba-c142ad44f617", "--sentinel_client_secret=DD6XKTPeDz4juiLWtItNGPofztG9frc3", + ] + }, + { + "name": "Sentinel-5P Augmentation", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/climate_augmentation.py", + "console": "integratedTerminal", + "args": [ + "--case-study-name=sentinel-5p", + "--tracer-id=potato", + "--job-id=3", + "--long_left=-156.708984", + "--datasets-evalscripts={\"SENTINEL5P\": [\"climate-bands\", \"climate-mask\"], \"SENTINEL2-L1C\": [\"true-color\"]}", + "--lat_up=20.759645", + "--long_right=-156.299744", + "--lat_down=20.955027", + "--log-level=INFO", + "--kp_auth_token=test123", "--kp_host=localhost", "--kp_port=8000", "--kp_scheme=http", + ] + }, + { + "name": "Sentinel-5P Time Travel", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sentinel_5p_time_travel.py", + "console": "integratedTerminal", + "args": [ + "--tracer-id=potato", + "--job-id=3", + "--long_left=-156.708984", + "--datasets-evalscripts={\"SENTINEL5P\": [\"climate-bands\", \"climate-mask\"], \"SENTINEL2-L1C\": [\"true-color\"]}", + "--lat_up=20.759645", + "--long_right=-156.299744", + "--lat_down=20.955027", + "--log-level=INFO", + "--kp_auth_token=test123", "--kp_host=localhost", "--kp_port=8000", "--kp_scheme=http", + ] + }, + + ] +} diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..8cd74c2 --- /dev/null +++ b/app/config.py @@ -0,0 +1,64 @@ +from sentinelhub import DataCollection +SUPPORTED_DATASET_EVALSCRIPTS = { + "SENTINEL2-L1C": { + "sentinel_sdk_obj": DataCollection.SENTINEL2_L1C, + "supported_evalscripts": [ + { + "name": "true-color", + "path": "https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/3867e78b12bf7d7dff44810c548ed20797b367ea/truecolor_wildfire.js", + "scaling_factor": 1.5, + "clip_range": { + "min": 0, + "max": 1 + }, + "description": "A Sentinel-2 image highlighting areas of interest based on water, vegetation, and spectral thresholds in true color. Bands: B04, B03, B02, B08, B11, B12" + } + ] + }, + "SENTINEL5P": { + "sentinel_sdk_obj": DataCollection.SENTINEL5P, + "supported_evalscripts": [ + { + "name": "climate-bands", + "path": "https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/5c8894fe017e42c594a2fb755d10d57602049ec5/climate_evalscript.js", + "scaling_factor": 1.5, + "clip_range": { + "min": 0, + "max": 1 + }, + "description": "Carbon monoxide (CO) concentrations using a color ramp from low (blue) to high (red) and processes the image into a grid to determine dominant CO concentrations per grid cell." + }, + { + "name": "climate-mask", + "path": "https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/5c8894fe017e42c594a2fb755d10d57602049ec5/climate_evalscript.js", + "scaling_factor": 255, + "clip_range": { + "min": 0, + "max": 1 + }, + "description": "A mask of the carbon monoxide (CO) concentrations in the image. The mask is created by thresholding the CO concentrations in the image." + }, + { + "name": "fire-bands", + "path": "https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/3867e78b12bf7d7dff44810c548ed20797b367ea/wildfire_evalscript.js", + "scaling_factor": 1.5, + "clip_range": { + "min": 0, + "max": 1 + }, + "description": "Sentinel-2 image focussed on detection of wildfires, highlighting areas of interest based on vegetation (NDVI), water content (NDWI), and spectral thresholds in enhanced true color" + }, + { + "name": "fire-mask", + "path": "https://gist.githubusercontent.com/Rahul-7131/b02d5614401ba654904ff509039def15/raw/5c8894fe017e42c594a2fb755d10d57602049ec5/climate_evalscript.js", + "scaling_factor": 255, + "clip_range": { + "min": 0, + "max": 1 + }, + "description": "A mask of the wildfire areas in the image. The mask is created by thresholding the NDVI and NDWI values in the image." + }, + ] + }, + # Add more datasets as needed +} diff --git a/app/scraper.py b/app/scraper.py index 9b26439..dc3e66e 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -1,213 +1,22 @@ -from oauthlib.oauth2.rfc6749.errors import InvalidClientError -from logging import Logger -from datetime import datetime,timedelta +from datetime import timedelta import logging -from typing import List -from sentinelhub import SHConfig, BBox, CRS, DataCollection, SentinelHubRequest, bbox_to_dimensions, MimeType -from app.sdk.models import KernelPlancksterSourceData, BaseJobState, JobOutput, ProtocolEnum -from app.sdk.scraped_data_repository import ScrapedDataRepository, KernelPlancksterSourceData +from typing import Any, List +from sentinelhub import ( + SHConfig, +) +from app.sdk.models import ( + KernelPlancksterSourceData, + BaseJobState, + JobOutput, +) +from app.sdk.scraped_data_repository import ( + ScrapedDataRepository, + KernelPlancksterSourceData, +) from app.setup import datetime_parser -from augmentations.climate_augmentations import augment_climate_images,get_image_hash -from augmentations.wildfire_augmentations import augment_wildfire_images, sanitize_filename -import time -import os -from utils import date_range, save_image -import numpy as np -import shutil -import requests -def load_evalscript(source: str) -> str: - if source.startswith("http://") or source.startswith("https://"): - # Load from URL - response = requests.get(source) - response.raise_for_status() # Raise an exception if the request failed - return response.text - else: - # Load from file - with open(source, 'r') as file: - return file.read() - - -def get_images(logger: Logger, case_study_name: str, job_id: int, tracer_id: str, scraped_data_repository: ScrapedDataRepository, - output_data_list: list[KernelPlancksterSourceData], protocol: ProtocolEnum, - coords_wgs84: tuple[float, float, float, float], evalscript_bands_config: str, evalscript_truecolor: str, config: SHConfig, start_date: str, end_date: str, - interval:int, - resolution: int, image_dir: str, dataset_name: str): - """ - Retrieves images for each set of coordinates within the specified date range. - - Args: - logger (Logger): Logger for logging information. - job_id (int): Job identifier. - tracer_id (str): Tracer identifier. - scraped_data_repository (ScrapedDataRepository): Repository for scraped data. - output_data_list (list): List to store output data. - protocol (ProtocolEnum): Protocol type for data. - coords_wgs84 (tuple): Coordinates in WGS84 format. - evalscript_bands_config (str): Evalscript for specific band configuration. - evalscript_truecolor (str): Evalscript for true color imagery. - config (SHConfig): Sentinel Hub configuration object. - start_date (str): Start date for image retrieval. - end_date (str): End date for image retrieval. - interval (int): Interval for data scraping - resolution (int): Resolution for image retrieval. - image_dir (str): Directory to save images. - dataset_name (str): dataset applied. - - Returns: - list: List of retrieved and processed images. - """ - images = [] - coords_bbox = BBox(bbox=coords_wgs84, crs=CRS.WGS84) - coords_size = bbox_to_dimensions(coords_bbox, resolution=resolution) - current_date = datetime_parser(start_date) - last_date = datetime_parser(end_date) - time_delta = timedelta(minutes=interval) #interval essentially - dataset = None - dataset_map = { - "SENTINEL2_L1C": DataCollection.SENTINEL2_L1C, - "SENTINEL5P": DataCollection.SENTINEL5P, - # Add more datasets as needed - } - dataset = dataset_map.get(dataset_name) - if not dataset: - logger.error(f"Dataset {dataset_name} not supported. Use one of {dataset_map.keys()}") - return output_data_list - while current_date <= last_date : - try: - request_bands_config = SentinelHubRequest( - evalscript=evalscript_bands_config, - input_data=[SentinelHubRequest.input_data(data_collection=dataset, time_interval=(current_date.isoformat(), (current_date + time_delta).isoformat()))], - responses=[SentinelHubRequest.output_response("default", MimeType.PNG)], - bbox=coords_bbox, size=coords_size, config=config - ) - data = request_bands_config.get_data() - except InvalidClientError as e: - logger.error(f"Sentinel Hub client error: {e}") - raise e - - except Exception as e: - logger.warning(e) - continue - - if data: - - image = data[0] - if np.mean(image) == 0.0: - logger.warning("Image is blank, can't be registered, retrying for next interval") - if not evalscript_truecolor : - current_date += time_delta - else: # if image is not entirely blank - image_hash = get_image_hash(image) - file_extension = "png" - try: - file_extension = image.format.lower() - except: - pass - - image_filename = f"{dataset_name}_banded_config_{image_hash}.{file_extension}" - image_path = os.path.join(image_dir, "banded_config", image_filename) - os.makedirs(os.path.dirname(image_path), exist_ok=True) - save_image(image, image_path, factor=1.5/255, clip_range=(0, 1)) - logger.info(f"Configured Bands Image saved to: {image_path}") - data_name = sanitize_filename(f"{dataset_name}_banded_config_{image_hash}") - unix_timestamp = int(current_date.timestamp()) - if not evalscript_truecolor : - current_date += time_delta - relative_path = f"{case_study_name}/{tracer_id}/{job_id}/{unix_timestamp}/sentinel/banded_config/{data_name}.{file_extension}" - media_data = KernelPlancksterSourceData( - name=data_name, - protocol=protocol, - relative_path=relative_path, - ) - - try: - scraped_data_repository.register_scraped_photo( - job_id=job_id, - source_data=media_data, - local_file_name=image_path, - ) - except Exception as e: - logger.warning(f"Could not register file: {e}") - - output_data_list.append(media_data) - - image_filename = f"{dataset_name}_masked_{image_hash}.{file_extension}" - image_path = os.path.join(image_dir, "masked", image_filename) - os.makedirs(os.path.dirname(image_path), exist_ok=True) - save_image(image, image_path, factor=255/255, clip_range=(0, 1)) - logger.info(f"Masked Image saved to: {image_path}") - - data_name = sanitize_filename(f"{dataset_name}_masked_{image_hash}") - relative_path = f"{case_study_name}/{tracer_id}/{job_id}/{unix_timestamp}/sentinel/masked/{data_name}.{file_extension}" - - media_data = KernelPlancksterSourceData( - name=data_name, - protocol=protocol, - relative_path=relative_path, - ) - - try: - scraped_data_repository.register_scraped_photo( - job_id=job_id, - source_data=media_data, - local_file_name=image_path, - ) - except Exception as e: - logger.warning(f"Could not register file: {e}") - - output_data_list.append(media_data) - - if evalscript_truecolor: - try: - request_truecolor = SentinelHubRequest( - evalscript=evalscript_truecolor, - input_data=[SentinelHubRequest.input_data(data_collection=dataset, time_interval=(current_date.isoformat(), (current_date + time_delta).isoformat()))], - responses=[SentinelHubRequest.output_response("default", MimeType.PNG)], - bbox=coords_bbox, size=coords_size, config=config - ) - truecolor = request_truecolor.get_data() - except Exception as e: - logger.warning(e) - continue - - - image_true_color = truecolor[0] - if np.mean(image_true_color) == 0.0: - logger.warning("Image is blank, can't be registered, retrying for next interval") - current_date += time_delta - else: # if image is not entirely blank - image_hash = get_image_hash(image_true_color) - file_extension = "png" - try: - file_extension = image_true_color.format.lower() - except: - pass - image_filename = f"{interval}_{dataset_name}_true_color_{image_hash}.{file_extension}" - image_path = os.path.join(image_dir, "true_color", image_filename) - os.makedirs(os.path.dirname(image_path), exist_ok=True) - save_image(image_true_color, image_path, factor=1.5/255, clip_range=(0, 1)) - logger.info(f"True Color Image saved to: {image_path}") - current_date += time_delta - data_name = sanitize_filename(f"{interval}_{dataset_name}_true_color_{image_hash}") - relative_path = f"{case_study_name}/{tracer_id}/{job_id}/{unix_timestamp}/sentinel/true_color/{data_name}.{file_extension}" - - media_data = KernelPlancksterSourceData( - name=data_name, - protocol=protocol, - relative_path=relative_path, - ) - - try: - scraped_data_repository.register_scraped_photo( - job_id=job_id, - source_data=media_data, - local_file_name=image_path, - ) - - except Exception as e: - logger.warning(f"Could not register file: {e}") - return output_data_list +from utils import download_image, generate_relative_path, get_image_hash, load_evalscript, save_image +import tempfile def scrape( @@ -223,18 +32,16 @@ def scrape( sentinel_config: SHConfig, start_date: str, end_date: str, - interval:str, - image_dir: str, - evalscript_bands_path: str, - evalscript_truecolor_path:str, - dataset_name: str, - resolution: int + interval: int, + dataset_evalscripts: dict[str, dict[str, Any]], + resolution: int, ) -> JobOutput: - try: logger = logging.getLogger(__name__) - logging.basicConfig(level=log_level, format='%(asctime)s - %(levelname)s - %(message)s') + logging.basicConfig( + level=log_level, format="%(asctime)s - %(levelname)s - %(message)s" + ) job_state = BaseJobState.CREATED current_data: KernelPlancksterSourceData | None = None @@ -243,68 +50,109 @@ def scrape( protocol = scraped_data_repository.protocol output_data_list: List[KernelPlancksterSourceData] = [] - if isinstance(sentinel_config, SHConfig): # for typing - config = sentinel_config - # Set the job state to running - logger.info(f"{job_id}: Starting Job") - job_state = BaseJobState.RUNNING - - start_time = time.time() # Record start time for response time measurement - try: - # Create an instance of SentinelHubPipelineElement with the request data - coords_wgs84 = (long_left,lat_down,long_right, lat_up) - evalscript_bands_config = load_evalscript(evalscript_bands_path) - evalscript_truecolor = load_evalscript(evalscript_truecolor_path) if evalscript_truecolor_path else False - logger.info(f"starting with dataset: {dataset_name}") - output_data_list = get_images(logger, case_study_name, job_id, tracer_id, scraped_data_repository, output_data_list, protocol, coords_wgs84, evalscript_bands_config, evalscript_truecolor ,config, start_date, end_date, interval, resolution, image_dir, dataset_name) - output_data_list = augment_wildfire_images(case_study_name, job_id, tracer_id, image_dir, coords_wgs84, logger, protocol, scraped_data_repository,output_data_list) if dataset_name == "SENTINEL2_L1C" else augment_climate_images(case_study_name, job_id, tracer_id, image_dir, coords_wgs84, logger, protocol, scraped_data_repository,output_data_list) + if not isinstance(sentinel_config, SHConfig): + return JobOutput( + job_state=BaseJobState.FAILED, + tracer_id=tracer_id, + source_data_list=[], + ) - # Calculate response time - response_time = time.time() - start_time - response_data = { - "message": f"Pipeline processing completed", - "response_time": f"{response_time:.2f} seconds" - } + # Set the job state to running + logger.info(f"{job_id}: Starting Job") + job_state = BaseJobState.RUNNING - job_state = BaseJobState.FINISHED - logger.info(f"{job_id}: Job finished") - - except Exception as e: - logger.error(f"Error in processing pipeline: {e}") - job_state = BaseJobState.FAILED - logger.error( - f"{job_id}: Unable to scrape data. Error: {e}\nJob with tracer_id {tracer_id} failed." - ) - logger.error( - f"Last successful data: {last_successful_data} -- Current data: \"{current_data}\" -- job_state: \"{job_state}\"" + try: + # Create an instance of SentinelHubPipelineElement with the request data + coords_wgs84 = (long_left, lat_down, long_right, lat_up) + parsed_start_date = datetime_parser(start_date) + parsed_end_date = datetime_parser(end_date) + parsed_interval = timedelta(minutes=interval) + current_datetime = parsed_start_date + current_iteration = 1 + total_iterations = int( + (parsed_end_date - parsed_start_date) / parsed_interval + ) + 1 + while current_datetime <= parsed_end_date: + logger.info( + f"Processing iteration {current_iteration} of {total_iterations}" ) + datasets = dataset_evalscripts.keys() + for dataset in datasets: + sentinel_dataset = dataset_evalscripts[dataset]["sentinel_sdk_obj"] + evalscripts = dataset_evalscripts[dataset]["evalscripts"] + for evalscript_config in evalscripts: + logger.info(f"{current_iteration}/{total_iterations}: Processing evalscript {evalscript_config['name']}") + evalscript_name = evalscript_config["name"] + evalscript_path = evalscript_config["path"] + evalscript = load_evalscript(evalscript_path) + logger.info(f"{current_iteration}/{total_iterations} Downloading image for evalscript from {evalscript_config['name']}") + image = download_image( + logger, + coords_wgs84, + sentinel_dataset, + evalscript, + sentinel_config, + resolution, + ) + if not image or len(image) == 0: + logger.warning(f"{current_iteration}/{total_iterations} No image found!") + continue + image = image[0] + image_hash = get_image_hash(image) + file_extension = "png" + clip_range = evalscript_config.get("clip_range", (0, 1)) + scaling_factor = evalscript_config.get("scaling_factor", 1.5) / 255 + with tempfile.NamedTemporaryFile(suffix='.png', delete=True) as fp: + logger.info(f"{current_iteration}/{total_iterations} Saving image for evalscript from {evalscript_config['name']}") + save_image(image=image, filename=fp.name, factor=scaling_factor, clip_range=(clip_range['min'], clip_range['max'])) + file_name = f"{dataset}_{evalscript_name}_{image_hash}" + relative_path = generate_relative_path(case_study_name=case_study_name, tracer_id=tracer_id, job_id=job_id, timestamp=int(current_datetime.timestamp()), dataset=dataset, evalscript_name=evalscript_name, image_hash=image_hash, file_extension=file_extension) + source_data = KernelPlancksterSourceData( + name=file_name, + protocol=protocol, + relative_path=relative_path, + ) + try: + logger.info(f"{current_iteration}/{total_iterations} Registering image for evalscript from {evalscript_config['name']}") + scraped_data_repository.register_scraped_photo( + source_data=source_data, + job_id=job_id, + local_file_name=fp.name, + ) + output_data_list.append(source_data) + except Exception as e: + logger.warning(f"{current_iteration}/{total_iterations} Could not register file {source_data}: {e}") + continue + current_datetime += parsed_interval + current_iteration += 1 + + job_state = BaseJobState.FINISHED + logger.info(f"{job_id}: Job finished") - finally: - try: - shutil.rmtree(image_dir) - except Exception as e: - logger.warning(f"Could not delete tmp directory, exiting: {e}") - - return JobOutput( - job_state=job_state, - tracer_id=tracer_id, - source_data_list=output_data_list, - ) + except Exception as e: + logger.error(f"Error in processing pipeline: {e}") + job_state = BaseJobState.FAILED + logger.error( + f"{job_id}: Unable to scrape data. Error: {e}\nJob with tracer_id {tracer_id} failed." + ) + logger.error( + f'Last successful data: {last_successful_data} -- Current data: "{current_data}" -- job_state: "{job_state}"' + ) + finally: + return JobOutput( + job_state=job_state, + tracer_id=tracer_id, + source_data_list=output_data_list, + ) except Exception as error: - logger.error(f"{job_id}: Unable to scrape data. Job with tracer_id {tracer_id} failed. Error:\n{error}") + logger.error( + f"{job_id}: Unable to scrape data. Job with tracer_id {tracer_id} failed. Error:\n{error}" + ) job_state = BaseJobState.FAILED - try: - logger.warning("deleting tmp directory") - shutil.rmtree(image_dir) - except Exception as e: - logger.warning(f"Could not delete tmp directory, exiting: {e}") - return JobOutput( job_state=job_state, tracer_id=tracer_id, source_data_list=[], ) - - \ No newline at end of file diff --git a/app/sdk/file_repository.py b/app/sdk/file_repository.py index 5cf6163..c5335b8 100644 --- a/app/sdk/file_repository.py +++ b/app/sdk/file_repository.py @@ -77,4 +77,25 @@ def public_upload(self, signed_url: str, file_path: str) -> None: if upload_res.status_code != 200: raise ValueError(f"Failed to upload file to signed url: {upload_res.text}") + + + def public_download(self, signed_url: str, file_path: str) -> None: + """ + Download a file from a signed url. + + :param signed_url: The signed url to download from. + :param file_path: The path to save the downloaded file. + """ + + download_res = requests.get(signed_url, verify=False) + + self.logger.info(f"Downloaded file from signed url: {signed_url}") + self.logger.info(f"Download status code: {download_res.status_code}") + self.logger.info(f"Download headers: {download_res.headers}") + + with open(file_path, "wb") as f: + f.write(download_res.content) + + if download_res.status_code != 200: + raise ValueError(f"Failed to download file from signed url: {download_res.text}") diff --git a/app/sdk/kernel_plackster_gateway.py b/app/sdk/kernel_plackster_gateway.py index 01097d9..558995e 100644 --- a/app/sdk/kernel_plackster_gateway.py +++ b/app/sdk/kernel_plackster_gateway.py @@ -32,14 +32,15 @@ def ping(self) -> bool: self.logger.info(f"Ping response: {res.text}") return res.status_code == 200 - def generate_signed_url(self, source_data: KernelPlancksterSourceData) -> str: + def generate_signed_url(self, source_data: KernelPlancksterSourceData, is_download_request: bool = False) -> str: if not self.ping(): self.logger.error(f"Failed to ping Kernel Plankster Gateway at {self.url}") raise Exception("Failed to ping Kernel Plankster Gateway") self.logger.info(f"Generating signed url for {source_data.relative_path}") - endpoint = f"{self.url}/client/{self._client_id}/upload-credentials" + endpoint_root = f"{self.url}/client/{self._client_id}" + endpoint = f"{endpoint_root}/download-credentials" if is_download_request else f"{endpoint_root}/upload-credentials" params = { "protocol": source_data.protocol.value, @@ -125,3 +126,48 @@ def register_new_source_data(self, source_data: KernelPlancksterSourceData) -> d assert res_name == source_data.name return kp_source_data + + def list_source_data(self, relative_path_root: str) -> list[KernelPlancksterSourceData]: + if not self.ping(): + self.logger.error(f"Failed to ping Kernel Plankster Gateway at {self.url}") + raise Exception("Failed to ping Kernel Plankster Gateway") + + self.logger.info(f"Listing source with Kernel Plankster Gateway at {self.url}") + + + endpoint = f"{self.url}/client/{self._client_id}/source" + + headers = { + "Content-Type": "application/json", + "x-auth-token": self._auth_token, + } + + res = httpx.get( + url=endpoint, + headers=headers, + ) + + self.logger.info(f"List source data response: {res.text}") + if res.status_code != 200: + raise ValueError( + f"Failed to list source data with Kernel Plankster Gateway: {res.text}" + ) + + kp_source_data = res.json() + + if not kp_source_data: + raise ValueError(f"Failed to list source data. Source Data not returned. Dumping raw response:\n{res.json()}") + + if not kp_source_data['status']: + raise ValueError(f"Failed to list source data. Source Data status not returned. Dumping raw response:\n{res.json()}") + + kp_source_data = kp_source_data.get("source_data_list") + output: list[KernelPlancksterSourceData] = [ + KernelPlancksterSourceData( + name=x.get("name"), + protocol=x.get("protocol"), + relative_path=x.get("relative_path"), + ) for x in kp_source_data + if x.get("relative_path").startswith(relative_path_root) + ] + return output diff --git a/app/sdk/scraped_data_repository.py b/app/sdk/scraped_data_repository.py index 73543dc..677d976 100644 --- a/app/sdk/scraped_data_repository.py +++ b/app/sdk/scraped_data_repository.py @@ -17,32 +17,21 @@ def __init__( self.file_repository = file_repository self._logger = logging.getLogger(__name__) - @property - def log_level(self) -> str: - return self._log_level @property def logger(self) -> logging.Logger: - return self._logger def register_scraped_photo(self, source_data: KernelPlancksterSourceData, job_id: int, local_file_name: str) -> KernelPlancksterSourceData: - match self.protocol: - case ProtocolEnum.S3: - signed_url = self.kernel_planckster.generate_signed_url(source_data=source_data) - self.logger.info(f"{job_id}: Uploading photo to object store") - self.file_repository.public_upload(signed_url, local_file_name) - self.logger.info( f"{job_id}: Uploaded photo to {signed_url}" ) - self.kernel_planckster.register_new_source_data(source_data=source_data) @@ -113,4 +102,28 @@ def register_scraped_json(self, source_data: KernelPlancksterSourceData, job_id: file_type="json", ) - return source_data \ No newline at end of file + return source_data + + def download_data(self, source_data: KernelPlancksterSourceData, local_file: str): + """ + Download data from Kernel Plankster Gateway. + + Args: + - source_data: KernelPlancksterSourceData + - local_file: str + """ + match self.protocol: + case ProtocolEnum.S3: + signed_url = self.kernel_planckster.generate_signed_url(source_data=source_data, is_download_request=True) + self.logger.info(f"Downloading data from {signed_url}") + self.file_repository.public_download(signed_url, local_file) + self.logger.info(f"Downloaded data to {local_file}") + case ProtocolEnum.LOCAL: + # If local, then we don't use kernel planckster at all + # NOTE: local is deprecated + self.file_repository.save_file_locally( + file_to_save=local_file, + source_data=source_data, + file_type="data", + ) + return local_file diff --git a/app/time_travel/models.py b/app/time_travel/models.py new file mode 100644 index 0000000..3889087 --- /dev/null +++ b/app/time_travel/models.py @@ -0,0 +1,31 @@ +from pydantic import BaseModel +from typing import List, Literal, Union + +class SentinelRowSchema(BaseModel): + timestamp: str + latitude: float + longitude: float + CarbonMonoxideLevel: str + + +class Error(BaseModel): + errorName: str + errorMessage: str + +class Image(BaseModel): + kind: str + relativePath: str + description: str + + +class KeyFrame(BaseModel): + timestamp: str + images: List[Union[Image, Error]] + data: List[Union[SentinelRowSchema, Error]] + dataDescription: str + +class Metadata(BaseModel): + caseStudy: Literal["sentinel-5p"] + relativePathsForAgent: List[str] + keyframes: List[KeyFrame] + imageKinds: List[str] diff --git a/app/time_travel/sentinel5p_metadata_generator.py b/app/time_travel/sentinel5p_metadata_generator.py new file mode 100644 index 0000000..ec06564 --- /dev/null +++ b/app/time_travel/sentinel5p_metadata_generator.py @@ -0,0 +1,180 @@ +import json +import logging +import tempfile +from typing import List, Union + +from app.config import SUPPORTED_DATASET_EVALSCRIPTS +from app.sdk.models import BaseJobState, JobOutput, KernelPlancksterSourceData, ProtocolEnum +from app.sdk.scraped_data_repository import ScrapedDataRepository +from app.time_travel.models import Error, Image, KeyFrame, Metadata, SentinelRowSchema +from utils import parse_relative_path + + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + +def __filter_paths_by_timestamp(timestamp: str, relative_paths: List[KernelPlancksterSourceData]) -> List[str]: + return [path.relative_path for path in relative_paths if timestamp in path.relative_path] + +def generate_time_travel_metadata( + job_id: int, + tracer_id: str, + long_left: float, + lat_down: float, + long_right: float, + lat_up: float, + scraped_data_repository: ScrapedDataRepository, + relevant_source_data: list[KernelPlancksterSourceData], + protocol: ProtocolEnum, +) -> JobOutput: + case_study_name = "sentinel-5p" + failed = False + timestamps: List[str] = [] + relative_paths_for_agent: List[str] = [] + for source_data in relevant_source_data: + relative_path = source_data.relative_path + ( + _, + _, + _, + timestamp, + _, + _, + _, + file_extension, + ) = parse_relative_path(relative_path=relative_path) + timestamps.append(timestamp) + if file_extension in ["json", "csv", "txt"]: + relative_paths_for_agent.append(relative_path) + + timestamps = list(set(timestamps)) + metadata: Metadata = Metadata( + caseStudy="sentinel-5p", + imageKinds=[] , + relativePathsForAgent=relative_paths_for_agent, + keyframes=[], + ) + for timestamp in timestamps: + keyframe = KeyFrame( + timestamp=timestamp, + images=[], + data=[], + dataDescription=f"This data is a collection of Carbon Monoxide levels for the given timestamp for the given coordinates", + ) + + timestamp_relative_paths = __filter_paths_by_timestamp(timestamp, relevant_source_data) + images_paths = [path for path in timestamp_relative_paths if path.endswith((".png", ".jpg", ".jpeg"))] + augmented_coordinates_path = [path for path in timestamp_relative_paths if path.endswith("augmented.json")] + + for image_path in images_paths: + ( + _, + _, + _, + timestamp, + dataset, + evalscript_name, + _, + file_extension, + ) = parse_relative_path(relative_path=image_path) + if dataset not in SUPPORTED_DATASET_EVALSCRIPTS: + keyframe.images.append(Error( + errorMessage=f"Dataset {dataset} is not supported", + errorName="UnsupportedDataset", + )) + continue + supported_eval_scripts = [x['name'] for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset]["supported_evalscripts"]] + if evalscript_name not in supported_eval_scripts: + keyframe.images.append(Error( + errorMessage=f"Evalscript {evalscript_name} is not supported for {dataset}.", + errorName="UnsupportedEvalscript", + )) + continue + if evalscript_name not in metadata.imageKinds: + metadata.imageKinds.append(evalscript_name) + evalscript = next((x for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset]["supported_evalscripts"] if x['name'] == evalscript_name), None) + if not evalscript: + keyframe.images.append(Error( + errorMessage=f"Evalscript {evalscript_name} not found for {dataset}.", + errorName="MissingEvalscript", + )) + continue + img_to_append = Image( + relativePath=image_path, + kind=evalscript_name, + description=f"dataset: {dataset} | coords_wgs84: {long_left, lat_down, long_right, lat_up} | details: {evalscript['description']}", + ) + keyframe.images.append(img_to_append) + + if len(augmented_coordinates_path) != 1: + keyframe.data.append(Error( + errorName="AugmentedCoordinatesError", + errorMessage="Augmented data are missing or more than 1 dataset was found for this timestamp", + )) + metadata.keyframes.append(keyframe) + continue + + try: + with tempfile.NamedTemporaryFile(suffix=".json", delete=True) as fp: + scraped_data_repository.download_data( + source_data=KernelPlancksterSourceData( + name=augmented_coordinates_path[0].split("/")[-1], + protocol=protocol, + relative_path=augmented_coordinates_path[0], + ), + local_file=fp.name, + ) + with open(fp.name, "r") as f: + augmented_coordinates: dict = json.load(f) + for _, augmented_coordinate in augmented_coordinates.items(): + keyframe.data.append(SentinelRowSchema( + timestamp=timestamp, + latitude=augmented_coordinate["latitude"], + longitude=augmented_coordinate["longitude"], + CarbonMonoxideLevel=augmented_coordinate["CO_level"], + )) + metadata.keyframes.append(keyframe) + except Exception as e: + keyframe.data.append(Error( + errorName="AugmentedCoordinatesError", + errorMessage=f"Error while processing augmented coordinates: {e}", + )) + metadata.keyframes.append(keyframe) + + + + with tempfile.NamedTemporaryFile(suffix=".json", delete=True) as out: + with open(out.name, "w") as f: + f.write(metadata.model_dump_json(indent=2)) + relative_path = f"{case_study_name}/{tracer_id}/{job_id}/sentinel/sentinel5p_time_travel_metadata.json" + out_source_data = KernelPlancksterSourceData( + name="time_travel_metadata.json", + protocol=protocol, + relative_path=relative_path, + ) + try: + scraped_data_repository.register_scraped_json( + job_id=job_id, + source_data=out_source_data, + local_file_name=out.name, + ) + except Exception as e: + logger.error(f"Failed to upload time travel metadata: {e}") + failed = True + + if failed: + return JobOutput( + job_state=BaseJobState.FAILED, + tracer_id=tracer_id, + source_data_list=[], + ) + return JobOutput( + job_state=BaseJobState.FINISHED, + tracer_id=tracer_id, + source_data_list=[out_source_data], + ) + diff --git a/augmentations/climate_augmentations.py b/augmentations/climate_augmentations.py index 781f0ac..9bdcf0c 100644 --- a/augmentations/climate_augmentations.py +++ b/augmentations/climate_augmentations.py @@ -1,101 +1,151 @@ -from logging import Logger -import time -from app.sdk.models import KernelPlancksterSourceData, ProtocolEnum -from app.sdk.scraped_data_repository import ScrapedDataRepository, KernelPlancksterSourceData +import logging +import tempfile +from app.sdk.models import ( + BaseJobState, + JobOutput, + KernelPlancksterSourceData, + ProtocolEnum, +) +from app.sdk.scraped_data_repository import ( + ScrapedDataRepository, + KernelPlancksterSourceData, +) import numpy as np import pandas as pd import cv2 -import os, re from collections import Counter -import hashlib -def sanitize_filename(filename): #helper function - return re.sub(r'[^\w./]', '_', filename) +from utils import generate_relative_path, parse_relative_path, sanitize_filename -def dominant_color(pixels): #helper function + +def dominant_color(pixels): # helper function if len(pixels) == 0: return [0, 0, 0] color_counts = Counter(map(tuple, pixels)) return max(color_counts, key=color_counts.get) -def get_image_hash(image): - """ - Computes a hash for the given image. - """ - hasher = hashlib.md5() - hasher.update(image.tobytes()) - return hasher.hexdigest() -def augment_climate_images(case_study_name: str, job_id: str, tracer_id: str, image_dir: str, coords_wgs84: tuple[float, float, float, float], logger: Logger, protocol: ProtocolEnum, scraped_data_repository: ScrapedDataRepository, output_data_list: list[KernelPlancksterSourceData]): - latitudes = [coords_wgs84[1], coords_wgs84[3]] - longitudes = [coords_wgs84[0], coords_wgs84[2]] +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) +logger.setLevel(logging.INFO) - os.makedirs(os.path.join(image_dir, "masked"), exist_ok=True) - for image_path in os.listdir(os.path.join(image_dir, "masked")): - interval = "_".join(image_path.split("_")[:-1]) - image_hash = image_path.split("_")[-1].split(".")[0] - full_path = os.path.join(image_dir, "masked", image_path) - image = cv2.imread(full_path) - height, width, _ = image.shape - grid_size = 5 - grid_height = height // grid_size - grid_width = width // grid_size +def augment_climate_images( + case_study_name: str, + job_id: int, + tracer_id: str, + long_left: float, + lat_down: float, + long_right: float, + lat_up: float, + scraped_data_repository: ScrapedDataRepository, + relevant_source_data: list[KernelPlancksterSourceData], + protocol: ProtocolEnum, +) -> JobOutput: + failed = False + latitudes = [lat_down, lat_up] + longitudes = [long_left, long_right] + for source_data in relevant_source_data: + relative_path = source_data.relative_path + ( + case_study_name, + tracer_id, + job_id, + timestamp, + dataset, + evalscript_name, + image_hash, + _, + ) = parse_relative_path(relative_path=relative_path) + with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as fp: + scraped_data_repository.download_data( + source_data=source_data, local_file=fp.name + ) + image = cv2.imread(fp.name) + height, width, _ = image.shape - data = [] - for grid_row in range(grid_size): - for grid_col in range(grid_size): - cell_pixels = image[grid_row * grid_height: (grid_row + 1) * grid_height, - grid_col * grid_width: (grid_col + 1) * grid_width] - cell_pixels = cell_pixels.reshape(-1, 3) - dominant_pixel = dominant_color(cell_pixels) - latitude = latitudes[0] + ((grid_row + 0.5) / grid_size) * (latitudes[1] - latitudes[0]) - longitude = longitudes[0] + ((grid_col + 0.5) / grid_size) * (longitudes[1] - longitudes[0]) + grid_size = 5 + grid_height = height // grid_size + grid_width = width // grid_size + data = [] + for grid_row in range(grid_size): + for grid_col in range(grid_size): + cell_pixels = image[ + grid_row * grid_height : (grid_row + 1) * grid_height, + grid_col * grid_width : (grid_col + 1) * grid_width, + ] + cell_pixels = cell_pixels.reshape(-1, 3) + dominant_pixel = dominant_color(cell_pixels) + latitude = latitudes[0] + ((grid_row + 0.5) / grid_size) * ( + latitudes[1] - latitudes[0] + ) + longitude = longitudes[0] + ((grid_col + 0.5) / grid_size) * ( + longitudes[1] - longitudes[0] + ) - if np.array_equal(dominant_pixel, [127, 0, 0]): # dark blue - data.append([latitude, longitude, "lowest-CO"]) - elif np.array_equal(dominant_pixel, [255, 0, 0]): # blue - data.append([latitude, longitude, "low-CO"]) - elif np.array_equal(dominant_pixel, [255, 255, 0]): # cyan - data.append([latitude, longitude, "moderately low-CO"]) - elif np.array_equal(dominant_pixel, [255, 255, 255]): # yellow - data.append([latitude, longitude, "moderately high-CO"]) - elif np.array_equal(dominant_pixel, [0, 0, 255]): # red - data.append([latitude, longitude, "high-CO"]) - elif np.array_equal(dominant_pixel, [0, 0, 127]): # dark red - data.append([latitude, longitude, "highest-CO"]) - else: - data.append([latitude, longitude, "unknown"]) + if np.array_equal(dominant_pixel, [127, 0, 0]): # dark blue + data.append([latitude, longitude, "lowest-CO"]) + elif np.array_equal(dominant_pixel, [255, 0, 0]): # blue + data.append([latitude, longitude, "low-CO"]) + elif np.array_equal(dominant_pixel, [255, 255, 0]): # cyan + data.append([latitude, longitude, "moderately low-CO"]) + elif np.array_equal(dominant_pixel, [255, 255, 255]): # yellow + data.append([latitude, longitude, "moderately high-CO"]) + elif np.array_equal(dominant_pixel, [0, 0, 255]): # red + data.append([latitude, longitude, "high-CO"]) + elif np.array_equal(dominant_pixel, [0, 0, 127]): # dark red + data.append([latitude, longitude, "highest-CO"]) + else: + data.append([latitude, longitude, "unknown"]) - if data: + if len(data) == 0: + logger.error(f"No data found for image {fp.name}") + continue df = pd.DataFrame(data, columns=["latitude", "longitude", "CO_level"]) - jsonpath = os.path.join(image_dir, "augmented_coordinates", interval) - os.makedirs(os.path.dirname(jsonpath), exist_ok=True) - df.to_json(jsonpath, orient="index") - logger.info(f"Augmented Climate Data saved to: {jsonpath}") - - # Sanitize the interval to create a valid filename - sanitized_interval = sanitize_filename(interval) - unix_timestamp = int(time.time()) # TODO: calculate a deterministic timestamp that can match those of the other scrapers given the same start_date, end_date, and interval - - data_name = f"{sanitized_interval}_climate_{image_hash}" - relative_path = f"{case_study_name}/{tracer_id}/{job_id}/{unix_timestamp}/sentinel/augmented-coordinates/{data_name}.json" - - media_data = KernelPlancksterSourceData( - name=data_name, - protocol=protocol, - relative_path=relative_path, - ) - - try: - scraped_data_repository.register_scraped_json( + with tempfile.NamedTemporaryFile(suffix=".json", delete=True) as out: + df.to_json(out.name, orient="index") + logger.info( + f"Augmented Climate Data locally saved to temporary file: {out.name}" + ) + + relative_path = generate_relative_path( + case_study_name=case_study_name, + tracer_id=tracer_id, job_id=job_id, - source_data=media_data, - local_file_name=jsonpath, + timestamp=timestamp, + dataset=dataset, + evalscript_name=evalscript_name, + image_hash=image_hash + "-augmented", + file_extension="json") + + media_data = KernelPlancksterSourceData( + name="augmented-coordinates.json", + protocol=protocol, + relative_path=relative_path, ) - except Exception as e: - logger.warning(f"Could not register file: {e}") - output_data_list.append(media_data) + try: + scraped_data_repository.register_scraped_json( + job_id=job_id, + source_data=media_data, + local_file_name=out.name, + ) + except Exception as e: + logger.error(f"Could not register file: {e}") + failed = True + continue - return output_data_list \ No newline at end of file + if failed: + return JobOutput( + job_state=BaseJobState.FAILED, + tracer_id=tracer_id, + source_data_list=[], + ) + return JobOutput( + job_state=BaseJobState.FINISHED, + tracer_id=tracer_id, + source_data_list=[media_data], + ) \ No newline at end of file diff --git a/augmentations/wildfire_augmentations.py b/augmentations/wildfire_augmentations.py index ad4f5ab..09fee4a 100644 --- a/augmentations/wildfire_augmentations.py +++ b/augmentations/wildfire_augmentations.py @@ -1,35 +1,53 @@ -from logging import Logger -import time -from app.sdk.models import KernelPlancksterSourceData, ProtocolEnum +import logging +import tempfile +from app.sdk.models import BaseJobState, JobOutput, KernelPlancksterSourceData, ProtocolEnum from app.sdk.scraped_data_repository import ScrapedDataRepository, KernelPlancksterSourceData import pandas as pd import cv2 -import os, re -import hashlib -def sanitize_filename(filename): - # Replace disallowed characters with underscores - return re.sub(r'[^\w./]', '_', filename) +from utils import generate_relative_path, parse_relative_path -def get_image_hash(image): - """ - Computes a hash for the given image. - """ - hasher = hashlib.md5() - hasher.update(image.tobytes()) - return hasher.hexdigest() -def augment_wildfire_images(case_study_name: str, job_id: str, tracer_id: str, image_dir: str, coords_wgs84: tuple[float, float, float, float], logger: Logger, protocol: ProtocolEnum, scraped_data_repository: ScrapedDataRepository, output_data_list: list[KernelPlancksterSourceData]): - latitudes = [coords_wgs84[1], coords_wgs84[3]] - longitudes = [coords_wgs84[0], coords_wgs84[2]] +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) +logger.setLevel(logging.INFO) - os.makedirs(os.path.join(image_dir, "masked"), exist_ok=True) - for image_path in os.listdir(os.path.join(image_dir, "masked")): - interval = "_".join(image_path.split("_")[:-1]) - image_hash = image_path.split("_")[-1].split(".")[0] - full_path = os.path.join(image_dir, "masked", image_path) - image = cv2.imread(full_path) - height, width, _ = image.shape +def augment_wildfire( + case_study_name: str, + job_id: int, + tracer_id: str, + long_left: float, + lat_down: float, + long_right: float, + lat_up: float, + scraped_data_repository: ScrapedDataRepository, + relevant_source_data: list[KernelPlancksterSourceData], + protocol: ProtocolEnum, +) -> JobOutput: + failed = False + latitudes = [lat_down, lat_up] + longitudes = [long_left, long_right] + for source_data in relevant_source_data: + relative_path = source_data.relative_path + ( + case_study_name, + tracer_id, + job_id, + timestamp, + dataset, + evalscript_name, + image_hash, + file_extension, + ) = parse_relative_path(relative_path=relative_path) + with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as fp: + scraped_data_repository.download_data( + source_data=source_data, local_file=fp.name + ) + image = cv2.imread(fp.name) + height, width, _ = image.shape data = [] for i in range(height): @@ -40,21 +58,30 @@ def augment_wildfire_images(case_study_name: str, job_id: str, tracer_id: str, i longitude = longitudes[0] + (j / width) * (longitudes[1] - longitudes[0]) data.append([latitude, longitude, "forestfire"]) - if data: - df = pd.DataFrame(data, columns=['latitude', 'longitude', 'status']) - jsonpath = os.path.join(image_dir, "augmented_coordinates", interval) - os.makedirs(os.path.dirname(jsonpath), exist_ok=True) - df.to_json(jsonpath, orient="index") - logger.info(f"Augmented JSON saved to: {jsonpath}") + if len(data) == 0: + logger.error(f"No data found for image {fp.name}") + continue - # Sanitize the interval to create a valid filename - sanitized_interval = sanitize_filename(interval) - unix_timestamp = int(time.time()) # TODO: calculate a deterministic timestamp that can match those of the other scrapers given the same start_date, end_date, and interval - data_name = sanitize_filename(f"{sanitized_interval}_wildfire_{image_hash}") - relative_path = f"{case_study_name}/{tracer_id}/{job_id}/{unix_timestamp}/sentinel/augmented-coordinates/{data_name}.json" + df = pd.DataFrame(data, columns=['latitude', 'longitude', 'status']) + with tempfile.NamedTemporaryFile(suffix=".json", delete=True) as out: + df.to_json(out.name, orient="index") + logger.info( + f"Augmented Data locally saved to temporary file: {out.name}" + ) + + relative_path = generate_relative_path( + case_study_name=case_study_name, + tracer_id=tracer_id, + job_id=job_id, + timestamp=timestamp, + dataset=dataset, + evalscript_name=evalscript_name, + image_hash=image_hash + "-augmented", + file_extension="json" + ) media_data = KernelPlancksterSourceData( - name=data_name, + name="augmented-coordinates.json", protocol=protocol, relative_path=relative_path, ) @@ -63,12 +90,21 @@ def augment_wildfire_images(case_study_name: str, job_id: str, tracer_id: str, i scraped_data_repository.register_scraped_json( job_id=job_id, source_data=media_data, - local_file_name=jsonpath, + local_file_name=out.name, ) except Exception as e: - logger.warning(f"Could not register file: {e}") - - output_data_list.append(media_data) - - return output_data_list + logger.error(f"Could not register file: {e}") + failed = True + continue + if failed: + return JobOutput( + job_state=BaseJobState.FAILED, + tracer_id=tracer_id, + source_data_list=[], + ) + return JobOutput( + job_state=BaseJobState.FINISHED, + tracer_id=tracer_id, + source_data_list=[media_data], + ) \ No newline at end of file diff --git a/climate_augmentation.py b/climate_augmentation.py new file mode 100644 index 0000000..c49afe7 --- /dev/null +++ b/climate_augmentation.py @@ -0,0 +1,242 @@ +import logging +import sys +import json +from app.config import SUPPORTED_DATASET_EVALSCRIPTS +from app.scraper import scrape +from app.sdk.scraped_data_repository import ScrapedDataRepository +from app.setup import setup, string_validator + + +from augmentations.climate_augmentations import augment_climate_images + + +def main( + case_study_name: str, + job_id: int, + tracer_id: str, + long_left: float, + lat_down: float, + long_right: float, + lat_up: float, + dataset_evalscripts: dict[str,list[str]], + kp_host: str, + kp_port: int, + kp_auth_token: str, + kp_scheme: str, + log_level: str = "WARNING", +) -> None: + + try: + logger = logging.getLogger(__name__) + logging.basicConfig(level=log_level, format='%(asctime)s - %(levelname)s - %(message)s') + + + if not all([case_study_name, job_id, tracer_id, long_left, lat_down, long_right, lat_up]): + raise ValueError(f"case_study_name, job_id, tracer_id, coordinates, and date range must all be set.") + + string_variables = { + "case_study_name": case_study_name, + "job_id": job_id, + "tracer_id": tracer_id, + } + + logger.info(f"Validating string variables: {string_variables}") + + for name, value in string_variables.items(): + string_validator(f"{value}", name) + + logger.info(f"String variables validated successfully!") + + final_dataset_evalscripts = {} + dataset_names = dataset_evalscripts.keys() + for dataset_name in dataset_names: + if dataset_name not in SUPPORTED_DATASET_EVALSCRIPTS.keys(): + logger.error( + f"Dataset {dataset_name} not supported. Use one of {SUPPORTED_DATASET_EVALSCRIPTS.keys()}" + ) + sys.exit(1) + requested_evalscripts = dataset_evalscripts[dataset_name] + supported_evalscripts = [x['name'] for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]["supported_evalscripts"]] + for evalscript in requested_evalscripts: + if evalscript not in supported_evalscripts: + logger.error( + f"Evalscript {evalscript} not supported. Use one of {SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]['supported_evalscripts']}" + ) + raise ValueError( + f"Evalscript {evalscript} not supported for {dataset_name}. Use one of {SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]['supported_evalscripts']}" + ) + final_dataset_evalscripts[dataset_name] = SUPPORTED_DATASET_EVALSCRIPTS[dataset_name] + final_dataset_evalscripts[dataset_name]["evalscripts"] = [x for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]["supported_evalscripts"] if x["name"] in requested_evalscripts] + + logger.info(f"Setting up climate augmentation for case study: {case_study_name}") + + kernel_planckster, protocol, file_repository = setup( + job_id=job_id, + logger=logger, + kp_auth_token=kp_auth_token, + kp_host=kp_host, + kp_port=kp_port, + kp_scheme=kp_scheme, + ) + + scraped_data_repository = ScrapedDataRepository( + protocol=protocol, + kernel_planckster=kernel_planckster, + file_repository=file_repository, + ) + + root_relative_path = f"{case_study_name}/{tracer_id}/{job_id}" + scraped_files = kernel_planckster.list_source_data(root_relative_path) + sentinel_evalscript = next((x for x in SUPPORTED_DATASET_EVALSCRIPTS["SENTINEL5P"]["supported_evalscripts"] if x["name"] == "climate-mask"), None) + if not sentinel_evalscript: + logger.error("Climate mask evalscript not found. Please check the configuration of the Sentinel scraper.") + sys.exit(1) + + relevant_files = [file for file in scraped_files if sentinel_evalscript["name"] in file.relative_path] + if not relevant_files or len(relevant_files) == 0: + logger.error(f"No relevant files found in {root_relative_path}. Did you scrape sentinel data with the {sentinel_evalscript} evalscript? Please start a new scraping job with the correct evalscript.") + sys.exit(1) + + print(relevant_files) + except Exception as error: + logger.error(f"Unable to setup the climate augmentation stage. Error: {error}") + sys.exit(1) + + + job_output = augment_climate_images( + case_study_name=case_study_name, + job_id=job_id, + protocol=protocol, + tracer_id=tracer_id, + scraped_data_repository=scraped_data_repository, + long_left=long_left, + lat_down=lat_down, + long_right=long_right, + lat_up=lat_up, + relevant_source_data=relevant_files, + ) + + logger.info(f"{job_id}: Scraper finished with state: {job_output.job_state.value}") + + if job_output.job_state.value == "failed": + sys.exit(1) + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser(description="Scrape data from Sentinel datacollection.") + + parser.add_argument( + "--case-study-name", + type=str, + help="The name of the case study", + required=True, + ) + + parser.add_argument( + "--job-id", + type=str, + help="The job id", + required=True, + ) + + parser.add_argument( + "--tracer-id", + type=str, + help="The tracer id", + required=True, + ) + + parser.add_argument( + "--log-level", + type=str, + default="WARNING", + help="The log level to use when running the scraper. Possible values are DEBUG, INFO, WARNING, ERROR, CRITICAL. Set to WARNING by default.", + ) + + parser.add_argument( + "--long_left", + type=float, + default="0", + help="leftmost longtude ~ left edge of bbox ", + ) + + parser.add_argument( + "--lat_down", + type=float, + default="0", + help="bottommost lattitude ~ bottom edge of bbox ", + ) + + parser.add_argument( + "--long_right", + type=float, + default="0.1", + help="rightmost longtude ~ right edge of bbox ", + ) + + parser.add_argument( + "--lat_up", + type=float, + default="0.1", + help="topmost lattitude ~ top edge of bbox ", + + ) + parser.add_argument( + "--datasets-evalscripts", + type=json.loads, + required=True, + help="dictionary in the format {\"dataset_name\": [evalscript_path1, evalscript_path2, ...]}", + ) + + parser.add_argument( + "--kp_host", + type=str, + help="kp host", + required=True, + ) + + parser.add_argument( + "--kp_port", + type=int, + help="kp port", + required=True, + ) + + parser.add_argument( + "--kp_auth_token", + type=str, + help="kp auth token", + required=True, + ) + + parser.add_argument( + "--kp_scheme", + type=str, + help="kp scheme", + required=True, + ) + + + args = parser.parse_args() + + + main( + case_study_name=args.case_study_name, + job_id=args.job_id, + tracer_id=args.tracer_id, + log_level=args.log_level, + long_left=args.long_left, + lat_down=args.lat_down, + long_right=args.long_right, + lat_up=args.lat_up, + dataset_evalscripts=args.datasets_evalscripts, + kp_host=args.kp_host, + kp_port=args.kp_port, + kp_auth_token=args.kp_auth_token, + kp_scheme=args.kp_scheme + ) + + diff --git a/sentinel_5p_time_travel.py b/sentinel_5p_time_travel.py new file mode 100644 index 0000000..7a84ba9 --- /dev/null +++ b/sentinel_5p_time_travel.py @@ -0,0 +1,226 @@ +import logging +import sys +import json +from app.config import SUPPORTED_DATASET_EVALSCRIPTS +from app.scraper import scrape +from app.sdk.scraped_data_repository import ScrapedDataRepository +from app.setup import setup, string_validator +from pydantic import BaseModel + +from app.time_travel.sentinel5p_metadata_generator import generate_time_travel_metadata + + +def main( + job_id: int, + tracer_id: str, + long_left: float, + lat_down: float, + long_right: float, + lat_up: float, + dataset_evalscripts: dict[str,list[str]], + kp_host: str, + kp_port: int, + kp_auth_token: str, + kp_scheme: str, + log_level: str = "WARNING", +) -> None: + + try: + logger = logging.getLogger(__name__) + logging.basicConfig(level=log_level, format='%(asctime)s - %(levelname)s - %(message)s') + + case_study_name = "sentinel-5p" + if not all([job_id, tracer_id, long_left, lat_down, long_right, lat_up]): + raise ValueError(f"job_id, tracer_id, coordinates must all be set.") + + string_variables = { + "case_study_name": case_study_name, + "job_id": job_id, + "tracer_id": tracer_id, + } + + logger.info(f"Validating string variables: {string_variables}") + + for name, value in string_variables.items(): + string_validator(f"{value}", name) + + logger.info(f"String variables validated successfully!") + + final_dataset_evalscripts = {} + dataset_names = dataset_evalscripts.keys() + for dataset_name in dataset_names: + if dataset_name not in SUPPORTED_DATASET_EVALSCRIPTS.keys(): + logger.error( + f"Dataset {dataset_name} not supported. Use one of {SUPPORTED_DATASET_EVALSCRIPTS.keys()}" + ) + sys.exit(1) + requested_evalscripts = dataset_evalscripts[dataset_name] + supported_evalscripts = [x['name'] for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]["supported_evalscripts"]] + for evalscript in requested_evalscripts: + if evalscript not in supported_evalscripts: + logger.error( + f"Evalscript {evalscript} not supported. Use one of {SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]['supported_evalscripts']}" + ) + raise ValueError( + f"Evalscript {evalscript} not supported for {dataset_name}. Use one of {SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]['supported_evalscripts']}" + ) + final_dataset_evalscripts[dataset_name] = SUPPORTED_DATASET_EVALSCRIPTS[dataset_name] + final_dataset_evalscripts[dataset_name]["evalscripts"] = [x for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]["supported_evalscripts"] if x["name"] in requested_evalscripts] + + logger.info(f"Setting up time trave for case study: {case_study_name}") + + kernel_planckster, protocol, file_repository = setup( + job_id=job_id, + logger=logger, + kp_auth_token=kp_auth_token, + kp_host=kp_host, + kp_port=kp_port, + kp_scheme=kp_scheme, + ) + + scraped_data_repository = ScrapedDataRepository( + protocol=protocol, + kernel_planckster=kernel_planckster, + file_repository=file_repository, + ) + + root_relative_path = f"{case_study_name}/{tracer_id}/{job_id}" + relevant_files = kernel_planckster.list_source_data(root_relative_path) + + + if not relevant_files or len(relevant_files) == 0: + logger.error(f"No relevant files found in {root_relative_path}.") + sys.exit(1) + + print(relevant_files) + except Exception as error: + logger.error(f"Unable to setup the climate augmentation stage. Error: {error}") + sys.exit(1) + + + job_output = generate_time_travel_metadata( + job_id=job_id, + protocol=protocol, + tracer_id=tracer_id, + scraped_data_repository=scraped_data_repository, + long_left=long_left, + lat_down=lat_down, + long_right=long_right, + lat_up=lat_up, + relevant_source_data=relevant_files, + ) + + logger.info(f"{job_id}: Scraper finished with state: {job_output.job_state.value}") + + if job_output.job_state.value == "failed": + sys.exit(1) + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser(description="Scrape data from Sentinel datacollection.") + + parser.add_argument( + "--job-id", + type=str, + help="The job id", + required=True, + ) + + parser.add_argument( + "--tracer-id", + type=str, + help="The tracer id", + required=True, + ) + + parser.add_argument( + "--log-level", + type=str, + default="WARNING", + help="The log level to use when running the scraper. Possible values are DEBUG, INFO, WARNING, ERROR, CRITICAL. Set to WARNING by default.", + ) + + parser.add_argument( + "--long_left", + type=float, + default="0", + help="leftmost longtude ~ left edge of bbox ", + ) + + parser.add_argument( + "--lat_down", + type=float, + default="0", + help="bottommost lattitude ~ bottom edge of bbox ", + ) + + parser.add_argument( + "--long_right", + type=float, + default="0.1", + help="rightmost longtude ~ right edge of bbox ", + ) + + parser.add_argument( + "--lat_up", + type=float, + default="0.1", + help="topmost lattitude ~ top edge of bbox ", + + ) + parser.add_argument( + "--datasets-evalscripts", + type=json.loads, + required=True, + help="dictionary in the format {\"dataset_name\": [evalscript_path1, evalscript_path2, ...]}", + ) + + parser.add_argument( + "--kp_host", + type=str, + help="kp host", + required=True, + ) + + parser.add_argument( + "--kp_port", + type=int, + help="kp port", + required=True, + ) + + parser.add_argument( + "--kp_auth_token", + type=str, + help="kp auth token", + required=True, + ) + + parser.add_argument( + "--kp_scheme", + type=str, + help="kp scheme", + required=True, + ) + + + args = parser.parse_args() + + + main( + job_id=args.job_id, + tracer_id=args.tracer_id, + log_level=args.log_level, + long_left=args.long_left, + lat_down=args.lat_down, + long_right=args.long_right, + lat_up=args.lat_up, + dataset_evalscripts=args.datasets_evalscripts, + kp_host=args.kp_host, + kp_port=args.kp_port, + kp_auth_token=args.kp_auth_token, + kp_scheme=args.kp_scheme + ) diff --git a/sentinel_scraper.py b/sentinel_scraper.py index 02637cb..96dac13 100644 --- a/sentinel_scraper.py +++ b/sentinel_scraper.py @@ -1,8 +1,10 @@ import logging import sys +import json +from app.config import SUPPORTED_DATASET_EVALSCRIPTS from app.scraper import scrape from app.sdk.scraped_data_repository import ScrapedDataRepository -from app.setup import datetime_parser,setup, string_validator +from app.setup import setup, string_validator from app.setup_scraping_client import get_scraping_config @@ -19,15 +21,12 @@ def main( start_date: str, end_date: str, interval: int, - image_dir: str, - dataset_name:str, + dataset_evalscripts: dict[str,list[str]], resolution: int, - evalscript_bands_path:str, - evalscript_truecolor_path:str, sentinel_client_id: str, sentinel_client_secret: str, kp_host: str, - kp_port: str, + kp_port: int, kp_auth_token: str, kp_scheme: str, log_level: str = "WARNING", @@ -45,7 +44,6 @@ def main( "case_study_name": case_study_name, "job_id": job_id, "tracer_id": tracer_id, - "dataset_name": dataset_name, } logger.info(f"Validating string variables: {string_variables}") @@ -55,7 +53,27 @@ def main( logger.info(f"String variables validated successfully!") - + final_datasaet_evalscripts = {} + dataset_names = dataset_evalscripts.keys() + for dataset_name in dataset_names: + if dataset_name not in SUPPORTED_DATASET_EVALSCRIPTS.keys(): + logger.error( + f"Dataset {dataset_name} not supported. Use one of {SUPPORTED_DATASET_EVALSCRIPTS.keys()}" + ) + sys.exit(1) + requested_evalscripts = dataset_evalscripts[dataset_name] + supported_evalscripts = [x['name'] for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]["supported_evalscripts"]] + for evalscript in requested_evalscripts: + if evalscript not in supported_evalscripts: + logger.error( + f"Evalscript {evalscript} not supported. Use one of {SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]['supported_evalscripts']}" + ) + raise ValueError( + f"Evalscript {evalscript} not supported for {dataset_name}. Use one of {SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]['supported_evalscripts']}" + ) + final_datasaet_evalscripts[dataset_name] = SUPPORTED_DATASET_EVALSCRIPTS[dataset_name] + final_datasaet_evalscripts[dataset_name]["evalscripts"] = [x for x in SUPPORTED_DATASET_EVALSCRIPTS[dataset_name]["supported_evalscripts"] if x["name"] in requested_evalscripts] + logger.info(f"Setting up scraper for case study: {case_study_name}") kernel_planckster, protocol, file_repository = setup( @@ -99,10 +117,7 @@ def main( start_date=start_date, end_date=end_date, interval=interval, - image_dir=image_dir, - evalscript_bands_path=evalscript_bands_path, - evalscript_truecolor_path=evalscript_truecolor_path, - dataset_name=dataset_name, + dataset_evalscripts=final_datasaet_evalscripts, resolution=resolution ) @@ -112,7 +127,6 @@ def main( sys.exit(1) - if __name__ == "__main__": import argparse @@ -198,31 +212,10 @@ def main( ) parser.add_argument( - "--image_dir", - type=str, - default="./.tmp", - help="image dir", - ) - - parser.add_argument( - "--evalscript_bands_path", - type=str, - required=True, - help="Path to Evalscript for Bands Configuration" - ) - - parser.add_argument( - "--evalscript_truecolor_path", - type=str, - required=False, - help="Path to truecolor Evalscript file for augmentation" - ) - - parser.add_argument( - "--dataset_name", - type=str, + "--datasets-evalscripts", + type=json.loads, required=True, - help="dataset configuration", + help="dictionary in the format {\"dataset_name\": [evalscript_path1, evalscript_path2, ...]}", ) parser.add_argument( @@ -290,11 +283,8 @@ def main( start_date=args.start_date, end_date=args.end_date, interval=args.interval, - image_dir=args.image_dir, - dataset_name=args.dataset_name, + dataset_evalscripts=args.datasets_evalscripts, resolution=args.resolution, - evalscript_bands_path=args.evalscript_bands_path, - evalscript_truecolor_path=args.evalscript_truecolor_path, sentinel_client_id=args.sentinel_client_id, sentinel_client_secret=args.sentinel_client_secret, kp_host=args.kp_host, diff --git a/utils.py b/utils.py index 0038f98..106db31 100644 --- a/utils.py +++ b/utils.py @@ -1,20 +1,26 @@ -"""utils - -Automatically generated by Colaboratory. - -Original file is located at - https://colab.research.google.com/drive/1aguoynCs-cJ81RbK8ycpbPz8pakIUvc- -""" - -from typing import Any, Optional, Tuple +from oauthlib.oauth2.rfc6749.errors import InvalidClientError +from typing import Any, NamedTuple, Optional, Tuple +import requests # Matplotlib is not thread safe, so we need to set the backend before importing pyplot import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np from datetime import datetime, timedelta +from logging import Logger +import hashlib +import re +from sentinelhub import ( + SHConfig, + BBox, + CRS, + DataCollection, + SentinelHubRequest, + bbox_to_dimensions, + MimeType, +) def plot_image( image: np.ndarray, factor: float = 1.0, clip_range: Optional[Tuple[float, float]] = None, **kwargs: Any ) -> None: @@ -29,6 +35,38 @@ def plot_image( plt.gca().set_axis_off() ax.set_frame_on(False) +def download_image( + logger: Logger, + coords_wgs84: tuple[float, float, float, float], + dataset: DataCollection, + evalscript: str, + config: SHConfig, + resolution: int, +): + coords_bbox = BBox(bbox=coords_wgs84, crs=CRS.WGS84) + coords_size = bbox_to_dimensions(coords_bbox, resolution=resolution) + try: + sentinel_request = SentinelHubRequest( + evalscript=evalscript, + input_data=[ + SentinelHubRequest.input_data( + data_collection=dataset, + ) + ], + responses=[SentinelHubRequest.output_response("default", MimeType.PNG)], + bbox=coords_bbox, + size=coords_size, + config=config, + ) + data = sentinel_request.get_data() + return data + except InvalidClientError as e: + logger.error(f"Sentinel Hub client error: {e}") + raise e + + except Exception as e: + logger.warning(e) + return str(e) def save_image( image: np.ndarray, filename: str, factor: float = 1.0, clip_range: Optional[Tuple[float, float]] = None, **kwargs: Any @@ -46,7 +84,13 @@ def save_image( plt.savefig(filename, bbox_inches="tight",pad_inches = 0) plt.close() - +def get_image_hash(image): + """ + Computes a hash for the given image. + """ + hasher = hashlib.md5() + hasher.update(image.tobytes()) + return hasher.hexdigest() def date_range(start_date, end_date): @@ -58,3 +102,42 @@ def date_range(start_date, end_date): for x in range((end_date-start_date).days + 1)] return date_list + +def load_evalscript(source: str) -> str: + if source.startswith("http://") or source.startswith("https://"): + # Load from URL + response = requests.get(source) + response.raise_for_status() # Raise an exception if the request failed + return response.text + else: + # Load from file + with open(source, "r") as file: + return file.read() + + +def sanitize_filename(filename): #helper function + return re.sub(r'[^\w./]', '_', filename) + + +class KernelPlancksterRelativePath(NamedTuple): + case_study_name: str + tracer_id: str + job_id: str + timestamp: str + dataset: str + evalscript_name: str + image_hash: str + file_extension: str + +def generate_relative_path(case_study_name, tracer_id, job_id, timestamp, dataset, evalscript_name, image_hash, file_extension): + return f"{case_study_name}/{tracer_id}/{job_id}/{timestamp}/sentinel/{dataset}_{evalscript_name}_{image_hash}.{file_extension}" + +def parse_relative_path(relative_path) -> KernelPlancksterRelativePath: + parts = relative_path.split("/") + case_study_name = parts[0] + tracer_id = parts[1] + job_id = parts[2] + timestamp = parts[3] + dataset, evalscript_name, image_hash_extension = parts[5].split("_") + image_hash, file_extension = image_hash_extension.split(".") + return KernelPlancksterRelativePath(case_study_name, tracer_id, job_id, timestamp, dataset, evalscript_name, image_hash, file_extension) \ No newline at end of file