diff --git a/CHANGELOG.md b/CHANGELOG.md index e1c276fa..4cb3d2dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.21.0] +### Added +* Logger is now configured in process.main() so paths to reference/secondary scenes will now be logged. +### Changed +* Fetch Sentinel-2 scenes from AWS S3 (if present); otherwise continue to fetch from Google Cloud Storage. + ## [0.20.0] ### Changed * The M11/M12 variables produced by the hyp3_autorift and s1_correction workflows will be written as `float32` instead of the previous compressed `int16` variables that did not take advantage of the full dynamic range and thus lost a significant amount of precision. diff --git a/src/hyp3_autorift/process.py b/src/hyp3_autorift/process.py index 0ff7b60d..840efb9f 100644 --- a/src/hyp3_autorift/process.py +++ b/src/hyp3_autorift/process.py @@ -108,7 +108,13 @@ def get_s2_manifest(scene_name): return response.text -def get_s2_path(manifest_text: str, scene_name: str) -> str: +def get_s2_path(scene_name: str) -> str: + bucket = 'its-live-project' + key = f's2-cache/{scene_name}_B08.jp2' + if s3_object_is_accessible(bucket, key): + return f'/vsis3/{bucket}/{key}' + + manifest_text = get_s2_manifest(scene_name) root = ET.fromstring(manifest_text) elements = root.findall(".//fileLocation[@locatorType='URL'][@href]") hrefs = [element.attrib['href'] for element in elements if @@ -140,8 +146,7 @@ def get_raster_bbox(path: str): def get_s2_metadata(scene_name): - manifest = get_s2_manifest(scene_name) - path = get_s2_path(manifest, scene_name) + path = get_s2_path(scene_name) bbox = get_raster_bbox(path) acquisition_start = datetime.strptime(scene_name.split('_')[2], '%Y%m%dT%H%M%S') @@ -485,6 +490,9 @@ def main(): if len(args.granules) != 2: parser.error('Must provide exactly two granules') + logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) + g1, g2 = sorted(args.granules, key=get_datetime) product_file, browse_file, thumbnail_file = process( diff --git a/tests/test_process.py b/tests/test_process.py index e6801354..9d5d5e52 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -129,21 +129,10 @@ def test_get_lc2_path(): assert process.get_lc2_path(metadata) == 'foo' -@responses.activate -def test_get_s2_metadata_not_found(): - url = 'https://storage.googleapis.com/gcp-public-data-sentinel-2/tiles////foo.SAFE/manifest.safe' - responses.add(responses.GET, url, status=404) - with pytest.raises(requests.exceptions.HTTPError) as http_error: - process.get_s2_metadata('foo') - assert http_error.value.response.status_code == 404 - - @responses.activate @patch('hyp3_autorift.process.get_raster_bbox') @patch('hyp3_autorift.process.get_s2_path') -@patch('hyp3_autorift.process.get_s2_manifest') -def test_get_s2_metadata(mock_get_s2_manifest: MagicMock, mock_get_s2_path: MagicMock, mock_get_raster_bbox: MagicMock): - mock_get_s2_manifest.return_value = 'manifest content' +def test_get_s2_metadata(mock_get_s2_path: MagicMock, mock_get_raster_bbox: MagicMock): mock_get_s2_path.return_value = 's2 path' mock_get_raster_bbox.return_value = [0, 0, 1, 1] @@ -157,9 +146,7 @@ def test_get_s2_metadata(mock_get_s2_manifest: MagicMock, mock_get_s2_path: Magi } assert process.get_s2_metadata('S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') == expected - mock_get_s2_manifest.assert_called_once_with('S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') - mock_get_s2_path.assert_called_once_with('manifest content', - 'S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') + mock_get_s2_path.assert_called_once_with('S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') mock_get_raster_bbox.assert_called_once_with('s2 path') @@ -182,24 +169,67 @@ def test_get_s2_manifest(): assert process.get_s2_manifest('S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') == 'foo' -def test_get_s2_path(test_data_directory): - scene_name = 'S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500' - with open(f'{test_data_directory}/{scene_name}.manifest.safe', 'r') as f: - manifest_text = f.read() - path = process.get_s2_path(manifest_text, scene_name) +@patch('hyp3_autorift.process.s3_object_is_accessible') +def test_get_s2_path_aws(mock_s3_object_is_accessible: MagicMock): + mock_s3_object_is_accessible.return_value = True + assert process.get_s2_path('foo') == '/vsis3/its-live-project/s2-cache/foo_B08.jp2' + + mock_s3_object_is_accessible.assert_called_once_with('its-live-project', 's2-cache/foo_B08.jp2') + + +@patch('hyp3_autorift.process.s3_object_is_accessible') +@patch('hyp3_autorift.process.get_s2_manifest') +def test_get_s2_path_google_old_manifest( + mock_get_s2_manifest: MagicMock, mock_s3_object_is_accessible: MagicMock, test_data_directory: Path, +): + manifest = test_data_directory / 'S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500.manifest.safe' + mock_get_s2_manifest.return_value = manifest.read_text() + mock_s3_object_is_accessible.return_value = False + + path = process.get_s2_path('S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') assert path == '/vsicurl/https://storage.googleapis.com/gcp-public-data-sentinel-2/tiles/29/Q/KF/' \ 'S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500.SAFE/./GRANULE' \ '/S2A_OPER_MSI_L1C_TL_SGS__20160616T181414_A005139_T29QKF_N02.04/IMG_DATA' \ '/S2A_OPER_MSI_L1C_TL_SGS__20160616T181414_A005139_T29QKF_B08.jp2' - scene_name = 'S2B_MSIL1C_20200419T060719_N0209_R105_T38EMQ_20200419T091056' - with open(f'{test_data_directory}/{scene_name}.manifest.safe', 'r') as f: - manifest_text = f.read() - path = process.get_s2_path(manifest_text, scene_name) + mock_get_s2_manifest.assert_called_once_with('S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500') + mock_s3_object_is_accessible.assert_called_once_with( + 'its-live-project', 's2-cache/S2A_MSIL1C_20160616T112217_N0204_R137_T29QKF_20160617T193500_B08.jp2', + ) + + +@patch('hyp3_autorift.process.s3_object_is_accessible') +@patch('hyp3_autorift.process.get_s2_manifest') +def test_get_s2_path_google_new_manifest( + mock_get_s2_manifest: MagicMock, mock_s3_object_is_accessible: MagicMock, test_data_directory, +): + manifest = test_data_directory / 'S2B_MSIL1C_20200419T060719_N0209_R105_T38EMQ_20200419T091056.manifest.safe' + mock_get_s2_manifest.return_value = manifest.read_text() + mock_s3_object_is_accessible.return_value = False + + path = process.get_s2_path('S2B_MSIL1C_20200419T060719_N0209_R105_T38EMQ_20200419T091056') assert path == '/vsicurl/https://storage.googleapis.com/gcp-public-data-sentinel-2/tiles/38/E/MQ/' \ 'S2B_MSIL1C_20200419T060719_N0209_R105_T38EMQ_20200419T091056.SAFE/./GRANULE' \ '/L1C_T38EMQ_A016290_20200419T060719/IMG_DATA/T38EMQ_20200419T060719_B08.jp2' + mock_get_s2_manifest.assert_called_once_with('S2B_MSIL1C_20200419T060719_N0209_R105_T38EMQ_20200419T091056') + mock_s3_object_is_accessible.assert_called_once_with( + 'its-live-project', 's2-cache/S2B_MSIL1C_20200419T060719_N0209_R105_T38EMQ_20200419T091056_B08.jp2', + ) + + +@responses.activate +@patch('hyp3_autorift.process.s3_object_is_accessible') +def test_get_s2_path_not_found(mock_s3_object_is_accessible: MagicMock): + mock_s3_object_is_accessible.return_value = False + + url = 'https://storage.googleapis.com/gcp-public-data-sentinel-2/tiles////foo.SAFE/manifest.safe' + responses.add(responses.GET, url, status=404) + + with pytest.raises(requests.exceptions.HTTPError) as http_error: + process.get_s2_path('foo') + assert http_error.value.response.status_code == 404 + def test_get_raster_bbox(test_data_directory): bbox = process.get_raster_bbox(str(test_data_directory / 'T60CWU_20160414T200612_B08.jp2'))