Skip to content

Commit

Permalink
Merge pull request #322 from asfadmin/master
Browse files Browse the repository at this point in the history
V8.1.0 Release
  • Loading branch information
artisticlight authored Jan 13, 2025
2 parents 5f5f39d + 26589ff commit 3c5a301
Show file tree
Hide file tree
Showing 11 changed files with 150 additions and 37 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Install Dependencies
run: |
python3 -m pip install --upgrade pip
Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,21 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-->
------
## [v8.1.0](https://github.com/asfadmin/Discovery-asf_search/compare/v8.0.1...v8.1.0)
### Added
- Adds `ASFSearchResults.find_urls()` and `ASFProduct.find_urls()` to gather urls/uris from results by extension and/or regex pattern
### Changed
- Changed log level from warning to debug/info for search timing log messages
- Raised minimum Python version to 3.9 from 3.8, which reached EOL last year (see the official [Status of Python versions](https://devguide.python.org/versions/) for the Python version release cycle)

------
## [v8.0.1](https://github.com/asfadmin/Discovery-asf_search/compare/v8.0.0...v8.0.1)
### Fixed
- Fixed setting end date timezone when translating search opts to CMR opts

------
## [v8.0.0](https://github.com/asfadmin/Discovery-asf_search/compare/v7.1.0...v7.2.0)
## [v8.0.0](https://github.com/asfadmin/Discovery-asf_search/compare/v7.1.0...v8.0.0)
### Added
- Added `asf.ASFSearchOptions(circle=[lat, long, radius])` search param. Takes list of exactly 3 numbers.
- Exposed `asf.validator_map`, which given a ops search param, can be used to look up which method we're going to validate it against.
Expand Down
59 changes: 44 additions & 15 deletions asf_search/ASFProduct.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import warnings
from shapely.geometry import shape, Point, Polygon, mapping
import json
import re

from urllib import parse

Expand Down Expand Up @@ -121,14 +122,10 @@ def get_classname(cls):
- `path`: the expected path in the CMR UMM json granule response as a list
- `cast`: (optional): the optional type casting method
<<<<<<< HEAD
Defining `_base_properties` in subclasses allows for
defining custom properties or overiding existing ones.
See `S1Product.get_property_paths()` on how subclasses are expected to
combine `ASFProduct._base_properties` with their own separately defined `_base_properties`
=======
Defining `_properties_paths` in subclasses allows for defining custom properties or overiding existing ones.
>>>>>>> master
"""

def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()):
Expand Down Expand Up @@ -268,7 +265,8 @@ def get_stack_opts(self, opts: ASFSearchOptions = None) -> ASFSearchOptions:
return None

def _get_access_urls(
self, url_types: List[str] = ['GET DATA', 'EXTENDED METADATA']
self,
url_types: List[str] = ['GET DATA', 'EXTENDED METADATA']
) -> List[str]:
accessUrls = []

Expand All @@ -278,23 +276,54 @@ def _get_access_urls(

return sorted(list(set(accessUrls)))

def _get_urls(self) -> List[str]:
"""Finds and returns all umm urls"""
urls = self._get_access_urls(
['GET DATA', 'EXTENDED METADATA', 'GET DATA VIA DIRECT ACCESS', 'GET RELATED VISUALIZATION', 'VIEW RELATED INFORMATION']
)
return [
url for url in urls if not url.startswith('s3://')
]

def _get_s3_uris(self) -> List[str]:
"""Finds and returns all umm S3 direct access uris"""
s3_urls = self._get_access_urls(
['GET DATA', 'EXTENDED METADATA', 'GET DATA VIA DIRECT ACCESS', 'GET RELATED VISUALIZATION', 'VIEW RELATED INFORMATION']
)
return [url for url in s3_urls if url.startswith('s3://')]

def _get_additional_urls(self) -> List[str]:
accessUrls = self._get_access_urls(['GET DATA', 'EXTENDED METADATA'])
"""Finds and returns all non-md5/image urls and filters out the existing `url` property"""
access_urls = self._get_urls()
return [
url
for url in accessUrls
url for url in access_urls
if not url.endswith('.md5')
and not url.startswith('s3://')
and 's3credentials' not in url
and not url.endswith('.png')
and url != self.properties['url']
and 's3credentials' not in url
]

def _get_s3_urls(self) -> List[str]:
s3_urls = self._get_access_urls(
['GET DATA', 'EXTENDED METADATA', 'GET DATA VIA DIRECT ACCESS']
)
return [url for url in s3_urls if url.startswith('s3://')]
def find_urls(self, extension: str = None, pattern: str = r'.*', directAccess: bool = False) -> List[str]:
"""
Searches for all urls matching a given extension and/or pattern
param extension: the file extension to search for. (Defaults to `None`)
- Example: '.tiff'
param pattern: A regex pattern to search each url for.(Defaults to `False`)
- Example: `r'(QA_)+'` to find urls with 'QA_' at least once
param directAccess: should search in s3 bucket urls (Defaults to `False`)
"""
search_list = self._get_s3_uris() if directAccess else self._get_urls()

def _get_extension(file_url: str):
path = parse.urlparse(file_url).path
return os.path.splitext(path)[-1]

if extension is not None:
search_list = [url for url in search_list if _get_extension(url) == extension]

regexp = re.compile(pattern=pattern)

return sorted([url for url in search_list if regexp.search(url) is not None])

def centroid(self) -> Point:
"""
Expand Down
16 changes: 16 additions & 0 deletions asf_search/ASFSearchResults.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import UserList
from multiprocessing import Pool
import json
from typing import List
from asf_search import ASFSession, ASFSearchOptions
from asf_search.download.file_download_type import FileDownloadType
from asf_search.exceptions import ASFSearchError
Expand Down Expand Up @@ -42,6 +43,21 @@ def jsonlite(self):
def jsonlite2(self):
return results_to_jsonlite2(self)

def find_urls(self, extension: str = None, pattern: str = r'.*', directAccess: bool = False) -> List[str]:
"""Returns a flat list of all https or s3 urls from all results matching an extension and/or regex pattern
param extension: the file extension to search for. (Defaults to `None`)
- Example: '.tiff'
param pattern: A regex pattern to search each url for.(Defaults to `False`)
- Example: `r'(QA_)+'` to find urls with 'QA_' at least once
param directAccess: should search in s3 bucket urls (Defaults to `False`)
"""
urls = []

for product in self:
urls.extend(product.find_urls(extension=extension, pattern=pattern, directAccess=directAccess))

return sorted(list(set(urls)))

def __str__(self):
return json.dumps(self.geojson(), indent=2, sort_keys=True)

Expand Down
2 changes: 1 addition & 1 deletion asf_search/Products/NISARProduct.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()):
super().__init__(args, session)

self.properties['additionalUrls'] = self._get_additional_urls()
self.properties['s3Urls'] = self._get_s3_urls()
self.properties['s3Urls'] = self._get_s3_uris()

if self.properties.get('groupID') is None:
self.properties['groupID'] = self.properties['sceneName']
Expand Down
2 changes: 1 addition & 1 deletion asf_search/Products/S1Product.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class S1Product(ASFStackableProduct):
def __init__(self, args: Dict = {}, session: ASFSession = ASFSession()):
super().__init__(args, session)

self.properties['s3Urls'] = self._get_s3_urls()
self.properties['s3Urls'] = self._get_s3_uris()

if self.has_baseline():
self.baseline = self.get_baseline_calc_properties()
Expand Down
2 changes: 1 addition & 1 deletion asf_search/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def search(
# The last page will be marked as complete if results sucessful
perf = time.time()
for page in search_generator(opts=opts):
ASF_LOGGER.warning(f'Page Time Elapsed {time.time() - perf}')
ASF_LOGGER.debug(f'Page Time Elapsed {time.time() - perf}')
results.extend(page)
results.searchComplete = page.searchComplete
results.searchOptions = page.searchOptions
Expand Down
6 changes: 3 additions & 3 deletions asf_search/search/search_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def search_generator(
last_page = process_page(
items, maxResults, subquery_max_results, total, subquery_count, opts
)
ASF_LOGGER.warning(f'Page Processing Time {time.time() - perf}')
ASF_LOGGER.info(f'Page Processing Time {time.time() - perf}')
subquery_count += len(last_page)
total += len(last_page)
last_page.searchComplete = subquery_count == subquery_max_results or total == maxResults
Expand Down Expand Up @@ -296,7 +296,7 @@ def query_cmr(

perf = time.time()
items = [as_ASFProduct(f, session=session) for f in response.json()['items']]
ASF_LOGGER.warning(f'Product Subclassing Time {time.time() - perf}')
ASF_LOGGER.debug(f'Product Subclassing Time {time.time() - perf}')
hits: int = response.json()['hits'] # total count of products given search opts
# 9-10 per process
# 3.9-5 per process
Expand Down Expand Up @@ -354,7 +354,7 @@ def get_page(session: ASFSession, url: str, translated_opts: List) -> Response:
f'Connection Error (Timeout): CMR took too long to respond. Set asf constant "asf_search.constants.INTERNAL.CMR_TIMEOUT" to increase. ({url=}, timeout={CMR_TIMEOUT})'
) from exc

ASF_LOGGER.warning(f'Query Time Elapsed {time.time() - perf}')
ASF_LOGGER.info(f'Query Time Elapsed {time.time() - perf}')
return response


Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
'pytest-cov',
'pytest-xdist',
'coverage',
'requests-mock',
'requests-mock==1.11.0',
'nbformat',
'nbconvert',
'ipykernel',
Expand Down Expand Up @@ -47,7 +47,7 @@
packages=find_packages(exclude=['tests.*', 'tests', 'examples.*', 'examples']),
package_dir={'asf_search': 'asf_search'},
include_package_data=True,
python_requires='>=3.8',
python_requires='>=3.9',
install_requires=requirements,
extras_require={'test': test_requirements, 'extras': extra_requirements},
license='BSD',
Expand All @@ -60,7 +60,6 @@
'Intended Audience :: Science/Research',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
Expand Down
13 changes: 9 additions & 4 deletions tests/ASFSearchResults/test_ASFSearchResults.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from shapely.geometry.base import BaseGeometry
from asf_search.CMR.translate import try_parse_date
from asf_search.constants import PLATFORM
from asf_search import ASF_LOGGER
import re

from asf_search.exceptions import ASFSearchError
Expand Down Expand Up @@ -211,15 +212,19 @@ def run_test_ASFSearchResults_intersection(wkt: str):
def overlap_check(s1: BaseGeometry, s2: BaseGeometry):
return s1.overlaps(s2) or s1.touches(s2) or s2.distance(s1) <= 0.005

asf.constants.INTERNAL.CMR_TIMEOUT = 60
for platform in platforms:
asf.constants.INTERNAL.CMR_TIMEOUT = 120
try:
results = asf.geo_search(intersectsWith=wkt, platform=platform, maxResults=250)
except ASFSearchError as exc:
asf.constants.INTERNAL.CMR_TIMEOUT = 30
raise BaseException(
f'Failed to perform intersection test with wkt: {wkt}\nplatform: {platform}.\nOriginal exception: {exc}'
)
if str(exc).startswith("Connection Error (Timeout):"):
ASF_LOGGER.warning('CMR timeout while running intersection test')
continue
else:
raise BaseException(
f'Failed to perform intersection test with wkt: {wkt}\nplatform: {platform}.\nOriginal exception: {exc}'
)

asf.constants.INTERNAL.CMR_TIMEOUT = 30
for product in results:
Expand Down
68 changes: 61 additions & 7 deletions tests/pytest-managers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from typing import Dict, List
from asf_search import (
ASFSearchOptions,
ASFSession,
FileDownloadType,
)
from asf_search import ASFSearchOptions, ASFSession, FileDownloadType, search

from asf_search.exceptions import ASFAuthenticationError

from ASFProduct.test_ASFProduct import (
Expand Down Expand Up @@ -481,8 +478,8 @@ def test_validator_map_validate(**args) -> None:
run_test_validator_map_validate(key, value, output)


def test_ASFSearchOptions_validator(**args) -> None:
test_info = args['test_info']
def test_ASFSearchOptions_validator(**kargs) -> None:
test_info = kargs['test_info']
validator_name = get_resource(test_info['validator'])
param = safe_load_tuple(get_resource(test_info['input']))
output = safe_load_tuple(get_resource(test_info['output']))
Expand All @@ -494,6 +491,63 @@ def test_ASFSearchOptions(**kwargs) -> None:
run_test_ASFSearchOptions(**kwargs)


def test_ASFSearchResults_get_urls() -> None:
response = search(
granule_list=[
'OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0'
]
)
actual_urls = response.find_urls()

expected_urls = [
'https://cumulus.asf.alaska.edu/s3credentials',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE.png.md5',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_low-res.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_low-res.png.md5',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_thumbnail.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_thumbnail.png.md5',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0.h5',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0.h5.md5',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0.iso.xml',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0.iso.xml.md5',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VH.tif',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VH.tif.md5',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VV.tif',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VV.tif.md5',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_mask.tif',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_mask.tif.md5',
]
assert actual_urls == expected_urls
assert response.find_urls('.tif') == [
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VH.tif',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VV.tif',
'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_mask.tif',
]
assert response.find_urls(pattern='.*s3credentials') == [
'https://cumulus.asf.alaska.edu/s3credentials'
]
assert response.find_urls('.tif', directAccess=True) == [
's3://asf-cumulus-prod-opera-products/OPERA_L2_RTC-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VH.tif',
's3://asf-cumulus-prod-opera-products/OPERA_L2_RTC-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_VV.tif',
's3://asf-cumulus-prod-opera-products/OPERA_L2_RTC-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_mask.tif',
]

assert response.find_urls(pattern=r'.*BROWSE.*') == [
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE.png.md5',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_low-res.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_low-res.png.md5',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_thumbnail.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_thumbnail.png.md5',
]
assert response.find_urls('.png', pattern=r'.*BROWSE.*') == [
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_low-res.png',
'https://datapool.asf.alaska.edu/BROWSE/OPERA-S1/OPERA_L2_RTC-S1_T131-279916-IW1_20231202T162856Z_20231202T232622Z_S1A_30_v1.0_BROWSE_thumbnail.png',
]


def test_ASFSearchResults_intersection(**kwargs) -> None:
wkt = get_resource(kwargs['test_info']['wkt'])
run_test_ASFSearchResults_intersection(wkt)
Expand Down

0 comments on commit 3c5a301

Please sign in to comment.