Skip to content

Commit

Permalink
Merge pull request #95 from WorldCereal/develop
Browse files Browse the repository at this point in the history
Merge branch 'develop' into 'main'
  • Loading branch information
mbattude-cs authored Apr 21, 2022
2 parents 0c094aa + c917d02 commit 0fe1592
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 10 deletions.
10 changes: 8 additions & 2 deletions src/ewoc_dag/bucket/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def _download_s2_prd(
product_date = s2_prd_info.datatake_sensing_start_time.date().strftime(
"%Y%m%d"
)
product_name = self._find_product(products_path, product_date)
folder_list = self._list_folders(products_path, request_payer=False)
product_name = self._find_product(folder_list, product_date)
(out_dirpath / product_name).mkdir(exist_ok=True)
prefix_components.append(product_name)
prd_prefix = "/".join(prefix_components) + "/"
Expand Down Expand Up @@ -201,7 +202,12 @@ def _download_s2_prd(
prefix_components.append(
str(s2_prd_info.datatake_sensing_start_time.date().day)
)
prefix_components.append("0")

products_path = "/".join(prefix_components) + "/"
folder_list = self._list_folders(products_path, request_payer=True)
folder_number = self._find_aws_folder_number(folder_list)
prefix_components.append(folder_number)

tile_prefix = "/".join(prefix_components) + "/"
logger.info("tile_prefix: %s", tile_prefix)

Expand Down
59 changes: 51 additions & 8 deletions src/ewoc_dag/bucket/eobucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,23 +176,45 @@ def _s3_basepath(self) -> str:
"""
return f"s3://{self._bucket_name}"

def _find_product(self, prd_path: str, prd_date: str) -> str:
"""Find the product name in the bucket
def _list_folders(self, prd_path: str, request_payer: bool = False) -> list:
"""Find the list of folders in the bucket
Args:
prd_path (str): prd path
prd_date (str): prd date
request_payer(bool): request payer
Returns:
str: product name
"""

def list_folders(s3_client, bucket_name, prefix):
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix, Delimiter='/')
for content in response.get('CommonPrefixes', []):
yield content.get('Prefix')
if request_payer is True:
response = self._s3_client.list_objects_v2(Bucket=self._bucket_name,
Prefix=prd_path,
Delimiter='/',
RequestPayer="requester")
else:
response = self._s3_client.list_objects_v2(Bucket=self._bucket_name,
Prefix=prd_path,
Delimiter='/')

folder_list = []
for content in response.get('CommonPrefixes', []):
folder_list.append(content.get('Prefix'))

return folder_list


def _find_product(self, folder_list: list, prd_date: str) -> str:
"""Find the product name in the bucket from the list of folders
Args:
folder_list (list): list of folders in the bucket
prd_date (str): prd date
Returns:
str: product name
"""

folder_list = list_folders(self._s3_client, self._bucket_name, prefix=prd_path)
for folder in folder_list:
logger.debug('Folder found: %s', folder.split('/')[-2])

Expand All @@ -202,6 +224,27 @@ def list_folders(s3_client, bucket_name, prefix):

return prd_name

def _find_aws_folder_number(self, folder_list: list, folder_number: str = "0") -> str:
"""Find the aws folder number in the bucket
Args:
folder_list (list): list of folders in the bucket
folder_number (str): folder number
Returns:
str: folder number
"""

for folder in folder_list:
logger.debug('Folder found: %s', folder.split('/')[-2])

if int(folder.split('/')[-2]) > int(folder_number):
folder_number = folder.split('/')[-2]

logger.debug('Folder number: %s', folder_number)

return folder_number

def _download_prd(
self,
prd_prefix: str,
Expand Down

0 comments on commit 0fe1592

Please sign in to comment.