From f2fb50d428be81b897109f4bce980aec33443645 Mon Sep 17 00:00:00 2001 From: mbattude-cs Date: Mon, 11 Apr 2022 14:28:02 +0200 Subject: [PATCH 1/5] add function du check s2 product in the bucket --- src/ewoc_dag/bucket/eobucket.py | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/ewoc_dag/bucket/eobucket.py b/src/ewoc_dag/bucket/eobucket.py index fefcb44..a2f4c51 100644 --- a/src/ewoc_dag/bucket/eobucket.py +++ b/src/ewoc_dag/bucket/eobucket.py @@ -112,6 +112,41 @@ def _check_bucket(self) -> bool: return True + def _check_product(self, prefix) -> bool: + """Check if the product is usable + + Returns: + bool: return True if the product is accessible and False otherwise + """ + # # !! Only works to check a file (eg. B12.tif), not working for a folder !! + # try: + # self._s3_client.head_object(Bucket=self._bucket_name, Key=prefix) + # except ClientError as err: + # error_code = err.response["Error"]["Code"] + # if error_code == "404": + # logger.critical("Path %s/%s does not exist!", self._bucket_name, prefix) + # return False + + # return True + + s3_result = self._s3_client.list_objects_v2(Bucket=self._bucket_name, Prefix=prefix, Delimiter = "/") + + if 'Contents' not in s3_result: + print("Path %s/%s does not exist!" % (self._bucket_name, prefix)) + return False + else: + + list_product_files = [] + for obj in s3_result.get('Contents'): + list_product_files.append(obj.get('Key')) + + if len(list_product_files)>15: + print("Path %s/%s is full with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) + return True + else: + print("Path %s/%s is partial with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) + return False + def _s3_basepath(self) -> str: """Compute the basepath of the bucket s3://bucket_name From ce3c8fd0a2222de83d12476b528475021a2b5d41 Mon Sep 17 00:00:00 2001 From: mbattude-cs Date: Tue, 12 Apr 2022 10:59:01 +0200 Subject: [PATCH 2/5] Add check_product_file function --- src/ewoc_dag/bucket/eobucket.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/ewoc_dag/bucket/eobucket.py b/src/ewoc_dag/bucket/eobucket.py index a2f4c51..26cc745 100644 --- a/src/ewoc_dag/bucket/eobucket.py +++ b/src/ewoc_dag/bucket/eobucket.py @@ -112,23 +112,30 @@ def _check_bucket(self) -> bool: return True + def _check_product_file(self, prefix) -> bool: + """Check if the product contains a given file + + Returns: + bool: return True if the product file is accessible and False otherwise + """ + try: + self._s3_client.head_object(Bucket=self._bucket_name, Key=prefix) + except ClientError as err: + error_code = err.response["Error"]["Code"] + if error_code == "404": + logger.critical("Path %s/%s does not exist!", self._bucket_name, prefix) + elif error_code == "403": + logger.critical("Acces forbidden to %s/%s path!", self._bucket_name, prefix) + return False + + return True + def _check_product(self, prefix) -> bool: """Check if the product is usable Returns: bool: return True if the product is accessible and False otherwise """ - # # !! Only works to check a file (eg. B12.tif), not working for a folder !! - # try: - # self._s3_client.head_object(Bucket=self._bucket_name, Key=prefix) - # except ClientError as err: - # error_code = err.response["Error"]["Code"] - # if error_code == "404": - # logger.critical("Path %s/%s does not exist!", self._bucket_name, prefix) - # return False - - # return True - s3_result = self._s3_client.list_objects_v2(Bucket=self._bucket_name, Prefix=prefix, Delimiter = "/") if 'Contents' not in s3_result: From 38082e150b7bb998d4ed1aae783d6d13289b2a35 Mon Sep 17 00:00:00 2001 From: mbattude-cs Date: Tue, 12 Apr 2022 11:19:57 +0200 Subject: [PATCH 3/5] modification print/logger --- src/ewoc_dag/bucket/eobucket.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ewoc_dag/bucket/eobucket.py b/src/ewoc_dag/bucket/eobucket.py index 26cc745..ee1756a 100644 --- a/src/ewoc_dag/bucket/eobucket.py +++ b/src/ewoc_dag/bucket/eobucket.py @@ -139,7 +139,7 @@ def _check_product(self, prefix) -> bool: s3_result = self._s3_client.list_objects_v2(Bucket=self._bucket_name, Prefix=prefix, Delimiter = "/") if 'Contents' not in s3_result: - print("Path %s/%s does not exist!" % (self._bucket_name, prefix)) + logger.critical("Path %s/%s does not exist!" % (self._bucket_name, prefix)) return False else: @@ -148,10 +148,10 @@ def _check_product(self, prefix) -> bool: list_product_files.append(obj.get('Key')) if len(list_product_files)>15: - print("Path %s/%s is full with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) + logger.debug("Path %s/%s is full with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) return True else: - print("Path %s/%s is partial with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) + logger.debug("Path %s/%s is partial with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) return False def _s3_basepath(self) -> str: From 3658356baad868b60eb4706479c264ff7adc7baa Mon Sep 17 00:00:00 2001 From: mbattude-cs Date: Tue, 12 Apr 2022 15:51:46 +0200 Subject: [PATCH 4/5] update check_product function for aws --- src/ewoc_dag/bucket/eobucket.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/ewoc_dag/bucket/eobucket.py b/src/ewoc_dag/bucket/eobucket.py index ee1756a..ae23d47 100644 --- a/src/ewoc_dag/bucket/eobucket.py +++ b/src/ewoc_dag/bucket/eobucket.py @@ -130,14 +130,26 @@ def _check_product_file(self, prefix) -> bool: return True - def _check_product(self, prefix) -> bool: + def _check_product(self, prefix, threshold, request_payer: bool = False) -> bool: """Check if the product is usable Returns: bool: return True if the product is accessible and False otherwise """ - s3_result = self._s3_client.list_objects_v2(Bucket=self._bucket_name, Prefix=prefix, Delimiter = "/") - + if request_payer is True: + s3_result = self._s3_client.list_objects_v2( + Bucket=self._bucket_name, + Prefix=prefix, + Delimiter = "/", + RequestPayer="requester" + ) + else: + s3_result = self._s3_client.list_objects_v2( + Bucket=self._bucket_name, + Prefix=prefix, + Delimiter = "/" + ) + if 'Contents' not in s3_result: logger.critical("Path %s/%s does not exist!" % (self._bucket_name, prefix)) return False @@ -147,7 +159,7 @@ def _check_product(self, prefix) -> bool: for obj in s3_result.get('Contents'): list_product_files.append(obj.get('Key')) - if len(list_product_files)>15: + if len(list_product_files)>threshold: logger.debug("Path %s/%s is full with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) return True else: From 99e079eb10997fd7db0ab4782511c449d4674e0a Mon Sep 17 00:00:00 2001 From: mbattude-cs Date: Tue, 12 Apr 2022 15:55:15 +0200 Subject: [PATCH 5/5] fix pylint issues --- src/ewoc_dag/bucket/eobucket.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ewoc_dag/bucket/eobucket.py b/src/ewoc_dag/bucket/eobucket.py index ae23d47..ef40202 100644 --- a/src/ewoc_dag/bucket/eobucket.py +++ b/src/ewoc_dag/bucket/eobucket.py @@ -151,19 +151,21 @@ def _check_product(self, prefix, threshold, request_payer: bool = False) -> bool ) if 'Contents' not in s3_result: - logger.critical("Path %s/%s does not exist!" % (self._bucket_name, prefix)) + logger.critical("Path %s/%s does not exist!", self._bucket_name, prefix) return False else: list_product_files = [] - for obj in s3_result.get('Contents'): + for obj in s3_result.get('Contents'): list_product_files.append(obj.get('Key')) if len(list_product_files)>threshold: - logger.debug("Path %s/%s is full with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) + logger.debug("Path %s/%s is full with %s files \n", \ + self._bucket_name, prefix, len(list_product_files)) return True else: - logger.debug("Path %s/%s is partial with %s files \n" % (self._bucket_name, prefix, len(list_product_files))) + logger.debug("Path %s/%s is partial with %s files \n", \ + self._bucket_name, prefix, len(list_product_files)) return False def _s3_basepath(self) -> str: @@ -180,7 +182,7 @@ def _find_product(self, prd_path: str, prd_date: str) -> str: Args: prd_path (str): prd path prd_date (str): prd date - + Returns: str: product name """ @@ -193,7 +195,7 @@ def list_folders(s3_client, bucket_name, prefix): folder_list = list_folders(self._s3_client, self._bucket_name, prefix=prd_path) for folder in folder_list: logger.debug('Folder found: %s', folder.split('/')[-2]) - + if prd_date in folder.split('/')[-2]: prd_name = folder.split('/')[-2] logger.debug('Product name: %s', prd_name)