From 17b5c0d7d68fadca8271f4f5273df08221b81571 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Thu, 29 Aug 2024 11:10:39 +0200 Subject: [PATCH 1/4] feat: add S3Path --- pydantic_extra_types/s3.py | 40 ++++++++++ tests/test_s3.py | 152 +++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 pydantic_extra_types/s3.py create mode 100644 tests/test_s3.py diff --git a/pydantic_extra_types/s3.py b/pydantic_extra_types/s3.py new file mode 100644 index 0000000..55e6069 --- /dev/null +++ b/pydantic_extra_types/s3.py @@ -0,0 +1,40 @@ +""" +The `pydantic_extra_types.s3` module provides the +[`S3Path`][pydantic_extra_types.s3.S3Path] data type. + +A simpleAWS S3 URLs parser. +It also provides the `Bucket`, `Key` component. +""" + +import re +from typing import Any, ClassVar, Type + +from pydantic import GetCoreSchemaHandler +from pydantic_core import core_schema + + +class S3Path(str): + patt: ClassVar[str] = r'^s3://([^/]+)/(.*?([^/]+)/?)$' + + def __init__(self, value: str) -> None: + self.value = value + self.bucket, self.key, self.last_key = re.match(self.patt, self.value).groups() + + def __str__(self) -> str: # pragma: no cover + return self.value + + def __repr__(self) -> str: # pragma: no cover + return f'{self.__class__.__name__}({self.value!r})' + + @classmethod + def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> 'S3Path': + return cls(__input_value) + + @classmethod + def __get_pydantic_core_schema__(cls, source: Type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema: + _, _ = source, handler + return core_schema.with_info_after_validator_function( + cls._validate, + core_schema.str_schema(pattern=cls.patt), + field_name=cls.__class__.__name__, + ) diff --git a/tests/test_s3.py b/tests/test_s3.py new file mode 100644 index 0000000..abfb1e4 --- /dev/null +++ b/tests/test_s3.py @@ -0,0 +1,152 @@ +import pytest +from pydantic import BaseModel, ValidationError + +from pydantic_extra_types.s3 import S3Path + + +class S3Check(BaseModel): + path: S3Path + + +@pytest.mark.parametrize( + 'raw,bucket,key,last_key', + [ + ( + 's3://my-data-bucket/2023/08/29/sales-report.csv', + 'my-data-bucket', + '2023/08/29/sales-report.csv', + 'sales-report.csv', + ), + ( + 's3://logs-bucket/app-logs/production/2024/07/01/application-log.txt', + 'logs-bucket', + 'app-logs/production/2024/07/01/application-log.txt', + 'application-log.txt', + ), + ( + 's3://backup-storage/user_data/john_doe/photos/photo-2024-08-15.jpg', + 'backup-storage', + 'user_data/john_doe/photos/photo-2024-08-15.jpg', + 'photo-2024-08-15.jpg', + ), + ( + 's3://analytics-bucket/weekly-reports/Q3/2023/week-35-summary.pdf', + 'analytics-bucket', + 'weekly-reports/Q3/2023/week-35-summary.pdf', + 'week-35-summary.pdf', + ), + ( + 's3://project-data/docs/presentations/quarterly_review.pptx', + 'project-data', + 'docs/presentations/quarterly_review.pptx', + 'quarterly_review.pptx', + ), + ( + 's3://my-music-archive/genres/rock/2024/favorite-songs.mp3', + 'my-music-archive', + 'genres/rock/2024/favorite-songs.mp3', + 'favorite-songs.mp3', + ), + ( + 's3://video-uploads/movies/2024/03/action/thriller/movie-trailer.mp4', + 'video-uploads', + 'movies/2024/03/action/thriller/movie-trailer.mp4', + 'movie-trailer.mp4', + ), + ( + 's3://company-files/legal/contracts/contract-2023-09-01.pdf', + 'company-files', + 'legal/contracts/contract-2023-09-01.pdf', + 'contract-2023-09-01.pdf', + ), + ( + 's3://dev-environment/source-code/release_v1.0.2.zip', + 'dev-environment', + 'source-code/release_v1.0.2.zip', + 'release_v1.0.2.zip', + ), + ( + 's3://public-bucket/open-data/geojson/maps/city_boundaries.geojson', + 'public-bucket', + 'open-data/geojson/maps/city_boundaries.geojson', + 'city_boundaries.geojson', + ), + ( + 's3://image-storage/2024/portfolio/shoots/wedding/couple_photo_12.jpg', + 'image-storage', + '2024/portfolio/shoots/wedding/couple_photo_12.jpg', + 'couple_photo_12.jpg', + ), + ( + 's3://finance-data/reports/2024/Q2/income_statement.xlsx', + 'finance-data', + 'reports/2024/Q2/income_statement.xlsx', + 'income_statement.xlsx', + ), + ( + 's3://training-data/nlp/corpora/english/2023/text_corpus.txt', + 'training-data', + 'nlp/corpora/english/2023/text_corpus.txt', + 'text_corpus.txt', + ), + ( + 's3://ecommerce-backup/2024/transactions/august/orders_2024_08_28.csv', + 'ecommerce-backup', + '2024/transactions/august/orders_2024_08_28.csv', + 'orders_2024_08_28.csv', + ), + ( + 's3://gaming-assets/3d_models/characters/hero/model_v5.obj', + 'gaming-assets', + '3d_models/characters/hero/model_v5.obj', + 'model_v5.obj', + ), + ( + 's3://iot-sensor-data/2024/temperature_sensors/sensor_42_readings.csv', + 'iot-sensor-data', + '2024/temperature_sensors/sensor_42_readings.csv', + 'sensor_42_readings.csv', + ), + ( + 's3://user-uploads/avatars/user123/avatar_2024_08_29.png', + 'user-uploads', + 'avatars/user123/avatar_2024_08_29.png', + 'avatar_2024_08_29.png', + ), + ( + 's3://media-library/podcasts/2023/episode_45.mp3', + 'media-library', + 'podcasts/2023/episode_45.mp3', + 'episode_45.mp3', + ), + ( + 's3://logs-bucket/security/firewall-logs/2024/08/failed_attempts.log', + 'logs-bucket', + 'security/firewall-logs/2024/08/failed_attempts.log', + 'failed_attempts.log', + ), + ( + 's3://data-warehouse/financials/quarterly/2024/Q1/profit_loss.csv', + 'data-warehouse', + 'financials/quarterly/2024/Q1/profit_loss.csv', + 'profit_loss.csv', + ), + ( + 's3://data-warehouse/financials/quarterly/2024/Q1', + 'data-warehouse', + 'financials/quarterly/2024/Q1', + 'Q1', + ), + ], +) +def test_s3(raw: str, bucket: str, key: str, last_key: str): + model = S3Check(path=raw) + assert model.path == S3Path(raw) + assert model.path.bucket == bucket + assert model.path.key == key + assert model.path.last_key == last_key + + +def test_wrong_s3(): + with pytest.raises(ValidationError): + S3Check(path='s3/ok') From 134ee608a2bca38008b820ca9cedb7c91db1ece6 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Thu, 29 Aug 2024 11:19:52 +0200 Subject: [PATCH 2/4] fix: mypy --- pydantic_extra_types/s3.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pydantic_extra_types/s3.py b/pydantic_extra_types/s3.py index 55e6069..a57c5dc 100644 --- a/pydantic_extra_types/s3.py +++ b/pydantic_extra_types/s3.py @@ -6,6 +6,8 @@ It also provides the `Bucket`, `Key` component. """ +from __future__ import annotations + import re from typing import Any, ClassVar, Type @@ -18,7 +20,10 @@ class S3Path(str): def __init__(self, value: str) -> None: self.value = value - self.bucket, self.key, self.last_key = re.match(self.patt, self.value).groups() + groups: tuple[str, str, str] = re.match(self.patt, self.value).groups() # type: ignore + self.bucket: str = groups[0] + self.key: str = groups[1] + self.last_key: str = groups[2] def __str__(self) -> str: # pragma: no cover return self.value From aed6ab5c3b9d322eba742074589b5e31f0c5987c Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Thu, 29 Aug 2024 11:27:28 +0200 Subject: [PATCH 3/4] fix: remove single quotes due to __future__.annotations --- pydantic_extra_types/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydantic_extra_types/s3.py b/pydantic_extra_types/s3.py index a57c5dc..dd9f846 100644 --- a/pydantic_extra_types/s3.py +++ b/pydantic_extra_types/s3.py @@ -32,7 +32,7 @@ def __repr__(self) -> str: # pragma: no cover return f'{self.__class__.__name__}({self.value!r})' @classmethod - def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> 'S3Path': + def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> S3Path: return cls(__input_value) @classmethod From ea31b6132a097123ec29435e075c8bb7c01183f8 Mon Sep 17 00:00:00 2001 From: Luciano Scarpulla Date: Tue, 3 Sep 2024 19:10:12 +0200 Subject: [PATCH 4/4] add docstring and test --- pydantic_extra_types/s3.py | 25 +++++++++++++++++++++++++ tests/test_json_schema.py | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/pydantic_extra_types/s3.py b/pydantic_extra_types/s3.py index dd9f846..c28ffee 100644 --- a/pydantic_extra_types/s3.py +++ b/pydantic_extra_types/s3.py @@ -16,6 +16,31 @@ class S3Path(str): + """ + An object representing a valid S3 path. + This type also allows you to access the `bucket` and `key` component of the S3 path. + It also contains the `last_key` which represents the last part of the path (tipically a file). + + ```python + from pydantic import BaseModel + from pydantic_extra_types.s3 import S3Path + + class TestModel(BaseModel): + path: S3Path + + p = 's3://my-data-bucket/2023/08/29/sales-report.csv' + model = TestModel(path=p) + model + + #> TestModel(path=S3Path('s3://my-data-bucket/2023/08/29/sales-report.csv')) + + model.path.bucket + + #> 'my-data-bucket' + + ``` + """ + patt: ClassVar[str] = r'^s3://([^/]+)/(.*?([^/]+)/?)$' def __init__(self, value: str) -> None: diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index 84fced2..86bdc57 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -31,6 +31,7 @@ from pydantic_extra_types.payment import PaymentCardNumber from pydantic_extra_types.pendulum_dt import DateTime from pydantic_extra_types.phone_numbers import PhoneNumber, PhoneNumberValidator +from pydantic_extra_types.s3 import S3Path from pydantic_extra_types.script_code import ISO_15924 from pydantic_extra_types.semantic_version import SemanticVersion from pydantic_extra_types.semver import _VersionPydanticAnnotation @@ -433,6 +434,23 @@ 'required': ['x'], }, ), + ( + S3Path, + { + 'title': 'Model', + 'type': 'object', + 'properties': { + 'x': { + 'pattern': '^s3://([^/]+)/(.*?([^/]+)/?)$', + 'title': 'X', + 'type': 'string', + }, + }, + 'required': [ + 'x', + ], + }, + ), ], ) def test_json_schema(cls, expected):