diff --git a/pydantic_extra_types/s3.py b/pydantic_extra_types/s3.py new file mode 100644 index 0000000..c28ffee --- /dev/null +++ b/pydantic_extra_types/s3.py @@ -0,0 +1,70 @@ +""" +The `pydantic_extra_types.s3` module provides the +[`S3Path`][pydantic_extra_types.s3.S3Path] data type. + +A simpleAWS S3 URLs parser. +It also provides the `Bucket`, `Key` component. +""" + +from __future__ import annotations + +import re +from typing import Any, ClassVar, Type + +from pydantic import GetCoreSchemaHandler +from pydantic_core import core_schema + + +class S3Path(str): + """ + An object representing a valid S3 path. + This type also allows you to access the `bucket` and `key` component of the S3 path. + It also contains the `last_key` which represents the last part of the path (tipically a file). + + ```python + from pydantic import BaseModel + from pydantic_extra_types.s3 import S3Path + + class TestModel(BaseModel): + path: S3Path + + p = 's3://my-data-bucket/2023/08/29/sales-report.csv' + model = TestModel(path=p) + model + + #> TestModel(path=S3Path('s3://my-data-bucket/2023/08/29/sales-report.csv')) + + model.path.bucket + + #> 'my-data-bucket' + + ``` + """ + + patt: ClassVar[str] = r'^s3://([^/]+)/(.*?([^/]+)/?)$' + + def __init__(self, value: str) -> None: + self.value = value + groups: tuple[str, str, str] = re.match(self.patt, self.value).groups() # type: ignore + self.bucket: str = groups[0] + self.key: str = groups[1] + self.last_key: str = groups[2] + + def __str__(self) -> str: # pragma: no cover + return self.value + + def __repr__(self) -> str: # pragma: no cover + return f'{self.__class__.__name__}({self.value!r})' + + @classmethod + def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> S3Path: + return cls(__input_value) + + @classmethod + def __get_pydantic_core_schema__(cls, source: Type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema: + _, _ = source, handler + return core_schema.with_info_after_validator_function( + cls._validate, + core_schema.str_schema(pattern=cls.patt), + field_name=cls.__class__.__name__, + ) diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index 84fced2..86bdc57 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -31,6 +31,7 @@ from pydantic_extra_types.payment import PaymentCardNumber from pydantic_extra_types.pendulum_dt import DateTime from pydantic_extra_types.phone_numbers import PhoneNumber, PhoneNumberValidator +from pydantic_extra_types.s3 import S3Path from pydantic_extra_types.script_code import ISO_15924 from pydantic_extra_types.semantic_version import SemanticVersion from pydantic_extra_types.semver import _VersionPydanticAnnotation @@ -433,6 +434,23 @@ 'required': ['x'], }, ), + ( + S3Path, + { + 'title': 'Model', + 'type': 'object', + 'properties': { + 'x': { + 'pattern': '^s3://([^/]+)/(.*?([^/]+)/?)$', + 'title': 'X', + 'type': 'string', + }, + }, + 'required': [ + 'x', + ], + }, + ), ], ) def test_json_schema(cls, expected): diff --git a/tests/test_s3.py b/tests/test_s3.py new file mode 100644 index 0000000..abfb1e4 --- /dev/null +++ b/tests/test_s3.py @@ -0,0 +1,152 @@ +import pytest +from pydantic import BaseModel, ValidationError + +from pydantic_extra_types.s3 import S3Path + + +class S3Check(BaseModel): + path: S3Path + + +@pytest.mark.parametrize( + 'raw,bucket,key,last_key', + [ + ( + 's3://my-data-bucket/2023/08/29/sales-report.csv', + 'my-data-bucket', + '2023/08/29/sales-report.csv', + 'sales-report.csv', + ), + ( + 's3://logs-bucket/app-logs/production/2024/07/01/application-log.txt', + 'logs-bucket', + 'app-logs/production/2024/07/01/application-log.txt', + 'application-log.txt', + ), + ( + 's3://backup-storage/user_data/john_doe/photos/photo-2024-08-15.jpg', + 'backup-storage', + 'user_data/john_doe/photos/photo-2024-08-15.jpg', + 'photo-2024-08-15.jpg', + ), + ( + 's3://analytics-bucket/weekly-reports/Q3/2023/week-35-summary.pdf', + 'analytics-bucket', + 'weekly-reports/Q3/2023/week-35-summary.pdf', + 'week-35-summary.pdf', + ), + ( + 's3://project-data/docs/presentations/quarterly_review.pptx', + 'project-data', + 'docs/presentations/quarterly_review.pptx', + 'quarterly_review.pptx', + ), + ( + 's3://my-music-archive/genres/rock/2024/favorite-songs.mp3', + 'my-music-archive', + 'genres/rock/2024/favorite-songs.mp3', + 'favorite-songs.mp3', + ), + ( + 's3://video-uploads/movies/2024/03/action/thriller/movie-trailer.mp4', + 'video-uploads', + 'movies/2024/03/action/thriller/movie-trailer.mp4', + 'movie-trailer.mp4', + ), + ( + 's3://company-files/legal/contracts/contract-2023-09-01.pdf', + 'company-files', + 'legal/contracts/contract-2023-09-01.pdf', + 'contract-2023-09-01.pdf', + ), + ( + 's3://dev-environment/source-code/release_v1.0.2.zip', + 'dev-environment', + 'source-code/release_v1.0.2.zip', + 'release_v1.0.2.zip', + ), + ( + 's3://public-bucket/open-data/geojson/maps/city_boundaries.geojson', + 'public-bucket', + 'open-data/geojson/maps/city_boundaries.geojson', + 'city_boundaries.geojson', + ), + ( + 's3://image-storage/2024/portfolio/shoots/wedding/couple_photo_12.jpg', + 'image-storage', + '2024/portfolio/shoots/wedding/couple_photo_12.jpg', + 'couple_photo_12.jpg', + ), + ( + 's3://finance-data/reports/2024/Q2/income_statement.xlsx', + 'finance-data', + 'reports/2024/Q2/income_statement.xlsx', + 'income_statement.xlsx', + ), + ( + 's3://training-data/nlp/corpora/english/2023/text_corpus.txt', + 'training-data', + 'nlp/corpora/english/2023/text_corpus.txt', + 'text_corpus.txt', + ), + ( + 's3://ecommerce-backup/2024/transactions/august/orders_2024_08_28.csv', + 'ecommerce-backup', + '2024/transactions/august/orders_2024_08_28.csv', + 'orders_2024_08_28.csv', + ), + ( + 's3://gaming-assets/3d_models/characters/hero/model_v5.obj', + 'gaming-assets', + '3d_models/characters/hero/model_v5.obj', + 'model_v5.obj', + ), + ( + 's3://iot-sensor-data/2024/temperature_sensors/sensor_42_readings.csv', + 'iot-sensor-data', + '2024/temperature_sensors/sensor_42_readings.csv', + 'sensor_42_readings.csv', + ), + ( + 's3://user-uploads/avatars/user123/avatar_2024_08_29.png', + 'user-uploads', + 'avatars/user123/avatar_2024_08_29.png', + 'avatar_2024_08_29.png', + ), + ( + 's3://media-library/podcasts/2023/episode_45.mp3', + 'media-library', + 'podcasts/2023/episode_45.mp3', + 'episode_45.mp3', + ), + ( + 's3://logs-bucket/security/firewall-logs/2024/08/failed_attempts.log', + 'logs-bucket', + 'security/firewall-logs/2024/08/failed_attempts.log', + 'failed_attempts.log', + ), + ( + 's3://data-warehouse/financials/quarterly/2024/Q1/profit_loss.csv', + 'data-warehouse', + 'financials/quarterly/2024/Q1/profit_loss.csv', + 'profit_loss.csv', + ), + ( + 's3://data-warehouse/financials/quarterly/2024/Q1', + 'data-warehouse', + 'financials/quarterly/2024/Q1', + 'Q1', + ), + ], +) +def test_s3(raw: str, bucket: str, key: str, last_key: str): + model = S3Check(path=raw) + assert model.path == S3Path(raw) + assert model.path.bucket == bucket + assert model.path.key == key + assert model.path.last_key == last_key + + +def test_wrong_s3(): + with pytest.raises(ValidationError): + S3Check(path='s3/ok')