Skip to content

Commit

Permalink
✨ Add Support for S3 Bucketsa (#206)
Browse files Browse the repository at this point in the history
* feat: add S3Path

* fix: mypy

* fix: remove single quotes due to __future__.annotations

* add docstring and test
  • Loading branch information
lucianosrp authored Sep 3, 2024
1 parent 12f3f61 commit c7db9d7
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 0 deletions.
70 changes: 70 additions & 0 deletions pydantic_extra_types/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
The `pydantic_extra_types.s3` module provides the
[`S3Path`][pydantic_extra_types.s3.S3Path] data type.
A simpleAWS S3 URLs parser.
It also provides the `Bucket`, `Key` component.
"""

from __future__ import annotations

import re
from typing import Any, ClassVar, Type

from pydantic import GetCoreSchemaHandler
from pydantic_core import core_schema


class S3Path(str):
"""
An object representing a valid S3 path.
This type also allows you to access the `bucket` and `key` component of the S3 path.
It also contains the `last_key` which represents the last part of the path (tipically a file).
```python
from pydantic import BaseModel
from pydantic_extra_types.s3 import S3Path
class TestModel(BaseModel):
path: S3Path
p = 's3://my-data-bucket/2023/08/29/sales-report.csv'
model = TestModel(path=p)
model
#> TestModel(path=S3Path('s3://my-data-bucket/2023/08/29/sales-report.csv'))
model.path.bucket
#> 'my-data-bucket'
```
"""

patt: ClassVar[str] = r'^s3://([^/]+)/(.*?([^/]+)/?)$'

def __init__(self, value: str) -> None:
self.value = value
groups: tuple[str, str, str] = re.match(self.patt, self.value).groups() # type: ignore
self.bucket: str = groups[0]
self.key: str = groups[1]
self.last_key: str = groups[2]

def __str__(self) -> str: # pragma: no cover
return self.value

def __repr__(self) -> str: # pragma: no cover
return f'{self.__class__.__name__}({self.value!r})'

@classmethod
def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> S3Path:
return cls(__input_value)

@classmethod
def __get_pydantic_core_schema__(cls, source: Type[Any], handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
_, _ = source, handler
return core_schema.with_info_after_validator_function(
cls._validate,
core_schema.str_schema(pattern=cls.patt),
field_name=cls.__class__.__name__,
)
18 changes: 18 additions & 0 deletions tests/test_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from pydantic_extra_types.payment import PaymentCardNumber
from pydantic_extra_types.pendulum_dt import DateTime
from pydantic_extra_types.phone_numbers import PhoneNumber, PhoneNumberValidator
from pydantic_extra_types.s3 import S3Path
from pydantic_extra_types.script_code import ISO_15924
from pydantic_extra_types.semantic_version import SemanticVersion
from pydantic_extra_types.semver import _VersionPydanticAnnotation
Expand Down Expand Up @@ -433,6 +434,23 @@
'required': ['x'],
},
),
(
S3Path,
{
'title': 'Model',
'type': 'object',
'properties': {
'x': {
'pattern': '^s3://([^/]+)/(.*?([^/]+)/?)$',
'title': 'X',
'type': 'string',
},
},
'required': [
'x',
],
},
),
],
)
def test_json_schema(cls, expected):
Expand Down
152 changes: 152 additions & 0 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import pytest
from pydantic import BaseModel, ValidationError

from pydantic_extra_types.s3 import S3Path


class S3Check(BaseModel):
path: S3Path


@pytest.mark.parametrize(
'raw,bucket,key,last_key',
[
(
's3://my-data-bucket/2023/08/29/sales-report.csv',
'my-data-bucket',
'2023/08/29/sales-report.csv',
'sales-report.csv',
),
(
's3://logs-bucket/app-logs/production/2024/07/01/application-log.txt',
'logs-bucket',
'app-logs/production/2024/07/01/application-log.txt',
'application-log.txt',
),
(
's3://backup-storage/user_data/john_doe/photos/photo-2024-08-15.jpg',
'backup-storage',
'user_data/john_doe/photos/photo-2024-08-15.jpg',
'photo-2024-08-15.jpg',
),
(
's3://analytics-bucket/weekly-reports/Q3/2023/week-35-summary.pdf',
'analytics-bucket',
'weekly-reports/Q3/2023/week-35-summary.pdf',
'week-35-summary.pdf',
),
(
's3://project-data/docs/presentations/quarterly_review.pptx',
'project-data',
'docs/presentations/quarterly_review.pptx',
'quarterly_review.pptx',
),
(
's3://my-music-archive/genres/rock/2024/favorite-songs.mp3',
'my-music-archive',
'genres/rock/2024/favorite-songs.mp3',
'favorite-songs.mp3',
),
(
's3://video-uploads/movies/2024/03/action/thriller/movie-trailer.mp4',
'video-uploads',
'movies/2024/03/action/thriller/movie-trailer.mp4',
'movie-trailer.mp4',
),
(
's3://company-files/legal/contracts/contract-2023-09-01.pdf',
'company-files',
'legal/contracts/contract-2023-09-01.pdf',
'contract-2023-09-01.pdf',
),
(
's3://dev-environment/source-code/release_v1.0.2.zip',
'dev-environment',
'source-code/release_v1.0.2.zip',
'release_v1.0.2.zip',
),
(
's3://public-bucket/open-data/geojson/maps/city_boundaries.geojson',
'public-bucket',
'open-data/geojson/maps/city_boundaries.geojson',
'city_boundaries.geojson',
),
(
's3://image-storage/2024/portfolio/shoots/wedding/couple_photo_12.jpg',
'image-storage',
'2024/portfolio/shoots/wedding/couple_photo_12.jpg',
'couple_photo_12.jpg',
),
(
's3://finance-data/reports/2024/Q2/income_statement.xlsx',
'finance-data',
'reports/2024/Q2/income_statement.xlsx',
'income_statement.xlsx',
),
(
's3://training-data/nlp/corpora/english/2023/text_corpus.txt',
'training-data',
'nlp/corpora/english/2023/text_corpus.txt',
'text_corpus.txt',
),
(
's3://ecommerce-backup/2024/transactions/august/orders_2024_08_28.csv',
'ecommerce-backup',
'2024/transactions/august/orders_2024_08_28.csv',
'orders_2024_08_28.csv',
),
(
's3://gaming-assets/3d_models/characters/hero/model_v5.obj',
'gaming-assets',
'3d_models/characters/hero/model_v5.obj',
'model_v5.obj',
),
(
's3://iot-sensor-data/2024/temperature_sensors/sensor_42_readings.csv',
'iot-sensor-data',
'2024/temperature_sensors/sensor_42_readings.csv',
'sensor_42_readings.csv',
),
(
's3://user-uploads/avatars/user123/avatar_2024_08_29.png',
'user-uploads',
'avatars/user123/avatar_2024_08_29.png',
'avatar_2024_08_29.png',
),
(
's3://media-library/podcasts/2023/episode_45.mp3',
'media-library',
'podcasts/2023/episode_45.mp3',
'episode_45.mp3',
),
(
's3://logs-bucket/security/firewall-logs/2024/08/failed_attempts.log',
'logs-bucket',
'security/firewall-logs/2024/08/failed_attempts.log',
'failed_attempts.log',
),
(
's3://data-warehouse/financials/quarterly/2024/Q1/profit_loss.csv',
'data-warehouse',
'financials/quarterly/2024/Q1/profit_loss.csv',
'profit_loss.csv',
),
(
's3://data-warehouse/financials/quarterly/2024/Q1',
'data-warehouse',
'financials/quarterly/2024/Q1',
'Q1',
),
],
)
def test_s3(raw: str, bucket: str, key: str, last_key: str):
model = S3Check(path=raw)
assert model.path == S3Path(raw)
assert model.path.bucket == bucket
assert model.path.key == key
assert model.path.last_key == last_key


def test_wrong_s3():
with pytest.raises(ValidationError):
S3Check(path='s3/ok')

0 comments on commit c7db9d7

Please sign in to comment.