Skip to content

Commit

Permalink
Merge pull request #390 from projectcaluma/feat-upload-file-to-s3storage
Browse files Browse the repository at this point in the history
Use Django storage backend to handle file-upload to s3 storage
  • Loading branch information
Yelinz authored Jan 17, 2024
2 parents 8467b90 + 4f39f90 commit 13b51f1
Show file tree
Hide file tree
Showing 33 changed files with 1,307 additions and 935 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
stages: [commit]
name: isort
language: system
entry: isort -y
entry: isort
types: [python]
- id: flake8
stages: [commit]
Expand Down
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ Different environment variable types are explained at [django-environ](https://g

Additional authorization and validation of the models is handled by [DGAP](https://github.com/adfinis/django-generic-api-permissions/?tab=readme-ov-file#usage---for-people-deploying-a-dgap-equipped-app).


#### Common

A list of configuration options which you need
Expand All @@ -76,12 +75,42 @@ A list of configuration options which you need
- Data validation configuration
- `ALEXANDRIA_VALIDATION_CLASSES`: Comma-separated list of [DGAP](https://github.com/adfinis/django-generic-api-permissions/?tab=readme-ov-file#data-validation) classes that define custom validations
- Thumbnail configuration (optional)

- `ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION`: Set to `false` to disable thumbnail generation
- Check the docker-compose file for an example on how to set up generation with s3 hooks
- `ALEXANDRIA_THUMBNAIL_WIDTH`: Width of generated thumbnails
- `ALEXANDRIA_THUMBNAIL_HEIGHT`: Height of generated thumbnails
- `ALEXANDRIA_ENABLE_CHECKSUM`: Set to `false` to disable file checksums. Checksums are calculated after upload to allow later verification (not implemented in Alexandria)

- Storage configuration

Storage backends are configured globally. The storable object bears information on the encryption status allowing the ORM appropriate handling of the data.

- `FILE_STORAGE_BACKEND`: Set the backend for file uploads. `django-storages` is available (default: `django.core.files.storage.FileSystemStorage`)

Encryption:

- `ALEXANDRIA_ENABLE_AT_REST_ENCRYPTION`: Set to `true` to enable at-rest encryption of files (enabling this causes an error unless `ALEXANDRIA_ENCRYPTRION_METHOD` is set to a supported method)
- `ALEXANDRIA_ENCRYPTION_METHOD`: Define encryption method that is applied to uploaded objects. Available values depend on storage backend's capabilities (default: `None`)
- available methods
- None: no at-rest encryption
- `ssec-global`: encrypt all files with the same key (requires: `FILE_STORAGE_BACKEND`: `alexandria.storages.s3.S3Storage)

Supported backends:

- `FileSystemStorage`: files are stored to the `MEDIA_ROOT` directory
- `S3Storage`: files are uploaded to the S3 object storage configured accordingly

required configuations:

- `AWS_S3_ACCESS_KEY_ID`: identity
- `AWS_S3_SECRET_ACCESS_KEY`: password to authorize identity
- `AWS_S3_ENDPOINT_URL`: the url of the service
- `AWS_STORAGE_BUCKET_NAME`: the bucket name of the storage to access objects in path notation (not subdomain)

The development setup features a minio service, implementing the S3 protocol.
To use SSE-C in development make sure to generate a certificate for the minio container and set `AWS_S3_VERIFY` to `false`.

For development, you can also set the following environemnt variables to help you:

- `ALEXANDRIA_DEV_AUTH_BACKEND`: Set this to "true" to enable a fake auth backend that simulates an authenticated user. Requires `DEBUG` to be set to `True` as well.
Expand Down
79 changes: 17 additions & 62 deletions alexandria/conftest.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
import importlib
import inspect
import shutil
import sys
import time
from io import BytesIO
from pathlib import Path

import pytest
from django.apps import apps
from django.core.cache import cache
from factory.base import FactoryMetaClass
from minio import Minio
from minio.datatypes import Object as MinioStatObject
from minio.helpers import ObjectWriteResult
from pytest_factoryboy import register
from pytest_factoryboy.fixture import Box
from rest_framework.test import APIClient
from urllib3 import HTTPResponse

from alexandria.core.storage_clients import Minio as MinioStorageClient
from alexandria.core.tests import file_data
from alexandria.oidc_auth.models import OIDCUser


Expand All @@ -35,6 +29,21 @@ def register_module(module):
register_module(importlib.import_module(".core.factories", "alexandria"))


@pytest.fixture(autouse=True)
def _default_file_storage_backend(settings):
settings.DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage"
settings.ALEXANDRIA_ENABLE_AT_REST_ENCRYPTION = False


@pytest.fixture(autouse=True)
def _make_clean_media_dir(settings):
test_media_root = Path(settings.MEDIA_ROOT) / "test"
test_media_root.mkdir(parents=True, exist_ok=True)
settings.MEDIA_ROOT = str(test_media_root)
pytest.yield_fixture
shutil.rmtree(test_media_root)


@pytest.fixture
def admin_groups():
return ["admin"]
Expand Down Expand Up @@ -89,57 +98,3 @@ def reset_config_classes(settings):
# First, set config to original value
core_config = apps.get_app_config("generic_permissions")
core_config.ready()


@pytest.fixture
def minio_mock(mocker, settings):
def presigned_get_object_side_effect(bucket, object_name, expires):
return f"http://minio/download-url/{object_name}"

def get_object_side_effect(bucket, object_name):
file = object_name.split("_", 1)[1].encode()
if object_name.endswith(".unsupported"):
file = file_data.unsupported
return HTTPResponse(
body=BytesIO(file),
preload_content=False,
)

stat_response = MinioStatObject(
settings.ALEXANDRIA_MINIO_STORAGE_MEDIA_BUCKET_NAME,
"some-file.pdf",
time.struct_time((2019, 4, 5, 7, 0, 49, 4, 95, 0)),
"0c81da684e6aaef48e8f3113e5b8769b",
8200,
content_type="application/pdf",
metadata={"X-Amz-Meta-Testtag": "super_file"},
)
mocker.patch.object(Minio, "presigned_get_object")
mocker.patch.object(Minio, "presigned_put_object")
mocker.patch.object(Minio, "stat_object")
mocker.patch.object(Minio, "bucket_exists")
mocker.patch.object(Minio, "make_bucket")
mocker.patch.object(Minio, "remove_object")
mocker.patch.object(Minio, "copy_object")
mocker.patch.object(Minio, "get_object")
mocker.patch.object(Minio, "put_object")
Minio.get_object.side_effect = get_object_side_effect
Minio.presigned_get_object.side_effect = presigned_get_object_side_effect
Minio.put_object.return_value = ObjectWriteResult(
bucket_name=settings.ALEXANDRIA_MINIO_STORAGE_MEDIA_BUCKET_NAME,
object_name="some-file.pdf",
version_id="",
etag="af1421c17294eed533ec99eb82b468fb",
http_headers="",
)
Minio.presigned_put_object.return_value = "http://minio/upload-url"
Minio.stat_object.return_value = stat_response
Minio.bucket_exists.return_value = True
return Minio


@pytest.fixture
def mock_s3storage(minio_mock, requests_mock):
minio = MinioStorageClient()
mock = requests_mock.put(minio.upload_url("the-object"), status_code=201)
return mock
42 changes: 41 additions & 1 deletion alexandria/core/factories.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import factory
from factory import Faker, SubFactory, post_generation
from factory.django import DjangoModelFactory

from alexandria.core.tests import file_data

from . import models


Expand Down Expand Up @@ -89,8 +92,45 @@ def marks(self, create, extracted, **kwargs): # pragma: todo cover


class FileFactory(BaseFactory):
name = Faker("name")
"""Factory for File.
Usage:
If you want a nice set of Document, File with thumbnails:
`thumb = FileFactory(variant=File.Variant.THUMBNAIL)`
for multiple versions:
`thumb_v2 = FileFactory(variant=File.Variant.THUMBNAIL, document=thumb.document)`
"""

name = factory.Maybe(
factory.LazyAttribute(lambda o: o.variant == models.File.Variant.ORIGINAL),
yes_declaration=Faker("name"),
no_declaration=factory.LazyAttribute(
lambda o: f"{o.original.name}_preview.jpg"
),
)
document = SubFactory(DocumentFactory)
variant = models.File.Variant.ORIGINAL
content = factory.Maybe(
factory.LazyAttribute(lambda o: o.variant == models.File.Variant.THUMBNAIL),
yes_declaration=factory.django.ImageField(
filename="thumb_preview.jpg", width=256, height=256, format="JPEG"
),
no_declaration=factory.django.FileField(
filename="the_file.png", data=file_data.png
),
)
original = factory.Maybe(
factory.LazyAttribute(lambda o: o.variant == models.File.Variant.THUMBNAIL),
SubFactory(
"alexandria.core.factories.FileFactory",
variant=models.File.Variant.ORIGINAL,
document=factory.SelfAttribute("..document"),
),
)

class Meta:
model = models.File
Expand Down
Empty file.
68 changes: 68 additions & 0 deletions alexandria/core/management/commands/encrypt_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from django.conf import settings
from django.core.files.storage import get_storage_class
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Q
from tqdm import tqdm

from alexandria.core.models import File
from alexandria.storages.backends.s3 import SsecGlobalS3Storage

# This is needed to disable the warning about not verifying the SSL certificate.
# It spams the output otherwise.
if not settings.AWS_S3_VERIFY:
import urllib3

urllib3.disable_warnings()


class Command(BaseCommand):
help = "Swaps plain text file content to encrypted content"

def add_arguments(self, parser):
parser.add_argument("--dry", dest="dry", action="store_true", default=False)

@transaction.atomic
def handle(self, *args, **options):
if (
not settings.ALEXANDRIA_ENABLE_AT_REST_ENCRYPTION
or settings.ALEXANDRIA_ENCRYPTION_METHOD
== File.EncryptionStatus.NOT_ENCRYPTED.value
):
return self.stdout.write(
self.style.WARNING(
"Encryption is not enabled. Skipping encryption of files."
)
)
# disable checksums to prevent errors
checksum = settings.ALEXANDRIA_ENABLE_CHECKSUM
settings.ALEXANDRIA_ENABLE_CHECKSUM = False

sid = transaction.savepoint()

# flip between default and encrypted storage to have the correct parameters in the requests
DefaultStorage = get_storage_class()
for file in tqdm(
File.objects.filter(
Q(encryption_status=File.EncryptionStatus.NOT_ENCRYPTED)
| Q(encryption_status__isnull=True)
),
):
# get original file content
file.content.storage = DefaultStorage()
content = file.content.open()

if not options["dry"]:
# overwrite with encrypted content
file.content.storage = SsecGlobalS3Storage()
file.content.save(file.content.name, content)

# set encryption status
file.encryption_status = settings.ALEXANDRIA_ENCRYPTION_METHOD
file.save()

settings.ALEXANDRIA_ENABLE_CHECKSUM = checksum
if options["dry"]: # pragma: no cover
transaction.savepoint_rollback(sid)
else:
transaction.savepoint_commit(sid)
59 changes: 59 additions & 0 deletions alexandria/core/migrations/0013_file_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 3.2.23 on 2023-12-13 18:00

from django.db import migrations, models

import alexandria.core.models
import alexandria.storages.fields


def migrate_file_references(apps, schema_editor):
"""Migrate the download_url based content access to storage backend access.
The simple object storage's object_name was simply the file's name. Setting
the name to the file name in the storage attribute will refer retrieve
the same object if it still exists.
Just make sure to point the object storage client to the right endpoint
and bucket before accessing the file.
"""
File = apps.get_model("alexandria_core", "File")
for file in File.objects.iterator():
file.content.name = alexandria.core.models.upload_file_content_to(file, None)
file.save()


class Migration(migrations.Migration):
dependencies = [
("alexandria_core", "0012_tag_uuid_schema"),
]

operations = [
migrations.RemoveField(
model_name="file",
name="upload_status",
),
migrations.AddField(
model_name="file",
name="content",
field=alexandria.storages.fields.DynamicStorageFileField(
default="", upload_to=alexandria.core.models.upload_file_content_to
),
preserve_default=False,
),
migrations.AddField(
model_name="file",
name="encryption_status",
field=models.CharField(
blank=True,
choices=[
(None, "Encryption status not set"),
("none", "No at-rest enryption"),
("ssec-global", "SSE-C global key encryption (AES256)"),
("ssec-object", "SSE-C per object encryption (AES256)"),
],
default=None,
max_length=12,
null=True,
),
),
migrations.RunPython(migrate_file_references, migrations.RunPython.noop),
]
Loading

0 comments on commit 13b51f1

Please sign in to comment.