From 2ffeb18ab9e3e51e0a87bf72931bb367dcf19547 Mon Sep 17 00:00:00 2001 From: Davis Raymond Muro Date: Wed, 13 Jul 2022 12:50:57 +0300 Subject: [PATCH 1/3] Ensure the SAS Token is used when retrieving images from AzureStorage --- onadata/apps/api/viewsets/media_viewset.py | 5 +- onadata/libs/utils/image_tools.py | 136 +++++++++++++++---- onadata/libs/utils/presigned_download_url.py | 75 ---------- 3 files changed, 111 insertions(+), 105 deletions(-) delete mode 100644 onadata/libs/utils/presigned_download_url.py diff --git a/onadata/apps/api/viewsets/media_viewset.py b/onadata/apps/api/viewsets/media_viewset.py index 7acecf1e81..a902b37e04 100644 --- a/onadata/apps/api/viewsets/media_viewset.py +++ b/onadata/apps/api/viewsets/media_viewset.py @@ -13,9 +13,8 @@ AuthenticateHeaderMixin from onadata.libs.mixins.cache_control_mixin import CacheControlMixin from onadata.libs.mixins.etags_mixin import ETagsMixin -from onadata.libs.utils.presigned_download_url import \ - generate_media_download_url -from onadata.libs.utils.image_tools import image_url +from onadata.libs.utils.image_tools import \ + image_url, generate_media_download_url from onadata.apps.api.tools import get_baseviewset_class BaseViewset = get_baseviewset_class() diff --git a/onadata/libs/utils/image_tools.py b/onadata/libs/utils/image_tools.py index 75bf8038b9..1123c971d6 100644 --- a/onadata/libs/utils/image_tools.py +++ b/onadata/libs/utils/image_tools.py @@ -1,22 +1,96 @@ +import boto3 +import urllib +import logging +from datetime import datetime, timedelta from tempfile import NamedTemporaryFile from PIL import Image from django.conf import settings from django.core.files.base import ContentFile from django.core.files.storage import get_storage_class +from django.http import HttpResponse, HttpResponseRedirect +from botocore.exceptions import ClientError +from wsgiref.util import FileWrapper from onadata.libs.utils.viewer_tools import get_path def flat(*nums): - '''Build a tuple of ints from float or integer arguments. + """Build a tuple of ints from float or integer arguments. Useful because PIL crop and resize require integer points. source: https://gist.github.com/16a01455 - ''' + """ return tuple(int(round(n)) for n in nums) +def generate_media_download_url(obj, expiration: int = 3600): + file_path = obj.media_file.name + default_storage = get_storage_class()() + filename = file_path.split("/")[-1] + s3 = None + azure = None + + try: + s3 = get_storage_class("storages.backends.s3boto3.S3Boto3Storage")() + except ModuleNotFoundError: + pass + + try: + azure = get_storage_class("storages.backends.azure_storage.AzureStorage")() + except ModuleNotFoundError: + if s3 is None: + return HttpResponseRedirect(obj.media_file.url) + + content_disposition = urllib.parse.quote(f"attachment; filename={filename}") + if isinstance(default_storage, type(s3)): + try: + bucket_name = s3.bucket.name + s3_client = boto3.client("s3") + + # Generate a presigned URL for the S3 object + response = s3_client.generate_presigned_url( + "get_object", + Params={ + "Bucket": bucket_name, + "Key": file_path, + "ResponseContentDisposition": content_disposition, + "ResponseContentType": "application/octet-stream", + }, + ExpiresIn=expiration, + ) + except ClientError as e: + logging.error(e) + return None + + return HttpResponseRedirect(response) + elif isinstance(default_storage, type(azure)): + media_url = generate_media_url_with_sas(file_path) + return HttpResponseRedirect(media_url) + else: + file_obj = open(settings.MEDIA_ROOT + file_path, "rb") + response = HttpResponse(FileWrapper(file_obj), content_type=obj.mimetype) + response["Content-Disposition"] = content_disposition + + return response + + +def generate_media_url_with_sas(file_path, expiration: int = 3600): + from azure.storage.blob import generate_blob_sas, AccountSasPermissions + account_name = getattr(settings, "AZURE_ACCOUNT_NAME", "") + container_name = getattr(settings, "AZURE_CONTAINER", "") + media_url = f"https://{account_name}.blob.core.windows.net/{container_name}/{file_path}" # noqa + sas_token = generate_blob_sas( + account_name=account_name, + account_key=getattr(settings, "AZURE_ACCOUNT_KEY", ""), + container_name=container_name, + blob_name=file_path, + permission=AccountSasPermissions(read=True), + expiry=datetime.utcnow() + timedelta(seconds=expiration), + ) + return f"{media_url}?{sas_token}" + + def get_dimensions(size, longest_side): width, height = size @@ -34,24 +108,22 @@ def get_dimensions(size, longest_side): def _save_thumbnails(image, path, size, suffix, extension): - nm = NamedTemporaryFile(suffix='.%s' % extension) + nm = NamedTemporaryFile(suffix=".%s" % extension) default_storage = get_storage_class()() try: # Ensure conversion to float in operations - image.thumbnail( - get_dimensions(image.size, float(size)), Image.ANTIALIAS) + image.thumbnail(get_dimensions(image.size, float(size)), Image.ANTIALIAS) except ZeroDivisionError: pass image.save(nm.name) - default_storage.save( - get_path(path, suffix), ContentFile(nm.read())) + default_storage.save(get_path(path, suffix), ContentFile(nm.read())) nm.close() def resize(filename, extension): - if extension == 'non': + if extension == "non": extension = settings.DEFAULT_IMG_FILE_TYPE default_storage = get_storage_class()() @@ -62,54 +134,64 @@ def resize(filename, extension): for key in settings.THUMB_ORDER: _save_thumbnails( - image, filename, - conf[key]['size'], - conf[key]['suffix'], - extension) + image, filename, conf[key]["size"], conf[key]["suffix"], extension + ) except IOError: raise Exception("The image file couldn't be identified") def resize_local_env(filename, extension): - if extension == 'non': + if extension == "non": extension = settings.DEFAULT_IMG_FILE_TYPE default_storage = get_storage_class()() path = default_storage.path(filename) image = Image.open(path) conf = settings.THUMB_CONF - [_save_thumbnails( - image, path, conf[key]['size'], - conf[key]['suffix'], extension) for key in settings.THUMB_ORDER] + [ + _save_thumbnails(image, path, conf[key]["size"], conf[key]["suffix"], extension) + for key in settings.THUMB_ORDER + ] def image_url(attachment, suffix): - '''Return url of an image given size(@param suffix) + """Return url of an image given size(@param suffix) e.g large, medium, small, or generate required thumbnail - ''' + """ url = attachment.media_file.url + azure = None + + try: + azure = get_storage_class("storages.backends.azure_storage.AzureStorage")() + except ModuleNotFoundError: + pass - if suffix == 'original': + if suffix == "original": return url else: default_storage = get_storage_class()() - fs = get_storage_class('django.core.files.storage.FileSystemStorage')() + fs = get_storage_class("django.core.files.storage.FileSystemStorage")() if suffix in settings.THUMB_CONF: - size = settings.THUMB_CONF[suffix]['suffix'] + size = settings.THUMB_CONF[suffix]["suffix"] filename = attachment.media_file.name if default_storage.exists(filename): - if default_storage.exists(get_path(filename, size)) and\ - default_storage.size(get_path(filename, size)) > 0: - url = default_storage.url( - get_path(filename, size)) + if ( + default_storage.exists(get_path(filename, size)) + and default_storage.size(get_path(filename, size)) > 0 + ): + file_path = get_path(filename, size) + url = ( + generate_media_url_with_sas(file_path) + if isinstance(default_storage, type(azure)) + else default_storage.url(file_path) + ) else: if default_storage.__class__ != fs.__class__: resize(filename, extension=attachment.extension) else: - resize_local_env(filename, - extension=attachment.extension) + resize_local_env(filename, extension=attachment.extension) return image_url(attachment, suffix) else: diff --git a/onadata/libs/utils/presigned_download_url.py b/onadata/libs/utils/presigned_download_url.py deleted file mode 100644 index 8c118f28bf..0000000000 --- a/onadata/libs/utils/presigned_download_url.py +++ /dev/null @@ -1,75 +0,0 @@ -import boto3 -import urllib -import logging -from datetime import datetime, timedelta - -from django.conf import settings -from django.core.files.storage import get_storage_class -from django.http import HttpResponse, HttpResponseRedirect -from botocore.exceptions import ClientError - -from wsgiref.util import FileWrapper - - -def generate_media_download_url(obj, expiration: int = 3600): - file_path = obj.media_file.name - default_storage = get_storage_class()() - filename = file_path.split("/")[-1] - s3 = None - azure = None - - try: - s3 = get_storage_class('storages.backends.s3boto3.S3Boto3Storage')() - except ModuleNotFoundError: - pass - - try: - azure = get_storage_class( - 'storages.backends.azure_storage.AzureStorage')() - except ModuleNotFoundError: - if s3 is None: - return HttpResponseRedirect(obj.media_file.url) - - content_disposition = urllib.parse.quote( - f'attachment; filename={filename}' - ) - if isinstance(default_storage, type(s3)): - try: - bucket_name = s3.bucket.name - s3_client = boto3.client('s3') - - # Generate a presigned URL for the S3 object - response = s3_client.generate_presigned_url( - 'get_object', - Params={ - 'Bucket': bucket_name, - 'Key': file_path, - 'ResponseContentDisposition': content_disposition, - 'ResponseContentType': 'application/octet-stream', }, - ExpiresIn=expiration) - except ClientError as e: - logging.error(e) - return None - - return HttpResponseRedirect(response) - elif isinstance(default_storage, type(azure)): - from azure.storage.blob import generate_blob_sas, AccountSasPermissions - account_name = getattr(settings, "AZURE_ACCOUNT_NAME", "") - container_name = getattr(settings, "AZURE_CONTAINER", "") - media_url = f"https://{account_name}.blob.core.windows.net/{container_name}/{file_path}" # noqa - sas_token = generate_blob_sas( - account_name=account_name, - account_key=getattr(settings, "AZURE_ACCOUNT_KEY", ""), - container_name=container_name, - blob_name=file_path, - permission=AccountSasPermissions(read=True), - expiry=datetime.utcnow() + timedelta(seconds=expiration) - ) - return HttpResponseRedirect(f"{media_url}?{sas_token}") - else: - file_obj = open(settings.MEDIA_ROOT + file_path, 'rb') - response = HttpResponse(FileWrapper(file_obj), - content_type=obj.mimetype) - response['Content-Disposition'] = content_disposition - - return response From ab7751b1820f5802622b61012791ade178640d9b Mon Sep 17 00:00:00 2001 From: Davis Raymond Muro Date: Wed, 13 Jul 2022 13:49:33 +0300 Subject: [PATCH 2/3] Add `generate_aws_media_url` function --- .../api/tests/viewsets/test_media_viewset.py | 4 +- onadata/libs/utils/image_tools.py | 42 +++++++++++-------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/onadata/apps/api/tests/viewsets/test_media_viewset.py b/onadata/apps/api/tests/viewsets/test_media_viewset.py index 0c1953878b..edc94e5987 100644 --- a/onadata/apps/api/tests/viewsets/test_media_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_media_viewset.py @@ -35,8 +35,8 @@ def test_retrieve_view(self): self.assertEqual(response.status_code, 302) self.assertEqual(type(response.content), bytes) - @patch('onadata.libs.utils.presigned_download_url.get_storage_class') - @patch('onadata.libs.utils.presigned_download_url.boto3.client') + @patch('onadata.libs.utils.image_tools.get_storage_class') + @patch('onadata.libs.utils.image_tools.boto3.client') def test_retrieve_view_from_s3( self, mock_presigned_urls, mock_get_storage_class): diff --git a/onadata/libs/utils/image_tools.py b/onadata/libs/utils/image_tools.py index 1123c971d6..4aa2631d8c 100644 --- a/onadata/libs/utils/image_tools.py +++ b/onadata/libs/utils/image_tools.py @@ -45,25 +45,12 @@ def generate_media_download_url(obj, expiration: int = 3600): content_disposition = urllib.parse.quote(f"attachment; filename={filename}") if isinstance(default_storage, type(s3)): try: - bucket_name = s3.bucket.name - s3_client = boto3.client("s3") - - # Generate a presigned URL for the S3 object - response = s3_client.generate_presigned_url( - "get_object", - Params={ - "Bucket": bucket_name, - "Key": file_path, - "ResponseContentDisposition": content_disposition, - "ResponseContentType": "application/octet-stream", - }, - ExpiresIn=expiration, - ) + url = generate_aws_media_url(file_path, content_disposition, expiration) except ClientError as e: logging.error(e) return None - - return HttpResponseRedirect(response) + else: + return HttpResponseRedirect(url) elif isinstance(default_storage, type(azure)): media_url = generate_media_url_with_sas(file_path) return HttpResponseRedirect(media_url) @@ -75,8 +62,29 @@ def generate_media_download_url(obj, expiration: int = 3600): return response -def generate_media_url_with_sas(file_path, expiration: int = 3600): +def generate_aws_media_url( + file_path: str, content_disposition: str, expiration: int = 3600 +): + s3 = get_storage_class("storage.backends.s3boto3.S3Boto3Storage")() + bucket_name = s3.bucket.name + s3_client = boto3.client("s3") + + # Generate a presigned URL for the S3 object + return s3_client.generate_presigned_url( + "get_object", + Params={ + "Bucket": bucket_name, + "Key": file_path, + "ResponseContentDisposition": content_disposition, + "ResponseContentType": "application/octet-stream", + }, + ExpiresIn=expiration, + ) + + +def generate_media_url_with_sas(file_path: str, expiration: int = 3600): from azure.storage.blob import generate_blob_sas, AccountSasPermissions + account_name = getattr(settings, "AZURE_ACCOUNT_NAME", "") container_name = getattr(settings, "AZURE_CONTAINER", "") media_url = f"https://{account_name}.blob.core.windows.net/{container_name}/{file_path}" # noqa From 313dc738c0f22b72cf5361ae1756169413b39aae Mon Sep 17 00:00:00 2001 From: Davis Raymond Muro Date: Wed, 13 Jul 2022 13:51:20 +0300 Subject: [PATCH 3/3] Ensure `expiration` arguement is correctly set --- onadata/libs/utils/image_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/libs/utils/image_tools.py b/onadata/libs/utils/image_tools.py index 4aa2631d8c..1dfe6318c5 100644 --- a/onadata/libs/utils/image_tools.py +++ b/onadata/libs/utils/image_tools.py @@ -52,7 +52,7 @@ def generate_media_download_url(obj, expiration: int = 3600): else: return HttpResponseRedirect(url) elif isinstance(default_storage, type(azure)): - media_url = generate_media_url_with_sas(file_path) + media_url = generate_media_url_with_sas(file_path, expiration) return HttpResponseRedirect(media_url) else: file_obj = open(settings.MEDIA_ROOT + file_path, "rb")