diff --git a/requirements.txt b/requirements.txt index 64b09716..d8ae0808 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ coveralls==1.8.2 # code coverage reportin in travis datacite==1.0.1 # BSD-license. convert datasets to datacite xml. datacite metadata store api wrappers python-dateutil==2.8.0 -Django==2.1.11 # BSD-license +Django==2.2.10 # BSD-license elasticsearch<6.0.0 hiredis==1.0.0 # Used by redis (redis-py) for parser djangorestframework==3.9.4 # BSD-license diff --git a/src/metax_api/api/rest/base/api_schemas/catalogrecord.json b/src/metax_api/api/rest/base/api_schemas/catalogrecord.json index 705f2236..0a87458e 100644 --- a/src/metax_api/api/rest/base/api_schemas/catalogrecord.json +++ b/src/metax_api/api/rest/base/api_schemas/catalogrecord.json @@ -12,6 +12,12 @@ "title":"Catalog Record", "description":"A record in a data catalog, describing a single dataset.", "properties":{ + "state":{ + "title":"state", + "description":"Publishing state (published / draft) of the dataset.", + "type":"string", + "readonly": true + }, "identifier":{ "title":"Identifier", "description":"Internal identifier of the record. Required by API write operations.", diff --git a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py index 21ad20ac..5886c805 100644 --- a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py +++ b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py @@ -76,6 +76,7 @@ class Meta: 'next_dataset_version', 'previous_dataset_version', 'mets_object_identifier', + 'state', 'editor', 'cumulative_state', 'date_cumulation_started', @@ -117,6 +118,7 @@ def is_valid(self, raise_exception=False): self.initial_data.pop('previous_dataset_version', None) self.initial_data.pop('deprecated', None) self.initial_data.pop('date_deprecated', None) + self.initial_data.pop('state', None) self.initial_data.pop('preservation_identifier', None) self.initial_data.pop('preservation_dataset_version', None) self.initial_data.pop('preservation_dataset_origin_version', None) diff --git a/src/metax_api/api/rest/base/views/common_view.py b/src/metax_api/api/rest/base/views/common_view.py index 1f602fca..0801573e 100644 --- a/src/metax_api/api/rest/base/views/common_view.py +++ b/src/metax_api/api/rest/base/views/common_view.py @@ -128,6 +128,13 @@ def handle_exception(self, exc): def paginate_queryset(self, queryset): if CS.get_boolean_query_param(self.request, 'no_pagination'): return None + + if self.request.query_params.get('ordering'): + # for some reason ordering is not taken into account when using pagination. + # ensure queryset is ordered. + ordering = self.request.query_params.get('ordering').split(',') + queryset.order_by(*ordering) + return super(CommonViewSet, self).paginate_queryset(queryset) def get_queryset(self): diff --git a/src/metax_api/api/rpc/base/views/dataset_rpc.py b/src/metax_api/api/rpc/base/views/dataset_rpc.py index 4477dfd8..c49ed8cb 100644 --- a/src/metax_api/api/rpc/base/views/dataset_rpc.py +++ b/src/metax_api/api/rpc/base/views/dataset_rpc.py @@ -119,6 +119,7 @@ def change_cumulative_state(self, request): def refresh_directory_content(self, request): cr_identifier = request.query_params.get('cr_identifier', False) dir_identifier = request.query_params.get('dir_identifier', False) + if not cr_identifier: raise Http400('Query param \'cr_identifier\' missing.') if not dir_identifier: @@ -134,14 +135,14 @@ def refresh_directory_content(self, request): cr.request = request - if cr.refresh_directory_content(dir_identifier): - return_status = status.HTTP_200_OK - data = { 'new_version_created': self.get_serializer(cr).data['new_version_created'] } - else: - return_status = status.HTTP_204_NO_CONTENT - data = None + new_version, n_added_files = cr.refresh_directory_content(dir_identifier) - return Response(data=data, status=return_status) + data = { 'number_of_files_added': n_added_files } + + if new_version: + data['new_version_created'] = self.get_serializer(cr).data['new_version_created'] + + return Response(data=data, status=status.HTTP_200_OK) @list_route(methods=['post'], url_path="fix_deprecated") def fix_deprecated(self, request): diff --git a/src/metax_api/migrations/0013_catalogrecord__access_granter.py b/src/metax_api/migrations/0013_catalogrecord__access_granter.py new file mode 100644 index 00000000..f8cafd00 --- /dev/null +++ b/src/metax_api/migrations/0013_catalogrecord__access_granter.py @@ -0,0 +1,19 @@ +# Generated by Django 2.1.11 on 2019-12-16 12:49 + +import django.contrib.postgres.fields.jsonb +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('metax_api', '0012_auto_20190823_1055'), + ] + + operations = [ + migrations.AddField( + model_name='catalogrecord', + name='_access_granter', + field=django.contrib.postgres.fields.jsonb.JSONField(default=None, help_text='Stores data of REMS user who is currently granting access to this dataset', null=True), + ), + ] diff --git a/src/metax_api/migrations/0014_catalogrecord_state.py b/src/metax_api/migrations/0014_catalogrecord_state.py new file mode 100644 index 00000000..79e7ec75 --- /dev/null +++ b/src/metax_api/migrations/0014_catalogrecord_state.py @@ -0,0 +1,18 @@ +# Generated by Django 2.1.11 on 2020-02-14 11:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('metax_api', '0013_catalogrecord__access_granter'), + ] + + operations = [ + migrations.AddField( + model_name='catalogrecord', + name='state', + field=models.CharField(choices=[('published', 'published'), ('draft', 'draft')], default='published', help_text='Publishing state (published / draft) of the dataset.', max_length=200), + ), + ] diff --git a/src/metax_api/models/catalog_record.py b/src/metax_api/models/catalog_record.py index 1deda97d..f9564c53 100644 --- a/src/metax_api/models/catalog_record.py +++ b/src/metax_api/models/catalog_record.py @@ -5,8 +5,10 @@ # :author: CSC - IT Center for Science Ltd., Espoo Finland # :license: MIT +from base64 import urlsafe_b64decode from collections import defaultdict from copy import deepcopy +import json import logging from django.conf import settings @@ -82,16 +84,22 @@ def get_listing(self): Return a list of record preferred_identifiers that belong in the same dataset version chain. Latest first. """ - return [ + + versions = [ { 'identifier': r.identifier, 'preferred_identifier': r.preferred_identifier, 'removed': r.removed, - 'date_created': r.date_created.astimezone().isoformat() + 'date_created': r.date_created.astimezone().isoformat(), + 'date_removed': r.date_removed.astimezone().isoformat() if r.date_removed else None } for r in self.records(manager='objects_unfiltered').all().order_by('-date_created') ] + versions = [{key: value for (key, value) in i.items() if value is not None} for i in versions] + + return versions + def print_records(self): # pragma: no cover for r in self.records.all(): print(r.__repr__()) @@ -213,6 +221,14 @@ class CatalogRecord(Common): (CUMULATIVE_STATE_CLOSED, 'closed') ) + STATE_PUBLISHED = 'published' + STATE_DRAFT = 'draft' + + STATE_CHOICES = ( + (STATE_PUBLISHED, 'published'), + (STATE_DRAFT, 'draft') + ) + # MODEL FIELD DEFINITIONS # alternate_record_set = models.ForeignKey( @@ -223,6 +239,13 @@ class CatalogRecord(Common): data_catalog = models.ForeignKey(DataCatalog, on_delete=models.DO_NOTHING, related_name='records') + state = models.CharField( + choices=STATE_CHOICES, + default=STATE_PUBLISHED, + max_length=200, + help_text='Publishing state (published / draft) of the dataset.' + ) + dataset_group_edit = models.CharField( max_length=200, blank=True, null=True, help_text='Group which is allowed to edit the dataset in this catalog record.') @@ -287,6 +310,9 @@ class CatalogRecord(Common): date_last_cumulative_addition = models.DateTimeField(null=True, default=None, help_text='Date of last file addition while actively cumulative.') + _access_granter = JSONField(null=True, default=None, + help_text='Stores data of REMS user who is currently granting access to this dataset') + # END OF MODEL FIELD DEFINITIONS # """ @@ -415,6 +441,14 @@ def _access_type_is_embargo(self): from metax_api.services import CatalogRecordService as CRS return CRS.get_research_dataset_access_type(self.research_dataset) == ACCESS_TYPES['embargo'] + def _access_type_is_permit(self): + from metax_api.services import CatalogRecordService as CRS + return CRS.get_research_dataset_access_type(self.research_dataset) == ACCESS_TYPES['permit'] + + def _access_type_was_permit(self): + from metax_api.services import CatalogRecordService as CRS + return CRS.get_research_dataset_access_type(self._initial_data['research_dataset']) == ACCESS_TYPES['permit'] + def _embargo_is_available(self): if not self.research_dataset.get('access_rights', {}).get('available', False): return False @@ -832,6 +866,10 @@ def delete(self, *args, **kwargs): if get_identifier_type(self.preferred_identifier) == IdentifierType.DOI: self.add_post_request_callable(DataciteDOIUpdate(self, self.research_dataset['preferred_identifier'], 'delete')) + + if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']: + self.add_post_request_callable(REMSUpdate(self, 'close', reason='deletion')) + self.add_post_request_callable(RabbitMQPublishRecord(self, 'delete')) log_args = { @@ -856,6 +894,10 @@ def delete(self, *args, **kwargs): def deprecate(self, timestamp=None): self.deprecated = True self.date_deprecated = self.date_modified = timestamp or get_tz_aware_now_without_micros() + + if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']: + self.add_post_request_callable(REMSUpdate(self, 'close', reason='deprecation')) + super().save(update_fields=['deprecated', 'date_deprecated', 'date_modified']) self.add_post_request_callable(DelayedLog( event='dataset_deprecated', @@ -930,6 +972,39 @@ def get_metadata_version_listing(self): entries[-1]['stored_to_pas'] = entry.stored_to_pas return entries + def _get_user_info_for_rems(self): + """ + Parses query parameter or token to fetch needed information for REMS user + """ + if self.request.user.is_service: + b64_access_granter = self.request.query_params.get('access_granter') + user_info = json.loads(urlsafe_b64decode(f'{b64_access_granter}===').decode('utf-8')) + else: + # end user api + user_info = { + 'userid': self.request.user.token.get('CSCUserName'), + 'name': self.request.user.token.get('displayName'), + 'email': self.request.user.token.get('email') + } + + if any([v is None for v in user_info.values()]): + raise Http400('Could not find the needed user information for REMS') + + if not all([isinstance(v, str) for v in user_info.values()]): + raise Http400('user information fields must be string') + + return user_info + + def _validate_for_rems(self): + """ + Ensures that all necessary information for REMS access + """ + if self._access_type_is_permit() and not self.research_dataset['access_rights'].get('license', False): + raise Http400('You must define license for dataset in order to make it REMS manageable') + + if self.request.user.is_service and not self.request.query_params.get('access_granter', False): + raise Http400('Missing query parameter access_granter') + def _pre_create_operations(self, pid_type=None): if not self._check_catalog_permissions(self.data_catalog.catalog_record_group_create): @@ -1021,8 +1096,11 @@ def _post_create_operations(self): self.add_post_request_callable(DataciteDOIUpdate(self, self.research_dataset['preferred_identifier'], 'create')) - if self._dataset_is_access_restricted(): - self.add_post_request_callable(REMSUpdate(self), 'create') + if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']: + self._validate_for_rems() + user_info = self._get_user_info_for_rems() + self._access_granter = user_info + self.add_post_request_callable(REMSUpdate(self, 'create', user_info=user_info)) self.add_post_request_callable(RabbitMQPublishRecord(self, 'create')) @@ -1112,9 +1190,15 @@ def _pre_update_operations(self): # read-only after creating self.metadata_provider_user = self._initial_data['metadata_provider_user'] - if self._dataset_restricted_access_changed(): - # todo check if restriction_grounds and access_type changed - pass + if self._dataset_rems_access_changed() and settings.REMS['ENABLED']: + if self._dataset_has_rems_managed_access(): + self._validate_for_rems() + user_info = self._get_user_info_for_rems() + self._access_granter = user_info + self.add_post_request_callable(REMSUpdate(self, 'create', user_info=user_info)) + + else: + self.add_post_request_callable(REMSUpdate(self, 'close', reason='access type change')) if self.field_changed('research_dataset'): if self.preservation_state in ( @@ -1322,17 +1406,17 @@ def _files_added_for_first_time(self): # creating a new dataset version already occurred once return not metadata_versions_with_files_exist - def _dataset_is_access_restricted(self): + def _dataset_has_rems_managed_access(self): """ - Check using logic x and y if dataset uses REMS for managing access. + Check if dataset uses REMS for managing access. """ - return False + return self.catalog_is_ida() and self._access_type_is_permit() - def _dataset_restricted_access_changed(self): + def _dataset_rems_access_changed(self): """ - Check using logic x and y if dataset uses REMS for managing access. + Check if dataset is updated so that REMS needs to be updated. """ - return False + return self.catalog_is_ida() and self._access_type_is_permit() != self._access_type_was_permit() def _calculate_total_files_byte_size(self): rd = self.research_dataset @@ -1930,6 +2014,11 @@ def _create_pas_version(self, origin_version): pas_version.request = origin_version.request pas_version.save(pid_type=IdentifierType.DOI) + # ensure pas dataset contains exactly the same files as origin dataset. clear the result + # that was achieved by calling save(), which processed research_dataset.files and research_dataset.directories + pas_version.files.clear() + pas_version.files.add(*origin_version.files.filter().values_list('id', flat=True)) + # link origin_version and pas copy origin_version.preservation_dataset_version = pas_version origin_version.new_dataset_version_created = pas_version.identifiers_dict @@ -2147,7 +2236,7 @@ def refresh_directory_content(self, dir_identifier): if not added_file_ids: _logger.info('no change in directory content') - return False + return (False, 0) _logger.info(f'refreshing directory adds {len(added_file_ids)} files to dataset') self.date_modified = get_tz_aware_now_without_micros() @@ -2172,7 +2261,7 @@ def refresh_directory_content(self, dir_identifier): super().save() self.add_post_request_callable(RabbitMQPublishRecord(self, 'update')) - return True if self.cumulative_state != self.CUMULATIVE_STATE_YES else False + return (self.cumulative_state != self.CUMULATIVE_STATE_YES, len(added_file_ids)) def _find_new_files_added_to_dir(self, dir): sql_insert_newly_frozen_files_by_dir_path = ''' @@ -2309,10 +2398,15 @@ class REMSUpdate(): Handles managing REMS resources when creating, updating and deleting datasets. """ - def __init__(self, cr, action): - assert action in ('create', 'update', 'delete'), 'invalid value for action' + def __init__(self, cr, action, user_info={}, reason=''): + # user_info is used on creation, reason on close + from metax_api.services.rems_service import REMSService + assert action in ('close', 'create', 'update'), 'invalid value for action' self.cr = cr + self.user_info = user_info + self.reason = reason self.action = action + self.rems = REMSService() def __call__(self): """ @@ -2324,12 +2418,15 @@ def __call__(self): ) try: - # todo do_stuff() - pass - except: - _logger.exception('REMS interaction failed') + if self.action == 'create': + self.rems.create_rems_entity(self.cr, self.user_info) + if self.action == 'close': + self.rems.close_rems_entity(self.cr, self.reason) + + except Exception as e: + _logger.error(e) raise Http503({ 'detail': [ - 'failed to publish updates to rems. request is aborted.' + f'failed to publish updates to rems. request is aborted.' ]}) diff --git a/src/metax_api/services/rabbitmq_service.py b/src/metax_api/services/rabbitmq_service.py index 118e047c..35f0d04b 100644 --- a/src/metax_api/services/rabbitmq_service.py +++ b/src/metax_api/services/rabbitmq_service.py @@ -12,6 +12,7 @@ import pika from django.conf import settings as django_settings +from django.core.serializers.json import DjangoJSONEncoder from metax_api.utils.utils import executing_test_case, executing_travis @@ -84,7 +85,9 @@ def publish(self, body, routing_key='', exchange=None, persistent=True): try: for message in messages: if isinstance(message, dict): - message = json_dumps(message) + message = json_dumps( + message, + cls=DjangoJSONEncoder) self._channel.basic_publish(body=message, routing_key=routing_key, exchange=exchange, **additional_args) except Exception as e: _logger.error(e) diff --git a/src/metax_api/services/rems_service.py b/src/metax_api/services/rems_service.py new file mode 100644 index 00000000..60213108 --- /dev/null +++ b/src/metax_api/services/rems_service.py @@ -0,0 +1,260 @@ +# This file is part of the Metax API service +# +# Copyright 2019 Ministry of Education and Culture, Finland +# +# :author: CSC - IT Center for Science Ltd., Espoo Finland +# :license: MIT +import logging +import requests + +from django.conf import settings as django_settings + + +_logger = logging.getLogger(__name__) + +HANDLER_CLOSEABLE_APPLICATIONS = [ + 'application.state/approved', + 'application.state/returned', + 'application.state/submitted' +] + +APPLICANT_CLOSEABLE_APPLICATIONS = [ + 'application.state/draft' +] + +class REMSException(Exception): + pass + +class REMSService(): + + def __init__(self): + if not hasattr(django_settings, 'REMS'): + raise Exception('Missing configuration from settings.py: REMS') + + settings = django_settings.REMS + + # only reporter_user is privileged to get all applications from REMS + self.api_key = settings['API_KEY'] + self.base_url = settings['BASE_URL'] + self.etsin_url = settings['ETSIN_URL_TEMPLATE'] + self.metax_user = settings['METAX_USER'] + self.reporter_user = settings['REPORTER_USER'] + self.auto_approver = settings['AUTO_APPROVER'] + self.form_id = settings['FORM_ID'] + + self.headers = { + "x-rems-api-key": self.api_key, + "x-rems-user-id": self.metax_user, + "Content-Type": "application/json" + } + + try: + response = requests.get(f"{self.base_url}/health", headers=self.headers) + except Exception as e: + raise Exception(f'Cannot connect to rems while checking its health. Error {e}') + + if not response.json()['healthy'] is True: + raise REMSException('Rems is not healthy, request is aborted') + + def create_rems_entity(self, cr, user_info): + """ + Creates all the necessary elements to create catalogue-item for the dataset in REMS + """ + self.cr = cr + + # create user. Successful even if userid is already taken + self._post_rems('user', user_info) + + wf_id = self._create_workflow(user_info['userid']) + license_id = self._create_license() + res_id = self._create_resource(license_id) + + self._create_catalogue_item(res_id, wf_id) + + def close_rems_entity(self, cr, reason): + """ + Closes all applications and archives and disables all related entities + """ + pref_id = cr.research_dataset['preferred_identifier'] + title = cr.research_dataset['title'].get('en') or cr.research_dataset['title'].get('fi') + + rems_ci = self._get_rems( + 'catalogue-item', + f'resource={pref_id}&archived=true&disabled=true' + ) + + if len(rems_ci) < 1: + # this should not happen but do not block the metax dataset removal + _logger.error(f'Could not find catalogue-item for {cr.identifier} in REMS.') + return + + self._close_applications(title, pref_id, reason) + + self._close_entity('catalogue-item', rems_ci[0]['id']) + self._close_entity('workflow', rems_ci[0]['wfid']) + self._close_entity('resource', rems_ci[0]['resource-id']) + + def _close_applications(self, title, pref_id, reason): + """ + Get all applications that are related to dataset and close them. + Application state determines which user (applicant or handler) can close the application. + Furthermore, closed, rejected or revoked applications cannot be closed. + """ + # REMS only allows reporter_user to get all applications + self.headers['x-rems-user-id'] = self.reporter_user + + applications = self._get_rems('application', f'query=resource:\"{pref_id}\"') + + for application in applications: + if application['application/state'] in HANDLER_CLOSEABLE_APPLICATIONS: + closing_user = application['application/workflow']['workflow.dynamic/handlers'][0]['userid'] + elif application['application/state'] in APPLICANT_CLOSEABLE_APPLICATIONS: + closing_user = application['application/applicant']['userid'] + else: + continue + + self.headers['x-rems-user-id'] = closing_user + + body = {"application-id": application['application/id'], "comment": f"Closed due to dataset {reason}"} + + self._post_rems('application', body, 'close') + + self.headers['x-rems-user-id'] = self.metax_user + + def _close_entity(self, entity, id): + body_ar = {'id': id, 'archived': True} + body_en = {'id': id, 'enabled': False} + + self._put_rems(entity, 'archived', body_ar) + self._put_rems(entity, 'enabled', body_en) + + def _create_workflow(self, user_id): + body = { + "organization": self.cr.metadata_owner_org, + "title": self.cr.research_dataset['preferred_identifier'], + "type": 'workflow/default', + "handlers": [user_id] + } + + response = self._post_rems('workflow', body) + + return response['id'] + + def _create_license(self): + """ + Checks if license is already found from REMS before creating new one + """ + license = self.cr.research_dataset['access_rights']['license'][0] + license_url = license.get('identifier') or license['license'] + + # no search parameter provided for license so have to check by hand + rems_licenses = self._get_rems('license', 'disabled=true&archived=true') + for l in rems_licenses: + if any( [v['textcontent'] == license_url for v in l['localizations'].values()] ): + return l['id'] + + body = { + "licensetype": 'link', + "localizations": {} + } + + for lang in list(license['title'].keys()): + body['localizations'].update({ + lang: { + "title": license['title'][lang], + "textcontent": license_url + } + }) + + response = self._post_rems('license', body) + + return response['id'] + + def _create_resource(self, license_id): + body = { + "resid": self.cr.research_dataset['preferred_identifier'], + "organization": self.cr.metadata_owner_org, + "licenses": [license_id] + } + + response = self._post_rems('resource', body) + + return response['id'] + + def _create_catalogue_item(self, res_id, wf_id): + rd_title = self.cr.research_dataset['title'] + + body = { + "form": self.form_id, + "resid": res_id, + "wfid": wf_id, + "localizations": {}, + "enabled": True + } + + for lang in list(rd_title.keys()): + body['localizations'].update({ + lang: { + "title": rd_title[lang], + "infourl": self.etsin_url % self.cr.identifier + } + }) + + response = self._post_rems('catalogue-item', body) + + return response['id'] + + def _post_rems(self, entity, body, action='create'): + """ + Send post to REMS. Action is needed as parameter because applications are closed with post. + """ + try: + response = requests.post(f"{self.base_url}/{entity}s/{action}", json=body, headers=self.headers) + + except Exception as e: + raise Exception(f'Connection to REMS failed while creating {entity}. Error: {e}') + + if response.status_code != 200: + raise REMSException(f'REMS returned bad status while creating {entity}. Error: {response.text}') + + # operation status is in body + resp = response.json() + + if not resp['success']: + raise REMSException(f'Could not {action} {entity} to REMS. Error: {resp["errors"]}') + + return resp + + def _put_rems(self, entity, action, body): + """ + Edit rems entity. Possible actions: [edit, archived, enabled]. + """ + try: + response = requests.put(f"{self.base_url}/{entity}s/{action}", json=body, headers=self.headers) + + except Exception as e: + raise Exception(f'Connection to REMS failed while updating {entity}. Error: {e}') + + if response.status_code != 200: + raise REMSException(f'REMS returned bad status while updating {entity}. Error: {response.text}') + + # operation status is in body + resp = response.json() + + if not resp['success']: + raise REMSException(f'Could not update {entity} to REMS. Error: {resp["errors"]}') + + return resp + + def _get_rems(self, entity, params=''): + try: + response = requests.get(f"{self.base_url}/{entity}s?{params}", headers=self.headers) + + except Exception as e: + raise Exception(f'Connection to REMS failed while getting {entity}. Error: {e}') + + if response.status_code != 200: + raise REMSException(f'REMS returned bad status while getting {entity}. Error: {response.text}') + + # operation should be successful if status code 200 + return response.json() diff --git a/src/metax_api/settings.py b/src/metax_api/settings.py index 508746ec..beca2a99 100755 --- a/src/metax_api/settings.py +++ b/src/metax_api/settings.py @@ -502,7 +502,7 @@ if executing_in_travis: ELASTICSEARCH = { - 'HOSTS': ['metax-test.csc.fi/es'], + 'HOSTS': ['metax.fairdata.fi/es'], 'USE_SSL': True, 'ALWAYS_RELOAD_REFERENCE_DATA_ON_RESTART': True, } @@ -579,3 +579,26 @@ 'PREFIX': app_config_dict['DATACITE']['PREFIX'], 'URL': app_config_dict['DATACITE']['URL'], } + +if executing_in_travis: + REMS = { + 'ENABLED': True, + 'API_KEY': 'key', + 'BASE_URL': 'https://rems.instance.fi', + 'ETSIN_URL_TEMPLATE': 'https://etsin.something.fi/dataset/%s', + 'METAX_USER': 'muser', + 'REPORTER_USER': 'ruser', + 'AUTO_APPROVER': 'aappr', + 'FORM_ID': 0, + } +else: + REMS = { + 'ENABLED': app_config_dict.get('REMS', {}).get('ENABLED'), + 'API_KEY': app_config_dict.get('REMS', {}).get('API_KEY'), + 'BASE_URL': app_config_dict.get('REMS', {}).get('BASE_URL'), + 'ETSIN_URL_TEMPLATE': app_config_dict.get('REMS', {}).get('ETSIN_URL_TEMPLATE'), + 'METAX_USER': app_config_dict.get('REMS', {}).get('METAX_USER'), + 'REPORTER_USER': app_config_dict.get('REMS', {}).get('REPORTER_USER'), + 'AUTO_APPROVER': app_config_dict.get('REMS', {}).get('AUTO_APPROVER'), + 'FORM_ID': int(app_config_dict.get('REMS', {}).get('FORM_ID')), + } diff --git a/src/metax_api/tests/api/rest/base/views/common/read.py b/src/metax_api/tests/api/rest/base/views/common/read.py index 5dc94f0a..c5fbac70 100644 --- a/src/metax_api/tests/api/rest/base/views/common/read.py +++ b/src/metax_api/tests/api/rest/base/views/common/read.py @@ -71,6 +71,7 @@ def test_removed_parameter_gets_correct_amount_of_objects(self): class ApiReadPaginationTests(CatalogRecordApiReadCommon): + """ pagination """ @@ -93,6 +94,28 @@ def test_disable_pagination(self): self.assertEqual('next' not in response.data, True) self.assertEqual('results' not in response.data, True) + def test_pagination_ordering(self): + limit = 5 + + for order in ('preservation_state', '-preservation_state'): + + # vary offset from 0 to 20, in increments of 5 + for offset in range(0, 20, 5): + + response = self.client.get(f'/rest/datasets?limit={limit}&offset={offset}&ordering={order}') + self.assertEqual(response.status_code, status.HTTP_200_OK) + + from_api = [cr['preservation_state'] for cr in response.data['results']] + + from_db = [ + r for r in CatalogRecord.objects + .filter() + .order_by(order) + .values_list('preservation_state', flat=True)[offset:offset + limit] + ] + + self.assertEqual(from_api, from_db) + class ApiReadHTTPHeaderTests(CatalogRecordApiReadCommon): # diff --git a/src/metax_api/tests/api/rest/base/views/datasets/write.py b/src/metax_api/tests/api/rest/base/views/datasets/write.py index ab93bbdc..5ab9c78a 100644 --- a/src/metax_api/tests/api/rest/base/views/datasets/write.py +++ b/src/metax_api/tests/api/rest/base/views/datasets/write.py @@ -5,9 +5,12 @@ # :author: CSC - IT Center for Science Ltd., Espoo Finland # :license: MIT +from base64 import urlsafe_b64encode from copy import deepcopy from datetime import datetime, timedelta +import json from time import sleep +import unittest import responses from django.conf import settings as django_settings @@ -151,6 +154,30 @@ def _get_new_full_test_cr_data(self, cr_from_test_data, dc_from_test_data): return cr_from_test_data +class CatalogRecordDraftTests(CatalogRecordApiWriteCommon): + """ + Tests related to draft dataset creation: + - when requesting data through API, field 'state' is returned + - the value of field 'state' can't be modified through API + """ + + def test_field_exists(self): + """Try fetching any dataset, field 'state' should be returned'""" + + cr = self.client.get('/rest/datasets/13').data + self.assertEqual('state' in cr, True) + + def test_change_state_field_through_API(self): + """Fetch a dataset and change its state. + Value should remain: 'published' """ + + cr = self.client.get('/rest/datasets/1').data + cr['state'] = 'changed value' + response = self.client.put('/rest/datasets/1', cr, format="json") + + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertFalse(response.data['state'] == 'changed value') + class CatalogRecordApiWriteCreateTests(CatalogRecordApiWriteCommon): # # @@ -1147,6 +1174,49 @@ def test_dataset_files_can_not_be_changed_in_pas_catalog(self): self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data) self.assertEqual('Cannot change files in' in response.data['detail'], True) + def test_pas_dataset_files_equal_origin_dataset(self): + """ + Ensure set of files in original and pas datasets match exactly, even if more files have + been frozen in between. + """ + test_file = self._get_object_from_test_data('file', requested_index=0) + + response = self.client.get( + '/rest/directories/files?project=%s&path=/' % test_file['project_identifier'], format="json") + + dir_identifier = response.data['directories'][0]['identifier'] + + # create dataset where directory along with all of its files are included + cr_data = self.client.get('/rest/datasets/1', format="json").data + cr_data['research_dataset']['directories'] = [{ + 'identifier': dir_identifier, + 'use_category': { 'identifier': 'documentation' } + }] + + response = self.client.put('/rest/datasets/1', cr_data, format="json") + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + cr_id = response.data['next_dataset_version']['id'] + + # now freeze more files into same directory + test_file.update({ + 'file_name': '%s_new' % test_file['file_name'], + 'file_path': '%s_new' % test_file['file_path'], + 'identifier': '%s_new' % test_file['identifier'], + }) + response = self.client.post('/rest/files', test_file, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # more files have been frozen in the directory, but pas dataset should not have the new frozen file, + # since it is not part of the origin dataset either. + self._create_pas_dataset_from_id(cr_id) + + cr = CatalogRecord.objects.get(pk=cr_id) + cr_files = cr.files.filter().order_by('id').values_list('id', flat=True) + cr_pas_files = cr.preservation_dataset_version.files.filter().order_by('id').values_list('id', flat=True) + + # note: trying to assert querysets will result in failure. must evaluate the querysets first by iterating them + self.assertEqual([f for f in cr_files], [f for f in cr_pas_files]) + def test_unfreezing_files_does_not_deprecate_pas_dataset(self): """ Even if the origin dataset is deprecated as a result of unfreezing its files, @@ -1941,6 +2011,24 @@ def test_dataset_version_lists_removed_records(self): self.assertEqual(response.data['dataset_version_set'][0].get('removed', None), True, response.data['dataset_version_set']) + def test_dataset_version_lists_date_removed(self): + # get catalog record + cr = self.client.get('/rest/datasets/1').data + # create version2 + cr['research_dataset']['files'].pop(0) + response = self.client.put('/rest/datasets/1', cr, format="json") + + # delete version2 + version2 = response.data['next_dataset_version'] + response = self.client.delete('/rest/datasets/%d' % version2['id'], format="json") + + # check date_removed is listed and not None in deleted version + response = self.client.get('/rest/datasets/1', format="json") + + self.assertTrue(response.data['dataset_version_set'][0].get('date_removed')) + self.assertTrue(response.data['dataset_version_set'][0].get('date_removed') is not None) + self.assertFalse(response.data['dataset_version_set'][1].get('date_removed')) + def test_new_dataset_version_pref_id_type_stays_same_as_previous_dataset_version_pref_id_type(self): # Create ida data catalog dc = self._get_object_from_test_data('datacatalog', requested_index=0) @@ -3809,3 +3897,528 @@ def test_user_projects_are_checked_when_writing_into_protected_data_catalog(self # try editing again - should be ok response = self.client.put('/rest/datasets/%d' % cr_data['id'], cr_data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) + + +@unittest.skipIf(django_settings.REMS['ENABLED'] is not True, 'Only run if REMS is enabled') +class CatalogRecordApiWriteREMS(CatalogRecordApiWriteCommon): + + rf = RDM.get_reference_data(cache) + # get by code to prevent failures if list ordering changes + access_permit = [type for type in rf['reference_data']['access_type'] if type['code'] == 'permit'][0] + access_open = [type for type in rf['reference_data']['access_type'] if type['code'] == 'open'][0] + + permit_rights = { + # license type does not matter + "license": [ + { + "title": rf['reference_data']['license'][0]['label'], + "identifier": rf['reference_data']['license'][0]['uri'] + } + ], + "access_type": { + "in_scheme": access_permit['scheme'], + "identifier": access_permit['uri'], + "pref_label": access_permit['label'] + } + } + + open_rights = { + "access_type": { + "in_scheme": access_open['scheme'], + "identifier": access_open['uri'], + "pref_label": access_open['label'] + } + } + + def setUp(self): + super().setUp() + # Create ida data catalog + dc = self._get_object_from_test_data('datacatalog', requested_index=0) + dc_id = IDA_CATALOG + dc['catalog_json']['identifier'] = dc_id + self.client.post('/rest/datacatalogs', dc, format="json") + + # token for end user access + self.token = get_test_oidc_token(new_proxy=True) + + # mock successful rems access for creation, add fails later if needed. + # Not using regex to allow individual access failures + for entity in ['user', 'workflow', 'license', 'resource', 'catalogue-item']: + self._mock_rems_write_access_succeeds('POST', entity, 'create') + + self._mock_rems_read_access_succeeds('license') + + # mock successful rems access for deletion. Add fails later + for entity in ['catalogue-item', 'workflow', 'resource']: + self._mock_rems_write_access_succeeds(method='PUT', entity=entity, action='archived') + self._mock_rems_write_access_succeeds(method='PUT', entity=entity, action='enabled') + + self._mock_rems_read_access_succeeds('catalogue-item') + self._mock_rems_read_access_succeeds('application') + self._mock_rems_write_access_succeeds(method='POST', entity='application', action='close') + + responses.add( + responses.GET, + f"{django_settings.REMS['BASE_URL']}/health", + json={'healthy': True}, + status=200 + ) + + def _get_access_granter(self, malformed=False): + """ + Returns encoded user information + """ + access_granter = { + "userid": "testcaseuser" if not malformed else 1234, + "name": "Test User", + "email": "testcase@user.com" + } + + ag_bytes = json.dumps(access_granter).encode('utf-8') + + return urlsafe_b64encode(ag_bytes).decode('utf-8') + + def _mock_rems_write_access_succeeds(self, method, entity, action): + """ + method: HTTP method to be mocked [PUT, POST] + entity: REMS entity [application, catalogue-item, license, resource, user, workflow] + action: Action taken to entity [archived, close, create, edit, enabled] + """ + req_type = responses.POST if method == 'POST' else responses.PUT + + body = {"success": True} + + if method == 'POST' and action != 'close': + # action condition needed because applications are closed with POST method + body['id'] = 6 + + responses.add( + req_type, + f"{django_settings.REMS['BASE_URL']}/{entity}s/{action}", + json=body, + status=200 + ) + + def _mock_rems_read_access_succeeds(self, entity): + if entity == 'license': + resp = [ + { + "id": 7, + "licensetype": "link", + "enabled": True, + "archived": False, + "localizations": { + "fi": { + "title": self.rf['reference_data']['license'][0]['label']['fi'], + "textcontent": self.rf['reference_data']['license'][0]['uri'] + }, + "und": { + "title": self.rf['reference_data']['license'][0]['label']['und'], + "textcontent": self.rf['reference_data']['license'][0]['uri'] + } + } + }, + { + "id": 8, + "licensetype": "link", + "enabled": True, + "archived": False, + "localizations": { + "en": { + "title": self.rf['reference_data']['license'][1]['label']['en'], + "textcontent": self.rf['reference_data']['license'][1]['uri'] + } + } + } + ] + + elif entity == 'catalogue-item': + resp = [ + { + "archived": False, + "localizations": { + "en": { + "id": 18, + "langcode": "en", + "title": "Removal test", + "infourl": "https://url.to.etsin.fi" + } + }, + "resource-id": 19, + "start": "2020-01-02T14:06:13.496Z", + "wfid": 15, + "resid": "preferred identifier", + "formid": 3, + "id": 18, + "expired": False, + "end": None, + "enabled": True + } + ] + + elif entity == 'application': + # only mock relevant data + resp = [ + { + 'application/workflow': { + 'workflow.dynamic/handlers': [ + { + 'userid': 'somehandler' + } + ] + }, + "application/id": 3, + 'application/applicant': { + 'userid': 'someapplicant' + }, + "application/resources": [ + { + "catalogue-item/title": { + "en": "Removal test" + }, + "resource/ext-id": "some:pref:id", + "catalogue-item/id": 5 + } + ], + "application/state": 'application.state/draft' + }, + { + 'application/workflow': { + 'workflow.dynamic/handlers': [ + { + 'userid': 'someid' + } + ] + }, + "application/id": 2, + 'application/applicant': { + 'userid': 'someotherapplicant' + }, + "application/resources": [ + { + "catalogue-item/title": { + "en": "Removal test" + }, + "resource/ext-id": "some:pref:id", + "catalogue-item/id": 5 + } + ], + "application/state": 'application.state/approved' + }, + { + 'application/workflow': { + 'workflow.dynamic/handlers': [ + { + 'userid': 'remsuid' + } + ] + }, + "application/id": 1, + 'application/applicant': { + 'userid': 'someapplicant' + }, + "application/resources": [ + { + "catalogue-item/title": { + "en": "Removal test" + }, + "resource/ext-id": 'Same:title:with:different:catalogue:item', + "catalogue-item/id": 18 + } + ], + "application/state": 'application.state/draft' + } + ] + + responses.add( + responses.GET, + f"{django_settings.REMS['BASE_URL']}/{entity}s", + json=resp, + status=200 + ) + + def _mock_rems_access_return_403(self, method, entity, action=''): + """ + Works also for GET method since failure responses from rems are identical for write and read operations + """ + req_type = responses.POST if method == 'POST' else responses.PUT if method == 'PUT' else responses.GET + + responses.replace( + req_type, + f"{django_settings.REMS['BASE_URL']}/{entity}s/{action}", + status=403 # anything else than 200 is a fail + ) + + def _mock_rems_access_return_error(self, method, entity, action=''): + """ + operation status is defined in the body so 200 response can also be failure. + """ + req_type = responses.POST if method == 'POST' else responses.PUT if method == 'PUT' else responses.GET + + errors = [ + { + "type": "some kind of identifier of this error", + "somedetail": "entity identifier the error is conserning" + } + ] + + responses.replace( + req_type, + f"{django_settings.REMS['BASE_URL']}/{entity}s/{action}", + json={"success": False, "errors": errors}, + status=200 + ) + + def _mock_rems_access_crashes(self, method, entity, action=''): + """ + Crash happens for example if there is a network error. Can be used for GET also + """ + req_type = responses.POST if method == 'POST' else responses.PUT if method == 'PUT' else responses.GET + + responses.replace( + req_type, + f"{django_settings.REMS['BASE_URL']}/{entity}s/{action}", + body=Exception('REMS_service should catch this one also') + ) + + def _create_new_rems_dataset(self): + """ + Modifies catalog record to be REMS managed and post it to Metax + """ + self.cr_test_data['research_dataset']['access_rights'] = self.permit_rights + self.cr_test_data['data_catalog'] = IDA_CATALOG + + granter = self._get_access_granter() + + response = self.client.post(f'/rest/datasets?access_granter={granter}', self.cr_test_data, format="json") + + return response + + @responses.activate + def test_creating_permit_dataset_creates_catalogue_item_service_succeeds(self): + """ + Tests that catalogue item in REMS is created correctly on permit dataset creation + """ + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + @responses.activate + def test_creating_permit_dataset_creates_catalogue_item_service_fails_1(self): + """ + Test unsuccessful rems access + """ + self._mock_rems_access_return_403('POST', 'workflow', 'create') + + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + self.assertTrue('failed to publish updates' in response.data['detail'][0], response.data) + + @responses.activate + def test_creating_permit_dataset_creates_catalogue_item_service_fails_2(self): + """ + Test unsuccessful rems access + """ + self._mock_rems_access_return_error('POST', 'catalogue-item', 'create') + + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + + @responses.activate + def test_creating_permit_dataset_creates_catalogue_item_service_fails_3(self): + """ + Test unsuccessful rems access + """ + self._mock_rems_access_crashes('POST', 'resource', 'create') + + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + self.assertTrue('failed to publish updates' in response.data['detail'][0], response.data) + + @responses.activate + def test_changing_dataset_to_permit_creates_new_catalogue_item_succeeds(self): + """ + Test that changing access type to permit invokes the REMS update + """ + + # create dataset without rems managed access + self.cr_test_data['research_dataset']['access_rights'] = self.open_rights + self.cr_test_data['data_catalog'] = IDA_CATALOG + + response = self.client.post(f'/rest/datasets', self.cr_test_data, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # change to rems managed + cr = response.data + cr['research_dataset']['access_rights'] = self.permit_rights + + granter = self._get_access_granter() + + response = self.client.put(f'/rest/datasets/{cr["id"]}?access_granter={granter}', cr, format="json") + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + + @responses.activate + def test_changing_dataset_to_permit_creates_new_catalogue_item_fails(self): + """ + Test error handling on metax update operation + """ + self._mock_rems_access_return_error('POST', 'user', 'create') + + # create dataset without rems managed access + self.cr_test_data['research_dataset']['access_rights'] = self.open_rights + self.cr_test_data['data_catalog'] = IDA_CATALOG + + response = self.client.post(f'/rest/datasets', self.cr_test_data, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # change to rems managed + cr = response.data + cr['research_dataset']['access_rights'] = self.permit_rights + + granter = self._get_access_granter() + + response = self.client.put(f'/rest/datasets/{cr["id"]}?access_granter={granter}', cr, format="json") + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + + @responses.activate + def test_changing_access_type_to_other_closes_rems_entities_succeeds(self): + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + cr = response.data + cr['research_dataset']['access_rights'] = self.open_rights + + response = self.client.put(f'/rest/datasets/{cr["id"]}', cr, format="json") + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + + @responses.activate + def test_changing_access_type_to_other_closes_rems_entities_fails(self): + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + self._mock_rems_access_return_error('POST', 'application', 'close') + + cr = response.data + cr['research_dataset']['access_rights'] = self.open_rights + + response = self.client.put(f'/rest/datasets/{cr["id"]}', cr, format="json") + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + + @responses.activate + def test_creating_permit_dataset_creates_catalogue_item_end_user(self): + """ + Tests that catalogue item in REMS is created correctly on permit dataset creation. + User information is fetch'd from token. + """ + self._set_http_authorization('owner') + + # modify catalog record + self.cr_test_data['user_created'] = self.token['CSCUserName'] + self.cr_test_data['metadata_provider_user'] = self.token['CSCUserName'] + self.cr_test_data['metadata_provider_org'] = self.token['schacHomeOrganization'] + self.cr_test_data['metadata_owner_org'] = self.token['schacHomeOrganization'] + self.cr_test_data['research_dataset']['access_rights'] = self.permit_rights + self.cr_test_data['data_catalog'] = IDA_CATALOG + + # end user doesn't have permissions to the files and they are also not needed in this test + del self.cr_test_data['research_dataset']['files'] + + response = self.client.post(f'/rest/datasets', self.cr_test_data, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + @responses.activate + def test_deleting_permit_dataset_removes_catalogue_item_succeeds(self): + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # delete dataset + response = self.client.delete(f'/rest/datasets/{response.data["id"]}') + self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT, response.data) + + @responses.activate + def test_deleting_permit_dataset_removes_catalogue_item_fails(self): + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # delete dataset + self._mock_rems_access_return_error('PUT', 'catalogue-item', 'enabled') + + response = self.client.delete(f'/rest/datasets/{response.data["id"]}') + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + + @responses.activate + def test_deprecating_permit_dataset_removes_catalogue_item_succeeds(self): + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # deprecate dataset + response = self.client.delete(f"/rest/files/{response.data['research_dataset']['files'][0]['identifier']}") + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + + @responses.activate + def test_deprecating_permit_dataset_removes_catalogue_item_fails(self): + response = self._create_new_rems_dataset() + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + # deprecate dataset + self._mock_rems_access_crashes('PUT', 'workflow', 'archived') + + response = self.client.delete(f"/rest/files/{response.data['research_dataset']['files'][0]['identifier']}") + self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE, response.data) + self.assertTrue('failed to publish' in response.data['detail'][0], response.data) + + def test_missing_access_granter_parameter(self): + """ + Access_granter parameter is required when user is service + """ + + # test on create + self.cr_test_data['research_dataset']['access_rights'] = self.permit_rights + self.cr_test_data['data_catalog'] = IDA_CATALOG + + response = self.client.post('/rest/datasets', self.cr_test_data, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data) + self.assertTrue('access_granter' in response.data['detail'], response.data) + + # test on update + self.cr_test_data['research_dataset']['access_rights'] = self.open_rights + response = self.client.post(f'/rest/datasets', self.cr_test_data, format="json") + self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) + + cr = response.data + cr['research_dataset']['access_rights'] = self.permit_rights + response = self.client.put(f'/rest/datasets/{cr["id"]}', cr, format="json") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data) + self.assertTrue('access_granter' in response.data['detail'], response.data) + + def test_bad_access_granter_parameter(self): + """ + Access_granter values must be strings + """ + self.cr_test_data['research_dataset']['access_rights'] = self.permit_rights + self.cr_test_data['data_catalog'] = IDA_CATALOG + + granter = self._get_access_granter(malformed=True) + + response = self.client.post( + f'/rest/datasets?access_granter={granter}', + self.cr_test_data, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data) + self.assertTrue('must be string' in response.data['detail'], response.data) + + def test_missing_license_in_dataset(self): + """ + License is required when dataset is REMS managed + """ + self.cr_test_data['research_dataset']['access_rights'] = self.permit_rights + del self.cr_test_data['research_dataset']['access_rights']['license'] + self.cr_test_data['data_catalog'] = IDA_CATALOG + + response = self.client.post( + f'/rest/datasets?access_granter={self._get_access_granter()}', + self.cr_test_data, + format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data) + self.assertTrue('must define license' in response.data['detail'], response.data) diff --git a/src/metax_api/tests/api/rpc/base/views/dataset_rpc.py b/src/metax_api/tests/api/rpc/base/views/dataset_rpc.py index 5af59d7c..6c839710 100644 --- a/src/metax_api/tests/api/rpc/base/views/dataset_rpc.py +++ b/src/metax_api/tests/api/rpc/base/views/dataset_rpc.py @@ -248,6 +248,7 @@ def test_refresh_adds_new_files(self): self._freeze_files_to_root() response = self.client.post(self.url % (cr_id, dir_id), format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertEqual(response.data['number_of_files_added'], 2) new_version = CatalogRecord.objects.get(id=response.data['new_version_created']['id']) file_size_after = new_version.research_dataset['total_files_byte_size'] @@ -258,6 +259,7 @@ def test_refresh_adds_new_files(self): self._freeze_new_files() response = self.client.post(self.url % (new_version.identifier, dir_id), format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertEqual(response.data['number_of_files_added'], 2) new_version = CatalogRecord.objects.get(id=response.data['new_version_created']['id']) self.assertEqual(new_version.files.count(), new_version.previous_dataset_version.files.count() + 2) @@ -276,6 +278,7 @@ def test_adding_parent_dir_allows_refreshes_to_child_dirs(self): response = self.client.post(self.url % (cr_id, frozen_dir.identifier), format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertEqual(response.data['number_of_files_added'], 2) new_version = CatalogRecord.objects.get(id=response.data['new_version_created']['id']) self.assertEqual(new_version.files.count(), new_version.previous_dataset_version.files.count() + 2) @@ -291,6 +294,7 @@ def test_refresh_adds_new_files_multiple_locations(self): self._freeze_files_to_root() response = self.client.post(self.url % (cr_id, dir_id), format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertEqual(response.data['number_of_files_added'], 4) new_version = CatalogRecord.objects.get(id=response.data['new_version_created']['id']) self.assertEqual(new_version.files.count(), new_version.previous_dataset_version.files.count() + 4) @@ -308,7 +312,8 @@ def test_refresh_adds_no_new_files_from_upper_dirs(self): self._freeze_files_to_root() response = self.client.post(self.url % (cr_id, dir_id), format="json") - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT, response.data) + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertEqual(response.data['number_of_files_added'], 0) cr_after = CatalogRecord.objects.get(identifier=cr_id) self.assertEqual(cr_after.next_dataset_version, None, 'should not have new dataset version') @@ -330,7 +335,8 @@ def test_refresh_with_cumulative_state_yes(self): self._freeze_new_files() self._freeze_files_to_root() response = self.client.post(self.url % (cr_id, dir_id), format="json") - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT, response.data) + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + self.assertEqual(response.data['number_of_files_added'], 4) cr_after = CatalogRecord.objects.get(identifier=cr_id) file_size_after = cr_after.research_dataset['total_files_byte_size']