From e6d2687389abe8082d2fe9814c8b3eb1a4bd800e Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Mon, 6 Apr 2020 15:24:05 +0300 Subject: [PATCH 01/10] CSCFAIRMETA-179: [ADD] directory paginator class to serve directory view pagination --- .../api/rest/base/views/directory_view.py | 13 ++ src/metax_api/services/file_service.py | 8 +- src/metax_api/services/pagination.py | 57 ++++++ .../api/rest/base/views/directories/read.py | 170 ++++++++++++++++++ 4 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 src/metax_api/services/pagination.py diff --git a/src/metax_api/api/rest/base/views/directory_view.py b/src/metax_api/api/rest/base/views/directory_view.py index 10c1888b..92590d61 100644 --- a/src/metax_api/api/rest/base/views/directory_view.py +++ b/src/metax_api/api/rest/base/views/directory_view.py @@ -11,6 +11,8 @@ from rest_framework.response import Response from metax_api.api.rest.base.serializers import DirectorySerializer +from metax_api.services.pagination import DirectoryPagination +from rest_framework.pagination import LimitOffsetPagination from metax_api.exceptions import Http400, Http403, Http501 from metax_api.models import Directory from metax_api.services import CommonService, FileService @@ -20,6 +22,7 @@ class DirectoryViewSet(CommonViewSet): serializer_class = DirectorySerializer + pagination_class = DirectoryPagination object = Directory select_related = ['parent_directory'] lookup_field_other = 'identifier' @@ -56,6 +59,7 @@ def _get_directory_contents(self, request, identifier=None): A wrapper to call FS to collect and validate parameters from the request, and then call FS.get_directory_contents(). """ + paginate = CommonService.get_boolean_query_param(request, 'pagination') include_parent = CommonService.get_boolean_query_param(request, 'include_parent') dirs_only = CommonService.get_boolean_query_param(request, 'directories_only') recursive = CommonService.get_boolean_query_param(request, 'recursive') @@ -86,6 +90,15 @@ def _get_directory_contents(self, request, identifier=None): request=request ) + if paginate: + if isinstance(files_and_dirs, dict): + a = self.paginate_queryset(files_and_dirs) + return self.get_paginated_response(a) + else: + paginator = LimitOffsetPagination() + context = paginator.paginate_queryset(files_and_dirs, request) + return paginator.get_paginated_response(context) + return Response(files_and_dirs) @detail_route(methods=['get'], url_path="files") diff --git a/src/metax_api/services/file_service.py b/src/metax_api/services/file_service.py index 031515c2..7f4f4579 100644 --- a/src/metax_api/services/file_service.py +++ b/src/metax_api/services/file_service.py @@ -729,8 +729,12 @@ def _get_directory_contents(cls, directory_id, recursive=False, max_depth=1, dep if cr_id: try: - dirs, files = cls._get_directory_contents_for_catalog_record(directory_id, cr_id, - dirs_only=dirs_only, directory_fields=directory_fields, file_fields=file_fields) + dirs, files = cls._get_directory_contents_for_catalog_record( + directory_id, + cr_id, + dirs_only=dirs_only, + directory_fields=directory_fields, + file_fields=file_fields) except Http404: if recursive: return {'directories': []} diff --git a/src/metax_api/services/pagination.py b/src/metax_api/services/pagination.py new file mode 100644 index 00000000..1021de0f --- /dev/null +++ b/src/metax_api/services/pagination.py @@ -0,0 +1,57 @@ +from rest_framework.pagination import LimitOffsetPagination + + +class DirectoryPagination(LimitOffsetPagination): + + page_size = 10 + page_size_query_param = 'page_size' + + def paginate_queryset(self, queryset, request, view=None): + self.count = self.get_count(queryset) + self.limit = self.get_limit(request) + if self.limit is None: + return None + + self.offset = self.get_offset(request) + self.request = request + if self.count > self.limit and self.template is not None: + self.display_page_controls = True + + if self.count == 0 or self.offset > self.count: + return [] + + # serves filters directories_only and files_only which returnes dictionaries + if len(queryset) == 1: + key = list(queryset.keys())[0] + return dict({key: queryset[key][self.offset:self.offset + self.limit]}) + + dirs = [] + files = [] + dir_len = len(queryset['directories']) + + # if no directories left to show + if self.offset >= dir_len: + offset = self.offset - dir_len + files = queryset['files'][offset:offset + self.limit] + + # if directories are not enough for one page limit + elif (self.offset + self.limit) >= dir_len: + dirs = queryset['directories'][self.offset:] + files_to_show = self.limit - (dir_len - self.offset) + if files_to_show > 0: + files = queryset['files'][0:files_to_show] + + # if enough directories for page limit + else: + dirs = queryset['directories'][self.offset:self.offset + self.limit] + + return dict({'directories': dirs, 'files': files}) + + def get_count(self, queryset): + """ + Determine a count of directory dictionary. + """ + count = 0 + for q, v in queryset.items(): + count = count + len(v) + return count diff --git a/src/metax_api/tests/api/rest/base/views/directories/read.py b/src/metax_api/tests/api/rest/base/views/directories/read.py index e0760492..850082d9 100644 --- a/src/metax_api/tests/api/rest/base/views/directories/read.py +++ b/src/metax_api/tests/api/rest/base/views/directories/read.py @@ -10,6 +10,7 @@ from rest_framework import status from rest_framework.test import APITestCase import responses +from django.db import transaction from metax_api.models import CatalogRecord, Directory from metax_api.models.catalog_record import ACCESS_TYPES @@ -610,3 +611,172 @@ def test_browsing_in_cr_context(self): self._use_http_authorization(method='bearer', token=self.token) response = self.client.get('/rest/directories/3/files?cr_identifier=%s' % cr_pk) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + +class DirectoryApiReadPaginationTests(DirectoryApiReadCommon): + + """ + Test paginated directory and file browsing. + Should return directories and/or files depending on limit and offset parameters. Defaul is ofcet is 10 + """ + + def setUp(self): + self._use_http_authorization() + self._create_test_dirs() + + def _create_test_dirs(self): + with transaction.atomic(): + for n in range(1, 15): + f = self._get_new_file_data(str(n)) + self.client.post('/rest/files', f, format="json") + + def _get_dirs_files_ids(self, url): + file_data = self.client.get(url).data + if isinstance(file_data, dict): + return {key: [f['id'] for f in file_data[key]] for key in file_data.keys()} + else: + return [f['id'] for f in file_data] + + def _get_new_file_data(self, file_n): + from_test_data = self._get_object_from_test_data('file', requested_index=0) + + path = '/prj_112_root/science_data_C/phase_2/2017/10/dir' + file_n + '/file_' + file_n + identifier = 'urn:nbn:fi:100' + file_n + + from_test_data.update({ + "checksum": { + "value": "habeebit", + "algorithm": "SHA-256", + "checked": "2017-05-23T10:07:22.559656Z", + }, + "file_name": "tiedosto_name_" + file_n, + "file_path": path, + "identifier": identifier, + "file_storage": self._get_object_from_test_data('filestorage', requested_index=0), + 'parent_directory': 24, + 'project_identifier': 'research_project_112' + }) + del from_test_data['id'] + return from_test_data + + def test_read_directory_with_default_limit_pagination(self): + """ + Test browsing files with pagination + """ + file_dict = self._get_dirs_files_ids('/rest/directories/24/files') + + response = self.client.get('/rest/directories/24/files?pagination') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 10) + self.assertEqual(len(response.data['results']['files']), 0) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][0]) + self.assertEqual(response.data['results']['directories'][9]['id'], file_dict['directories'][9]) + + next_link = response.data['next'].split('http://testserver')[1] + response = self.client.get(next_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 4) + self.assertEqual(len(response.data['results']['files']), 6) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][10]) + self.assertEqual(response.data['results']['directories'][3]['id'], file_dict['directories'][13]) + self.assertEqual(response.data['results']['files'][0]['id'], file_dict['files'][0]) + self.assertEqual(response.data['results']['files'][5]['id'], file_dict['files'][5]) + + next_link = response.data['next'].split('http://testserver')[1] + response = self.client.get(next_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 0) + self.assertEqual(len(response.data['results']['files']), 10) + self.assertEqual(response.data['results']['files'][0]['id'], file_dict['files'][6]) + self.assertEqual(response.data['results']['files'][9]['id'], file_dict['files'][15]) + + prev_link = response.data['previous'].split('http://testserver')[1] + response = self.client.get(prev_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 4) + self.assertEqual(len(response.data['results']['files']), 6) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][10]) + self.assertEqual(response.data['results']['directories'][3]['id'], file_dict['directories'][13]) + self.assertEqual(response.data['results']['files'][0]['id'], file_dict['files'][0]) + self.assertEqual(response.data['results']['files'][5]['id'], file_dict['files'][5]) + + def test_read_directory_with_custom_limit_pagination(self): + file_dict = self._get_dirs_files_ids('/rest/directories/24/files') + + response = self.client.get('/rest/directories/24/files?limit=4&offset=12&pagination') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 2) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][12]) + self.assertEqual(response.data['results']['directories'][1]['id'], file_dict['directories'][13]) + self.assertEqual(len(response.data['results']['files']), 2) + self.assertEqual(response.data['results']['files'][0]['id'], file_dict['files'][0]) + self.assertEqual(response.data['results']['files'][1]['id'], file_dict['files'][1]) + + next_link = response.data['next'].split('http://testserver')[1] + prev_link = response.data['previous'].split('http://testserver')[1] + + response = self.client.get(next_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 0) + self.assertEqual(len(response.data['results']['files']), 4) + self.assertEqual(response.data['results']['files'][0]['id'], file_dict['files'][2]) + self.assertEqual(response.data['results']['files'][3]['id'], file_dict['files'][5]) + + response = self.client.get(prev_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 4) + self.assertEqual(len(response.data['results']['files']), 0) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][8]) + self.assertEqual(response.data['results']['directories'][3]['id'], file_dict['directories'][11]) + + def test_read_directory_with_recursive_and_pagination(self): + ''' + Query with recursive flag must return only files as a list + ''' + file_list = self._get_dirs_files_ids('/rest/directories/24/files?recursive') + + response = self.client.get('/rest/directories/24/files?recursive&pagination') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']), 10) + self.assertEqual(response.data['results'][0]['id'], file_list[0]) + self.assertEqual(response.data['results'][9]['id'], file_list[9]) + + next_link = response.data['next'].split('http://testserver')[1] + response = self.client.get(next_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']), 10) + self.assertEqual(response.data['results'][0]['id'], file_list[10]) + self.assertEqual(response.data['results'][9]['id'], file_list[19]) + + prev_link = response.data['previous'].split('http://testserver')[1] + response = self.client.get(prev_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']), 10) + self.assertEqual(response.data['results'][0]['id'], file_list[0]) + self.assertEqual(response.data['results'][9]['id'], file_list[9]) + + def test_read_directory_with_dirs_only_and_pagination(self): + ''' + Query with directories_only flag must return only directories + ''' + file_dict = self._get_dirs_files_ids('/rest/directories/24/files?directories_only')['directories'] + + response = self.client.get('/rest/directories/24/files?directories_only&pagination=true') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 10) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict[0]) + self.assertEqual(response.data['results']['directories'][9]['id'], file_dict[9]) + + next_link = response.data['next'].split('http://testserver')[1] + response = self.client.get(next_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 4) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict[10]) + self.assertEqual(response.data['results']['directories'][3]['id'], file_dict[13]) + + prev_link = response.data['previous'].split('http://testserver')[1] + response = self.client.get(prev_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 10) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict[0]) + self.assertEqual(response.data['results']['directories'][9]['id'], file_dict[9]) \ No newline at end of file From 75ff659ea22369cc83f0512a5b0a8dd24b9ccea2 Mon Sep 17 00:00:00 2001 From: katrite Date: Thu, 9 Apr 2020 19:21:43 +0300 Subject: [PATCH 02/10] CSCFAIRMETA-435: [ADD] Drafts-OAI-PMH-API-Should-only-list-published-datasets Add filters to remove drafts from verbs ListIdentifiers, ListSets and GetRecord [FIX] Change testname for verb Identity to Identify --- .../api/oaipmh/base/metax_oai_server.py | 8 ++- src/metax_api/tests/api/oaipmh/minimal_api.py | 53 +++++++++++++++++-- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/metax_api/api/oaipmh/base/metax_oai_server.py b/src/metax_api/api/oaipmh/base/metax_oai_server.py index cf75d686..a920efbc 100644 --- a/src/metax_api/api/oaipmh/base/metax_oai_server.py +++ b/src/metax_api/api/oaipmh/base/metax_oai_server.py @@ -110,6 +110,8 @@ def _get_filtered_records_data(self, verb, metadata_prefix, set, cursor, batch_s data_catalog__catalog_json__identifier__in=settings.OAI['SET_MAPPINGS'][set]) else: query_set = query_set.filter(data_catalog__catalog_json__identifier__in=self._get_default_set_filter()) + query_set = query_set.filter(state='published') + data = [] for record in query_set: if verb == 'ListRecords': @@ -440,6 +442,10 @@ def getRecord(self, metadataPrefix, identifier): if metadataPrefix == OAI_DC_URNRESOLVER_MDPREFIX: raise BadArgumentError('Invalid metadataPrefix value. It can be only used with ListRecords verb') record = CatalogRecord.objects.get(identifier__exact=identifier) + if record.state == 'published': + pass + else: + raise IdDoesNotExistError("No record with identifier %s is available." % identifier) except CatalogRecord.DoesNotExist: try: record = DataCatalog.objects.get(catalog_json__identifier__exact=identifier) @@ -455,4 +461,4 @@ def getRecord(self, metadataPrefix, identifier): raise NoRecordsMatchError return (common.Header('', identifier, self._get_header_timestamp(record), ['metax'], False), - common.Metadata('', metadata), None) + common.Metadata('', metadata), None) \ No newline at end of file diff --git a/src/metax_api/tests/api/oaipmh/minimal_api.py b/src/metax_api/tests/api/oaipmh/minimal_api.py index 64a12e2a..043180f4 100644 --- a/src/metax_api/tests/api/oaipmh/minimal_api.py +++ b/src/metax_api/tests/api/oaipmh/minimal_api.py @@ -62,10 +62,15 @@ def _get_single_result(self, data, xpath): results = self._get_results(data, xpath) return results[0] -# VERB: Identity + def _set_dataset_as_draft(self, cr_id): + cr = CatalogRecord.objects.get(pk=cr_id) + cr.state = 'draft' + cr.force_save() - def test_identity(self): - response = self.client.get('/oai/?verb=Identity') +# VERB: Identify + + def test_identify(self): + response = self.client.get('/oai/?verb=Identify') self.assertEqual(response.status_code, status.HTTP_200_OK) # VERB: ListMetadataFormats @@ -115,9 +120,23 @@ def test_list_identifiers(self): errors = self._get_results(response.content, '//o:error[@code="badArgument"]') self.assertTrue(len(errors) == 1, response.content) + def test_list_identifiers_for_drafts(self): + ''' Tests that drafts are not returned from ListIdentifiers ''' + ms = settings.OAI['BATCH_SIZE'] + allRecords = CatalogRecord.objects.filter( + data_catalog__catalog_json__identifier__in=MetaxOAIServer._get_default_set_filter())[:ms] + + self._set_dataset_as_draft(25) + self._set_dataset_as_draft(26) + + # headers should be reduced when some datasets are set as drafts + response = self.client.get('/oai/?verb=ListIdentifiers&metadataPrefix=oai_dc') + self.assertEqual(response.status_code, status.HTTP_200_OK) + headers = self._get_results(response.content, '//o:header') + self.assertFalse(len(headers) == len(allRecords), len(headers)) + def test_list_identifiers_from_datacatalogs_set(self): allRecords = DataCatalog.objects.all()[:settings.OAI['BATCH_SIZE']] - response = self.client.get('/oai/?verb=ListIdentifiers&metadataPrefix=oai_dc&set=datacatalogs') self.assertEqual(response.status_code, status.HTTP_200_OK) records = self._get_results(response.content, '//o:header') @@ -145,6 +164,20 @@ def test_list_records(self): records = self._get_results(response.content, '//o:record') self.assertTrue(len(records) == len(allRecords)) + def test_list_records_for_drafts(self): + ''' Tests that drafts are not returned from ListRecords ''' + ms = settings.OAI['BATCH_SIZE'] + allRecords = CatalogRecord.objects.filter( + data_catalog__catalog_json__identifier__in=MetaxOAIServer._get_default_set_filter())[:ms] + + self._set_dataset_as_draft(25) + self._set_dataset_as_draft(26) + + response = self.client.get('/oai/?verb=ListRecords&metadataPrefix=oai_fairdata_datacite') + self.assertEqual(response.status_code, status.HTTP_200_OK) + records = self._get_results(response.content, '//o:record') + self.assertFalse(len(records) == len(allRecords)) + def test_list_records_urnresolver_from_datacatalogs_set(self): response = self.client.get('/oai/?verb=ListRecords&metadataPrefix=oai_dc_urnresolver&set=datacatalogs') self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -267,6 +300,18 @@ def test_get_record(self): '//o:record/o:header/o:identifier[text()="%s"]' % self.identifier) self.assertTrue(len(identifiers) == 1, response.content) + def test_get_record_for_drafts(self): + ''' Tests that GetRecord doesn't return drafts ''' + + self._set_dataset_as_draft(1) + + response = self.client.get( + '/oai/?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc' % self.identifier) + self.assertEqual(response.status_code, status.HTTP_200_OK) + identifiers = self._get_results(response.content, + '//o:record/o:header/o:identifier[text()="%s"]' % self.identifier) + self.assertFalse(len(identifiers) == 1, response.content) + def test_get_record_non_existing(self): response = self.client.get('/oai/?verb=GetRecord&identifier=urn:non:existing&metadataPrefix=oai_dc') self.assertEqual(response.status_code, status.HTTP_200_OK) From f29483b2f36486a8e19d6f5c0a6e753e22bd4b91 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Thu, 9 Apr 2020 19:34:32 +0300 Subject: [PATCH 03/10] CSCFAIRMETA-179: [ADD] information on parent directory when pagination with include_parent flag --- .../api/rest/base/views/directory_view.py | 8 +++-- src/metax_api/services/pagination.py | 3 +- .../api/rest/base/views/directories/read.py | 31 +++++++++++++++++-- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/metax_api/api/rest/base/views/directory_view.py b/src/metax_api/api/rest/base/views/directory_view.py index 92590d61..a19ceb5e 100644 --- a/src/metax_api/api/rest/base/views/directory_view.py +++ b/src/metax_api/api/rest/base/views/directory_view.py @@ -92,8 +92,12 @@ def _get_directory_contents(self, request, identifier=None): if paginate: if isinstance(files_and_dirs, dict): - a = self.paginate_queryset(files_and_dirs) - return self.get_paginated_response(a) + paginated = self.paginate_queryset(files_and_dirs) + if include_parent: + for k, v in files_and_dirs.items(): + if k not in ['directories', 'files']: + paginated[k] = v + return self.get_paginated_response(paginated) else: paginator = LimitOffsetPagination() context = paginator.paginate_queryset(files_and_dirs, request) diff --git a/src/metax_api/services/pagination.py b/src/metax_api/services/pagination.py index 1021de0f..5d4fcc0b 100644 --- a/src/metax_api/services/pagination.py +++ b/src/metax_api/services/pagination.py @@ -53,5 +53,6 @@ def get_count(self, queryset): """ count = 0 for q, v in queryset.items(): - count = count + len(v) + if q in ['directories', 'files']: + count = count + len(v) return count diff --git a/src/metax_api/tests/api/rest/base/views/directories/read.py b/src/metax_api/tests/api/rest/base/views/directories/read.py index 850082d9..35df7861 100644 --- a/src/metax_api/tests/api/rest/base/views/directories/read.py +++ b/src/metax_api/tests/api/rest/base/views/directories/read.py @@ -633,7 +633,8 @@ def _create_test_dirs(self): def _get_dirs_files_ids(self, url): file_data = self.client.get(url).data if isinstance(file_data, dict): - return {key: [f['id'] for f in file_data[key]] for key in file_data.keys()} + return {key: [f['id'] for f in file_data[key]] for key in file_data.keys() + if key in ['directories', 'files']} else: return [f['id'] for f in file_data] @@ -779,4 +780,30 @@ def test_read_directory_with_dirs_only_and_pagination(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data['results']['directories']), 10) self.assertEqual(response.data['results']['directories'][0]['id'], file_dict[0]) - self.assertEqual(response.data['results']['directories'][9]['id'], file_dict[9]) \ No newline at end of file + self.assertEqual(response.data['results']['directories'][9]['id'], file_dict[9]) + + def test_read_directory_with_parent_and_pagination(self): + ''' + Query with directories_only flag must return only directories + ''' + file_dict = self._get_dirs_files_ids('/rest/directories/24/files?include_parent') + + response = self.client.get('/rest/directories/24/files?include_parent&pagination=true') + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 10) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][0]) + self.assertEqual(response.data['results']['directories'][9]['id'], file_dict['directories'][9]) + self.assertEqual(response.data['results']['id'], 24) + self.assertEqual(response.data['results']['directory_name'], "10") + + next_link = response.data['next'].split('http://testserver')[1] + response = self.client.get(next_link) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']['directories']), 4) + self.assertEqual(len(response.data['results']['files']), 6) + self.assertEqual(response.data['results']['directories'][0]['id'], file_dict['directories'][10]) + self.assertEqual(response.data['results']['directories'][3]['id'], file_dict['directories'][13]) + self.assertEqual(response.data['results']['files'][0]['id'], file_dict['files'][0]) + self.assertEqual(response.data['results']['files'][5]['id'], file_dict['files'][5]) + self.assertEqual(response.data['results']['id'], 24) + self.assertEqual(response.data['results']['directory_name'], "10") \ No newline at end of file From 072f698db8c801e81873151e36b7f6dce841fee3 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Apr 2020 10:29:16 +0300 Subject: [PATCH 04/10] CSCFAIRMETA-179: [ADD] updates swagger with description of pagination --- swagger/swagger.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index fff49b0e..9c0a6286 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -542,6 +542,22 @@ paths: description: Comma-separated list of field names to retrieve for directories required: false type: string + - name: pagination + in: query + description: sets paging on with default limit of 10 + required: false + type: bolean + - name: offset + in: query + description: offset for paging + required: false + type: integer + - name: limit + in: query + description: limit for paging + required: false + type: integer + default: 10 responses: '200': description: | From 1f606a0de2b619dff7d01c770bb09583f6d5c479 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Apr 2020 13:18:19 +0300 Subject: [PATCH 05/10] CSCFAIRMETA-213: [ADD|REF] Adds delete descriptions. Minor refactoring --- swagger/swagger.yaml | 574 ++++++++++++++++++++++++++----------------- 1 file changed, 350 insertions(+), 224 deletions(-) diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index fff49b0e..2624305b 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -8,7 +8,6 @@ schemes: basePath: /rest/v1/ produces: - application/json - paths: # File Storage API @@ -18,12 +17,12 @@ paths: parameters: - name: ordering in: query - description: specify ordering of results by fields. accepts a list of field names separated by a comma. ordering can be reversed by prefixing field name with a '-' char. + description: Specify ordering of results by fields. Accepts a list of field names separated by a comma. Ordering can be reversed by prefixing field name with a '-' char. required: false type: string responses: "200": - description: return list of file storages + description: Return list of file storages tags: - File Storage API post: @@ -34,11 +33,13 @@ paths: - application/json responses: '201': - description: new file storage created, returns created object + description: New file storage created, returns created object '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details '401': description: Unauthorized + '403': + description: Forbidden. Must have permission for resource tags: - File Storage API /rest/filestorages/{PID}: @@ -47,14 +48,16 @@ paths: parameters: - name: PID in: path - description: identifier of file storage + description: Identifier of file storage required: true type: string responses: '200': - description: return file storage metadata + description: Return file storage metadata + '403': + description: Forbidden. Must have permission for resource '404': - description: not found + description: Not found tags: - File Storage API put: @@ -62,17 +65,19 @@ paths: parameters: - name: PID in: path - description: identifier of file storage + description: Identifier of file storage required: true type: string - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation. modified content returned + description: Successful operation. Modified content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details '401': description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource tags: - File Storage API patch: @@ -80,17 +85,19 @@ paths: parameters: - name: PID in: path - description: identifier of file storage + description: Identifier of file storage required: true type: string - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation. modified content returned + description: Successful operation. Modified content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details '401': description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource tags: - File Storage API delete: @@ -98,60 +105,65 @@ paths: parameters: - name: PID in: path - description: identifier of file storage + description: Identifier of file storage required: true type: string - $ref: "#/parameters/dryrun" responses: '204': - description: successful operation + description: Successful operation '401': description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource '404': - description: not found + description: Not found tags: - File Storage API + # File API /rest/files: get: - summary: get list of files + summary: Get list of files parameters: - name: project_identifier in: query - description: filter files by project + description: Filter files by project required: false type: string - name: file_path in: query - description: filters files by path. requires query parameter project_identifier. + description: Filters files by path. Requires query parameter project_identifier. required: false type: string - name: offset in: query - description: offset for paging + description: Offset for paging required: false type: integer - name: limit in: query - description: limit for paging + description: Limit for paging required: false type: integer default: 10 - name: ordering in: query - description: specify ordering of results by fields. accepts a list of field names separated by a comma. ordering can be reversed by prefixing field name with a '-' char. + description: Specify ordering of results by fields. Accepts a list of field names separated by a comma. Ordering can be reversed by prefixing field name with a '-' char. required: false type: string responses: "200": - description: successful operation, return list of files + description: Successful operation, return list of files schema: $ref: '#/definitions/FileList' + '404': + description: Not found. Also when not authenticated request tags: - File API post: - summary: create new file metadata + summary: Create new file metadata consumes: - application/json parameters: @@ -166,17 +178,19 @@ paths: '201': description: Returns the created object, or if a list was given, a list of objects and errors. '400': - description: parameters contained errors, response includes details. + description: Parameters contained errors, response includes details. + '403': + description: Forbidden. Must have permission for resource tags: - File API put: - summary: bulk update + summary: Bulk update consumes: - application/json parameters: - in: "body" name: "body" - description: "A list of objects to update." + description: A list of objects to update. required: true schema: $ref: '#/definitions/FileList' @@ -187,10 +201,12 @@ paths: description: Successful operation. Return values include a list of errors, if any. '400': description: All updates failed. A list of errors is returned. + '403': + description: Forbidden. Must have permission for resource tags: - File API patch: - summary: bulk update partial + summary: Bulk update partial description: | The payload must include a field that can be used to identify the resource being updated. Acceptable identifier fields are: id, identifier consumes: @@ -198,7 +214,7 @@ paths: parameters: - in: "body" name: "body" - description: "A list of (partial) objects to update." + description: A list of (partial) objects to update. required: true schema: $ref: '#/definitions/FileList' @@ -209,10 +225,12 @@ paths: description: Some or all objects were updated. Return values contain list of full updated objects, and may include a list of errors. '400': description: All updates failed. A list of errors is returned. + '403': + description: Forbidden. Must have permission for resource tags: - File API delete: - summary: bulk delete + summary: Bulk delete description: Mark files as deleted en masse. consumes: - application/json @@ -231,13 +249,15 @@ paths: identifiers were not found, those are ignored. Returns count of deleted files in json body. '400': description: All updates failed. A list of errors is returned. + '403': + description: Forbidden. Must have permission for resource '404': description: None of the provided identifiers were found. tags: - File API /rest/files/{PID}: get: - summary: get file metadata + summary: Get file metadata parameters: - name: PID in: path @@ -246,15 +266,15 @@ paths: type: string responses: '200': - description: return file metadata + description: Return file metadata schema: $ref: '#/definitions/File' '404': - description: not found + description: Not found. Also when not authenticated request tags: - File API put: - summary: replace file metadata + summary: Replace file metadata parameters: - name: PID in: path @@ -263,7 +283,7 @@ paths: type: string - name: "body" in: "body" - description: "Object to update the resource with" + description: Object to update the resource with required: true schema: $ref: '#/definitions/File' @@ -271,15 +291,17 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation. modified content returned + description: Successful operation. Modified content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource '404': - description: object not found + description: Object not found tags: - File API patch: - summary: replace part of file metadata + summary: Replace part of file metadata parameters: - name: PID in: path @@ -288,7 +310,7 @@ paths: type: string - name: "body" in: "body" - description: "(Partial) Object to update the resource with" + description: (Partial) Object to update the resource with required: true schema: $ref: '#/definitions/File' @@ -296,17 +318,19 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation, full content returned + description: Successful operation, full content returned schema: $ref: '#/definitions/File' '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource '404': - description: object not found + description: Object not found tags: - File API delete: - summary: delete a file + summary: Delete a file parameters: - name: PID in: path @@ -316,14 +340,16 @@ paths: - $ref: "#/parameters/dryrun" responses: '204': - description: successful operation, no content returned + description: Successful operation, no content returned + '403': + description: Forbidden. Must have permission for resource '404': - description: object not found + description: Object not found tags: - File API /rest/files/{PID}/XML: get: - summary: get XML metadata from file + summary: Get XML metadata from file produces: - application/xml - application/json @@ -339,13 +365,13 @@ paths: type: string responses: '200': - description: single xml metadata (application/xml), or a list of namespaces (application/json) + description: Single xml metadata (application/xml), or a list of namespaces (application/json) '404': - description: file not found + description: File not found. Also when not authenticated request tags: - File API post: - summary: create new XML metadata + summary: Create new XML metadata consumes: - application/xml produces: @@ -354,12 +380,12 @@ paths: parameters: - name: PID in: path - description: persistent ID of file OR the internal pk + description: Persistent ID of file OR the internal pk required: true type: string - name: namespace in: query - description: namespace of the schema. If a xml metadata already existed with given namespace, an error is returned + description: Namespace of the schema. If a xml metadata already existed with given namespace, an error is returned required: true type: string - name: body @@ -369,51 +395,55 @@ paths: - $ref: "#/parameters/dryrun" responses: '201': - description: return created XML + description: Return created XML '400': - description: parameters contained errors, response includes details (application/json) + description: Parameters contained errors, response includes details (application/json) + '403': + description: Forbidden. Must have permission for resource tags: - File API put: - summary: update XML metadata + summary: Update XML metadata consumes: - application/xml parameters: - name: PID in: path - description: persistent ID of file OR the internal pk + description: Persistent ID of file OR the internal pk required: true type: string - name: namespace in: query - description: namespace of the schema + description: Namespace of the schema required: true type: string - name: body in: body - description: "The XML to update" + description: The XML to update required: true - $ref: "#/parameters/dryrun" responses: '204': description: XML saved '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource '404': - description: file or namespace does not exist + description: File or namespace does not exist tags: - File API delete: - summary: delete XML metadata + summary: Delete XML metadata parameters: - name: PID in: path - description: persistent ID of file OR the internal pk + description: Persistent ID of file OR the internal pk required: true type: string - name: namespace in: query - description: namespace of the schema + description: Namespace of the schema required: true type: string - $ref: "#/parameters/dryrun" @@ -421,14 +451,16 @@ paths: '204': description: XML is deleted '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource '404': - description: file or namespace does not exist + description: File or namespace does not exist tags: - File API /rest/files/datasets: post: - summary: get datasets where files belong to + summary: Get datasets where files belong to description: | Note: The method is invoked using POST, because there are limits to length of query parameters in GET. Also, some clients forcibly shove parameters in body in GET @@ -438,21 +470,23 @@ paths: parameters: - name: body in: body - description: a list of file id's (integers) or identifiers (strings). + description: A list of file id's (integers) or identifiers (strings). required: true schema: $ref: '#/definitions/StringList' - $ref: "#/parameters/dryrun" responses: '200': - description: a list of preferred_identifiers. if the files were not found to belong to any datasets, an empty list is returned + description: A list of preferred_identifiers. if the files were not found to belong to any datasets, an empty list is returned + '403': + description: Forbidden. Must have permission for resource '404': - description: files given in the list not found + description: Files given in the list not found tags: - File API /rest/files/restore: post: - summary: restore removed files back to "not removed" state. + summary: Restore removed files back to "not removed" state. description: | note: does not restore possible deprecated datasets (due to files having been removed) to back to non-deprecated state! produces: @@ -460,21 +494,23 @@ paths: parameters: - name: body in: body - description: a list of file identifiers of files to restore. + description: A list of file identifiers of files to restore. required: true schema: $ref: '#/definitions/StringList' - $ref: "#/parameters/dryrun" responses: '200': - description: numbers of files restored. + description: Numbers of files restored. '400': - description: request ended in an error, response contains details. + description: Request ended in an error, response contains details. + '403': + description: Forbidden. Must have permission for resource tags: - File API /rest/directories/{PID}: get: - summary: get details of a directory + summary: Get details of a directory description: Does not contain the directory's files and sub-directories. For that, use /rest/directories/{PID}/files parameters: - name: PID @@ -484,16 +520,16 @@ paths: type: string responses: '200': - description: return directory details + description: Return directory details schema: $ref: '#/definitions/Directory' '404': - description: not found + description: Not found. Also when not authenticated request tags: - File API /rest/directories/{PID}/files: get: - summary: get list of files and directories contained by a directory + summary: Get list of files and directories contained by a directory parameters: - name: PID in: path @@ -503,33 +539,33 @@ paths: - name: recursive in: query description: | - return a flat list of file objects contained by the target directory and its sub-directories. - if directories_only=true is also specified, returns a hierarchial directory tree instead, of x depth. + Return a flat list of file objects contained by the target directory and its sub-directories. + If directories_only=true is also specified, returns a hierarchial directory tree instead, of x depth. required: false type: boolean - name: depth in: query - description: max depth of recursion. value must be an integer > 0, or *. default value is 1. + description: Max depth of recursion. Value must be an integer > 0, or *. default value is 1. required: false type: string - name: directories_only in: query description: | - omit files entirely from the returned results. use together with recursive=true and depth=x + Omit files entirely from the returned results. Use together with recursive=true and depth=x to get a directory tree. required: false type: boolean - name: include_parent in: query description: | - includes the 'parent directory' of the contents being fetched in the results also. example: + Includes the 'parent directory' of the contents being fetched in the results also. example: GET /rest/directories/3/files?include_parent=true also includes data about directory id: 3 in the results. - otherwise, one would query for its data separately by GET /rest/directories/3. + Otherwise, one would query for its data separately by GET /rest/directories/3. required: false type: boolean - name: cr_identifier in: query - description: identifier of a catalog record. browse only files that have been selected for that record. + description: Identifier of a catalog record. Browse only files that have been selected for that record. required: false type: string - name: file_fields @@ -545,110 +581,109 @@ paths: responses: '200': description: | - returns a dict like { 'directories': [], 'files': [] }, where the lists contain directory and file objects. + Returns a dict like { 'directories': [], 'files': [] }, where the lists contain directory and file objects. note: if parameter 'recursive' is used, return value is a flat list of file objects instead. '404': - description: directory not found + description: Directory not found. Also when not authenticated request tags: - File API /rest/directories/files: get: - summary: get list of files and directories contained by a directory, queried by dir path and project + summary: Get list of files and directories contained by a directory, queried by dir path and project description: | - functions the same as /rest/directories/pid, except queried by dir path and project identifier, instead of directly by directory identifier. + Functions the same as /rest/directories/pid, except queried by dir path and project identifier, instead of directly by directory identifier. parameters: - name: path in: path - description: path of the directory to browse + description: Path of the directory to browse required: true type: string - name: project in: query - description: project_identifier of the project to browse from + description: Project_identifier of the project to browse from required: true type: string - name: recursive in: query description: | - return a flat list of file objects contained by the target directory and its sub-directories. - if directories_only=true is also specified, returns a hierarchial directory tree instead, of x depth. + Return a flat list of file objects contained by the target directory and its sub-directories. + If directories_only=true is also specified, returns a hierarchial directory tree instead, of x depth. required: false type: boolean - name: depth in: query - description: max depth of recursion. value must be an integer > 0, or *. default value is 1. + description: Max depth of recursion. Value must be an integer > 0, or *. Default value is 1. required: false type: string - name: directories_only in: query description: | - omit files entirely from the returned results. use together with recursive=true and depth=x + Omit files entirely from the returned results. Use together with recursive=true and depth=x to get a directory tree. required: false type: boolean - name: include_parent in: query description: | - includes the 'parent directory' of the contents being fetched in the results also. example: + Includes the 'parent directory' of the contents being fetched in the results also. example: GET /rest/directories/3/files?include_parent=true also includes data about directory id: 3 in the results. - otherwise, one would query for its data separately by GET /rest/directories/3. + Otherwise, one would query for its data separately by GET /rest/directories/3. required: false type: boolean - name: preferred_identifier in: query - description: preferred_identifier of a dataset. browse only files that have been selected for that record. + description: Preferred_identifier of a dataset. Browse only files that have been selected for that record. required: false type: string responses: '200': description: | - returns a dict like { 'directories': [], 'files': [] }, where the lists contain directory and file objects. - note: if parameter 'recursive' is used, return value is a flat list of file objects instead. + Returns a dict like { 'directories': [], 'files': [] }, where the lists contain directory and file objects. + note: If parameter 'recursive' is used, return value is a flat list of file objects instead. '404': - description: directory not found + description: Directory not found. Also when not authenticated request tags: - File API /rest/directories/root: get: - summary: return root directory for a project, and its files and directories + summary: Return root directory for a project, and its files and directories description: Useful when starting to browse files for a project, when individual root-level directory identifier is not yet known. parameters: - name: project in: query - description: project_identifier of the project for which to find root directory + description: Project_identifier of the project for which to find root directory required: true type: string responses: '200': description: | - returns the root directory for the requested project. - returned object additionally contains fields 'directories' and 'files', which contain the child directory and file objects of the root directory, similar to what API /directories/{PID}/files does. + Returns the root directory for the requested project. + Returned object additionally contains fields 'directories' and 'files', which contain the child directory and file objects of the root directory, similar to what API /directories/{PID}/files does. '400': - description: bad parameters, details in body + description: Bad parameters, details in body '404': - description: directory not found + description: Directory not found. Also when not authenticated request tags: - File API - # Data Catalog API /rest/datacatalogs: get: - summary: "list of data catalogs" + summary: List of data catalogs parameters: - name: ordering in: query - description: specify ordering of results by fields. accepts a list of field names separated by a comma. ordering can be reversed by prefixing field name with a '-' char. + description: Specify ordering of results by fields. Accepts a list of field names separated by a comma. Ordering can be reversed by prefixing field name with a '-' char. required: false type: string responses: "200": - description: return list of file data catalogs + description: Return list of file data catalogs tags: - Data Catalog API post: - summary: create new data catalog + summary: Create new data catalog consumes: - application/json parameters: @@ -661,16 +696,18 @@ paths: - $ref: "#/parameters/dryrun" responses: '201': - description: new data catalog created + description: New data catalog created schema: $ref: '#/definitions/DataCatalog' '401': description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource tags: - Data Catalog API /rest/datacatalogs/{PID}: get: - summary: get data catalog metadata + summary: Get data catalog metadata parameters: - name: PID in: path @@ -679,15 +716,15 @@ paths: type: string responses: '200': - description: return data catalog metadata + description: Return data catalog metadata schema: $ref: '#/definitions/DataCatalog' '404': - description: not found + description: Not found tags: - Data Catalog API put: - summary: replace data catalog metadata + summary: Replace data catalog metadata description: | # catalog_json read-only fields - identifier @@ -706,15 +743,17 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation. modified content returned + description: Successful operation. modified content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details '401': description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource tags: - Data Catalog API patch: - summary: replace part of catalog metadata + summary: Replace part of catalog metadata description: | # catalog_json read-only fields - identifier @@ -733,43 +772,61 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation, full content returned + description: Successful operation, full content returned schema: $ref: '#/definitions/DataCatalog' '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource + tags: + - Data Catalog API + delete: + summary: Delete catalog metadata + parameters: + - name: PID + in: path + description: Persistent ID of the resource OR the internal pk + required: true + type: string + - $ref: "#/parameters/dryrun" + responses: + '204': + description: Successful operation + '401': + description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource + '404': + description: Not found tags: - Data Catalog API - - - - # Dataset API /rest/datasets: # some of the parameters and returned fields are for TPAS usage only get: - summary: "list datasets" + summary: List datasets parameters: - name: latest in: query - description: only return latest versions + description: Only return latest versions required: false type: boolean - name: owner_id in: query - description: id of the person who owns the record in metax + description: Id of the person who owns the record in metax required: false type: string - name: user_created in: query - description: id of the person who created the record in metax + description: Id of the person who created the record in metax required: false type: string - name: curator in: query - description: curator identifier (field research_dataset-> curator-> identifier) + description: Curator identifier (field research_dataset-> curator-> identifier) required: false type: string - name: preferred_identifier @@ -782,7 +839,7 @@ paths: type: string - name: state in: query - description: TPAS state (field preservation_state). multiple states using OR-logic are queriable in the same request, e.g. state=5,6. see valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state + description: TPAS state (field preservation_state). Multiple states using OR-logic are queriable in the same request, e.g. state=5,6. See valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state required: false type: string - name: metadata_owner_org @@ -807,7 +864,7 @@ paths: type: string - name: editor in: query - description: identifier of the editor used to modify the record, i.e. qvain. + description: Identifier of the editor used to modify the record, i.e. qvain. type: string - name: data_catalog in: query @@ -815,24 +872,24 @@ paths: type: string - name: offset in: query - description: offset for paging + description: Offset for paging required: false type: integer - name: limit in: query - description: limit for paging + description: Limit for paging required: false type: integer default: 10 - name: ordering in: query - description: specify ordering of results by fields. accepts a list of field names separated by a comma. ordering can be reversed by prefixing field name with a '-' char. + description: Specify ordering of results by fields. Accepts a list of field names separated by a comma. Ordering can be reversed by prefixing field name with a '-' char. required: false type: string - name: actor_filter in: query description: | - actor_filters are a collection of filter parameters for filtering according to the name + Actor_filters are a collection of filter parameters for filtering according to the name of creator, curator, publisher or rights_holder actors. Actor type must be defined as a suffix in the filter name ('_person' or '_organization'). Actor type '_organization' finds matches from "is_member_of" -field if actor is a person. Multiple actor_filters can be applied simultaneously (AND) @@ -846,13 +903,13 @@ paths: type: string responses: "200": - description: successful operation, returns a list of datasets containing full dataset objects including their data catalog and contract information. when using query parameters, search result can be an empty list. + description: Successful operation, returns a list of datasets containing full dataset objects including their data catalog and contract information. When using query parameters, search result can be an empty list. schema: $ref: '#/definitions/CatalogRecord' tags: - Dataset API post: - summary: create new dataset metadata + summary: Create new dataset metadata description: | # research_dataset identifiers The fields metadata_version_identifier and preferred_identifier are generally always generated server-side. Exception: In harvested catalogs, preferred_identifier value can be provided by the user. @@ -876,11 +933,13 @@ paths: '201': description: Returns the created object, or if a list was given, a list of objects and errors. '400': - description: parameters contained errors, response includes details. + description: Parameters contained errors, response includes details. + '403': + description: Forbidden. Must have permission for resource tags: - Dataset API put: - summary: bulk update + summary: Bulk update consumes: - application/json parameters: @@ -897,10 +956,12 @@ paths: description: Sucessful operation. Return values include a list of errors, if any. '400': description: All updates failed. A list of errors is returned. + '403': + description: Forbidden. Must have permission for resource tags: - Dataset API patch: - summary: bulk update partial + summary: Bulk update partial description: | The payload must include a field that can be used to identify the resource being updated. Acceptable identifier fields are: id, identifier consumes: @@ -919,79 +980,81 @@ paths: description: Some or all objects were updated. Return values contain list of full updated objects, and may include a list of errors. '400': description: All updates failed. A list of errors is returned. + '403': + description: Forbidden. Must have permission for resource tags: - Dataset API /rest/datasets/identifiers: get: - summary: "list all dataset identifiers" + summary: List all dataset identifiers parameters: - name: latest in: query - description: only return latest versions + description: Only return latest versions required: false type: boolean - name: owner_id in: query - description: id of the person who owns the record in metax + description: Id of the person who owns the record in metax required: false type: string - name: user_created in: query - description: id of the person who created the record in metax + description: Id of the person who created the record in metax required: false type: string - name: curator in: query - description: curator identifier (field research_dataset-> curator-> identifier) + description: Curator identifier (field research_dataset-> curator-> identifier) required: false type: string - name: state in: query - description: TPAS state (field preservation_state). multiple states using OR-logic are queriable in the same request, e.g. state=5,6. see valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state + description: TPAS state (field preservation_state). Multiple states using OR-logic are queriable in the same request, e.g. state=5,6. See valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state required: false type: string responses: "200": - description: successful operation, returns a list of all dataset metadata version identifiers. + description: Successful operation, returns a list of all dataset metadata version identifiers. tags: - Dataset API /rest/datasets/unique_preferred_identifiers: get: - summary: "list all unique dataset preferred identifiers" + summary: List all unique dataset preferred identifiers parameters: - name: latest in: query - description: only return latest dataset versions + description: Only return latest dataset versions required: false type: boolean - name: owner_id in: query - description: id of the person who owns the record in metax + description: Id of the person who owns the record in metax required: false type: string - name: user_created in: query - description: id of the person who created the record in metax + description: Id of the person who created the record in metax required: false type: string - name: curator in: query - description: curator identifier (field research_dataset-> curator-> identifier) + description: Curator identifier (field research_dataset-> curator-> identifier) required: false type: string - name: state in: query - description: TPAS state (field preservation_state). multiple states using OR-logic are queriable in the same request, e.g. state=5,6. see valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state + description: TPAS state (field preservation_state). Multiple states using OR-logic are queriable in the same request, e.g. state=5,6. see valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state required: false type: string responses: "200": - description: successful operation, returns a list of all unique dataset preferred identifiers. + description: Successful operation, returns a list of all unique dataset preferred identifiers. tags: - Dataset API /rest/datasets/{PID}: get: - summary: get dataset metadata + summary: Get dataset metadata parameters: - name: PID in: path @@ -1045,15 +1108,15 @@ paths: type: boolean responses: '200': - description: return dataset metadata + description: Return dataset metadata schema: $ref: '#/definitions/CatalogRecord' '404': - description: not found + description: Not found tags: - Dataset API put: - summary: replace dataset metadata + summary: Replace dataset metadata description: | # research_dataset read-only fields - metadata_version_identifier @@ -1076,13 +1139,15 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation. modified content returned + description: Successful operation. Modified content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource tags: - Dataset API patch: - summary: replace part of dataset metadata + summary: Replace part of dataset metadata description: | # research_dataset read-only fields - metadata_version_identifier @@ -1105,16 +1170,38 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation, full content returned + description: Successful operation, full content returned schema: $ref: '#/definitions/CatalogRecord' '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource + tags: + - Dataset API + delete: + summary: Delete dataset metadata + parameters: + - name: PID + in: path + description: Persistent ID of the resource OR the internal pk + required: true + type: string + - $ref: "#/parameters/dryrun" + responses: + '204': + description: Successful operation + '401': + description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource + '404': + description: Not found tags: - Dataset API /rest/datasets/{PID}/files: get: - summary: get list of files in a dataset + summary: Get list of files in a dataset parameters: - name: PID in: path @@ -1128,17 +1215,17 @@ paths: type: string - name: removed_files in: query - description: return only deleted files + description: Return only deleted files required: false type: boolean responses: '200': - description: successful operation, return list of full files metadata + description: Successful operation, return list of full files metadata tags: - Dataset API /rest/datasets/{PID}/metadata_versions: get: - summary: "list old research_dataset entries of a record" + summary: List old research_dataset entries of a record parameters: - name: PID in: path @@ -1147,14 +1234,14 @@ paths: type: string responses: "200": - description: successful operation, return a list of entries. may return an empty list. + description: Successful operation, return a list of entries. May return an empty list. schema: $ref: '#/definitions/StringList' tags: - Dataset API /rest/datasets/{PID}/metadata_versions/{MVI}: get: - summary: "get contents of a specific old research_dataset of a record." + summary: Get contents of a specific old research_dataset of a record. parameters: - name: PID in: path @@ -1163,63 +1250,68 @@ paths: type: string - name: MVI in: path - description: the metadata_version_identifier of the research_dataset + description: The metadata_version_identifier of the research_dataset required: true type: string responses: "200": - description: successful operation, return a list of entries. may return an empty list. + description: Successful operation, return a list of entries. May return an empty list. "404": - description: resource not found. + description: Resource not found. tags: - Dataset API + # Contract API /rest/contracts: get: - summary: "list contracts" + summary: List contracts parameters: - name: organization in: query - description: organization ID (field contract_json-> organization-> organization_identifier) + description: Organization ID (field contract_json-> organization-> organization_identifier) required: false type: string - name: offset in: query - description: offset for paging + description: Offset for paging required: false type: integer - name: limit in: query - description: limit for paging + description: Limit for paging required: false type: integer - name: ordering in: query - description: specify ordering of results by fields. accepts a list of field names separated by a comma. ordering can be reversed by prefixing field name with a '-' char. + description: Specify ordering of results by fields. accepts a list of field names separated by a comma. ordering can be reversed by prefixing field name with a '-' char. required: false type: string responses: "200": - description: successful operation, return list of contracts + description: Successful operation, return list of contracts + '403': + description: Forbidden. Must have permission for resource tags: - Contract API post: - summary: create new contract metadata + summary: Create new contract metadata parameters: - $ref: "#/parameters/dryrun" consumes: - application/json responses: '201': - description: new contract metadata created, returns the created object, or if a list was given, a list of objects + description: New contract metadata created, returns the created object, or if a list was given, a list of objects '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource tags: - Contract API /rest/contracts/{PID}: get: - summary: get contract metadata + summary: Get contract metadata parameters: - name: PID in: path @@ -1228,13 +1320,15 @@ paths: type: string responses: '200': - description: return contract metadata + description: Successful operation, return contract metadata + '403': + description: Forbidden. Must have permission for resource '404': - description: not found + description: Not found tags: - Contract API put: - summary: replace contract metadata + summary: Replace contract metadata parameters: - name: PID in: path @@ -1244,13 +1338,15 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation. modified content returned + description: Successful operation. Modified content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource tags: - Contract API patch: - summary: replace part of contract metadata + summary: Replace part of contract metadata parameters: - name: PID in: path @@ -1260,9 +1356,31 @@ paths: - $ref: "#/parameters/dryrun" responses: '200': - description: successful operation, full content returned + description: Successful operation, full content returned '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details + '403': + description: Forbidden. Must have permission for resource + tags: + - Contract API + delete: + summary: Delete contract metadata + parameters: + - name: PID + in: path + description: Persistent ID of the resource OR the internal pk + required: true + type: string + - $ref: "#/parameters/dryrun" + responses: + '204': + description: Successful operation + '401': + description: Unauthorized. Reserved for admins only + '403': + description: Forbidden. Must have permission for resource + '404': + description: Not found tags: - Contract API @@ -1270,49 +1388,57 @@ paths: # ApiErrors API /rest/apierrors: get: - summary: "list errors produced during api requests" + summary: List errors produced during api requests responses: '200': - description: returns a list of error entries. may return an empty list. + description: Returns a list of error entries. May return an empty list. + '403': + description: Forbidden. Must have permission for resource tags: - ApiErrors API /rest/apierrors/{identifier}: get: - summary: get details of a single error entry + summary: Get details of a single error entry parameters: - name: identifier in: path - description: error_identifier of an error response + description: Error_identifier of an error response required: true type: string responses: '200': - description: return error details as a json object + description: Return error details as a json object + '403': + description: Forbidden. Must have permission for resource '404': - description: not found + description: Not found tags: - ApiErrors API delete: - summary: delete a single error entry + summary: Delete a single error entry parameters: - name: identifier in: path - description: error_identifier of an error response + description: Error identifier of an error response required: true type: string responses: '204': - description: entry deleted + description: Entry deleted + '403': + description: Forbidden. Must have permission for resource '404': - description: not found + description: Not found tags: - ApiErrors API /rest/apierrors/flush: post: - summary: "delete all error entries" + summary: "Delete all error entries" responses: '200': - description: all entries deleted. return a json object telling how many items were deleted. + description: All entries deleted. Return a json object telling how many items were deleted. + '403': + description: Forbidden. Must have permission for resource tags: - ApiErrors API @@ -1320,26 +1446,26 @@ paths: # Schema API /rest/schemas: get: - summary: "list schemas" + summary: "List schemas" responses: '200': - description: successful operation, return list of schema names + description: Successful operation, return list of schema names tags: - Schema API /rest/schemas/{name}: get: - summary: get schema content + summary: Get schema content parameters: - name: name in: path - description: name of the schema to be retrieved + description: Name of the schema to be retrieved required: true type: string responses: '200': - description: return JSON schema + description: Return JSON schema '404': - description: not found + description: Not found tags: - Schema API @@ -1357,7 +1483,7 @@ paths: type: string responses: '200': - description: successful operation, return dataset template as json. + description: Successful operation, return dataset template as json. schema: $ref: '#/definitions/CatalogRecord' tags: @@ -1374,7 +1500,7 @@ paths: type: string responses: '200': - description: successful operation, return catalog record preservation identifier. + description: Successful operation, return catalog record preservation identifier. schema: $ref: '#/definitions/CatalogRecord' tags: @@ -1394,9 +1520,9 @@ paths: description: Directory identifier from where the new files will be searched. responses: '200': - description: successful operation. Files were added and new dataset version was created. Returns information about the new dataset version. + description: Successful operation. Files were added and new dataset version was created. Returns information about the new dataset version. '204': - description: successful operation. Files were added and new dataset version was not created or no new files were found. Returns no content. + description: Successful operation. Files were added and new dataset version was not created or no new files were found. Returns no content. /rpc/datasets/fix_deprecated: post: summary: Fixes deprecated dataset @@ -1409,7 +1535,7 @@ paths: type: string responses: '200': - description: successful operation, returns information about the new dataset version. + description: Successful operation, returns information about the new dataset version. tags: - Dataset RPC # /rpc/statistics/count_datasets: @@ -1469,11 +1595,11 @@ paths: type: boolean responses: '200': - description: successful operation. + description: Successful operation. schema: $ref: "#/examples/all_datasets_cumulative" '400': - description: parameters contained errors, response includes details + description: Parameters contained errors, response includes details tags: - Statistics RPC # /rpc/statistics/catalog_datasets_cumulative: @@ -1549,11 +1675,11 @@ paths: - $ref: "#/parameters/metadata_owner_org_filter" responses: '200': - description: successful operation. + description: Successful operation. schema: $ref: "#/examples/organization_datasets_cumulative" '400': - description: required parameters missing. + description: Required parameters missing. tags: - Statistics RPC # /rpc/statistics/unused_files: @@ -1590,14 +1716,14 @@ parameters: dryrun: name: dryrun in: query - description: execute the operation normally, returning the same response as normally, but do not save anything to db, or publish anything to remote services. + description: Execute the operation normally, returning the same response as normally, but do not save anything to db, or publish anything to remote services. required: false type: boolean allowed_projects: name: allowed_projects in: query - description: ensures that content is part of intended projects. + description: Ensures that content is part of intended projects. required: false type: string @@ -1650,7 +1776,7 @@ parameters: preservation_state_filter: name: state in: query - description: TPAS state (field preservation_state). multiple states using OR-logic are queriable in the same request, e.g. state=5,6. see valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state + description: TPAS state (field preservation_state). Multiple states using OR-logic are queriable in the same request, e.g. state=5,6. See valid values from http://iow.csc.fi/model/mrd/CatalogRecord/ field preservation_state required: false type: string @@ -1678,14 +1804,14 @@ parameters: latest_filter: name: latest in: query - description: only return latest versions + description: Only return latest versions required: false type: boolean harvested_filter: name: harvested in: query - description: only return harvested datasets + description: Only return harvested datasets required: false type: boolean From a546e29e105703dbec1efdf85392e6b93a12f8da Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Apr 2020 15:55:54 +0300 Subject: [PATCH 06/10] CSCFAIRMETA-499: [FIX] Datacatalog get object without authentication fix (#560) * CSCFAIRMETA-499: [FIX|ADD] Changed data catalog GET permission checking from permission file to data catalog model. Expands not authenticated permitted datacatalog requests for 'head' and 'options' querying --- .../api/rest/base/views/common_view.py | 2 +- src/metax_api/models/data_catalog.py | 11 +++++++++++ src/metax_api/permissions/permissions.py | 4 ---- .../tests/api/rest/base/views/common/auth.py | 17 ++++++++++++++--- .../api/rest/base/views/datacatalogs/read.py | 10 ++++++++++ 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/metax_api/api/rest/base/views/common_view.py b/src/metax_api/api/rest/base/views/common_view.py index fcdabfa6..77680252 100644 --- a/src/metax_api/api/rest/base/views/common_view.py +++ b/src/metax_api/api/rest/base/views/common_view.py @@ -34,7 +34,7 @@ class CommonViewSet(ModelViewSet): api_type = 'rest' authentication_classes = () - permission_classes = (EndUserPermissions, ServicePermissions) + permission_classes = [EndUserPermissions, ServicePermissions] cache = RedisCacheService diff --git a/src/metax_api/models/data_catalog.py b/src/metax_api/models/data_catalog.py index 52e0f31e..c61f9dec 100644 --- a/src/metax_api/models/data_catalog.py +++ b/src/metax_api/models/data_catalog.py @@ -35,6 +35,8 @@ class DataCatalog(Common): # END OF MODEL FIELD DEFINITIONS # + READ_METHODS = ('GET', 'HEAD', 'OPTIONS') + def __init__(self, *args, **kwargs): super(DataCatalog, self).__init__(*args, **kwargs) self.track_fields('catalog_json.identifier') @@ -60,3 +62,12 @@ def __repr__(self): self.catalog_json['research_dataset_schema'], self.catalog_json['dataset_versioning'], ) + + def user_has_access(self, request): + """ + Overriding inherited operation to check permissions for datacatalogs + """ + + if request.method in self.READ_METHODS or request.user.is_service: + return True + return False \ No newline at end of file diff --git a/src/metax_api/permissions/permissions.py b/src/metax_api/permissions/permissions.py index 18ef730e..ea719502 100644 --- a/src/metax_api/permissions/permissions.py +++ b/src/metax_api/permissions/permissions.py @@ -112,16 +112,13 @@ def _check_rest_perms(self, request, api_name): execute specific operation type on given API endpoint. """ if request.method in METHOD_MAP: - operation_type = METHOD_MAP[request.method] - if 'all' in self.perms['rest'][api_name].get(operation_type, []): has_perm = True else: has_perm = self._check_user_rest_perms(request, api_name, operation_type) else: raise MethodNotAllowed - return has_perm def _check_rpc_perms(self, request, api_name): @@ -147,7 +144,6 @@ def _check_rpc_perms(self, request, api_name): def has_object_permission(self, request, view, obj): has_perm = obj.user_has_access(request) - if not has_perm: self.message = 'You are not permitted to access this resource.' return has_perm diff --git a/src/metax_api/tests/api/rest/base/views/common/auth.py b/src/metax_api/tests/api/rest/base/views/common/auth.py index 4a6c8b79..6d2eba96 100644 --- a/src/metax_api/tests/api/rest/base/views/common/auth.py +++ b/src/metax_api/tests/api/rest/base/views/common/auth.py @@ -75,13 +75,24 @@ def test_delete_access_error(self): response = self.client.delete('/rest/files/1') self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) - def test_read_for_world_ok(self): + def test_read_for_datasets_world_ok(self): """ Reading datasets api should be permitted even without any authorization. """ self.client._credentials = {} - response = self.client.get('/rest/datasets/1') - self.assertEqual(response.status_code, status.HTTP_200_OK) + + for req in ['/rest/datasets', '/rest/datasets/1']: + response = self.client.get(req) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + def test_read_for_datacatalogs_world_ok(self): + """ + Reading datacatalogs api should be permitted even without any authorization. + """ + self.client._credentials = {} + for req in ['/rest/datacatalogs', '/rest/datacatalogs/1']: + response = self.client.get(req) + self.assertEqual(response.status_code, status.HTTP_200_OK) class ApiEndUserAccessAuthorization(CatalogRecordApiWriteCommon): diff --git a/src/metax_api/tests/api/rest/base/views/datacatalogs/read.py b/src/metax_api/tests/api/rest/base/views/datacatalogs/read.py index 23129d72..74c35576 100644 --- a/src/metax_api/tests/api/rest/base/views/datacatalogs/read.py +++ b/src/metax_api/tests/api/rest/base/views/datacatalogs/read.py @@ -30,3 +30,13 @@ def setUp(self): def test_basic_get(self): response = self.client.get('/rest/datacatalogs/%s' % self.identifier) self.assertEqual(response.status_code, status.HTTP_200_OK) + + def test_allowed_read_methods(self): + self.client._credentials = {} + for req in ['/rest/datacatalogs', '/rest/datacatalogs/1']: + response = self.client.get(req) + self.assertEqual(response.status_code, status.HTTP_200_OK) + response = self.client.head(req) + self.assertEqual(response.status_code, status.HTTP_200_OK) + response = self.client.options(req) + self.assertEqual(response.status_code, status.HTTP_200_OK) \ No newline at end of file From b42e2f4b8f9f045c8687a83c839dcb342afb002c Mon Sep 17 00:00:00 2001 From: Katri Tegel Date: Tue, 14 Apr 2020 16:35:42 +0300 Subject: [PATCH 07/10] =?UTF-8?q?CSCFAIRMETA-434:=20[ADD]=20Drafts-Statist?= =?UTF-8?q?ics-API-SQL-queries-should-be-mo=E2=80=A6=20(#563)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CSCFAIRMETA-434: [ADD] Drafts-Statistics-API-SQL-queries-should-be-modified-to-only-count-published-datasets Add sql filters to rpc/statistics APIs so that drafts are not taken into account --- src/metax_api/services/statistic_service.py | 11 +- .../tests/api/rpc/base/views/statistic_rpc.py | 102 ++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/src/metax_api/services/statistic_service.py b/src/metax_api/services/statistic_service.py index 31cdf069..09a71c34 100644 --- a/src/metax_api/services/statistic_service.py +++ b/src/metax_api/services/statistic_service.py @@ -64,6 +64,8 @@ def count_datasets(cls, where_args = [] sql_args = [] + where_args.append("and state = 'published'") + if from_date: where_args.append('and cr.date_created >= %s::date') sql_args.append(from_date) @@ -156,6 +158,7 @@ def total_datasets(cls, from_date, to_date, latest=True, legacy=None, removed=No FROM metax_api_catalogrecord cr JOIN metax_api_datacatalog AS dc on dc.id = cr.data_catalog_id WHERE 1=1 + and state = 'published' OPTIONAL_WHERE_FILTERS GROUP BY mon ) @@ -174,6 +177,7 @@ def total_datasets(cls, from_date, to_date, latest=True, legacy=None, removed=No FROM metax_api_catalogrecord AS cr JOIN metax_api_datacatalog AS dc ON dc.id = cr.data_catalog_id WHERE 1=1 + and state = 'published' OPTIONAL_WHERE_FILTERS GROUP BY mon ) cr USING (mon) @@ -258,6 +262,7 @@ def _total_data_catalog_datasets(cls, from_date, to_date, access_types, dc_id): FROM metax_api_catalogrecord cr JOIN metax_api_datacatalog as dc on dc.id = cr.data_catalog_id where dc.id = %s + and state = 'published' and cr.research_dataset->'access_rights'->'access_type'->>'identifier' = %s GROUP BY mon ) @@ -281,6 +286,7 @@ def _total_data_catalog_datasets(cls, from_date, to_date, access_types, dc_id): FROM metax_api_catalogrecord AS cr JOIN metax_api_datacatalog as dc on dc.id = cr.data_catalog_id where dc.id = %s + and state = 'published' and cr.research_dataset->'access_rights'->'access_type'->>'identifier' = %s GROUP BY mon, access_type ) cr USING (mon) @@ -353,6 +359,7 @@ def _total_organization_datasets(cls, from_date, to_date, metadata_owner_org): FROM metax_api_catalogrecord cr JOIN metax_api_datacatalog as dc on dc.id = cr.data_catalog_id where dc.id = %s + and state = 'published' and cr.metadata_owner_org = %s GROUP BY mon ) @@ -373,6 +380,7 @@ def _total_organization_datasets(cls, from_date, to_date, metadata_owner_org): FROM metax_api_catalogrecord AS cr JOIN metax_api_datacatalog as dc on dc.id = cr.data_catalog_id where dc.id = %s + and state = 'published' and cr.metadata_owner_org = %s GROUP BY mon ) cr USING (mon) @@ -535,6 +543,7 @@ def total_end_user_datasets(cls, from_date, to_date): date_trunc('month', cr.date_created) AS mon FROM metax_api_catalogrecord AS cr where service_created is null + and state = 'published' GROUP BY mon ) cr USING (mon) GROUP BY mon, count @@ -575,4 +584,4 @@ def unused_files(cls): _logger.info('Done retrieving total counts') - return file_stats + return file_stats \ No newline at end of file diff --git a/src/metax_api/tests/api/rpc/base/views/statistic_rpc.py b/src/metax_api/tests/api/rpc/base/views/statistic_rpc.py index 83aa0a53..b0c80c98 100644 --- a/src/metax_api/tests/api/rpc/base/views/statistic_rpc.py +++ b/src/metax_api/tests/api/rpc/base/views/statistic_rpc.py @@ -271,6 +271,21 @@ def _get_total_dataset_count(self): """ return CatalogRecord.objects_unfiltered.count() + def _set_cr_datacatalog(self, cr_id, catalog_id): + cr = CatalogRecord.objects.get(pk=cr_id) + cr.data_catalog_id = DataCatalog.objects.get(catalog_json__identifier=catalog_id).id + cr.force_save() + + def _set_dataset_as_draft(self, cr_id): + cr = CatalogRecord.objects.get(pk=cr_id) + cr.state = 'draft' + cr.force_save() + + def _set_cr_organization(self, cr_id, org): + cr = CatalogRecord.objects.get(pk=cr_id) + cr.metadata_owner_org = org + cr.force_save() + class StatisticRPCCountDatasets(StatisticRPCCommon, CatalogRecordApiWriteCommon): """ @@ -678,3 +693,90 @@ def test_all_datasets_cumulative_latest_legacy(self): self.assertEqual(not_leg_not_lat[-1]['ida_byte_size'], march_size - legacy_size, not_leg_not_lat) self.assertEqual(not_leg_not_lat[-1]['count_cumulative'], total_count - legacy_count, not_leg_not_lat) self.assertEqual(not_leg_not_lat[-1]['ida_byte_size_cumulative'], total_size - legacy_size, not_leg_not_lat) + + +class StatisticRPCforDrafts(StatisticRPCCommon, CatalogRecordApiWriteCommon): + """ + Tests that drafts are not taken into account when calculating statistics + """ + def test_count_datasets_api_for_drafts(self): + """ + Tests that rpc/statistics/count_datasets returns only count of published datasets + """ + response_1 = self.client.get('/rpc/statistics/count_datasets').data + + self._set_dataset_as_draft(1) + self.assertEqual(CatalogRecord.objects.get(pk=1).state, 'draft', + 'Dataset with id=1 should have changed state to draft') + + response_2 = self.client.get('/rpc/statistics/count_datasets').data + self.assertNotEqual(response_1['count'], response_2['count'], + 'Drafts should not be returned in count_datasets api') + + def test_all_datasets_cumulative_for_drafts(self): + """ + Tests that /rpc/statistics/all_datasets_cumulative returns only published datasets + """ + url = '/rpc/statistics/all_datasets_cumulative?from_date=2019-06&to_date=2019-06' + + self._set_dataset_creation_date(1, '2019-06-15') + response_1 = self.client.get(url).data + + self._set_dataset_as_draft(1) + response_2 = self.client.get(url).data + + # ensure the counts and byte sizes are calculated without drafts + self.assertNotEqual(response_1[0]['count'], response_2[0]['count'], + 'Count for June should reduce by one as dataset id=1 was set as draft') + self.assertNotEqual(response_1[0]['ida_byte_size'], response_2[0]['ida_byte_size'], + 'Byte size for June should reduce by one as dataset id=1 was set as draft') + + def test_catalog_datasets_cumulative_for_drafts(self): + """ + Tests that /rpc/statistics/catalog_datasets_cumulative returns only published datasets + """ + + url = '/rpc/statistics/catalog_datasets_cumulative?from_date=2019-06-01&to_date=2019-06-30' + catalog = "urn:nbn:fi:att:2955e904-e3dd-4d7e-99f1-3fed446f96d3" + + self._set_dataset_creation_date(1, '2019-06-15') + self._set_cr_datacatalog(1, catalog) # Adds id=1 to catalog + + count_1 = self.client.get(url).data[catalog]['open'][0]['count'] + total_1 = self.client.get(url).data[catalog]['total'] + + self._set_dataset_as_draft(1) + + count_2 = self.client.get(url).data[catalog]['open'][0]['count'] + total_2 = self.client.get(url).data[catalog]['total'] + + # ensure the count and total are calculated without drafts + self.assertNotEqual(count_1, count_2, 'Count should reduce by one as dataset id=1 was set as draft') + self.assertNotEqual(total_1, total_2, 'Total should reduce by one as dataset id=1 was set as draft') + + def test_end_user_datasets_cumulative_for_drafts(self): + ''' End user api should return only published data ''' + url = '/rpc/statistics/end_user_datasets_cumulative?from_date=2019-06-01&to_date=2019-06-30' + + self._set_dataset_creation_date(10, '2019-06-15') + count_1 = self.client.get(url).data[0]['count'] + + self._set_dataset_as_draft(10) + count_2 = self.client.get(url).data[0]['count'] + + # ensure the count are calculated without drafts + self.assertNotEqual(count_1, count_2, 'Count should be reduced by one after setting id=10 as draft') + + def test_organization_datasets_cumulative_for_drafts(self): + ''' Organization api should return only published data ''' + url = "/rpc/statistics/organization_datasets_cumulative?from_date=2019-06-01&to_date=2019-06-30" + + self._set_dataset_creation_date(1, '2019-06-15') + self._set_cr_organization(1, 'org_2') + total_1 = self.client.get(url).data['org_2']['total'] + + self._set_dataset_as_draft(1) + total_2 = self.client.get(url).data['org_2']['total'] + + # ensure the totals are calculated without drafts + self.assertNotEqual(total_1, total_2, 'Count be reduced by one after setting id=1 as draft') \ No newline at end of file From 2353e58ae660f075c1d6d441193e5c0e6afe251b Mon Sep 17 00:00:00 2001 From: Tommi Pulli <43133956+tompulli@users.noreply.github.com> Date: Tue, 14 Apr 2020 16:36:23 +0300 Subject: [PATCH 08/10] CSCFAIRMETA-428: [ADD|FIX] Access granter changed to external field (#561) Access granter is visible for privileged users only Access granter is deleted when REMS deletion occurs --- .../serializers/catalog_record_serializer.py | 5 +- .../migrations/0018_auto_20200330_1101.py | 18 +++++++ src/metax_api/models/catalog_record.py | 47 +++++++++---------- .../api/rest/base/views/datasets/write.py | 13 ++++- 4 files changed, 56 insertions(+), 27 deletions(-) create mode 100644 src/metax_api/migrations/0018_auto_20200330_1101.py diff --git a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py index ae5f379d..02a35148 100644 --- a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py +++ b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py @@ -82,7 +82,8 @@ class Meta: 'date_cumulation_started', 'date_cumulation_ended', 'date_last_cumulative_addition', - 'rems_identifier' + 'rems_identifier', + 'access_granter' ) + CommonSerializer.Meta.fields extra_kwargs = { @@ -124,6 +125,7 @@ def is_valid(self, raise_exception=False): self.initial_data.pop('preservation_dataset_version', None) self.initial_data.pop('preservation_dataset_origin_version', None) self.initial_data.pop('rems_identifier', None) + self.initial_data.pop('access_granter', None) if self._data_catalog_is_changed(): # updating data catalog, but not necessarily research_dataset. @@ -302,6 +304,7 @@ def _check_and_strip_sensitive_fields(self, instance, res): CRS.remove_contact_info_metadata(res['research_dataset'])) res.pop('rems_identifier', None) + res.pop('access_granter', None) return res diff --git a/src/metax_api/migrations/0018_auto_20200330_1101.py b/src/metax_api/migrations/0018_auto_20200330_1101.py new file mode 100644 index 00000000..6436ce95 --- /dev/null +++ b/src/metax_api/migrations/0018_auto_20200330_1101.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.10 on 2020-03-30 08:01 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('metax_api', '0017_catalogrecord_rems_identifier'), + ] + + operations = [ + migrations.RenameField( + model_name='catalogrecord', + old_name='_access_granter', + new_name='access_granter', + ), + ] diff --git a/src/metax_api/models/catalog_record.py b/src/metax_api/models/catalog_record.py index bc3bf3b9..229b4bca 100644 --- a/src/metax_api/models/catalog_record.py +++ b/src/metax_api/models/catalog_record.py @@ -310,7 +310,7 @@ class CatalogRecord(Common): date_last_cumulative_addition = models.DateTimeField(null=True, default=None, help_text='Date of last file addition while actively cumulative.') - _access_granter = JSONField(null=True, default=None, + access_granter = JSONField(null=True, default=None, help_text='Stores data of REMS user who is currently granting access to this dataset') rems_identifier = models.CharField(max_length=200, null=True, default=None, @@ -908,11 +908,8 @@ def delete(self, *args, **kwargs): 'delete')) if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']: - self.add_post_request_callable( - REMSUpdate(self, 'close', rems_id=self.rems_identifier, reason='dataset deletion') - ) - self.rems_identifier = None - super().save(update_fields=['rems_identifier']) + self._pre_rems_deletion('dataset deletion') + super().save(update_fields=['rems_identifier', 'access_granter']) self.add_post_request_callable(RabbitMQPublishRecord(self, 'delete')) @@ -940,11 +937,8 @@ def deprecate(self, timestamp=None): self.date_deprecated = self.date_modified = timestamp or get_tz_aware_now_without_micros() if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']: - self.add_post_request_callable( - REMSUpdate(self, 'close', rems_id=self.rems_identifier, reason='dataset deprecation') - ) - self.rems_identifier = None - super().save(update_fields=['rems_identifier']) + self._pre_rems_deletion('dataset deprecation') + super().save(update_fields=['rems_identifier', 'access_granter']) super().save(update_fields=['deprecated', 'date_deprecated', 'date_modified']) self.add_post_request_callable(DelayedLog( @@ -1159,8 +1153,8 @@ def _post_create_operations(self): 'create')) if self._dataset_has_rems_managed_access() and settings.REMS['ENABLED']: - self._handle_rems_managed_access() - super().save(update_fields=['rems_identifier']) + self._pre_rems_creation() + super().save(update_fields=['rems_identifier', 'access_granter']) self.add_post_request_callable(RabbitMQPublishRecord(self, 'create')) @@ -1256,12 +1250,9 @@ def _pre_update_operations(self): if self._dataset_rems_changed(): if self._dataset_rems_access_type_changed(): if self._dataset_has_rems_managed_access(): - self._handle_rems_managed_access() + self._pre_rems_creation() else: - self.add_post_request_callable( - REMSUpdate(self, 'close', rems_id=self.rems_identifier, reason='access type change') - ) - self.rems_identifier = None + self._pre_rems_deletion(reason='access type change') elif self._dataset_license_changed() and self._dataset_has_rems_managed_access(): if self._dataset_has_license(): @@ -1271,10 +1262,7 @@ def _pre_update_operations(self): self.rems_identifier = generate_uuid_identifier() else: - self.add_post_request_callable( - REMSUpdate(self, 'close', rems_id=self.rems_identifier, reason='license deletion') - ) - self.rems_identifier = None + self._pre_rems_deletion(reason='license deletion') if self.field_changed('research_dataset'): if self.preservation_state in ( @@ -1482,17 +1470,28 @@ def _files_added_for_first_time(self): # creating a new dataset version already occurred once return not metadata_versions_with_files_exist - def _handle_rems_managed_access(self): + def _pre_rems_creation(self): """ Ensure that all necessary information is avaliable for REMS access and save post request callable to create correspoding REMS entity. """ self._validate_for_rems() user_info = self._get_user_info_for_rems() - self._access_granter = user_info + self.access_granter = user_info self.rems_identifier = generate_uuid_identifier() self.add_post_request_callable(REMSUpdate(self, 'create', user_info=user_info)) + def _pre_rems_deletion(self, reason): + """ + Delete rems information and save post request callable to close + corresponding REMS entity. + """ + self.add_post_request_callable( + REMSUpdate(self, 'close', rems_id=self.rems_identifier, reason=reason) + ) + self.rems_identifier = None + self.access_granter = None + def _dataset_has_rems_managed_access(self): """ Check if dataset uses REMS for managing access. diff --git a/src/metax_api/tests/api/rest/base/views/datasets/write.py b/src/metax_api/tests/api/rest/base/views/datasets/write.py index 3edc19d5..6a13f2f1 100644 --- a/src/metax_api/tests/api/rest/base/views/datasets/write.py +++ b/src/metax_api/tests/api/rest/base/views/datasets/write.py @@ -4287,6 +4287,7 @@ def test_creating_permit_dataset_creates_catalogue_item_service_succeeds(self): response = self._create_new_rems_dataset() self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) self.assertTrue(response.data.get('rems_identifier') is not None, 'rems_identifier should be present') + self.assertTrue(response.data.get('access_granter') is not None, 'access_granter should be present') @responses.activate def test_creating_permit_dataset_creates_catalogue_item_service_fails_1(self): @@ -4342,6 +4343,7 @@ def test_changing_dataset_to_permit_creates_new_catalogue_item_succeeds(self): response = self.client.put(f'/rest/datasets/{cr["id"]}?access_granter={granter}', cr, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) self.assertTrue(response.data.get('rems_identifier') is not None, 'rems_identifier should be present') + self.assertTrue(response.data.get('access_granter') is not None, 'access_granter should be present') @responses.activate def test_changing_dataset_to_permit_creates_new_catalogue_item_fails(self): @@ -4432,6 +4434,7 @@ def test_deleting_license_updates_rems(self): cr_after = response.data self.assertTrue(cr_after.get('rems_identifier') is None, 'REMS identifier should have been deleted') + self.assertTrue(cr_after.get('access_granter') is None, 'access_granter should have been deleted') @responses.activate def test_creating_permit_dataset_creates_catalogue_item_end_user(self): @@ -4468,6 +4471,7 @@ def test_deleting_permit_dataset_removes_catalogue_item_succeeds(self): cr = self.client.get(f'/rest/datasets/{cr_id}?removed').data self.assertTrue(cr.get('rems_identifier') is None, 'rems_identifier should not be present') + self.assertTrue(cr.get('access_granter') is None, 'access_granter should not be present') @responses.activate def test_deleting_permit_dataset_removes_catalogue_item_fails(self): @@ -4492,6 +4496,7 @@ def test_deprecating_permit_dataset_removes_catalogue_item_succeeds(self): cr_after = self.client.get(f'/rest/datasets/{cr_before["id"]}').data self.assertTrue(cr_after.get('rems_identifier') is None, 'rems_identifier should not be present') + self.assertTrue(cr_after.get('access_granter') is None, 'access_granter should not be present') @responses.activate def test_deprecating_permit_dataset_removes_catalogue_item_fails(self): @@ -4563,27 +4568,31 @@ def test_missing_license_in_dataset(self): self.assertTrue('must define license' in response.data['detail'], response.data) @responses.activate - def test_only_return_rems_identifier_to_privileged(self): + def test_only_return_rems_info_to_privileged(self): self._set_http_authorization('service') response = self._create_new_rems_dataset() self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) self.assertTrue(response.data.get('rems_identifier') is not None, 'rems_identifier should be returned to owner') + self.assertTrue(response.data.get('access_granter') is not None, 'access_granter should be returned to owner') self._set_http_authorization('no') response = self.client.get(f'/rest/datasets/{response.data["id"]}') self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) self.assertTrue(response.data.get('rems_identifier') is None, 'rems_identifier should not be returned to Anon') + self.assertTrue(response.data.get('access_granter') is None, 'access_granter should not be returned to Anon') @responses.activate - def test_rems_identifier_cannot_be_changed(self): + def test_rems_info_cannot_be_changed(self): response = self._create_new_rems_dataset() self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data) cr = response.data cr['rems_identifier'] = 'some:new:identifier' + cr['access_granter']['name'] = 'New Name' response = self.client.put(f'/rest/datasets/{cr["id"]}', cr, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) self.assertNotEqual(response.data['rems_identifier'], 'some:new:identifier', 'rems_id should not be changed') + self.assertNotEqual(response.data['access_granter'], 'New Name', 'access_granter should not be changed') From 71b3548b859ea73720ee022822e965135c5b3f74 Mon Sep 17 00:00:00 2001 From: Evgenia Lyjina Date: Tue, 14 Apr 2020 16:36:46 +0300 Subject: [PATCH 09/10] CSCFAIRMETA-179: [REF] swagger descriptions with capital letters --- swagger/swagger.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 9c0a6286..30b7634a 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -544,17 +544,17 @@ paths: type: string - name: pagination in: query - description: sets paging on with default limit of 10 + description: Sets paging on with default limit of 10 required: false type: bolean - name: offset in: query - description: offset for paging + description: Offset for paging required: false type: integer - name: limit in: query - description: limit for paging + description: Limit for paging required: false type: integer default: 10 From 48ec52087e5a39485fca81c380fc5a1070c88471 Mon Sep 17 00:00:00 2001 From: katrite Date: Wed, 15 Apr 2020 15:30:46 +0300 Subject: [PATCH 10/10] CSCFAIRMETA-435-Drafts-OAI-PMH-API-Should-only-list-published-datasets Modify draft handling in getRecord --- src/metax_api/api/oaipmh/base/metax_oai_server.py | 4 +--- src/metax_api/tests/api/oaipmh/minimal_api.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/metax_api/api/oaipmh/base/metax_oai_server.py b/src/metax_api/api/oaipmh/base/metax_oai_server.py index a920efbc..062c0a9b 100644 --- a/src/metax_api/api/oaipmh/base/metax_oai_server.py +++ b/src/metax_api/api/oaipmh/base/metax_oai_server.py @@ -442,9 +442,7 @@ def getRecord(self, metadataPrefix, identifier): if metadataPrefix == OAI_DC_URNRESOLVER_MDPREFIX: raise BadArgumentError('Invalid metadataPrefix value. It can be only used with ListRecords verb') record = CatalogRecord.objects.get(identifier__exact=identifier) - if record.state == 'published': - pass - else: + if record.state == 'draft': raise IdDoesNotExistError("No record with identifier %s is available." % identifier) except CatalogRecord.DoesNotExist: try: diff --git a/src/metax_api/tests/api/oaipmh/minimal_api.py b/src/metax_api/tests/api/oaipmh/minimal_api.py index 043180f4..ef35776a 100644 --- a/src/metax_api/tests/api/oaipmh/minimal_api.py +++ b/src/metax_api/tests/api/oaipmh/minimal_api.py @@ -49,6 +49,7 @@ def setUp(self): # some cr that has publisher set... cr = CatalogRecord.objects.filter(research_dataset__publisher__isnull=False).first() self.identifier = cr.identifier + self.id = cr.id self.preferred_identifier = cr.preferred_identifier self._use_http_authorization() @@ -303,14 +304,22 @@ def test_get_record(self): def test_get_record_for_drafts(self): ''' Tests that GetRecord doesn't return drafts ''' - self._set_dataset_as_draft(1) + response = self.client.get( + '/oai/?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc' % self.identifier) + self.assertEqual(response.status_code, status.HTTP_200_OK) + identifiers = self._get_results(response.content, + '//o:record/o:header/o:identifier[text()="%s"]' % self.identifier) + self.assertTrue(len(identifiers) == 1, response.content) + + # Set same dataset as draft + self._set_dataset_as_draft(self.id) response = self.client.get( '/oai/?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc' % self.identifier) self.assertEqual(response.status_code, status.HTTP_200_OK) identifiers = self._get_results(response.content, '//o:record/o:header/o:identifier[text()="%s"]' % self.identifier) - self.assertFalse(len(identifiers) == 1, response.content) + self.assertTrue(len(identifiers) == 0, response.content) def test_get_record_non_existing(self): response = self.client.get('/oai/?verb=GetRecord&identifier=urn:non:existing&metadataPrefix=oai_dc')