diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7a31fe95..adb2390b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,36 +4,55 @@ workflow: - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^(demo|master|stable|staging|test)$/ stages: + - clean_build - deploy - test - - clean_test - - clean_env - update - - clean_build - -deploy: - stage: deploy - environment: $CI_COMMIT_REF_NAME - script: - - ansible-playbook -i $ANSIBLE_INVENTORY $DEPLOY_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" + - clean_env integration_test: stage: test + tags: + - docker + image: + name: python:3.8 + services: + - postgres:9.6 + - rabbitmq:3 + - redis:latest + - name: elasticsearch:7.9.2 + alias: elasticsearch + command: [ "bin/elasticsearch", "-Expack.security.enabled=false", "-Ediscovery.type=single-node" ] + variables: + POSTGRES_USER: metax_user + POSTGRES_PASSWORD: password + POSTGRES_DB: metax_db + METAX_DATABASE: metax_db + METAX_DATABASE_PASSWORD: password + METAX_DATABASE_USER: metax_user + METAX_DATABASE_HOST: postgres + ELASTIC_SEARCH_HOSTS: elasticsearch + RABBIT_MQ_HOSTS: rabbitmq + REDIS_HOST: redis + REDIS_PORT: 6379 + DJANGO_SECRET_KEY: django-insecure-22388&4#2_#u0e%$%!)5vo3mpys)#f7@vsk9az354!jgze--&e + DJANGO_ENV: unittests environment: $CI_COMMIT_REF_NAME script: - - ansible-playbook -i $ANSIBLE_INVENTORY $TEST_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" - -clean_test: - stage: clean_test - environment: - name: $CI_COMMIT_REF_NAME - on_stop: clean_gitlab_env - script: - - ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" - rules: - - if: $CI_PIPELINE_SOURCE == "merge_request_event" - when: always - - when: never + - apt-get update && apt-get install -y xqilla libxerces-c-dev build-essential libssl-dev libffi-dev python-dev libxqilla-dev + - mkdir -p /var/log/metax-api/errors + - pip install -r requirements.txt + - cd src + - python manage.py first_time_setup + - coverage run manage.py test --parallel + - coverage combine + - coverage report -m + - coverage xml + artifacts: + reports: + cobertura: src/coverage.xml + paths: + - src/coverage.xml clean_gitlab_env: stage: clean_env @@ -45,16 +64,25 @@ clean_gitlab_env: script: - echo "Cleaning deleted branches from environments" rules: - - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^(demo|master|stable|staging|test)$/ when: never - when: manual +deploy: + stage: deploy + environment: $CI_COMMIT_REF_NAME + script: + - ansible-playbook -i $ANSIBLE_INVENTORY $DEPLOY_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" + rules: + - if: $CI_COMMIT_BRANCH =~ /^(demo|master|stable|staging|test)$/ + when: always + update_metax: stage: update environment: $CI_COMMIT_REF_NAME script: - - ansible-playbook -i $ANSIBLE_INVENTORY $UPDATE_PROXY_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" - - ansible-playbook -i $ANSIBLE_INVENTORY $MANAGE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" + - ansible-playbook -i $ANSIBLE_INVENTORY $UPDATE_PROXY_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" + - ansible-playbook -i $ANSIBLE_INVENTORY $MANAGE_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" rules: - if: $CI_COMMIT_BRANCH =~ /^(demo|stable|staging|test)$/ when: always @@ -64,9 +92,33 @@ clean_previous_build: stage: clean_build environment: $CI_COMMIT_REF_NAME script: - - ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" + - ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME" rules: - if: $CI_COMMIT_BRANCH =~ /^(staging|test|stable)$/ - when: never - when: manual + when: always +sonarqube-check: + tags: + - docker + stage: test + image: + name: sonarsource/sonar-scanner-cli:latest + entrypoint: [""] + variables: + SONAR_USER_HOME: "${CI_PROJECT_DIR}/.sonar" # Defines the location of the analysis task cache + GIT_DEPTH: "0" # Tells git to fetch all the branches of the project, required by the analysis task + cache: + key: "${CI_JOB_NAME}" + paths: + - .sonar/cache + script: + - sonar-scanner + allow_failure: true + needs: + - job: integration_test + artifacts: true + only: + - merge_requests + - master + - test + - staging diff --git a/README.md b/README.md index b753029d..59210c65 100755 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# Metax API + This repository contains the code for Metax API service. ## License diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 00000000..98d55388 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,9 @@ +sonar.projectKey=fairdata_fairdata-metax_AX-2XrAIExyDtAUSxNVt +sonar.qualitygate.wait=true +# Scan settings. +#sonar.projectBaseDir=. +# Define the directories that should be scanned. Comma separated. +sonar.sources=src/metax_api/ +sonar.exclusions=src/metax_api/tests/**/* +sonar.tests=src/metax_api/tests/ +sonar.python.coverage.reportPaths=src/coverage*.xml diff --git a/src/.coveragerc b/src/.coveragerc index 63a369dc..3d5d6384 100755 --- a/src/.coveragerc +++ b/src/.coveragerc @@ -10,6 +10,8 @@ omit = */urls.py */router.py metax_api/onappstart.py + */site-packages/* + branch = True concurrency = multiprocessing data_file = coverage_data/coverage.db @@ -29,6 +31,8 @@ exclude_lines = raise NotImplementedError if 0: if __name__ == .__main__.: + + skip_covered = True omit = */migrations/* @@ -40,4 +44,5 @@ omit = */manage.py */urls.py */router.py - metax_api/onappstart.py \ No newline at end of file + metax_api/onappstart.py + */site-packages/* diff --git a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py index c262c020..303459d7 100755 --- a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py +++ b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py @@ -254,6 +254,29 @@ def _check_end_user_allowed_catalogs(self, dc_identifier): } ) + def _filter_research_dataset_fields(self, res): + """ + If research_dataset_fields query parameter is supplied, return only + requested fields from research_dataset. + """ + if ( + "research_dataset" in res + and "view" in self.context + and "research_dataset_fields" in self.context["view"].request.query_params + ): + research_dataset_fields = set( + self.context["view"] + .request.query_params.get("research_dataset_fields", "") + .split(",") + ) + research_dataset = { + key: value + for (key, value) in res["research_dataset"].items() + if key in research_dataset_fields + } + return {**res, "research_dataset": research_dataset} + return res + def to_representation(self, instance): res = super(CatalogRecordSerializer, self).to_representation(instance) @@ -281,7 +304,14 @@ def to_representation(self, instance): res["alternate_record_set"] = [ar.identifier for ar in alternate_records] if "dataset_version_set" in res: - res["dataset_version_set"] = instance.dataset_version_set.get_listing() + # avoid querying records when there are no other datasets in dataset_version_set + if ( + hasattr(instance, "dataset_version_set__records__count") + and instance.dataset_version_set__records__count == 1 + ): + res["dataset_version_set"] = [instance.version_dict] + else: + res["dataset_version_set"] = instance.dataset_version_set.get_listing() if "next_dataset_version" in res: if instance.next_dataset_version.state == CatalogRecord.STATE_PUBLISHED: @@ -322,6 +352,7 @@ def to_representation(self, instance): if "request" in self.context and "file_details" in self.context["request"].query_params: CRS.populate_file_details(res, self.context["request"]) + res = self._filter_research_dataset_fields(res) res = self._check_and_strip_sensitive_fields(instance, res) return res @@ -441,6 +472,7 @@ def validate_research_dataset(self, value): self._validate_research_dataset_uniqueness(value) CRS.validate_reference_data(value, cache) + self._validate_org_name_is_set(self.initial_data["research_dataset"]) return value diff --git a/src/metax_api/api/rest/base/views/common_view.py b/src/metax_api/api/rest/base/views/common_view.py index 053a5a40..1833da7a 100755 --- a/src/metax_api/api/rest/base/views/common_view.py +++ b/src/metax_api/api/rest/base/views/common_view.py @@ -174,7 +174,6 @@ def get_queryset(self): """ additional_filters = {} q_filters = [] - deduplicated_q_filters = [] if hasattr(self, "queryset_search_params"): additional_filters.update(**self.queryset_search_params) @@ -185,10 +184,6 @@ def get_queryset(self): # Q-filter objects, which can contain more complex filter options such as OR-clauses q_filters = additional_filters.pop("q_filters") - if "deduplicated_q_filters" in additional_filters: - # Q-filter objects that may produce duplicate results - deduplicated_q_filters = additional_filters.pop("deduplicated_q_filters") - if CS.get_boolean_query_param(self.request, "removed"): additional_filters.update({"removed": True}) self.queryset = self.queryset_unfiltered @@ -210,11 +205,6 @@ def get_queryset(self): self.select_related = [rel for rel in self.select_related if rel in self.fields] queryset = super().get_queryset() - if deduplicated_q_filters: - # run filters that may produce duplicates and deduplicate the results. deduplicating just the ids - # in a subquery is faster than deduplicating the full results when there are a lot of duplicates. - id_query = queryset.filter(*deduplicated_q_filters).values("id").distinct() - queryset = queryset.filter(id__in=id_query) queryset = queryset.filter(*q_filters, **additional_filters) if self.request.META["REQUEST_METHOD"] in WRITE_OPERATIONS: diff --git a/src/metax_api/api/rest/base/views/dataset_view.py b/src/metax_api/api/rest/base/views/dataset_view.py index 648feb45..8bd0554c 100755 --- a/src/metax_api/api/rest/base/views/dataset_view.py +++ b/src/metax_api/api/rest/base/views/dataset_view.py @@ -10,6 +10,7 @@ from django.conf import settings from django.http import Http404 +from django.db.models import Count from rest_framework import status from rest_framework.decorators import action @@ -74,6 +75,26 @@ def get_queryset(self): data_catalog__catalog_json__identifier__in=settings.LEGACY_CATALOGS ) + if self.request.META["REQUEST_METHOD"] == "GET": + # Optimize dataset listing by prefetching related objects. + # Annotate results with number of records in dataset_version_set + # to allow the serializer skip querying other versions when there + # is only one. + return ( + super() + .get_queryset() + .prefetch_related( + "data_catalog", + "dataset_version_set", + "preservation_dataset_version", + "preservation_dataset_origin_version", + "next_draft", + "draft_of", + "editor_permissions", + ) + .annotate(Count("dataset_version_set__records")) + ) + return super().get_queryset() def retrieve(self, request, *args, **kwargs): diff --git a/src/metax_api/api/rpc/v2/views/dataset_rpc.py b/src/metax_api/api/rpc/v2/views/dataset_rpc.py index 4ca86b70..b0d77130 100755 --- a/src/metax_api/api/rpc/v2/views/dataset_rpc.py +++ b/src/metax_api/api/rpc/v2/views/dataset_rpc.py @@ -82,7 +82,8 @@ def create_draft(self, request): data={"id": cr.next_draft.id, "identifier": cr.next_draft.identifier}, status=status.HTTP_201_CREATED, ) - except DatabaseError: + except DatabaseError as e: + _logger.error(f"DatabaseError: {e}") return Response({'error': 'Failed to create draft'}, status=status.HTTP_400_BAD_REQUEST) @action(detail=False, methods=["post"], url_path="create_new_version") @@ -100,7 +101,8 @@ def create_new_version(self, request): }, status=status.HTTP_201_CREATED, ) - except DatabaseError: + except DatabaseError as e: + _logger.error(f"DatabaseError: {e}") return Response({'error': 'Failed to create a new version'}, status=status.HTTP_400_BAD_REQUEST) @action(detail=False, methods=["post"], url_path="publish_dataset") @@ -115,7 +117,8 @@ def publish_dataset(self, request): data={"preferred_identifier": cr.preferred_identifier}, status=status.HTTP_200_OK, ) - except DatabaseError: + except DatabaseError as e: + _logger.error(f"DatabaseError: {e}") return Response({'error': 'Failed to publish dataset'}, status=status.HTTP_400_BAD_REQUEST) @action(detail=False, methods=["post"], url_path="merge_draft") diff --git a/src/metax_api/management/commands/create_statistic_report.py b/src/metax_api/management/commands/create_statistic_report.py index a68d8021..e9011a43 100644 --- a/src/metax_api/management/commands/create_statistic_report.py +++ b/src/metax_api/management/commands/create_statistic_report.py @@ -28,11 +28,13 @@ def handle(self, *args, **options): file_pids = ret[1] if len(file_pids) == 0: - catalog_records = "" + published_catalog_record_pids = "" else: - catalog_records = FileService.get_identifiers(file_pids, "noparams", True, get_pids=True).data + all_catalog_records = FileService.get_identifiers(file_pids, "noparams", True).data + published_catalog_records = CatalogRecordV2.objects.filter(identifier__in = all_catalog_records, state = "published") + published_catalog_record_pids = list(published_catalog_records.values_list('research_dataset__preferred_identifier', flat = True).distinct()) - stat = ProjectStatistics(project_id, count, size, catalog_records) + stat = ProjectStatistics(project_id, count, size, published_catalog_record_pids) stat.save() @@ -44,4 +46,4 @@ def handle(self, *args, **options): stat = OrganizationStatistics(org_id, ret["count"], ret["ida_byte_size"]) stat.save() - logger.info("Statistic summary created") \ No newline at end of file + logger.info("Statistic summary created") diff --git a/src/metax_api/models/catalog_record.py b/src/metax_api/models/catalog_record.py index dd9561b0..ef2db7a8 100755 --- a/src/metax_api/models/catalog_record.py +++ b/src/metax_api/models/catalog_record.py @@ -158,25 +158,17 @@ def get_listing(self): self.records(manager="objects_unfiltered") .filter(state=CatalogRecord.STATE_PUBLISHED) .order_by("-date_created") + .only( + "id", + "identifier", + "research_dataset", + "dataset_version_set_id", + "date_created", + "date_removed", + "removed", + ) ) - - versions = [ - { - "identifier": r.identifier, - "preferred_identifier": r.preferred_identifier, - "removed": r.removed, - "date_created": r.date_created.astimezone().isoformat(), - "date_removed": r.date_removed.astimezone().isoformat() if r.date_removed else None, - } - for r in records - ] - - # dont show the date_removed field at all if the value is None (record has not been removed) - versions = [ - {key: value for (key, value) in i.items() if value is not None} for i in versions - ] - - return versions + return [r.version_dict for r in records] def print_records(self): # pragma: no cover for r in self.records.all(): @@ -1295,6 +1287,21 @@ def identifiers_dict(self): except: return {} + @property + def version_dict(self): + try: + val = { + "identifier": self.identifier, + "preferred_identifier": self.research_dataset["preferred_identifier"], + "date_created": self.date_created.astimezone().isoformat(), + "removed": self.removed, + } + if self.removed and self.date_removed: + val['date_removed'] = self.date_removed + return val + except: + return {} + @property def preferred_identifier(self): try: @@ -2278,6 +2285,7 @@ def _create_new_dataset_version_template(self): new_version_template.next_dataset_version = None new_version_template.previous_dataset_version = None new_version_template.dataset_version_set = None + new_version_template.preservation_dataset_version = None new_version_template.identifier = generate_uuid_identifier() new_version_template.research_dataset[ "metadata_version_identifier" diff --git a/src/metax_api/models/common.py b/src/metax_api/models/common.py index 62b5f0b6..2c7c374c 100755 --- a/src/metax_api/models/common.py +++ b/src/metax_api/models/common.py @@ -5,7 +5,7 @@ # :author: CSC - IT Center for Science Ltd., Espoo Finland # :license: MIT -from copy import deepcopy +import pickle from dateutil import parser from django.core.exceptions import FieldError @@ -132,6 +132,16 @@ def modified_since(self, timestamp): return timestamp < self.date_modified + def _deepcopy_field(self, field_value): + """ + Deep copy field value. + + Pickle can be an order of magnitude faster than copy.deepcopy for + deeply nested fields like CatalogRecord.research_dataset. + """ + return pickle.loads(pickle.dumps(field_value)) + + def track_fields(self, *fields): """ Save initial values from object fields when object is created (= retrieved from db), @@ -141,17 +151,16 @@ def track_fields(self, *fields): field_name is a dict (a JSON field). For now only one level of nesting is supported. If a need arises, can be made mega generic. """ - for field_name in fields: - - self._tracked_fields.append(field_name) + self._tracked_fields.extend(fields) + for field_name in fields: if "." in field_name: self._track_json_field(field_name) else: if self._field_is_loaded(field_name): requested_field = getattr(self, field_name) if isinstance(requested_field, dict): - self._initial_data[field_name] = deepcopy(requested_field) + self._initial_data[field_name] = self._deepcopy_field(requested_field) else: self._initial_data[field_name] = requested_field @@ -173,7 +182,7 @@ def _track_json_field(self, field_name): self._initial_data[field_name] = {} if isinstance(json_field_value, dict): - self._initial_data[field_name][json_field_name] = deepcopy(json_field_value) + self._initial_data[field_name][json_field_name] = self._deepcopy_field(json_field_value) else: self._initial_data[field_name][json_field_name] = json_field_value diff --git a/src/metax_api/services/catalog_record_service.py b/src/metax_api/services/catalog_record_service.py index 73277998..a707e004 100755 --- a/src/metax_api/services/catalog_record_service.py +++ b/src/metax_api/services/catalog_record_service.py @@ -144,10 +144,18 @@ def filter_by_editor_permissions_user(request, queryset_search_params): # non-service users can only query their own datasets if not request.user.is_service: - if request.user.username == '': - raise Http403({"detail": ["Query by editor_permissions_user is only supported for authenticated users"]}) + if request.user.username == "": + raise Http403( + { + "detail": [ + "Query by editor_permissions_user is only supported for authenticated users" + ] + } + ) if request.user.username != user_id: - raise Http403({"detail": ["Provided editor_permissions_user does not match current user"]}) + raise Http403( + {"detail": ["Provided editor_permissions_user does not match current user"]} + ) queryset_search_params["editor_permissions__users__user_id"] = user_id queryset_search_params["editor_permissions__users__removed"] = False @@ -227,8 +235,12 @@ def _get_org_filter(agent, org): if agent == "publisher": name_filter |= Q(**{f"research_dataset__{agent}__{name_en}__iregex": org}) name_filter |= Q(**{f"research_dataset__{agent}__{name_fi}__iregex": org}) - name_filter |= Q(**{f"research_dataset__{agent}__member_of__{name_en}__iregex": org}) - name_filter |= Q(**{f"research_dataset__{agent}__member_of__{name_fi}__iregex": org}) + name_filter |= Q( + **{f"research_dataset__{agent}__member_of__{name_en}__iregex": org} + ) + name_filter |= Q( + **{f"research_dataset__{agent}__member_of__{name_fi}__iregex": org} + ) else: for i in range(3): name_filter |= Q(**{f"research_dataset__{agent}__{i}__{name_en}__iregex": org}) @@ -262,15 +274,9 @@ def _get_org_filter(agent, org): } ) else: + name_filter |= Q(**{f"research_dataset__{agent}__contains": [{name: org}]}) name_filter |= Q( - **{f"research_dataset__{agent}__contains": [{name: org}]} - ) - name_filter |= Q( - **{ - f"research_dataset__{agent}__contains": [ - {"member_of": {name: org}} - ] - } + **{f"research_dataset__{agent}__contains": [{"member_of": {name: org}}]} ) return name_filter @@ -352,15 +358,20 @@ def set_projects_filter(queryset_search_params, request): if not set(projects).issubset(user_projects): raise Http403({"detail": ["User is not member of project"]}) - q_filter = Q(files__project_identifier__in=projects) - if "deduplicated_q_filters" in queryset_search_params: - queryset_search_params["deduplicated_q_filters"].append(q_filter) + project_cr_ids = ( + File.objects_unfiltered.filter(project_identifier__in=projects) + .values("record__id") + .distinct() + ) + q_filter = Q(id__in=project_cr_ids) + + if "q_filters" in queryset_search_params: + queryset_search_params["q_filters"].append(q_filter) else: - queryset_search_params["deduplicated_q_filters"] = [q_filter] + queryset_search_params["q_filters"] = [q_filter] return queryset_search_params - @staticmethod def populate_file_details(cr_json, request): """ diff --git a/src/metax_api/swagger/v1/swagger.yaml b/src/metax_api/swagger/v1/swagger.yaml index 49c82192..ce0f1c51 100755 --- a/src/metax_api/swagger/v1/swagger.yaml +++ b/src/metax_api/swagger/v1/swagger.yaml @@ -1007,6 +1007,7 @@ paths: required: false type: string - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: "200": @@ -1261,6 +1262,7 @@ paths: required: false type: boolean - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: '200': @@ -2181,7 +2183,14 @@ parameters: fields: name: fields in: query - description: Comma separated list of fields that is returned. Note that nested fields are not supported. + description: Comma separated list of fields that are returned. Note that nested fields are not supported. + required: false + type: string + + research_dataset_fields: + name: research_dataset_fields + in: query + description: Comma separated list of fields in research_dataset that are returned. required: false type: string diff --git a/src/metax_api/swagger/v2/swagger.yaml b/src/metax_api/swagger/v2/swagger.yaml index 606c7ed6..7b11ce04 100755 --- a/src/metax_api/swagger/v2/swagger.yaml +++ b/src/metax_api/swagger/v2/swagger.yaml @@ -1013,6 +1013,7 @@ paths: type: string - $ref: "#/parameters/include_user_metadata" - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: "200": @@ -1282,6 +1283,7 @@ paths: type: boolean - $ref: "#/parameters/include_user_metadata" - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: '200': @@ -2482,7 +2484,14 @@ parameters: fields: name: fields in: query - description: Comma separated list of fields that is returned. Note that nested fields are not supported. + description: Comma separated list of fields that are returned. Note that nested fields are not supported. + required: false + type: string + + research_dataset_fields: + name: research_dataset_fields + in: query + description: Comma separated list of fields in research_dataset that are returned. required: false type: string diff --git a/src/metax_api/tasks/refdata/refdata_indexer/resources/organizations/organizations.csv b/src/metax_api/tasks/refdata/refdata_indexer/resources/organizations/organizations.csv index 0c412f4a..826cb10f 100755 --- a/src/metax_api/tasks/refdata/refdata_indexer/resources/organizations/organizations.csv +++ b/src/metax_api/tasks/refdata/refdata_indexer/resources/organizations/organizations.csv @@ -451,6 +451,7 @@ Itä-Suomen yliopisto,University of Eastern Finland,Östra Finlands universitet, Itä-Suomen yliopisto,University of Eastern Finland,Östra Finlands universitet,10088,,505010,Yhteiskuntatieteiden laitos,, Itä-Suomen yliopisto,University of Eastern Finland,Östra Finlands universitet,10088,,100000,"Yliopiston johto, yhteiset ja yliopistopalvelut",, Itä-Suomen yliopisto,University of Eastern Finland,Östra Finlands universitet,10088,,307010,Ympäristö- ja biotieteiden laitos,, +Jane ja Aatos Erkon säätiö,Jane and Aatos Erkko Foundation,Jane och Aatos Erkkos stiftelse,17584348,,,,, Jyväskylän ammattikorkeakoulu,JAMK University of Applied Sciences,,02504,,,,, Jyväskylän ammattikorkeakoulu,JAMK University of Applied Sciences,Jyväskylän ammattikorkeakoulu,02504,,1,Ammatillinen opettajakorkeakoulu,, Jyväskylän ammattikorkeakoulu,JAMK University of Applied Sciences,Jyväskylän ammattikorkeakoulu,02504,,5,Hallintoyksikkö,, @@ -2013,7 +2014,7 @@ Oulun yliopisto,University of Oulu,Uleåborgs universitet,01904,,240532,Älyk Oulun yliopisto,University of Oulu,Uleåborgs universitet,01904,,2405320,Älykkäät koneet ja järjestelmät,, Oulun yliopistollisen sairaalan erityisvastuualue,Oulu University Hospital Catchment Area,,06794809,,,,, Poliisiammattikorkeakoulu,Police University College,,02557,,,,, -Ruokavirasto,Finnish Food Authority,,430001,,,,, +Ruokavirasto,Finnish Food Authority,Livsmedelsverket,430001,,,,, Saimaan ammattikorkeakoulu,Saimaa University of Applied Sciences,,02609,,,,http://isni.org/isni/0000000404184038,31 Saimaan ammattikorkeakoulu,Saimaa University of Applied Sciences,,02609,,2099,AMK yhteiset palvelut,, Saimaan ammattikorkeakoulu,Saimaa University of Applied Sciences,,02609,,2310,Hotelli- ja ravintola-ala,, diff --git a/src/metax_api/tests/api/rest/base/views/datasets/read.py b/src/metax_api/tests/api/rest/base/views/datasets/read.py index 688e99f2..66af746d 100755 --- a/src/metax_api/tests/api/rest/base/views/datasets/read.py +++ b/src/metax_api/tests/api/rest/base/views/datasets/read.py @@ -611,7 +611,11 @@ def test_agents_and_actors_with_ids(self): ) cr.research_dataset["creator"] = [] cr.research_dataset["creator"].append( - {"@type": "Organization", "name": {"en": "Unique Organization"}, "identifier": "http://uri.suomi.fi/codelist/fairdata/organization/code/1234567"} + { + "@type": "Organization", + "name": {"en": "Unique Organization"}, + "identifier": "http://uri.suomi.fi/codelist/fairdata/organization/code/1234567", + } ) cr.force_save() @@ -892,7 +896,10 @@ def test_filter_by_projects_for_end_user(self): self._mock_token_validation_succeeds() self._use_http_authorization( method="bearer", - token={"group_names": ["IDA01:project_x", "IDA01:no_datasets_here"], "CSCUserName": "testi"} + token={ + "group_names": ["IDA01:project_x", "IDA01:no_datasets_here"], + "CSCUserName": "testi", + }, ) response = self.client.get("/rest/datasets?projects=project_x&pagination=false") @@ -967,17 +974,30 @@ def test_filter_by_legacy(self): def test_filter_by_editor_permissions_user_ok(self): cr = CatalogRecord.objects.get(pk=1) - cr.editor_permissions.users.update(user_id='test_user_x') + cr.editor_permissions.users.update(user_id="test_user_x") response = self.client.get(f"/rest/datasets?editor_permissions_user=test_user_x") self.assertEqual(response.data["count"], 1) def test_filter_by_editor_permissions_user_removed(self): cr = CatalogRecord.objects.get(pk=1) - cr.editor_permissions.users.update(user_id='test_user_x') + cr.editor_permissions.users.update(user_id="test_user_x") cr.editor_permissions.users.first().delete() response = self.client.get(f"/rest/datasets?editor_permissions_user=test_user_x") self.assertEqual(response.data["count"], 0) + def test_research_dataset_fields(self): + cr = CatalogRecord.objects.get(pk=1) + expected_fields = { + "title": cr.research_dataset["title"], + "description": cr.research_dataset["description"], + "preferred_identifier": cr.research_dataset["preferred_identifier"], + } + response = self.client.get( + f"/rest/datasets/1?research_dataset_fields=title,description,preferred_identifier" + ) + returned_fields = response.data["research_dataset"] + self.assertEqual(returned_fields, expected_fields) + class CatalogRecordApiReadXMLTransformationTests(CatalogRecordApiReadCommon): @@ -1082,7 +1102,6 @@ def test_read_dataset_format_dummy_datacite_doi(self): def _check_dataset_xml_format_response(self, response, element_name): self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(" should create pas version - cr_data["preservation_state"] = 80 - response = self.client.put("/rest/v2/datasets/%d" % id, cr_data, format="json") - self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) - return response.data - def setUp(self): super().setUp() dc = DataCatalog.objects.get(pk=1) diff --git a/src/metax_api/tests/api/rpc/v2/views/dataset_rpc.py b/src/metax_api/tests/api/rpc/v2/views/dataset_rpc.py index d61fce67..5256d4c2 100755 --- a/src/metax_api/tests/api/rpc/v2/views/dataset_rpc.py +++ b/src/metax_api/tests/api/rpc/v2/views/dataset_rpc.py @@ -9,9 +9,10 @@ from django.conf import settings from rest_framework import status -from metax_api.models import CatalogRecordV2 +from metax_api.models import CatalogRecordV2, DataCatalog from metax_api.tests.api.rest.base.views.datasets.write import CatalogRecordApiWriteCommon from metax_api.tests.utils import get_test_oidc_token +from metax_api.utils import get_tz_aware_now_without_micros CR = CatalogRecordV2 @@ -96,6 +97,19 @@ class CatalogRecordVersionHandling(CatalogRecordApiWriteCommon): New dataset versions can only be created by explicitly calling related RPC API. """ + def setUp(self): + super().setUp() + dc = DataCatalog.objects.get(pk=1) + catalog_json = dc.catalog_json + catalog_json["identifier"] = settings.PAS_DATA_CATALOG_IDENTIFIER + catalog_json["dataset_versioning"] = False + dc = DataCatalog.objects.create( + catalog_json=catalog_json, + date_created=get_tz_aware_now_without_micros(), + catalog_record_services_create="testuser,api_auth_user,metax", + catalog_record_services_edit="testuser,api_auth_user,metax", + ) + def test_create_new_version(self): """ A new dataset version can be created for datasets in data catalogs that support versioning. @@ -201,6 +215,48 @@ def test_new_version_removes_deprecated_files(self): new_cr.files.count() < original_cr.files(manager="objects_unfiltered").count() ) + def test_new_version_of_pas_catalog_record(self): + """ + Trying to create a new version of a PAS dataset should fail. + """ + cr_data = self.client.get("/rest/v2/datasets/1", format="json").data + self.assertEqual(cr_data["preservation_state"], 0) + + origin_dataset = self._create_pas_dataset_from_id(1) + + pas_id = origin_dataset["preservation_dataset_version"]["identifier"] + + response = self.client.post( + f"/rpc/v2/datasets/create_new_version?identifier={pas_id}", format="json" + ) + self.assertEqual(response.status_code, 400) + self.assertTrue("Data catalog does not allow dataset versioning" in response.data["detail"][0]) + + def test_new_version_of_copy_of_pas_catalog_record(self): + """ + Trying to create a new version from a origin version of a PAS dataset should succeed. + New version should have preservation_state 0 and it shouldn't contain + preservation_dataset_version. + """ + cr_data = self.client.get("/rest/v2/datasets/1", format="json").data + self.assertEqual(cr_data["preservation_state"], 0) + + origin_dataset = self._create_pas_dataset_from_id(1) + + cr_id = origin_dataset["identifier"] + + response = self.client.post( + f"/rpc/v2/datasets/create_new_version?identifier={cr_id}", format="json" + ) + self.assertEqual(response.status_code, 201) + + new_version_id = response.data["identifier"] + + cr_data = self.client.get(f"/rest/v2/datasets/{new_version_id}", format="json").data + self.assertEqual(cr_data["preservation_state"], 0) + self.assertFalse("preservation_dataset_version" in cr_data) + + def test_version_from_draft(self): """ New versions cannot be created from drafts diff --git a/src/metax_api/tests/utils.py b/src/metax_api/tests/utils.py index d4cbb3e0..003298eb 100755 --- a/src/metax_api/tests/utils.py +++ b/src/metax_api/tests/utils.py @@ -411,6 +411,20 @@ def get_embargoed_cr_with_files_and_dirs_from_api_with_file_details(self, is_ava return response.data + def _create_pas_dataset_from_id(self, id): + """ + Helper method to create a pas dataset by updating the given dataset's + preservation_state to 80. + """ + cr_data = self.client.get("/rest/v2/datasets/%d" % id, format="json").data + self.assertEqual(cr_data["preservation_state"], 0) + + # update state to "accepted to pas" -> should create pas version + cr_data["preservation_state"] = 80 + response = self.client.put("/rest/v2/datasets/%d" % id, cr_data, format="json") + self.assertEqual(response.status_code, status.HTTP_200_OK, response.data) + return response.data + def _get_ida_dataset_without_files(self): data = self._get_object_from_test_data("catalogrecord", requested_index=0)