diff --git a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py index c262c020..3126b432 100755 --- a/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py +++ b/src/metax_api/api/rest/base/serializers/catalog_record_serializer.py @@ -254,6 +254,29 @@ def _check_end_user_allowed_catalogs(self, dc_identifier): } ) + def _filter_research_dataset_fields(self, res): + """ + If research_dataset_fields query parameter is supplied, return only + requested fields from research_dataset. + """ + if ( + "research_dataset" in res + and "view" in self.context + and "research_dataset_fields" in self.context["view"].request.query_params + ): + research_dataset_fields = set( + self.context["view"] + .request.query_params.get("research_dataset_fields", "") + .split(",") + ) + research_dataset = { + key: value + for (key, value) in res["research_dataset"].items() + if key in research_dataset_fields + } + return {**res, "research_dataset": research_dataset} + return res + def to_representation(self, instance): res = super(CatalogRecordSerializer, self).to_representation(instance) @@ -281,7 +304,14 @@ def to_representation(self, instance): res["alternate_record_set"] = [ar.identifier for ar in alternate_records] if "dataset_version_set" in res: - res["dataset_version_set"] = instance.dataset_version_set.get_listing() + # avoid querying records when there are no other datasets in dataset_version_set + if ( + hasattr(instance, "dataset_version_set__records__count") + and instance.dataset_version_set__records__count == 1 + ): + res["dataset_version_set"] = [instance.version_dict] + else: + res["dataset_version_set"] = instance.dataset_version_set.get_listing() if "next_dataset_version" in res: if instance.next_dataset_version.state == CatalogRecord.STATE_PUBLISHED: @@ -322,6 +352,7 @@ def to_representation(self, instance): if "request" in self.context and "file_details" in self.context["request"].query_params: CRS.populate_file_details(res, self.context["request"]) + res = self._filter_research_dataset_fields(res) res = self._check_and_strip_sensitive_fields(instance, res) return res diff --git a/src/metax_api/api/rest/base/views/dataset_view.py b/src/metax_api/api/rest/base/views/dataset_view.py index 648feb45..8bd0554c 100755 --- a/src/metax_api/api/rest/base/views/dataset_view.py +++ b/src/metax_api/api/rest/base/views/dataset_view.py @@ -10,6 +10,7 @@ from django.conf import settings from django.http import Http404 +from django.db.models import Count from rest_framework import status from rest_framework.decorators import action @@ -74,6 +75,26 @@ def get_queryset(self): data_catalog__catalog_json__identifier__in=settings.LEGACY_CATALOGS ) + if self.request.META["REQUEST_METHOD"] == "GET": + # Optimize dataset listing by prefetching related objects. + # Annotate results with number of records in dataset_version_set + # to allow the serializer skip querying other versions when there + # is only one. + return ( + super() + .get_queryset() + .prefetch_related( + "data_catalog", + "dataset_version_set", + "preservation_dataset_version", + "preservation_dataset_origin_version", + "next_draft", + "draft_of", + "editor_permissions", + ) + .annotate(Count("dataset_version_set__records")) + ) + return super().get_queryset() def retrieve(self, request, *args, **kwargs): diff --git a/src/metax_api/models/catalog_record.py b/src/metax_api/models/catalog_record.py index 5b644ff6..ef2db7a8 100755 --- a/src/metax_api/models/catalog_record.py +++ b/src/metax_api/models/catalog_record.py @@ -158,25 +158,17 @@ def get_listing(self): self.records(manager="objects_unfiltered") .filter(state=CatalogRecord.STATE_PUBLISHED) .order_by("-date_created") + .only( + "id", + "identifier", + "research_dataset", + "dataset_version_set_id", + "date_created", + "date_removed", + "removed", + ) ) - - versions = [ - { - "identifier": r.identifier, - "preferred_identifier": r.preferred_identifier, - "removed": r.removed, - "date_created": r.date_created.astimezone().isoformat(), - "date_removed": r.date_removed.astimezone().isoformat() if r.date_removed else None, - } - for r in records - ] - - # dont show the date_removed field at all if the value is None (record has not been removed) - versions = [ - {key: value for (key, value) in i.items() if value is not None} for i in versions - ] - - return versions + return [r.version_dict for r in records] def print_records(self): # pragma: no cover for r in self.records.all(): @@ -1295,6 +1287,21 @@ def identifiers_dict(self): except: return {} + @property + def version_dict(self): + try: + val = { + "identifier": self.identifier, + "preferred_identifier": self.research_dataset["preferred_identifier"], + "date_created": self.date_created.astimezone().isoformat(), + "removed": self.removed, + } + if self.removed and self.date_removed: + val['date_removed'] = self.date_removed + return val + except: + return {} + @property def preferred_identifier(self): try: diff --git a/src/metax_api/models/common.py b/src/metax_api/models/common.py index 62b5f0b6..2c7c374c 100755 --- a/src/metax_api/models/common.py +++ b/src/metax_api/models/common.py @@ -5,7 +5,7 @@ # :author: CSC - IT Center for Science Ltd., Espoo Finland # :license: MIT -from copy import deepcopy +import pickle from dateutil import parser from django.core.exceptions import FieldError @@ -132,6 +132,16 @@ def modified_since(self, timestamp): return timestamp < self.date_modified + def _deepcopy_field(self, field_value): + """ + Deep copy field value. + + Pickle can be an order of magnitude faster than copy.deepcopy for + deeply nested fields like CatalogRecord.research_dataset. + """ + return pickle.loads(pickle.dumps(field_value)) + + def track_fields(self, *fields): """ Save initial values from object fields when object is created (= retrieved from db), @@ -141,17 +151,16 @@ def track_fields(self, *fields): field_name is a dict (a JSON field). For now only one level of nesting is supported. If a need arises, can be made mega generic. """ - for field_name in fields: - - self._tracked_fields.append(field_name) + self._tracked_fields.extend(fields) + for field_name in fields: if "." in field_name: self._track_json_field(field_name) else: if self._field_is_loaded(field_name): requested_field = getattr(self, field_name) if isinstance(requested_field, dict): - self._initial_data[field_name] = deepcopy(requested_field) + self._initial_data[field_name] = self._deepcopy_field(requested_field) else: self._initial_data[field_name] = requested_field @@ -173,7 +182,7 @@ def _track_json_field(self, field_name): self._initial_data[field_name] = {} if isinstance(json_field_value, dict): - self._initial_data[field_name][json_field_name] = deepcopy(json_field_value) + self._initial_data[field_name][json_field_name] = self._deepcopy_field(json_field_value) else: self._initial_data[field_name][json_field_name] = json_field_value diff --git a/src/metax_api/swagger/v1/swagger.yaml b/src/metax_api/swagger/v1/swagger.yaml index 49c82192..ce0f1c51 100755 --- a/src/metax_api/swagger/v1/swagger.yaml +++ b/src/metax_api/swagger/v1/swagger.yaml @@ -1007,6 +1007,7 @@ paths: required: false type: string - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: "200": @@ -1261,6 +1262,7 @@ paths: required: false type: boolean - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: '200': @@ -2181,7 +2183,14 @@ parameters: fields: name: fields in: query - description: Comma separated list of fields that is returned. Note that nested fields are not supported. + description: Comma separated list of fields that are returned. Note that nested fields are not supported. + required: false + type: string + + research_dataset_fields: + name: research_dataset_fields + in: query + description: Comma separated list of fields in research_dataset that are returned. required: false type: string diff --git a/src/metax_api/swagger/v2/swagger.yaml b/src/metax_api/swagger/v2/swagger.yaml index 606c7ed6..7b11ce04 100755 --- a/src/metax_api/swagger/v2/swagger.yaml +++ b/src/metax_api/swagger/v2/swagger.yaml @@ -1013,6 +1013,7 @@ paths: type: string - $ref: "#/parameters/include_user_metadata" - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: "200": @@ -1282,6 +1283,7 @@ paths: type: boolean - $ref: "#/parameters/include_user_metadata" - $ref: "#/parameters/fields" + - $ref: "#/parameters/research_dataset_fields" - $ref: "#/parameters/include_legacy" responses: '200': @@ -2482,7 +2484,14 @@ parameters: fields: name: fields in: query - description: Comma separated list of fields that is returned. Note that nested fields are not supported. + description: Comma separated list of fields that are returned. Note that nested fields are not supported. + required: false + type: string + + research_dataset_fields: + name: research_dataset_fields + in: query + description: Comma separated list of fields in research_dataset that are returned. required: false type: string diff --git a/src/metax_api/tests/api/rest/base/views/datasets/read.py b/src/metax_api/tests/api/rest/base/views/datasets/read.py index 688e99f2..66af746d 100755 --- a/src/metax_api/tests/api/rest/base/views/datasets/read.py +++ b/src/metax_api/tests/api/rest/base/views/datasets/read.py @@ -611,7 +611,11 @@ def test_agents_and_actors_with_ids(self): ) cr.research_dataset["creator"] = [] cr.research_dataset["creator"].append( - {"@type": "Organization", "name": {"en": "Unique Organization"}, "identifier": "http://uri.suomi.fi/codelist/fairdata/organization/code/1234567"} + { + "@type": "Organization", + "name": {"en": "Unique Organization"}, + "identifier": "http://uri.suomi.fi/codelist/fairdata/organization/code/1234567", + } ) cr.force_save() @@ -892,7 +896,10 @@ def test_filter_by_projects_for_end_user(self): self._mock_token_validation_succeeds() self._use_http_authorization( method="bearer", - token={"group_names": ["IDA01:project_x", "IDA01:no_datasets_here"], "CSCUserName": "testi"} + token={ + "group_names": ["IDA01:project_x", "IDA01:no_datasets_here"], + "CSCUserName": "testi", + }, ) response = self.client.get("/rest/datasets?projects=project_x&pagination=false") @@ -967,17 +974,30 @@ def test_filter_by_legacy(self): def test_filter_by_editor_permissions_user_ok(self): cr = CatalogRecord.objects.get(pk=1) - cr.editor_permissions.users.update(user_id='test_user_x') + cr.editor_permissions.users.update(user_id="test_user_x") response = self.client.get(f"/rest/datasets?editor_permissions_user=test_user_x") self.assertEqual(response.data["count"], 1) def test_filter_by_editor_permissions_user_removed(self): cr = CatalogRecord.objects.get(pk=1) - cr.editor_permissions.users.update(user_id='test_user_x') + cr.editor_permissions.users.update(user_id="test_user_x") cr.editor_permissions.users.first().delete() response = self.client.get(f"/rest/datasets?editor_permissions_user=test_user_x") self.assertEqual(response.data["count"], 0) + def test_research_dataset_fields(self): + cr = CatalogRecord.objects.get(pk=1) + expected_fields = { + "title": cr.research_dataset["title"], + "description": cr.research_dataset["description"], + "preferred_identifier": cr.research_dataset["preferred_identifier"], + } + response = self.client.get( + f"/rest/datasets/1?research_dataset_fields=title,description,preferred_identifier" + ) + returned_fields = response.data["research_dataset"] + self.assertEqual(returned_fields, expected_fields) + class CatalogRecordApiReadXMLTransformationTests(CatalogRecordApiReadCommon): @@ -1082,7 +1102,6 @@ def test_read_dataset_format_dummy_datacite_doi(self): def _check_dataset_xml_format_response(self, response, element_name): self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual("