Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Merge branch 'CSCFAIRMETA-1416-faster-project-datasets' into 'test'
Browse files Browse the repository at this point in the history
CSCFAIRMETA-1416: Faster project datasets filtering

See merge request fairdata/fairdata-metax!175
  • Loading branch information
Toni Nurmi committed Apr 1, 2022
2 parents 4e13dcd + a6b0195 commit 98ca59b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 28 deletions.
10 changes: 0 additions & 10 deletions src/metax_api/api/rest/base/views/common_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ def get_queryset(self):
"""
additional_filters = {}
q_filters = []
deduplicated_q_filters = []

if hasattr(self, "queryset_search_params"):
additional_filters.update(**self.queryset_search_params)
Expand All @@ -185,10 +184,6 @@ def get_queryset(self):
# Q-filter objects, which can contain more complex filter options such as OR-clauses
q_filters = additional_filters.pop("q_filters")

if "deduplicated_q_filters" in additional_filters:
# Q-filter objects that may produce duplicate results
deduplicated_q_filters = additional_filters.pop("deduplicated_q_filters")

if CS.get_boolean_query_param(self.request, "removed"):
additional_filters.update({"removed": True})
self.queryset = self.queryset_unfiltered
Expand All @@ -210,11 +205,6 @@ def get_queryset(self):
self.select_related = [rel for rel in self.select_related if rel in self.fields]

queryset = super().get_queryset()
if deduplicated_q_filters:
# run filters that may produce duplicates and deduplicate the results. deduplicating just the ids
# in a subquery is faster than deduplicating the full results when there are a lot of duplicates.
id_query = queryset.filter(*deduplicated_q_filters).values("id").distinct()
queryset = queryset.filter(id__in=id_query)
queryset = queryset.filter(*q_filters, **additional_filters)

if self.request.META["REQUEST_METHOD"] in WRITE_OPERATIONS:
Expand Down
47 changes: 29 additions & 18 deletions src/metax_api/services/catalog_record_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,18 @@ def filter_by_editor_permissions_user(request, queryset_search_params):

# non-service users can only query their own datasets
if not request.user.is_service:
if request.user.username == '':
raise Http403({"detail": ["Query by editor_permissions_user is only supported for authenticated users"]})
if request.user.username == "":
raise Http403(
{
"detail": [
"Query by editor_permissions_user is only supported for authenticated users"
]
}
)
if request.user.username != user_id:
raise Http403({"detail": ["Provided editor_permissions_user does not match current user"]})
raise Http403(
{"detail": ["Provided editor_permissions_user does not match current user"]}
)

queryset_search_params["editor_permissions__users__user_id"] = user_id
queryset_search_params["editor_permissions__users__removed"] = False
Expand Down Expand Up @@ -227,8 +235,12 @@ def _get_org_filter(agent, org):
if agent == "publisher":
name_filter |= Q(**{f"research_dataset__{agent}__{name_en}__iregex": org})
name_filter |= Q(**{f"research_dataset__{agent}__{name_fi}__iregex": org})
name_filter |= Q(**{f"research_dataset__{agent}__member_of__{name_en}__iregex": org})
name_filter |= Q(**{f"research_dataset__{agent}__member_of__{name_fi}__iregex": org})
name_filter |= Q(
**{f"research_dataset__{agent}__member_of__{name_en}__iregex": org}
)
name_filter |= Q(
**{f"research_dataset__{agent}__member_of__{name_fi}__iregex": org}
)
else:
for i in range(3):
name_filter |= Q(**{f"research_dataset__{agent}__{i}__{name_en}__iregex": org})
Expand Down Expand Up @@ -262,15 +274,9 @@ def _get_org_filter(agent, org):
}
)
else:
name_filter |= Q(**{f"research_dataset__{agent}__contains": [{name: org}]})
name_filter |= Q(
**{f"research_dataset__{agent}__contains": [{name: org}]}
)
name_filter |= Q(
**{
f"research_dataset__{agent}__contains": [
{"member_of": {name: org}}
]
}
**{f"research_dataset__{agent}__contains": [{"member_of": {name: org}}]}
)

return name_filter
Expand Down Expand Up @@ -352,15 +358,20 @@ def set_projects_filter(queryset_search_params, request):
if not set(projects).issubset(user_projects):
raise Http403({"detail": ["User is not member of project"]})

q_filter = Q(files__project_identifier__in=projects)
if "deduplicated_q_filters" in queryset_search_params:
queryset_search_params["deduplicated_q_filters"].append(q_filter)
project_cr_ids = (
File.objects_unfiltered.filter(project_identifier__in=projects)
.values("record__id")
.distinct()
)
q_filter = Q(id__in=project_cr_ids)

if "q_filters" in queryset_search_params:
queryset_search_params["q_filters"].append(q_filter)
else:
queryset_search_params["deduplicated_q_filters"] = [q_filter]
queryset_search_params["q_filters"] = [q_filter]

return queryset_search_params


@staticmethod
def populate_file_details(cr_json, request):
"""
Expand Down

0 comments on commit 98ca59b

Please sign in to comment.