Skip to content

Commit

Permalink
intitial working version of though resource instance search, no pagin…
Browse files Browse the repository at this point in the history
…g, re #8
  • Loading branch information
apeters committed Dec 19, 2024
1 parent 6a0a70d commit b143b67
Showing 4 changed files with 350 additions and 6 deletions.
5 changes: 2 additions & 3 deletions afrc/src/afrc/Search/SearchPage.vue
Original file line number Diff line number Diff line change
@@ -39,8 +39,7 @@ watch(queryString, () => {
function updateFilter(componentName: string, value: object) {
console.log(value);
// Test for an empty object
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function isEmpty(value: any) {
function isEmpty(value: unknown) {
if (value === null || value === undefined) {
return true;
}
@@ -89,7 +88,7 @@ const doQuery = function () {
const qs = new URLSearchParams(queryObj);
fetch(arches.urls.search_results + "?" + qs.toString())
fetch(arches.urls["api-search"] + "?" + qs.toString())
.then((response) => response.json())
.then((data) => {
console.log(data);
1 change: 1 addition & 0 deletions afrc/templates/arches_urls.htm
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
{% block arches_urls %}
{{ block.super }}
<div class="arches-urls"
api-search="{% url 'api-search' %}"
api-settings="{% url 'api-settings' %}"
api-map-data="{% url 'api-map-data' %}"
api-feature-buffer="{% url 'api-feature-buffer' %}"
15 changes: 12 additions & 3 deletions afrc/urls.py
Original file line number Diff line number Diff line change
@@ -3,17 +3,26 @@
from django.conf.urls.i18n import i18n_patterns
from django.urls import include, path, re_path
from afrc.views.settings_api import SettingsAPI
from afrc.views.map_api import MapDataAPI, FeatureBufferAPI, GeoJSONBoundsAPI, ReferenceCollectionMVT
from afrc.views.search_api import SearchAPI
from afrc.views.map_api import (
MapDataAPI,
FeatureBufferAPI,
GeoJSONBoundsAPI,
ReferenceCollectionMVT,
)

urlpatterns = [
# project-level urls
path("api-search", SearchAPI.as_view(), name="api-search"),
path("api-settings", SettingsAPI.as_view(), name="api-settings"),
path("api-map-data", MapDataAPI.as_view(), name="api-map-data"),
path("api-feature-buffer", FeatureBufferAPI.as_view(), name="api-feature-buffer"),
path("api-geojson-bounds", GeoJSONBoundsAPI.as_view(), name="api-geojson-bounds"),
re_path(r"^api-reference-collection-mvt/(?P<zoom>[0-9]+|\{z\})/(?P<x>[0-9]+|\{x\})/(?P<y>[0-9]+|\{y\}).pbf$",
re_path(
r"^api-reference-collection-mvt/(?P<zoom>[0-9]+|\{z\})/(?P<x>[0-9]+|\{x\})/(?P<y>[0-9]+|\{y\}).pbf$",
ReferenceCollectionMVT.as_view(),
name="api-reference-collection-mvt"),
name="api-reference-collection-mvt",
),
]

# Ensure Arches core urls are superseded by project-level urls
335 changes: 335 additions & 0 deletions afrc/views/search_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
"""
ARCHES - a program developed to inventory and manage immovable cultural heritage.
Copyright (C) 2013 J. Paul Getty Trust and World Monuments Fund
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import logging
import os
import json

from django.views.generic import View
from django.contrib.gis.geos import GEOSGeometry
from django.core.cache import cache
from django.db import connection
from django.http import Http404
from django.shortcuts import render
from django.utils.translation import get_language, gettext as _
from django.db.models import Q, OuterRef, Subquery
from arches.app.models.models import (
MapMarker,
GraphModel,
DDataType,
Widget,
ReportTemplate,
CardComponent,
Geocoder,
Node,
SearchExportHistory,
ResourceXResource,
)
from arches.app.search.components.search_results import get_localized_descriptor
from arches.app.search.mappings import RESOURCES_INDEX
from arches.app.models.concept import Concept, get_preflabel_from_conceptid
from arches.app.utils.response import JSONResponse, JSONErrorResponse
from arches.app.utils.betterJSONSerializer import JSONSerializer, JSONDeserializer
from arches.app.search.search_engine_factory import SearchEngineFactory
from arches.app.search.elasticsearch_dsl_builder import (
Bool,
Match,
Query,
Ids,
Term,
Terms,
MaxAgg,
Aggregation,
)
from arches.app.search.search_export import SearchResultsExporter
from arches.app.search.time_wheel import TimeWheel
from arches.app.search.components.base import SearchFilterFactory
from arches.app.views.base import MapBaseManagerView
from arches.app.utils import permission_backend
from arches.app.utils.permission_backend import (
get_nodegroups_by_perm,
user_is_resource_reviewer,
)
from arches.app.utils.decorators import group_required
import arches.app.utils.zip as zip_utils
import arches.app.utils.task_management as task_management
from arches.app.utils.data_management.resources.formats.htmlfile import HtmlWriter
import arches.app.tasks as tasks
from io import StringIO
from tempfile import NamedTemporaryFile
from arches.app.models.system_settings import settings

logger = logging.getLogger(__name__)


def search_results(request, returnDsl=False):
search_filter_factory = SearchFilterFactory(request)
searchview_component_instance = search_filter_factory.get_searchview_instance()

if not searchview_component_instance:
unavailable_searchview_name = search_filter_factory.get_searchview_name()
message = _("No search-view named {0}").format(unavailable_searchview_name)
return JSONErrorResponse(
_("Search Failed"),
message,
status=400,
)

try:
response_object, search_query_object = (
searchview_component_instance.handle_search_results_query(
search_filter_factory, returnDsl
)
)
if returnDsl:
return search_query_object.pop("query")
else:
return response_object
except Exception as e:
message = _("There was an error retrieving the search results")
try:
message = e.args[0].get("message", message)
except:
logger.exception("Error retrieving search results:")
logger.exception(e)

return JSONErrorResponse(
_("Search Failed"),
message,
status=500,
)


class SearchAPI(View):
def get(self, request):

base_resource_type_filter = [
{
"graphid": "d6774bfc-b4b4-11ea-84f7-3af9d3b32b71",
"name": "Group",
"inverted": False,
}
]

current_page = request.GET.get("paging-filter", 1)
page_size = int(settings.SEARCH_ITEMS_PER_PAGE)
print(page_size)

request_copy = request.GET.copy()
request_copy["resource-type-filter"] = json.dumps(base_resource_type_filter)
request.GET = request_copy
direct_results = search_results(request)
print(current_page * page_size)
print(direct_results["total_results"])

if direct_results["total_results"] >= current_page * page_size:
print("we have direct hits on collections")
return JSONResponse(content=search_results(request))
else:
# we have no more direct hits on reference collections and we need to
# backfill with results of hits based on potential resources related to reference collections
# So first we need to search for resources that aren't reference collections and that match our search criteria
# then we take those resource instance ids and do a recursive search for any of those
# resources that might be related to reference collections
# and return a list of those reference collections
base_resource_type_filter[0]["inverted"] = True

request_copy = request.GET.copy()
request_copy["resource-type-filter"] = json.dumps(base_resource_type_filter)
request_copy["paging-filter"] = 1
request.GET = request_copy
backfill_results = search_results(request)

# first page of hits of potentially related resources
resourceinstanceids = [
hit["_source"]["resourceinstanceid"]
for hit in backfill_results["results"]["hits"]["hits"]
]

related_resource_ids = list(
search_relationships_via_ORM(resourceinstanceids, depth=3)
)

se = SearchEngineFactory().create()
query = Query(se, start=0, limit=30)
query.add_query(Ids(ids=related_resource_ids))
results = query.search(index=RESOURCES_INDEX)

descriptor_types = ("displaydescription", "displayname")
active_and_default_language_codes = (get_language(), settings.LANGUAGE_CODE)
for result in results["hits"]["hits"]:
for descriptor_type in descriptor_types:
descriptor = get_localized_descriptor(
result, descriptor_type, active_and_default_language_codes
)
if descriptor:
print(descriptor)
result["_source"][descriptor_type] = descriptor["value"]
if descriptor_type == "displayname":
result["_source"]["displayname_language"] = descriptor[
"language"
]
else:
result["_source"][descriptor_type] = _("Undefined")
direct_results["results"]["hits"]["hits"] += results["hits"]["hits"]
direct_results["total_results"] += int(len(results["hits"]["hits"]))
return JSONResponse(direct_results)


def search_relationships_via_ORM(
resourceinstanceids=None,
target_graphid="d6774bfc-b4b4-11ea-84f7-3af9d3b32b71",
depth=1,
):
hits = set()

# This is a placeholder for the ORM version of the search_relationships function
# This function should return a list of resourceinstanceids of reference collections
# that are related to the given list of resourceinstanceids
def get_related_resourceinstanceids(resourceinstanceids, depth=1):
depth -= 1
to_crawl = set()

# This is a placeholder for the ORM version of the get_related_resourceinstanceids function
# This function should return a list of resourceinstanceids of resources that are related to
# the given list of resourceinstanceids
instances_query = Q(resourceinstanceidfrom__in=resourceinstanceids) | Q(
resourceinstanceidto__in=resourceinstanceids
)

for res in ResourceXResource.objects.filter(instances_query).values_list(
"resourceinstanceidfrom",
"resourceinstancefrom_graphid",
"resourceinstanceidto",
"resourceinstanceto_graphid",
):
if str(res[1]) != target_graphid:
to_crawl.add(res[0])
else:
hits.add(res[0])

if str(res[3]) != target_graphid:
to_crawl.add(res[2])
else:
hits.add(res[2])

if depth > 0:
get_related_resourceinstanceids(list(to_crawl), depth=depth)

return hits

return get_related_resourceinstanceids(resourceinstanceids, depth=depth)


def search_relationships(
resourceinstanceids=None, target_graphid="d6774bfc-b4b4-11ea-84f7-3af9d3b32b71"
):
with connection.cursor() as cursor:
sql = """
WITH RECURSIVE resource_traversal_from(resourcexid, resourceid, graphid, depth) AS (
-- Anchor member: start with the given list of starting resource IDs
SELECT
resource_x_resource.resourcexid, resourceinstanceidto AS resourceid, resourceinstanceto_graphid AS graphid, 0 AS depth
FROM
resource_x_resource
WHERE
resourceinstanceidfrom = ANY(%s::uuid[])
UNION ALL
-- Recursive member: traverse the table bidirectionally
SELECT
resource_x_resource.resourcexid, resource_x_resource.resourceinstanceidto AS resourceid, resourceinstanceto_graphid AS graphid, rt.depth + 1
FROM
resource_x_resource
INNER JOIN
resource_traversal_from rt
ON
resource_x_resource.resourceinstanceidfrom = rt.resourceid
WHERE
rt.graphid != %s::uuid
) CYCLE resourcexid SET is_cycle USING path
SELECT DISTINCT resourceid
FROM resource_traversal_from
WHERE graphid = %s::uuid
AND DEPTH < 3
UNION (
WITH RECURSIVE resource_traversal_to(resourcexid, resourceid, graphid, depth) AS (
-- Anchor member: start with the given list of starting resource IDs
SELECT
resource_x_resource.resourcexid, resourceinstanceidfrom AS resourceid, resourceinstancefrom_graphid AS graphid, 0 AS depth
FROM
resource_x_resource
WHERE
resourceinstanceidto = ANY(%s::uuid[])
UNION ALL
SELECT
resource_x_resource.resourcexid, resource_x_resource.resourceinstanceidfrom AS resourceid, resourceinstancefrom_graphid AS graphid, rt.depth + 1
FROM
resource_x_resource
INNER JOIN
resource_traversal_to rt
ON
resource_x_resource.resourceinstanceidto = rt.resourceid
WHERE
rt.graphid != %s::uuid
) CYCLE resourcexid SET is_cycle USING path
SELECT DISTINCT resourceid
FROM resource_traversal_to
WHERE graphid = %s::uuid
AND DEPTH < 3
)
"""
print(
sql
% (
resourceinstanceids,
target_graphid,
target_graphid,
resourceinstanceids,
target_graphid,
target_graphid,
)
)
cursor.execute(
sql,
[
resourceinstanceids,
target_graphid,
target_graphid,
resourceinstanceids,
target_graphid,
target_graphid,
],
)
hits = []
# hits = [str(row[0]) for row in cursor.fetchall()]
for row in cursor.fetchall():
hits.append(str(row[0]))
print(len(hits))
return hits


# {"query": {"ids": {"values": ["fba9bdb3-29a6-3cc2-bd7e-2d3fa7a08c78"]}}, "start": 0, "limit": 0}

0 comments on commit b143b67

Please sign in to comment.