From 9991732140451327ce98bcc0965e7dede9796cec Mon Sep 17 00:00:00 2001 From: Paige Gulley Date: Tue, 15 Oct 2024 15:28:16 -0400 Subject: [PATCH 1/2] Update utils.py - remove prefix wildcards Prefix wildcards have a huge performance cost. Removing here as a precursor. --- mcweb/backend/search/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mcweb/backend/search/utils.py b/mcweb/backend/search/utils.py index dd2dd587f..c25a9cb2c 100644 --- a/mcweb/backend/search/utils.py +++ b/mcweb/backend/search/utils.py @@ -235,7 +235,7 @@ def _for_media_cloud(collections: List, sources: List, all_params: Dict) -> Dict and s.name not in domains] sources_with_url_search_strs += [s for s in selected_sources_in_collections if bool(s.url_search_string) is not False and s.name not in domains] - domain_url_filters = ["(canonical_domain:{} AND url:*{}*)".format(s.name, s.url_search_string) + domain_url_filters = ["(canonical_domain:{} AND url:{}*)".format(s.name, s.url_search_string) for s in sources_with_url_search_strs] # 3. assemble and add in other supported params supported_extra_props = ['pagination_token', 'page_size', 'sort_field', 'sort_order', From 6598cd970038e951a027f17ca6b135b95d3b84fb Mon Sep 17 00:00:00 2001 From: Paige Gulley Date: Thu, 17 Oct 2024 18:15:13 -0400 Subject: [PATCH 2/2] Update utils.py - scheme-safe url-search-string --- mcweb/backend/search/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mcweb/backend/search/utils.py b/mcweb/backend/search/utils.py index c25a9cb2c..6afded1d1 100644 --- a/mcweb/backend/search/utils.py +++ b/mcweb/backend/search/utils.py @@ -235,7 +235,8 @@ def _for_media_cloud(collections: List, sources: List, all_params: Dict) -> Dict and s.name not in domains] sources_with_url_search_strs += [s for s in selected_sources_in_collections if bool(s.url_search_string) is not False and s.name not in domains] - domain_url_filters = ["(canonical_domain:{} AND url:{}*)".format(s.name, s.url_search_string) + + domain_url_filters = [f"(canonical_domain:{s.name} AND (url:http\://{s.url_search_string} OR url:https\://{s.url_search_string}))" for s in sources_with_url_search_strs] # 3. assemble and add in other supported params supported_extra_props = ['pagination_token', 'page_size', 'sort_field', 'sort_order',