From 4de95ded96f869b976854bf184e3bc71c53153b9 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 2 Nov 2024 05:39:53 -0400 Subject: [PATCH 1/8] possible fix for sid indexing problem --- CHANGELOG.rst | 6 ++++++ pyproject.toml | 2 +- snovault/elasticsearch/indexer.py | 11 +++++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index aafd4a9f3..dde04d767 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,12 @@ snovault Change Log ---------- +11.23.0 +======= +* 2024-11-02/dmichaels + - Possible fix for intermittent 'sid' indexing problem. + + 11.22.0 ======= * 2024-09-03/dmichaels diff --git a/pyproject.toml b/pyproject.toml index ad8bb4f73..b484fee25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicsnovault" -version = "11.22.0" +version = "11.22.0.1b1" # TODO: To become 11.23.0 description = "Storage support for 4DN Data Portals." authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/snovault/elasticsearch/indexer.py b/snovault/elasticsearch/indexer.py index 50b75d976..02f81152d 100644 --- a/snovault/elasticsearch/indexer.py +++ b/snovault/elasticsearch/indexer.py @@ -465,6 +465,11 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, if add_to_secondary is not None: add_to_secondary.update(result['rev_linked_to_me']) + # xyzzy/dmichaels/20241102 + result_sid = result['sid'] + del result['sid'] + # xyzzy/dmichaels/20241102 + last_exc = None # We intend to set it to something else later, but this is just in case we goof ignorable(last_exc) for backoff in [0, 1, 2]: @@ -473,7 +478,8 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, namespaced_index = get_namespaced_index(request, result['item_type']) self.es.index( index=namespaced_index, body=result, - id=str(uuid), version=result['sid'], version_type='external_gte', + # id=str(uuid), version=result['sid'], version_type='external_gte', + id=str(uuid), version=result_sid, version_type='external_gte', # xyzzy/dmichaels/20241102 request_timeout=30 ) except ConflictError: @@ -481,7 +487,8 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, # this may be somewhat common and is not harmful # do not return an error so item is removed from queue duration = timer() - start - log.warning('Conflict indexing', sid=result['sid'], duration=duration, cat=cat) + # log.warning('Conflict indexing', sid=result['sid'], duration=duration, cat=cat) + log.warning('Conflict indexing', sid=result_sid, duration=duration, cat=cat) # xyzzy/dmichaels/20241102 return except (ConnectionError, ReadTimeoutError, TransportError) as e: duration = timer() - start From 93abf581c707b82fc6dfadc802b1a09ef0e7c780 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 2 Nov 2024 07:28:39 -0400 Subject: [PATCH 2/8] effectively back out proposed sid fix - it breaks tests. --- snovault/elasticsearch/indexer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/snovault/elasticsearch/indexer.py b/snovault/elasticsearch/indexer.py index 02f81152d..301d0c403 100644 --- a/snovault/elasticsearch/indexer.py +++ b/snovault/elasticsearch/indexer.py @@ -467,7 +467,8 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, # xyzzy/dmichaels/20241102 result_sid = result['sid'] - del result['sid'] + # NEVERMIND - BREAKS TEST: FAILED snovault/tests/test_indexing.py::test_indexing_info[False] - KeyError: 'sid' + # del result['sid'] # xyzzy/dmichaels/20241102 last_exc = None # We intend to set it to something else later, but this is just in case we goof From b2ce98e1948f4c7bc24f9b758b196afcbee86c2f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Nov 2024 10:33:28 -0500 Subject: [PATCH 3/8] another possible fix for sid indexing problem --- snovault/indexing_views.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/snovault/indexing_views.py b/snovault/indexing_views.py index ae0646a23..506bd615c 100644 --- a/snovault/indexing_views.py +++ b/snovault/indexing_views.py @@ -108,6 +108,10 @@ def item_index_data(context, request): with indexing_timer(indexing_stats, 'upgrade_properties'): properties = context.upgrade_properties() + # 2024-11-03/xyzzy: Possible fix for unexpected 'sid' property ... + if "sid" in properties: + del properties["sid"] + # 2024-07-09: Make sure that the uuid gets into the frame=raw view. if not properties.get('uuid'): properties['uuid'] = uuid From 5051b1a18b315201cce8a802621907059cc5990f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Nov 2024 10:33:40 -0500 Subject: [PATCH 4/8] version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b484fee25..3af7afe1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicsnovault" -version = "11.22.0.1b1" # TODO: To become 11.23.0 +version = "11.22.0.1b2" # TODO: To become 11.23.0 description = "Storage support for 4DN Data Portals." authors = ["4DN-DCIC Team "] license = "MIT" From 96319f909134b6bc68596616d8cb0b461b231eed Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Nov 2024 15:02:37 -0500 Subject: [PATCH 5/8] backed out previous sid "fix" --- snovault/elasticsearch/indexer.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/snovault/elasticsearch/indexer.py b/snovault/elasticsearch/indexer.py index 301d0c403..50b75d976 100644 --- a/snovault/elasticsearch/indexer.py +++ b/snovault/elasticsearch/indexer.py @@ -465,12 +465,6 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, if add_to_secondary is not None: add_to_secondary.update(result['rev_linked_to_me']) - # xyzzy/dmichaels/20241102 - result_sid = result['sid'] - # NEVERMIND - BREAKS TEST: FAILED snovault/tests/test_indexing.py::test_indexing_info[False] - KeyError: 'sid' - # del result['sid'] - # xyzzy/dmichaels/20241102 - last_exc = None # We intend to set it to something else later, but this is just in case we goof ignorable(last_exc) for backoff in [0, 1, 2]: @@ -479,8 +473,7 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, namespaced_index = get_namespaced_index(request, result['item_type']) self.es.index( index=namespaced_index, body=result, - # id=str(uuid), version=result['sid'], version_type='external_gte', - id=str(uuid), version=result_sid, version_type='external_gte', # xyzzy/dmichaels/20241102 + id=str(uuid), version=result['sid'], version_type='external_gte', request_timeout=30 ) except ConflictError: @@ -488,8 +481,7 @@ def update_object(self, request, uuid, add_to_secondary=None, sid=None, # this may be somewhat common and is not harmful # do not return an error so item is removed from queue duration = timer() - start - # log.warning('Conflict indexing', sid=result['sid'], duration=duration, cat=cat) - log.warning('Conflict indexing', sid=result_sid, duration=duration, cat=cat) # xyzzy/dmichaels/20241102 + log.warning('Conflict indexing', sid=result['sid'], duration=duration, cat=cat) return except (ConnectionError, ReadTimeoutError, TransportError) as e: duration = timer() - start From eb15178aa0a381e337cb61ad45ace4f3d06c7539 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 3 Nov 2024 15:03:32 -0500 Subject: [PATCH 6/8] backed out previous sid "fix" --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3af7afe1b..2ae91a6cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicsnovault" -version = "11.22.0.1b2" # TODO: To become 11.23.0 +version = "11.22.0.1b3" # TODO: To become 11.23.0 description = "Storage support for 4DN Data Portals." authors = ["4DN-DCIC Team "] license = "MIT" From bf8224370b21cbe6899a105fbe18ccd5edb1c477 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 5 Nov 2024 11:28:15 -0500 Subject: [PATCH 7/8] new fix for unexpected-sid validation-error in storage.py --- pyproject.toml | 2 +- snovault/indexing_views.py | 4 ---- snovault/storage.py | 12 ++++++++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2ae91a6cb..6ddae8c9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicsnovault" -version = "11.22.0.1b3" # TODO: To become 11.23.0 +version = "11.22.0.1b4" # TODO: To become 11.23.0 description = "Storage support for 4DN Data Portals." authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/snovault/indexing_views.py b/snovault/indexing_views.py index 506bd615c..ae0646a23 100644 --- a/snovault/indexing_views.py +++ b/snovault/indexing_views.py @@ -108,10 +108,6 @@ def item_index_data(context, request): with indexing_timer(indexing_stats, 'upgrade_properties'): properties = context.upgrade_properties() - # 2024-11-03/xyzzy: Possible fix for unexpected 'sid' property ... - if "sid" in properties: - del properties["sid"] - # 2024-07-09: Make sure that the uuid gets into the frame=raw view. if not properties.get('uuid'): properties['uuid'] = uuid diff --git a/snovault/storage.py b/snovault/storage.py index 556e718c2..9cb45d3e9 100644 --- a/snovault/storage.py +++ b/snovault/storage.py @@ -1,4 +1,5 @@ import boto3 +from copy import deepcopy import structlog import uuid @@ -566,8 +567,15 @@ def revision_history(self, *, rid): session = self.DBSession revisions = [] for revision in session.query(PropertySheet).filter_by(rid=rid).order_by(PropertySheet.sid): - revision.properties['sid'] = revision.sid - revisions.append(revision.properties) + # 2024-11-04/C4-1188/PR-306/dmichaels: + # Fix for "sid" appearing in properties in some situations, and ultimately ending up + # with validation-errors = Additional properties are not allowed ('sid' was unexpected). + # See smaht-portal/.../test_types_file.py for 92e8371b-bcdf-44de-ad49-3a5f108e91eb (from workbook-inserts). + # revision.properties['sid'] = revision.sid + # revisions.append(revision.properties) + revision_properties = deepcopy(revision.properties) + revision_properties['sid'] = revision.sid + revisions.append(revision_properties) return revisions From c6ee163249d55845e50276bd9b262365d960512f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 7 Nov 2024 10:53:31 -0500 Subject: [PATCH 8/8] etc --- CHANGELOG.rst | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dde04d767..62072b3ae 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,7 +9,7 @@ Change Log 11.23.0 ======= * 2024-11-02/dmichaels - - Possible fix for intermittent 'sid' indexing problem. + - Fix for unexpected 'sid' indexing problem. 11.22.0 diff --git a/pyproject.toml b/pyproject.toml index 6ddae8c9d..7ec98d2d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicsnovault" -version = "11.22.0.1b4" # TODO: To become 11.23.0 +version = "11.23.0" description = "Storage support for 4DN Data Portals." authors = ["4DN-DCIC Team "] license = "MIT"