Skip to content

Commit

Permalink
[kitsune] Include unique field in enriched index
Browse files Browse the repository at this point in the history
This commit updates the Kitsune enriched index. It was not
including a unique identifier in the enriched index for questions
and answers, this was causing an error when trying to autorefresh.

Include also the default raw fields in answers items that were missing:
"metadata__updated_on", "metadata__timestamp","offset",
"origin", "tag", and "uuid".

Signed-off-by: Jose Javier Merchante <[email protected]>
  • Loading branch information
jjmerchante committed Mar 11, 2024
1 parent 2c4ea08 commit dbc3c18
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
19 changes: 15 additions & 4 deletions grimoire_elk/enriched/kitsune.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def get_elastic_mappings(es_major):
"tags_analyzed": {
"type": "text",
"index": true
},
"id": {
"type": "keyword"
}
}
} """
Expand All @@ -71,6 +74,9 @@ def __init__(self, *args, **kwargs):
def get_field_author(self):
return "creator"

def get_field_unique_id(self):
return "id"

def get_sh_identity(self, item, identity_field=None):
identity = {}

Expand Down Expand Up @@ -152,6 +158,8 @@ def get_rich_item(self, item, kind='question'):
eitem['lifetime_days'] = \
get_time_diff_days(question['created'], question['updated'])

# Add id info to allow to coexistence of items of different types in the same index
eitem['id'] = 'question_{}'.format(question['id'])
eitem.update(self.get_grimoire_fields(question['created'], "question"))

eitem['author'] = question['creator']['username']
Expand Down Expand Up @@ -200,6 +208,8 @@ def get_rich_item(self, item, kind='question'):
eitem['lifetime_days'] = \
get_time_diff_days(answer['created'], answer['updated'])

# Add id info to allow to coexistence of items of different types in the same index
eitem['id'] = 'question_{}_answer_{}'.format(answer['question'], answer['id'])
eitem.update(self.get_grimoire_fields(answer['created'], "answer"))

eitem['author'] = answer['creator']['username']
Expand Down Expand Up @@ -237,7 +247,7 @@ def enrich_items(self, ocean_backend):
rich_item = self.get_rich_item(item)
data_json = json.dumps(rich_item)
bulk_json += '{"index" : {"_id" : "%s" } }\n' % \
(item[self.get_field_unique_id()])
(rich_item[self.get_field_unique_id()])
bulk_json += data_json + "\n" # Bulk document
current += 1
# Time to enrich also de answers
Expand All @@ -249,10 +259,11 @@ def enrich_items(self, ocean_backend):
if answer['id'] == item['data']['solution']:
answer['solution'] = 1
rich_answer = self.get_rich_item(answer, kind='answer')
self.copy_raw_fields(self.RAW_FIELDS_COPY, item, rich_answer)

data_json = json.dumps(rich_answer)
bulk_json += '{"index" : {"_id" : "%s_%i" } }\n' % \
(item[self.get_field_unique_id()],
rich_answer['answer_id'])
bulk_json += '{"index" : {"_id" : "%s" } }\n' % \
(rich_answer[self.get_field_unique_id()])
bulk_json += data_json + "\n" # Bulk document
current += 1

Expand Down
5 changes: 4 additions & 1 deletion releases/unreleased/kitsune-demography-study.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@ category: added
author: Jose Javier Merchante <[email protected]>
issue: null
notes: >
Include demography study in Kitsune (SUMO).
Include demography study in Kitsune (SUMO). And update
the index to include standard fields such as a unique
identifier (`id`) and some missing fields like `origin`
or `uuid`.

0 comments on commit dbc3c18

Please sign in to comment.