Skip to content

Commit

Permalink
Optimize reports
Browse files Browse the repository at this point in the history
Avoiding unnecessary DB requests to find the list of documents where client and service is the same subsystem. This info is computable from document data.
Additionally, fixing invalid duplicate detection when client and producer requestInTs are in different report periods. "get_faulty_documents" did not find duplicates in that case.
  • Loading branch information
VitaliStupin committed Feb 15, 2024
1 parent f71479c commit 60c8663
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 73 deletions.
20 changes: 12 additions & 8 deletions integration_tests/ci_reports/test_report_worker_CI.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,21 +171,25 @@ def test_get_matching_documents_different_cases(self):
docs.append(doc_test_machine)
mongodb_h.add_clean_documents(docs)

# Query faulty documents
faulty_doc_set = db_manager.get_faulty_documents(
test_machine[2], test_machine[3], test_machine[1],
test_machine[0], start_timestamp, end_timestamp)
faulty_docs_found = set()
duplicate_docs_found = set()

# member_code, subsystem_code, member_class, x_road_instance, start_time_timestamp, end_time_timestamp
matching_docs = []
for doc in db_manager.get_matching_documents(test_machine[2], test_machine[3], test_machine[1],
test_machine[0], start_timestamp, end_timestamp):

if doc['_id'] in faulty_docs_found:
if doc['_id'] in duplicate_docs_found:
continue
if doc['_id'] in faulty_doc_set:
faulty_docs_found.add(doc['_id'])
doc = ReportManager.reduce_to_plain_json(doc)
# If client and service is the same subsystem,
# then matching_docs contain two identical documents
if (
doc['serviceXRoadInstance'] == doc['clientXRoadInstance']
and doc['serviceMemberClass'] == doc['clientMemberClass']
and doc['serviceMemberCode'] == doc['clientMemberCode']
and doc['serviceSubsystemCode'] == doc['clientSubsystemCode']
):
duplicate_docs_found.add(doc['_id'])
matching_docs.append(doc)

self.assertEqual(len(matching_docs), total_ref_count)
Expand Down
56 changes: 0 additions & 56 deletions reports_module/opmon_reports/database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,62 +176,6 @@ def get_matching_documents(self, target, start_time, end_time):
self.logger_m.log_error('DatabaseManager.get_matching_documents', '{0}'.format(repr(e)))
raise e

def get_faulty_documents(self, target, start_time, end_time):
try:
db = self.mongodb_handler.get_query_db()
collection = db[CLEAN_DATA_COLLECTION]

query_a = {
"query": {
"producer.serviceXRoadInstance": target.xroad_instance,
"producer.serviceMemberCode": target.member_code,
"producer.serviceSubsystemCode": target.subsystem_code,
"producer.serviceMemberClass": target.member_class,
"producer.requestInTs": {"$gte": start_time, "$lte": end_time},
"client.clientXRoadInstance": target.xroad_instance,
"client.clientMemberCode": target.member_code,
"client.clientSubsystemCode": target.subsystem_code,
"client.clientMemberClass": target.member_class,
"client.requestInTs": {"$gte": start_time, "$lte": end_time}
},
"hint": [
("client.clientMemberCode", 1),
("client.clientSubsystemCode", 1),
("client.requestInTs", 1)
]
}

query_b = {
"query": {
"client.serviceXRoadInstance": target.xroad_instance,
"client.serviceMemberCode": target.member_code,
"client.serviceSubsystemCode": target.subsystem_code,
"client.serviceMemberClass": target.member_class,
"client.requestInTs": {"$gte": start_time, "$lte": end_time},
"producer.clientXRoadInstance": target.xroad_instance,
"producer.clientMemberCode": target.member_code,
"producer.clientSubsystemCode": target.subsystem_code,
"producer.clientMemberClass": target.member_class,
"producer.requestInTs": {"$gte": start_time, "$lte": end_time}
},
"hint": [
("producer.clientMemberCode", 1),
("producer.clientSubsystemCode", 1),
("producer.requestInTs", 1)
]
}

faulty_set = set()

for q in [query_a, query_b]:
for doc in collection.find(q["query"], {"_id": 1}).hint(q["hint"]):
faulty_set.add(doc['_id'])

except Exception as e:
self.logger_m.log_error('DatabaseManager.get_matching_documents', '{0}'.format(repr(e)))
raise e
return faulty_set

def get_documents_within_time_frame(self, start_time, end_time):
"""
Get all the documents for specified time period.
Expand Down
20 changes: 11 additions & 9 deletions reports_module/opmon_reports/report_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,7 @@ def merge_document_fields(document, merged_fields, new_field_name, separator):
def get_documents(self):
report_map = dict()

faulty_doc_set = self.database_manager.get_faulty_documents(
self.target,
self.reports_arguments.start_time_milliseconds,
self.reports_arguments.end_time_milliseconds
)
faulty_docs_found = set()
duplicate_docs_found = set()

matching_docs = self.database_manager.get_matching_documents(
self.target,
Expand All @@ -154,12 +149,19 @@ def get_documents(self):

# Iterate over all the docs and append to report map
for doc in matching_docs:
if doc['_id'] in faulty_docs_found:
if doc['_id'] in duplicate_docs_found:
continue
if doc['_id'] in faulty_doc_set:
faulty_docs_found.add(doc['_id'])

doc = self.reduce_to_plain_json(doc)
# If client and service is the same subsystem,
# then matching_docs contain two identical documents
if (
doc['serviceXRoadInstance'] == doc['clientXRoadInstance']
and doc['serviceMemberClass'] == doc['clientMemberClass']
and doc['serviceMemberCode'] == doc['clientMemberCode']
and doc['serviceSubsystemCode'] == doc['clientSubsystemCode']
):
duplicate_docs_found.add(doc['_id'])

# "ps" / "pms" / "cs" / "cms"
sorted_service_type = self.get_service_type(doc)
Expand Down

0 comments on commit 60c8663

Please sign in to comment.