From 6422e48103e5f76de147cf54c1e9462a5f79d2b4 Mon Sep 17 00:00:00 2001 From: bruntib Date: Mon, 14 Oct 2024 12:59:16 +0200 Subject: [PATCH] [fix] Better SQL SELECT instead of a timeout query This query runs when filtering by files or components with "anywhere on bugpath" option. In this case the following query was generated: SELECT FROM reports WHERE reports.id IN (id1, id2, ...); The ID list at "IN" block can be so huges that it eats up all the memory and times out the query. This ID list is now replaced with a nested select: SELECT FROM reports WHERE reports.id IN (SELECT report_id FROM ); --- .../codechecker_server/api/report_server.py | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py index f3e2a7a6b5..c31e9576b8 100644 --- a/web/server/codechecker_server/api/report_server.py +++ b/web/server/codechecker_server/api/report_server.py @@ -534,39 +534,31 @@ def get_source_component_file_query( def get_reports_by_bugpath_filter(session, file_filter_q) -> Set[int]: """ - This function returns a set of report IDs that are related to any file + This function returns a query for report IDs that are related to any file described by the query in the second parameter, either because their bug path goes through these files, or there is any bug note, etc. in these files. """ - def first_col_values(query): - """ - This function executes a query and returns the set of first columns' - values. - """ - return set(map(lambda x: x[0], query.all())) - - report_ids = set() - - q = session.query(Report.id) \ + q_report = session.query(Report.id) \ .join(File, File.id == Report.file_id) \ .filter(file_filter_q) - report_ids.update(first_col_values(q)) - - q = session.query(BugPathEvent.report_id) \ + q_bugpathevent = session.query(BugPathEvent.report_id) \ .join(File, File.id == BugPathEvent.file_id) \ .filter(file_filter_q) - report_ids.update(first_col_values(q)) + q_bugreportpoint = session.query(BugReportPoint.report_id) \ + .join(File, File.id == BugReportPoint.file_id) \ + .filter(file_filter_q) - q = session.query(ExtendedReportData.report_id) \ + q_extendedreportdata = session.query(ExtendedReportData.report_id) \ .join(File, File.id == ExtendedReportData.file_id) \ .filter(file_filter_q) - report_ids.update(first_col_values(q)) - - return report_ids + return q_report.union( + q_bugpathevent, + q_extendedreportdata, + q_bugreportpoint) def get_reports_by_components(session, component_names: List[str]) -> Set[int]: