From de63bb53945d2699e5247a0ff301abeecae8e0a0 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 6 Mar 2025 16:16:43 -0600 Subject: [PATCH 1/4] Website: update query generator --- .../query-generator/get-llm-generated-sql.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/website/api/controllers/query-generator/get-llm-generated-sql.js b/website/api/controllers/query-generator/get-llm-generated-sql.js index 11ef473776b1..c911624fde5e 100644 --- a/website/api/controllers/query-generator/get-llm-generated-sql.js +++ b/website/api/controllers/query-generator/get-llm-generated-sql.js @@ -120,11 +120,13 @@ module.exports = { When generating the SQL: 1. Please do not use the SQL "AS" operator, nor alias tables. Always reference tables by their full name. - 2. If this question is related to an application or program, consider using LIKE instead of something verbatim. - 3. If this question is not possible to ask given the tables and columns available in the provided context (the osquery schema) for a particular operating system, then use empty string. - 4. If this question is a "yes" or "no" question, or a "how many people" question, or a "how many hosts" question, then build the query such that a "yes" returns exactly one row and a "no" returns zero rows. In other words, if this question is about finding out which hosts match a "yes" or "no" question, then if a host does not match, do not include any rows for it. - 5. Use only tables that are supported for each target platform, as documented in the provided context, considering the examples if they exist, and the available columns. 6. For each table that you use, only use columns that are documented for that table, as documented in the provided context. + 2. When generating a query that uses the "LIKE" operator, you should include wildcard characters. + 3. If this question is related to an application or program, consider using LIKE instead of something verbatim. + 4. If this question is not possible to ask given the tables and columns available in the provided context (the osquery schema) for a particular operating system, then use empty string. + 5. If this question is a "yes" or "no" question, or a "how many people" question, or a "how many hosts" question, then build the query such that a "yes" returns exactly one row and a "no" returns zero rows. In other words, if this question is about finding out which hosts match a "yes" or "no" question, then if a host does not match, do not include any rows for it. + 6. Use only tables that are supported for each target platform, as documented in the provided context, considering the examples if they exist, and the available columns. + 7. For each table that you use, only use columns that are documented for that table, as documented in the provided context. Provided context: \`\`\` From f3dd57f1773b3c223e26ef8e37a1057938225bd2 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 6 Mar 2025 16:17:27 -0600 Subject: [PATCH 2/4] Update get-llm-generated-sql.js --- website/api/controllers/query-generator/get-llm-generated-sql.js | 1 - 1 file changed, 1 deletion(-) diff --git a/website/api/controllers/query-generator/get-llm-generated-sql.js b/website/api/controllers/query-generator/get-llm-generated-sql.js index c911624fde5e..6198a248b4d2 100644 --- a/website/api/controllers/query-generator/get-llm-generated-sql.js +++ b/website/api/controllers/query-generator/get-llm-generated-sql.js @@ -120,7 +120,6 @@ module.exports = { When generating the SQL: 1. Please do not use the SQL "AS" operator, nor alias tables. Always reference tables by their full name. - 6. For each table that you use, only use columns that are documented for that table, as documented in the provided context. 2. When generating a query that uses the "LIKE" operator, you should include wildcard characters. 3. If this question is related to an application or program, consider using LIKE instead of something verbatim. 4. If this question is not possible to ask given the tables and columns available in the provided context (the osquery schema) for a particular operating system, then use empty string. From b0562626e368a6bf65515c9340ef91937115a9e0 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 6 Mar 2025 16:50:06 -0600 Subject: [PATCH 3/4] Website: Update query generator errors --- .../query-generator/get-llm-generated-sql.js | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/website/api/controllers/query-generator/get-llm-generated-sql.js b/website/api/controllers/query-generator/get-llm-generated-sql.js index 6198a248b4d2..95f6b9bd5d73 100644 --- a/website/api/controllers/query-generator/get-llm-generated-sql.js +++ b/website/api/controllers/query-generator/get-llm-generated-sql.js @@ -18,8 +18,9 @@ module.exports = { description: 'A SQL query was generated' }, - errorFromOpenAi: { - description: 'The Open AI API reutrned an error.' + couldNotGenerateQueries: { + description: 'A set of queries could not be generated for a user using the provided question.', + responseType: 'badRequest' } }, @@ -60,12 +61,13 @@ module.exports = { Please respond in JSON, with the same data shape as the provided context, but with the array filtered to include only relevant tables.`; let filteredTables = await sails.helpers.ai.prompt(schemaFiltrationPrompt, 'gpt-4o-mini-2024-07-18', true, 'Please only respond in valid JSON with no codefences or backticks.') .intercept((err)=>{ + sails.log.warn(`When trying to get a subset of tables to use to generate a query for a user, an error occurred. Full error: ${require('util').inspect(err, {depth: 2})}`); if(this.req.isSocket){ // If this request was from a socket and an error occurs, broadcast an 'error' event and unsubscribe the socket from this room. sails.sockets.broadcast(roomId, 'error', {error: err}); sails.sockets.leave(this.req, roomId); } - return new Error(`When trying to get a subset of tables to use to generate a query for an Admin user, an error occurred. Full error: ${require('util').inspect(err, {depth: 2})}`); + return 'couldNotGenerateQueries'; }); @@ -143,6 +145,13 @@ module.exports = { "windowsCaveats": "TODO", "linuxCaveats": "TODO", "chromeOSCaveats": "TODO", + } + + + If no queries can be generated from the provided instructions do not return the datashape above and instead return this JSON in this exact data shape: + + { + "couldNotGenerateQueries": true }`; let sqlReport = await sails.helpers.ai.prompt.with({prompt:sqlPrompt, baseModel:'o3-mini-2025-01-31', expectJson: true}) @@ -152,8 +161,18 @@ module.exports = { sails.sockets.broadcast(roomId, 'error', {error: err}); sails.sockets.leave(this.req, roomId); } - return new Error(`When trying to generate a query for an Admin user, an error occurred. Full error: ${require('util').inspect(err, {depth: 2})}`); + sails.log.warn(`When trying to generate a query for a user, an error occurred. Full error: ${require('util').inspect(err, {depth: 2})}`); + return 'couldNotGenerateQueries'; }); + let jsonResult = JSON.parse(sqlReport); + if(jsonResult.couldNotGenerateQueries){ + if(this.req.isSocket){ + sails.sockets.broadcast(roomId, 'error', 'couldNotGenerateQueries'); + sails.sockets.leave(this.req, roomId); + } else { + throw 'couldNotGenerateQueries'; + } + } // If this request was from a socket, we'll broadcast a 'queryGenerated' event with the sqlReport and unsubscribe the socket if(this.req.isSocket){ From a51e3f4d5833b614d7dc21d5d3f7fb4da9a16ff8 Mon Sep 17 00:00:00 2001 From: Eric Date: Thu, 6 Mar 2025 17:29:22 -0600 Subject: [PATCH 4/4] update error handling --- .../query-generator/get-llm-generated-sql.js | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/website/api/controllers/query-generator/get-llm-generated-sql.js b/website/api/controllers/query-generator/get-llm-generated-sql.js index 95f6b9bd5d73..6aea15ad398a 100644 --- a/website/api/controllers/query-generator/get-llm-generated-sql.js +++ b/website/api/controllers/query-generator/get-llm-generated-sql.js @@ -58,7 +58,15 @@ module.exports = { return lighterTable;}))} \`\`\` - Please respond in JSON, with the same data shape as the provided context, but with the array filtered to include only relevant tables.`; + Please respond in JSON, with the same data shape as the provided context, but with the array filtered to include only relevant tables. + + + If no queries can be generated from the provided instructions do not return the datashape above and instead return this JSON in this exact data shape: + + { + "couldNotGenerateQueries": true + }`; + let filteredTables = await sails.helpers.ai.prompt(schemaFiltrationPrompt, 'gpt-4o-mini-2024-07-18', true, 'Please only respond in valid JSON with no codefences or backticks.') .intercept((err)=>{ sails.log.warn(`When trying to get a subset of tables to use to generate a query for a user, an error occurred. Full error: ${require('util').inspect(err, {depth: 2})}`); @@ -70,6 +78,14 @@ module.exports = { return 'couldNotGenerateQueries'; }); + if(filteredTables.couldNotGenerateQueries){ + if(this.req.isSocket){ + sails.sockets.broadcast(roomId, 'error', {error: 'couldNotGenerateQueries'}); + sails.sockets.leave(this.req, roomId); + } else { + throw 'couldNotGenerateQueries'; + } + } // 2024-02-26: Testing using a system prompt with a single API request. // let systemPrompt = `You are an AI that generates osquery SQL queries for IT admin questions. Use the following osquery schema as context: @@ -164,10 +180,10 @@ module.exports = { sails.log.warn(`When trying to generate a query for a user, an error occurred. Full error: ${require('util').inspect(err, {depth: 2})}`); return 'couldNotGenerateQueries'; }); - let jsonResult = JSON.parse(sqlReport); - if(jsonResult.couldNotGenerateQueries){ + + if(sqlReport.couldNotGenerateQueries){ if(this.req.isSocket){ - sails.sockets.broadcast(roomId, 'error', 'couldNotGenerateQueries'); + sails.sockets.broadcast(roomId, 'error', {error: 'couldNotGenerateQueries'}); sails.sockets.leave(this.req, roomId); } else { throw 'couldNotGenerateQueries';