From f64d5040a7ea0fb9fa50b1454174e93c1e0e8c70 Mon Sep 17 00:00:00 2001 From: "J.C. Zhong" Date: Thu, 14 Nov 2024 23:07:27 +0000 Subject: [PATCH 1/4] feat: add sql completions powered by LLM --- containers/bundled_querybook_config.yaml | 4 + package.json | 2 +- querybook/config/querybook_public_config.yaml | 3 + querybook/config/user_setting.yaml | 8 ++ querybook/server/const/ai_assistant.py | 1 + .../datasources_socketio/ai_assistant.py | 11 +++ .../lib/ai_assistant/base_ai_assistant.py | 51 +++++++++++ .../prompts/sql_complete_prompt.py | 43 +++++++++ .../QueryEditor/BoundQueryEditor.tsx | 10 ++- .../components/QueryEditor/QueryEditor.tsx | 11 +++ .../UserSettingsMenu/UserSettingsMenu.tsx | 16 +++- querybook/webapp/config.d.ts | 4 + querybook/webapp/const/aiAssistant.ts | 1 + .../extensions/useCopilotExtension.ts | 15 ---- .../extensions/useSqlCompleteExtension.ts | 87 +++++++++++++++++++ .../hooks/redux/useUserQueryEditorConfig.ts | 5 +- 16 files changed, 250 insertions(+), 22 deletions(-) create mode 100644 querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py delete mode 100644 querybook/webapp/hooks/queryEditor/extensions/useCopilotExtension.ts create mode 100644 querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts diff --git a/containers/bundled_querybook_config.yaml b/containers/bundled_querybook_config.yaml index 40af55a05..e30b64969 100644 --- a/containers/bundled_querybook_config.yaml +++ b/containers/bundled_querybook_config.yaml @@ -28,6 +28,10 @@ ELASTICSEARCH_HOST: http://elasticsearch:9200 # model_args: # model_name: gpt-4o-mini # temperature: 0 +# sql_complete: +# model_args: +# model_name: gpt-4o-mini +# temperature: 0 # Uncomment below to enable vector store to support embedding based table search. # Please check langchain doc for the configs of each provider. diff --git a/package.json b/package.json index 3afa4a61b..03ba9e16e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "querybook", - "version": "3.36.0", + "version": "3.37.0", "description": "A Big Data Webapp", "private": true, "scripts": { diff --git a/querybook/config/querybook_public_config.yaml b/querybook/config/querybook_public_config.yaml index 44df88a14..87281cbcb 100644 --- a/querybook/config/querybook_public_config.yaml +++ b/querybook/config/querybook_public_config.yaml @@ -15,6 +15,9 @@ ai_assistant: table_vector_search: enabled: false + sql_complete: + enabled: false + survey: global_response_cooldown: 2592000 # 30 days global_trigger_cooldown: 600 # 10 minutes diff --git a/querybook/config/user_setting.yaml b/querybook/config/user_setting.yaml index 20d62ee50..56c56f551 100644 --- a/querybook/config/user_setting.yaml +++ b/querybook/config/user_setting.yaml @@ -87,3 +87,11 @@ tab: - 'tab space 4' - tab helper: The spaces setting for code editor, this does not modify the existing code. + +sql_complete: + default: disabled + tab: editor + options: + - enabled + - disabled + helper: (Experimental) Enable it to receive inline AI-generated SQL completions as you type within the editor. diff --git a/querybook/server/const/ai_assistant.py b/querybook/server/const/ai_assistant.py index 1db8b888f..211548bf6 100644 --- a/querybook/server/const/ai_assistant.py +++ b/querybook/server/const/ai_assistant.py @@ -9,6 +9,7 @@ class AICommandType(Enum): SQL_SUMMARY = "sql_summary" TABLE_SUMMARY = "table_summary" TABLE_SELECT = "table_select" + SQL_COMPLETE = "sql_complete" AI_ASSISTANT_NAMESPACE = "/ai_assistant" diff --git a/querybook/server/datasources_socketio/ai_assistant.py b/querybook/server/datasources_socketio/ai_assistant.py index 263e8cc91..1a120ebbc 100644 --- a/querybook/server/datasources_socketio/ai_assistant.py +++ b/querybook/server/datasources_socketio/ai_assistant.py @@ -30,3 +30,14 @@ def sql_fix(payload={}): ai_assistant.query_auto_fix( query_execution_id=query_execution_id, ) + + +@register_socket(AICommandType.SQL_COMPLETE.value, namespace=AI_ASSISTANT_NAMESPACE) +def sql_complete(payload={}): + prefix = payload["prefix"] + suffix = payload["suffix"] + query_engine_id = payload["query_engine_id"] + tables = payload.get("tables", []) + ai_assistant.get_sql_completion( + query_engine_id=query_engine_id, tables=tables, prefix=prefix, suffix=suffix + ) diff --git a/querybook/server/lib/ai_assistant/base_ai_assistant.py b/querybook/server/lib/ai_assistant/base_ai_assistant.py index b5b1ae3f1..835cc81f9 100644 --- a/querybook/server/lib/ai_assistant/base_ai_assistant.py +++ b/querybook/server/lib/ai_assistant/base_ai_assistant.py @@ -29,6 +29,7 @@ from .prompts.table_select_prompt import TABLE_SELECT_PROMPT from .prompts.table_summary_prompt import TABLE_SUMMARY_PROMPT from .prompts.text_to_sql_prompt import TEXT_TO_SQL_PROMPT +from .prompts.sql_complete_prompt import SQL_AUTOCOMPLETE_PROMPT from .tools.table_schema import ( get_slimmed_table_schemas, get_table_schema_by_name, @@ -160,6 +161,14 @@ def _get_table_select_prompt(self, top_n, question, table_schemas): table_schemas=table_schemas, ) + def _get_sql_complete_prompt(self, dialect, table_schemas, prefix, suffix): + return SQL_AUTOCOMPLETE_PROMPT.format( + dialect=dialect, + table_schemas=table_schemas, + prefix=prefix, + suffix=suffix, + ) + def _get_error_msg(self, error) -> str: """Override this method to return specific error messages for your own assistant.""" if isinstance(error, ValidationError): @@ -458,3 +467,45 @@ def find_tables(self, metastore_id, question, session=None): except Exception as e: LOG.error(e, exc_info=True) return [] + + @catch_error + @with_session + @with_ai_socket(command_type=AICommandType.SQL_COMPLETE) + def get_sql_completion( + self, + query_engine_id: int, + tables: list[str], + prefix: str, + suffix: str, + socket=None, + session=None, + ): + """ + Generate SQL completions based on the given context. + """ + query_engine = admin_logic.get_query_engine_by_id( + query_engine_id, session=session + ) + table_schemas = get_table_schemas_by_names( + metastore_id=query_engine.metastore_id, + full_table_names=tables, + should_skip_column=self._should_skip_column, + session=session, + ) + prompt = self._get_sql_complete_prompt( + dialect=query_engine.language, + table_schemas=table_schemas, + prefix=prefix, + suffix=suffix, + ) + llm = self._get_llm( + ai_command=AICommandType.SQL_COMPLETE.value, + prompt_length=self._get_token_count( + AICommandType.SQL_COMPLETE.value, prompt + ), + ) + + chain = llm | JsonOutputParser() + response = chain.invoke(prompt) + socket.send_data(response) + socket.close() diff --git a/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py b/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py new file mode 100644 index 000000000..5a251a33c --- /dev/null +++ b/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py @@ -0,0 +1,43 @@ +from langchain.prompts import PromptTemplate + +prompt_template = """You are an expert in the {dialect} SQL dialect, skilled in providing precise SQL code completions. +Your task is to complete the SQL query based on the given context. + + + +===Table Schemas +{table_schemas} + +===Response Guidelines: +1. Analyze the partial query and table schemas to understand the context and determine the query's goal. +2. Identify the relevant tables and columns necessary for the query. +3. Replace with appropriate SQL code, or leave it empty if no completion is needed. +4. Make sure the completion does not overlap with the prefix or suffix. +5. Respond in JSON format + +===Response Format: +{{ + "completion": "the SQL code to replace , if any" +}} + + +===Example +Input: +sele from some_table + +Reasoning: +The prefix "sele" suggests that the query is likely a SELECT statement. The table schemas indicate the available columns. The completion should be a list of columns to select from the table "some_table". +As it already has a partial query, the completion should be starting from "ct" to complete the word "select", and then followed by the columns to select. + +Output: +{{ + "completion": "ct *" +}} + +===Input +{prefix}{suffix} + +""" + + +SQL_AUTOCOMPLETE_PROMPT = PromptTemplate.from_template(prompt_template) diff --git a/querybook/webapp/components/QueryEditor/BoundQueryEditor.tsx b/querybook/webapp/components/QueryEditor/BoundQueryEditor.tsx index cf671d2fc..a9b01dae8 100644 --- a/querybook/webapp/components/QueryEditor/BoundQueryEditor.tsx +++ b/querybook/webapp/components/QueryEditor/BoundQueryEditor.tsx @@ -38,8 +38,13 @@ export const BoundQueryEditor = React.forwardRef< const editorRef = useForwardedRef(ref); // Code Editor related Props - const { codeEditorTheme, options, fontSize, autoCompleteType } = - useUserQueryEditorConfig(); + const { + codeEditorTheme, + options, + fontSize, + autoCompleteType, + sqlCompleteEnabled, + } = useUserQueryEditorConfig(); const combinedOptions = useMemo( () => ({ ...options, @@ -79,6 +84,7 @@ export const BoundQueryEditor = React.forwardRef< searchContext={searchContext} cellId={cellId} engineId={engine?.id} + sqlCompleteEnabled={sqlCompleteEnabled} /> ); }); diff --git a/querybook/webapp/components/QueryEditor/QueryEditor.tsx b/querybook/webapp/components/QueryEditor/QueryEditor.tsx index 0a74dd44e..e0795ae8b 100644 --- a/querybook/webapp/components/QueryEditor/QueryEditor.tsx +++ b/querybook/webapp/components/QueryEditor/QueryEditor.tsx @@ -22,6 +22,7 @@ import { useKeyMapExtension } from 'hooks/queryEditor/extensions/useKeyMapExtens import { useLintExtension } from 'hooks/queryEditor/extensions/useLintExtension'; import { useOptionsExtension } from 'hooks/queryEditor/extensions/useOptionsExtension'; import { useSearchExtension } from 'hooks/queryEditor/extensions/useSearchExtension'; +import { useSqlCompleteExtension } from 'hooks/queryEditor/extensions/useSqlCompleteExtension'; import { useStatusBarExtension } from 'hooks/queryEditor/extensions/useStatusBarExtension'; import { useAutoComplete } from 'hooks/queryEditor/useAutoComplete'; import { useCodeAnalysis } from 'hooks/queryEditor/useCodeAnalysis'; @@ -49,6 +50,7 @@ export interface IQueryEditorProps { keyMap?: CodeMirrorKeyMap; className?: string; autoCompleteType?: AutoCompleteType; + sqlCompleteEnabled?: boolean; engineId: number; templatedVariables?: TDataDocMetaVariables; @@ -103,6 +105,7 @@ export const QueryEditor: React.FC< keyMap = {}, className, autoCompleteType = 'all', + sqlCompleteEnabled = false, engineId, cellId, templatedVariables = [], @@ -355,6 +358,12 @@ export const QueryEditor: React.FC< [onSelection] ); + const sqlCompleteExtension = useSqlCompleteExtension({ + enabled: sqlCompleteEnabled, + engineId, + tables: tableNamesSet, + }); + const extensions = useMemo( () => [ mixedSQL(), @@ -368,6 +377,7 @@ export const QueryEditor: React.FC< optionsExtension, searchExtension, selectionExtension, + sqlCompleteExtension, ], [ keyMapExtention, @@ -379,6 +389,7 @@ export const QueryEditor: React.FC< optionsExtension, searchExtension, selectionExtension, + sqlCompleteExtension, ] ); diff --git a/querybook/webapp/components/UserSettingsMenu/UserSettingsMenu.tsx b/querybook/webapp/components/UserSettingsMenu/UserSettingsMenu.tsx index 88dabbb6a..4b599a924 100644 --- a/querybook/webapp/components/UserSettingsMenu/UserSettingsMenu.tsx +++ b/querybook/webapp/components/UserSettingsMenu/UserSettingsMenu.tsx @@ -2,6 +2,7 @@ import React, { useMemo } from 'react'; import { useDispatch, useSelector } from 'react-redux'; import { UserSettingsTab } from 'components/EnvironmentAppRouter/modalRoute/UserSettingsMenuRoute'; +import PublicConfig from 'config/querybook_public_config.yaml'; import userSettingConfig from 'config/user_setting.yaml'; import { titleize } from 'lib/utils'; import { availableEnvironmentsSelector } from 'redux/environment/selector'; @@ -13,6 +14,8 @@ import { makeSelectOptions, Select } from 'ui/Select/Select'; import './UserSettingsMenu.scss'; +const AIAssistantConfig = PublicConfig.ai_assistant; + export const UserSettingsMenu: React.FC<{ tab: UserSettingsTab }> = ({ tab, }) => { @@ -35,9 +38,16 @@ export const UserSettingsMenu: React.FC<{ tab: UserSettingsTab }> = ({ const settingsToShow = useMemo( () => - Object.entries(userSettingConfig).filter( - ([key, value]) => value.tab === tab - ), + Object.entries(userSettingConfig).filter(([key, value]) => { + if (key === 'sql_complete') { + return ( + AIAssistantConfig.enabled && + AIAssistantConfig.sql_complete.enabled && + value.tab === tab + ); + } + return value.tab === tab; + }), [tab] ); diff --git a/querybook/webapp/config.d.ts b/querybook/webapp/config.d.ts index 69069788c..9961d9686 100644 --- a/querybook/webapp/config.d.ts +++ b/querybook/webapp/config.d.ts @@ -102,6 +102,10 @@ declare module 'config/querybook_public_config.yaml' { table_vector_search: { enabled: boolean; }; + + sql_complete: { + enabled: boolean; + }; }; survey?: { global_response_cooldown?: number; diff --git a/querybook/webapp/const/aiAssistant.ts b/querybook/webapp/const/aiAssistant.ts index 0f5ee6a33..b87998616 100644 --- a/querybook/webapp/const/aiAssistant.ts +++ b/querybook/webapp/const/aiAssistant.ts @@ -5,6 +5,7 @@ export enum AICommandType { TEXT_TO_SQL = 'text_to_sql', TABLE_SUMMARY = 'table_summary', TABLE_SELECT = 'table_select', + SQL_COMPLETE = 'sql_complete', } export enum AISocketEvent { diff --git a/querybook/webapp/hooks/queryEditor/extensions/useCopilotExtension.ts b/querybook/webapp/hooks/queryEditor/extensions/useCopilotExtension.ts deleted file mode 100644 index 37d51b9d7..000000000 --- a/querybook/webapp/hooks/queryEditor/extensions/useCopilotExtension.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { inlineCopilot } from 'codemirror-copilot'; -import { useMemo } from 'react'; - -export const useCopilotExtension = () => { - const extension = useMemo( - () => - inlineCopilot(async (prefix, suffix) => { - // TODO: To be implemented - return null; - }), - [] - ); - - return extension; -}; diff --git a/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts b/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts new file mode 100644 index 000000000..f7dae431a --- /dev/null +++ b/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts @@ -0,0 +1,87 @@ +import { inlineCopilot } from 'codemirror-copilot'; +import { useMemo } from 'react'; + +import { AICommandType, AISocketEvent } from 'const/aiAssistant'; +import aiAssistantSocket from 'lib/ai-assistant/ai-assistant-socketio'; + +/** + * Remove overlapping part with prefix from completion. + * + * e.g. removeOverlapPrefix('sel', 'elect *') => 'ect *' + */ +const removeOverlapPrefix = (completion: string, prefix: string) => { + let commonPrefixLength = 0; + for (let i = 0; i < prefix.length; i++) { + if (completion.startsWith(prefix.slice(i))) { + commonPrefixLength = prefix.length - i; + break; + } + } + if (commonPrefixLength > 0 || prefix === '') { + completion = completion.slice(commonPrefixLength); + } + return completion; +}; + +const getCodeCompletionFromWebSocket = async (requestPayload: { + query_engine_id: number; + tables: string[]; + prefix: string; + suffix: string; +}): Promise => { + return new Promise((resolve) => { + const eventHandler = (event, payload) => { + switch (event) { + case AISocketEvent.DATA: + const completion = removeOverlapPrefix( + payload.completion, + requestPayload.prefix + ); + resolve(completion); + return; + + case AISocketEvent.CLOSE: + case AISocketEvent.ERROR: + aiAssistantSocket.removeListener( + AICommandType.SQL_COMPLETE, + eventHandler + ); + break; + default: + console.error('Unknown ai websocket event', event); + } + + resolve(''); + }; + + aiAssistantSocket.addListener(AICommandType.SQL_COMPLETE, eventHandler); + aiAssistantSocket.emit(AICommandType.SQL_COMPLETE, requestPayload); + }); +}; + +export const useSqlCompleteExtension = ({ + engineId, + tables = null, + enabled = false, +}: { + engineId: number; + tables?: Set; + enabled?: boolean; +}) => { + const extension = useMemo(() => { + if (!enabled) { + return []; + } + + return inlineCopilot(async (prefix, suffix) => { + return await getCodeCompletionFromWebSocket({ + query_engine_id: engineId, + tables: Array.from(tables ?? []), + prefix, + suffix, + }); + }, 1000); + }, [enabled, tables]); + + return extension; +}; diff --git a/querybook/webapp/hooks/redux/useUserQueryEditorConfig.ts b/querybook/webapp/hooks/redux/useUserQueryEditorConfig.ts index eb91c2ffe..cea45625c 100644 --- a/querybook/webapp/hooks/redux/useUserQueryEditorConfig.ts +++ b/querybook/webapp/hooks/redux/useUserQueryEditorConfig.ts @@ -12,6 +12,7 @@ export function useUserQueryEditorConfig(): { fontSize: string; options: CodeMirror.EditorConfiguration; autoCompleteType: AutoCompleteType; + sqlCompleteEnabled: boolean; } { const editorSettings = useShallowSelector((state: IStoreState) => ({ theme: state.user.computedSettings['theme'], @@ -21,6 +22,8 @@ export function useUserQueryEditorConfig(): { ], autoComplete: state.user.computedSettings['auto_complete'], tab: state.user.computedSettings['tab'], + sqlCompleteEnabled: + state.user.computedSettings['sql_complete'] === 'enabled', })); const indentWithTabs = editorSettings.tab === 'tab'; const tabSize = @@ -39,7 +42,7 @@ export function useUserQueryEditorConfig(): { codeEditorTheme: editorSettings.theme, fontSize: editorSettings.fontSize, autoCompleteType: editorSettings.autoComplete as AutoCompleteType, - // From: https://github.com/codemirror/CodeMirror/issues/988 + sqlCompleteEnabled: editorSettings.sqlCompleteEnabled, options, }; } From f311e10590b353e8b67c8258377628e96e51117f Mon Sep 17 00:00:00 2001 From: "J.C. Zhong" Date: Thu, 14 Nov 2024 23:20:15 +0000 Subject: [PATCH 2/4] fix linter --- .../prompts/sql_complete_prompt.py | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py b/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py index 5a251a33c..f5dc31735 100644 --- a/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py +++ b/querybook/server/lib/ai_assistant/prompts/sql_complete_prompt.py @@ -1,29 +1,29 @@ from langchain.prompts import PromptTemplate -prompt_template = """You are an expert in the {dialect} SQL dialect, skilled in providing precise SQL code completions. -Your task is to complete the SQL query based on the given context. - - - -===Table Schemas -{table_schemas} - -===Response Guidelines: -1. Analyze the partial query and table schemas to understand the context and determine the query's goal. -2. Identify the relevant tables and columns necessary for the query. -3. Replace with appropriate SQL code, or leave it empty if no completion is needed. +prompt_template = """You are an expert in the {dialect} SQL dialect, skilled in providing precise SQL code completions. +Your task is to complete the SQL query based on the given context. + + + +===Table Schemas +{table_schemas} + +===Response Guidelines: +1. Analyze the partial query and table schemas to understand the context and determine the query's goal. +2. Identify the relevant tables and columns necessary for the query. +3. Replace with appropriate SQL code, or leave it empty if no completion is needed. 4. Make sure the completion does not overlap with the prefix or suffix. 5. Respond in JSON format - -===Response Format: -{{ + +===Response Format: +{{ "completion": "the SQL code to replace , if any" -}} +}} ===Example Input: -sele from some_table +sele from some_table Reasoning: The prefix "sele" suggests that the query is likely a SELECT statement. The table schemas indicate the available columns. The completion should be a list of columns to select from the table "some_table". @@ -35,8 +35,7 @@ }} ===Input -{prefix}{suffix} - +{prefix}{suffix} """ From 283f92700c8f083a7e7d74e50f02ad4e92bedf69 Mon Sep 17 00:00:00 2001 From: "J.C. Zhong" Date: Thu, 14 Nov 2024 23:39:05 +0000 Subject: [PATCH 3/4] fix linter --- .../extensions/useSqlCompleteExtension.ts | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts b/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts index f7dae431a..041f1edd0 100644 --- a/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts +++ b/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts @@ -5,7 +5,7 @@ import { AICommandType, AISocketEvent } from 'const/aiAssistant'; import aiAssistantSocket from 'lib/ai-assistant/ai-assistant-socketio'; /** - * Remove overlapping part with prefix from completion. + * A hacky workaround to remove overlapping part with prefix from completion. * * e.g. removeOverlapPrefix('sel', 'elect *') => 'ect *' */ @@ -73,14 +73,16 @@ export const useSqlCompleteExtension = ({ return []; } - return inlineCopilot(async (prefix, suffix) => { - return await getCodeCompletionFromWebSocket({ - query_engine_id: engineId, - tables: Array.from(tables ?? []), - prefix, - suffix, - }); - }, 1000); + return inlineCopilot( + (prefix, suffix) => + getCodeCompletionFromWebSocket({ + query_engine_id: engineId, + tables: Array.from(tables ?? []), + prefix, + suffix, + }), + 1000 + ); }, [enabled, tables]); return extension; From 753143b536fce09a9252ecff8655028a63502498 Mon Sep 17 00:00:00 2001 From: "J.C. Zhong" Date: Fri, 15 Nov 2024 00:08:56 +0000 Subject: [PATCH 4/4] address comment --- .../hooks/queryEditor/extensions/useSqlCompleteExtension.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts b/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts index 041f1edd0..11f08d396 100644 --- a/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts +++ b/querybook/webapp/hooks/queryEditor/extensions/useSqlCompleteExtension.ts @@ -17,7 +17,7 @@ const removeOverlapPrefix = (completion: string, prefix: string) => { break; } } - if (commonPrefixLength > 0 || prefix === '') { + if (commonPrefixLength > 0) { completion = completion.slice(commonPrefixLength); } return completion;