From 096da9f9138f5964bec9fb977c96d85e8c5f82a9 Mon Sep 17 00:00:00 2001 From: JinmingYang <2214962083@qq.com> Date: Mon, 7 Oct 2024 14:02:15 +0800 Subject: [PATCH] feat: add doc crawler and indexer --- .../strategies/chat-strategy/chat-workflow.ts | 3 +- .../chat-strategy/nodes/agent-node.ts | 5 +- .../nodes/codebase-search-node.ts | 2 +- .../chat-strategy/nodes/doc-retriever-node.ts | 126 ++++++++++++------ .../strategies/chat-strategy/nodes/state.ts | 3 +- .../chat-strategy/nodes/web-visit-node.ts | 97 ++++++++++++++ .../types/chat-context/doc-context.ts | 6 +- .../utils/doc-crawler.ts | 93 ++++++++++--- .../vectordb/codebase-indexer.ts | 6 +- .../vectordb/doc-indexer.ts | 3 +- .../webview-api/controllers/doc.controller.ts | 11 +- .../webview-api/lowdb/doc-sites-db.ts | 2 +- src/shared/utils/common.ts | 36 +++++ .../get-default-conversation-attachments.ts | 3 +- .../components/chat/editor/chat-input.tsx | 3 +- .../custom-renders/doc-management.tsx | 2 +- src/webview/hooks/api/use-doc-sites.ts | 8 ++ .../hooks/chat/use-mention-options.tsx | 21 ++- .../code/code-chunks-mention-strategy.ts | 2 +- ...-search-doc-site-names-mention-strategy.ts | 32 +++++ ...w-search-doc-site-urls-mention-strategy.ts | 32 ----- .../files/selected-files-mention-strategy.ts | 2 +- .../files/selected-images-mention-strategy.ts | 2 +- .../selected-folders-mention-strategy.ts | 2 +- .../git/git-commits-mention-strategy.ts | 2 +- .../git/git-diffs-mention-strategy.ts | 2 +- src/webview/types/chat.ts | 2 + src/webview/utils/common.ts | 36 ----- 28 files changed, 382 insertions(+), 162 deletions(-) create mode 100644 src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/web-visit-node.ts create mode 100644 src/webview/hooks/api/use-doc-sites.ts create mode 100644 src/webview/lexical/mentions/docs/allow-search-doc-site-names-mention-strategy.ts delete mode 100644 src/webview/lexical/mentions/docs/allow-search-doc-site-urls-mention-strategy.ts diff --git a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/chat-workflow.ts b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/chat-workflow.ts index d6904ff..ed0fe27 100644 --- a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/chat-workflow.ts +++ b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/chat-workflow.ts @@ -11,6 +11,7 @@ import { type ChatGraphState } from './nodes/state' import { webSearchNode } from './nodes/web-search-node' +import { webVisitNode } from './nodes/web-visit-node' const createSmartRoute = (nextNodeName: ChatGraphNodeName) => (state: ChatGraphState) => @@ -21,7 +22,7 @@ const chatWorkflow = new StateGraph(chatGraphState) .addNode( ChatGraphNodeName.Tools, combineNode( - [codebaseSearchNode, docRetrieverNode, webSearchNode], + [codebaseSearchNode, docRetrieverNode, webSearchNode, webVisitNode], chatGraphState ) ) diff --git a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/agent-node.ts b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/agent-node.ts index da6a5a8..fe8cc93 100644 --- a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/agent-node.ts +++ b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/agent-node.ts @@ -9,6 +9,7 @@ import { createCodebaseSearchTool } from './codebase-search-node' import { createDocRetrieverTool } from './doc-retriever-node' import type { ChatGraphNode } from './state' import { createWebSearchTool } from './web-search-node' +import { createWebVisitTool } from './web-visit-node' export const agentNode: ChatGraphNode = async state => { const modelProvider = await createModelProvider() @@ -21,7 +22,9 @@ export const agentNode: ChatGraphNode = async state => { // doc await createDocRetrieverTool(state), // web search - await createWebSearchTool(state) + await createWebSearchTool(state), + // web visit + await createWebVisitTool(state) ].filter(Boolean) as LangchainTool[] const chatMessagesConstructor = new ChatMessagesConstructor(state.chatContext) diff --git a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/codebase-search-node.ts b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/codebase-search-node.ts index c452085..41506da 100644 --- a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/codebase-search-node.ts +++ b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/codebase-search-node.ts @@ -40,7 +40,7 @@ export const createCodebaseSearchTool = async (state: ChatGraphState) => { if (!indexer) return searchResults const searchPromisesResult = await Promise.allSettled( - queryParts?.map(queryPart => indexer.searchSimilarCode(queryPart)) || [] + queryParts?.map(queryPart => indexer.searchSimilarRow(queryPart)) || [] ) const searchCodeSnippets: CodeSnippet[] = searchPromisesResult diff --git a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/doc-retriever-node.ts b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/doc-retriever-node.ts index 0fbef8b..2544cb8 100644 --- a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/doc-retriever-node.ts +++ b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/doc-retriever-node.ts @@ -1,13 +1,13 @@ +import { aidePaths } from '@extension/file-utils/paths' +import { DocInfo } from '@extension/webview-api/chat-context-processor/types/chat-context/doc-context' import type { LangchainTool } from '@extension/webview-api/chat-context-processor/types/langchain-message' +import { DocCrawler } from '@extension/webview-api/chat-context-processor/utils/doc-crawler' import { findCurrentToolsCallParams } from '@extension/webview-api/chat-context-processor/utils/find-current-tools-call-params' -import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio' -import type { DocumentInterface } from '@langchain/core/documents' +import { DocIndexer } from '@extension/webview-api/chat-context-processor/vectordb/doc-indexer' +import { docSitesDB } from '@extension/webview-api/lowdb/doc-sites-db' import type { ToolMessage } from '@langchain/core/messages' import { DynamicStructuredTool } from '@langchain/core/tools' -import type { VectorStoreRetriever } from '@langchain/core/vectorstores' -import { OpenAIEmbeddings } from '@langchain/openai' -import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters' -import { MemoryVectorStore } from 'langchain/vectorstores/memory' +import { removeDuplicates } from '@shared/utils/common' import { z } from 'zod' import { @@ -17,7 +17,7 @@ import { } from './state' interface DocRetrieverToolResult { - relevantDocs: DocumentInterface>[] + relevantDocs: DocInfo[] } export const createDocRetrieverTool = async (state: ChatGraphState) => { @@ -28,52 +28,93 @@ export const createDocRetrieverTool = async (state: ChatGraphState) => { if (!docContext) return null - const { allowSearchDocSiteUrls } = docContext + const { allowSearchDocSiteNames } = docContext - if (!allowSearchDocSiteUrls.length) return null + if (!allowSearchDocSiteNames.length) return null - let _retriever: VectorStoreRetriever + const getRelevantDocs = async ( + queryParts: { siteName: string; keywords: string[] }[] + ): Promise => { + const docSites = await docSitesDB.getAll() - const getRetriever = async () => { - if (_retriever) return _retriever + const docPromises = queryParts.map(async ({ siteName, keywords }) => { + const docSite = docSites.find(site => site.name === siteName) - // TODO: Deep search - const docs = await Promise.all( - allowSearchDocSiteUrls.map(url => new CheerioWebBaseLoader(url).load()) - ) - const docsList = docs.flat() + if (!docSite?.isIndexed || !allowSearchDocSiteNames.includes(siteName)) { + return [] + } - const textSplitter = new RecursiveCharacterTextSplitter({ - chunkSize: 500, - chunkOverlap: 50 + const docIndexer = new DocIndexer( + DocCrawler.getDocCrawlerFolderPath(docSite.url), + aidePaths.getGlobalLanceDbPath() + ) + + await docIndexer.initialize() + + const searchResults = await Promise.allSettled( + keywords.map(keyword => docIndexer.searchSimilarRow(keyword)) + ) + + const searchRows = removeDuplicates( + searchResults + .filter( + (result): result is PromiseFulfilledResult => + result.status === 'fulfilled' + ) + .flatMap(result => result.value), + ['fullPath'] + ).slice(0, 3) + + const docInfoResults = await Promise.allSettled( + searchRows.map(async row => ({ + content: await docIndexer.getRowFileContent(row), + path: docSite.url + })) + ) + + return docInfoResults + .filter( + (result): result is PromiseFulfilledResult => + result.status === 'fulfilled' + ) + .map(result => result.value) }) - const docSplits = await textSplitter.splitDocuments(docsList) - const vectorStore = await MemoryVectorStore.fromDocuments( - docSplits, - new OpenAIEmbeddings() - ) + const results = await Promise.allSettled(docPromises) + const relevantDocs = results + .filter( + (result): result is PromiseFulfilledResult => + result.status === 'fulfilled' + ) + .flatMap(result => result.value) - _retriever = vectorStore.asRetriever() - - return _retriever + return relevantDocs } return new DynamicStructuredTool({ name: ChatGraphToolName.DocRetriever, - description: 'Search and return information about question in Docs.', - func: async ({ query }, runManager): Promise => { - const retriever = await getRetriever() - - return { - relevantDocs: await retriever.invoke( - query, - runManager?.getChild('retriever') - ) - } - }, + description: + 'Search for relevant information in specified documentation sites. This tool can search across multiple doc sites, with multiple keywords for each site. Use this tool to find documentation on specific topics or understand how certain features are described in the documentation.', + func: async ({ queryParts }): Promise => ({ + relevantDocs: await getRelevantDocs(queryParts) + }), schema: z.object({ - query: z.string().describe('query to look up in retriever') + queryParts: z + .array( + z.object({ + siteName: z + .enum(allowSearchDocSiteNames as unknown as [string, ...string[]]) + .describe('The name of the documentation site to search'), + keywords: z + .array(z.string()) + .describe( + 'List of keywords to search for in the specified doc site' + ) + }) + ) + .describe( + "The AI should break down the user's query into multiple parts, each targeting a specific doc site with relevant keywords. This allows for a more comprehensive search across multiple documentation sources." + ) }) }) } @@ -105,10 +146,7 @@ export const docRetrieverNode: ChatGraphNode = async state => { lastConversation.attachments!.docContext.relevantDocs = [ ...lastConversation.attachments!.docContext.relevantDocs, - ...result.relevantDocs.map(doc => ({ - path: doc.metadata?.filePath, - content: doc.pageContent - })) + ...result.relevantDocs ] }) diff --git a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/state.ts b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/state.ts index d362bca..b906680 100644 --- a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/state.ts +++ b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/state.ts @@ -7,7 +7,8 @@ import { baseState } from '../../base-state' export enum ChatGraphToolName { DocRetriever = 'docRetriever', WebSearch = 'webSearch', - CodebaseSearch = 'codebaseSearch' + CodebaseSearch = 'codebaseSearch', + WebVisit = 'webVisit' } export enum ChatGraphNodeName { diff --git a/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/web-visit-node.ts b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/web-visit-node.ts new file mode 100644 index 0000000..8a59f93 --- /dev/null +++ b/src/extension/webview-api/chat-context-processor/strategies/chat-strategy/nodes/web-visit-node.ts @@ -0,0 +1,97 @@ +import type { LangchainTool } from '@extension/webview-api/chat-context-processor/types/langchain-message' +import { DocCrawler } from '@extension/webview-api/chat-context-processor/utils/doc-crawler' +import { findCurrentToolsCallParams } from '@extension/webview-api/chat-context-processor/utils/find-current-tools-call-params' +import type { ToolMessage } from '@langchain/core/messages' +import { DynamicStructuredTool } from '@langchain/core/tools' +import { z } from 'zod' + +import { + ChatGraphToolName, + type ChatGraphNode, + type ChatGraphState +} from './state' + +interface WebVisitToolResult { + contents: { url: string; content: string }[] +} + +// eslint-disable-next-line unused-imports/no-unused-vars +export const createWebVisitTool = async (state: ChatGraphState) => { + const getPageContents = async ( + urls: string[] + ): Promise<{ url: string; content: string }[]> => { + const docCrawler = new DocCrawler(urls![0]!) + const promises = await Promise.allSettled( + urls.map(async url => ({ + url, + content: + (await docCrawler.getPageContent(url)) || 'Failed to retrieve content' + })) + ) + return promises + .filter(promise => promise.status === 'fulfilled') + .map( + promise => + (promise as PromiseFulfilledResult<{ url: string; content: string }>) + .value + ) + } + + return new DynamicStructuredTool({ + name: ChatGraphToolName.WebVisit, + description: + 'Visit specific web pages and retrieve their content. Use this tool when you need to access and analyze the content of one or more web pages.', + func: async ({ urls }): Promise => { + const contents = await getPageContents(urls) + return { contents } + }, + schema: z.object({ + urls: z + .array(z.string().url()) + .describe( + 'An array of URLs to visit and retrieve content from. Each URL should be a valid web address.' + ) + }) + }) +} + +export const webVisitNode: ChatGraphNode = async state => { + const { messages, chatContext } = state + const { conversations } = chatContext + const lastConversation = conversations.at(-1) + const docContext = lastConversation?.attachments?.docContext + + if (!docContext) return {} + + const webVisitTool = await createWebVisitTool(state) + + if (!webVisitTool) return {} + + const tools: LangchainTool[] = [webVisitTool] + const lastMessage = messages.at(-1) + const toolCalls = findCurrentToolsCallParams(lastMessage, tools) + + if (!toolCalls.length) return {} + + const toolCallsPromises = toolCalls.map(async toolCall => { + const toolMessage = (await webVisitTool.invoke(toolCall)) as ToolMessage + + const result = JSON.parse( + toolMessage?.lc_kwargs.content + ) as WebVisitToolResult + + lastConversation.attachments!.docContext.relevantDocs = [ + ...lastConversation.attachments!.docContext.relevantDocs, + ...result.contents.map(item => ({ + path: item.url, + content: item.content + })) + ] + }) + + await Promise.allSettled(toolCallsPromises) + + return { + chatContext + } +} diff --git a/src/extension/webview-api/chat-context-processor/types/chat-context/doc-context.ts b/src/extension/webview-api/chat-context-processor/types/chat-context/doc-context.ts index 320a08d..bf34e55 100644 --- a/src/extension/webview-api/chat-context-processor/types/chat-context/doc-context.ts +++ b/src/extension/webview-api/chat-context-processor/types/chat-context/doc-context.ts @@ -1,11 +1,9 @@ -import type { BaseToolContext } from './base-tool-context' - export interface DocInfo { content: string path: string // file path or url } -export interface DocContext extends BaseToolContext { - allowSearchDocSiteUrls: string[] +export interface DocContext { + allowSearchDocSiteNames: string[] relevantDocs: DocInfo[] } diff --git a/src/extension/webview-api/chat-context-processor/utils/doc-crawler.ts b/src/extension/webview-api/chat-context-processor/utils/doc-crawler.ts index 9a890e9..52872f9 100644 --- a/src/extension/webview-api/chat-context-processor/utils/doc-crawler.ts +++ b/src/extension/webview-api/chat-context-processor/utils/doc-crawler.ts @@ -1,6 +1,7 @@ -import * as fs from 'fs/promises' -import * as path from 'path' -import * as url from 'url' +/* eslint-disable func-names */ +import fs from 'fs/promises' +import path from 'path' +import url from 'url' import { aidePaths, getSemanticHashName } from '@extension/file-utils/paths' import { logger } from '@extension/logger' import * as cheerio from 'cheerio' @@ -38,12 +39,19 @@ export class DocCrawler { private turndownService: TurndownService - private outputDir: string - private domainDir: string progressReporter = new ProgressReporter() + static getDocCrawlerFolderPath(baseUrl: string) { + const parsedUrl = new URL(baseUrl) + const domainFolderName = getSemanticHashName( + parsedUrl.hostname, + parsedUrl.hostname + ) + return path.join(aidePaths.getDocCrawlerPath(), domainFolderName) + } + constructor(baseUrl: string, options: Partial = {}) { this.baseUrl = baseUrl this.options = { @@ -122,14 +130,7 @@ export class DocCrawler { this.content = {} this.depthMap = new Map([[baseUrl, 0]]) this.turndownService = new TurndownService() - this.outputDir = aidePaths.getDocCrawlerPath() - - const parsedUrl = new URL(baseUrl) - const domainFolderName = getSemanticHashName( - parsedUrl.hostname, - parsedUrl.hostname - ) - this.domainDir = path.join(this.outputDir, domainFolderName) + this.domainDir = DocCrawler.getDocCrawlerFolderPath(baseUrl) } private generateRandomUserAgent(): string { @@ -162,7 +163,52 @@ export class DocCrawler { await new Promise(resolve => setTimeout(resolve, this.options.delay)) this.progressReporter.setProcessedItems(this.visited.size) } - await this.saveResults() + } + + public async getPageContent( + pageUrl: string, + retries: number = 3 + ): Promise { + try { + const randomIP = this.generateRandomIP() + const response = await fetch(pageUrl, { + headers: { + 'User-Agent': this.generateRandomUserAgent(), + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Cache-Control': 'no-cache', + Pragma: 'no-cache', + 'X-Forwarded-For': randomIP, + 'X-Real-IP': randomIP, + 'X-Originating-IP': randomIP, + 'CF-Connecting-IP': randomIP, + 'True-Client-IP': randomIP + } + }) + + if (!response.ok) { + if (response.status === 404) { + logger.error(`Page not found: ${pageUrl}`) + return null + } + throw new Error(`HTTP error! status: ${response.status}`) + } + + const html = await response.text() + const $ = cheerio.load(html) + + const content = this.extractContent($) + return content + } catch (error) { + logger.error(`Error crawling ${pageUrl}:`, error) + if (retries > 0) { + logger.log(`Retrying... (${retries} attempts left)`) + await new Promise(resolve => setTimeout(resolve, 5000)) + await this.getPageContent(pageUrl, retries - 1) + } + return null + } } private async crawlPage( @@ -224,7 +270,19 @@ export class DocCrawler { mainContent = this.detectMainContent($) } - // Convert HTML to Markdown + mainContent + .find('script, style, noscript, iframe, img, svg, header, footer, nav') + .remove() + + mainContent + .find('p, div') + .filter(function (this: any) { + return $(this).text().trim() === '' + }) + .remove() + + mainContent.find('*').removeAttr('class').removeAttr('id') + return this.turndownService.turndown(mainContent.html() || '') } @@ -446,11 +504,6 @@ export class DocCrawler { await fs.writeFile(filePath, content) } - private async saveResults(): Promise { - const indexFilePath = path.join(this.domainDir, 'index.json') - await fs.writeFile(indexFilePath, JSON.stringify(this.content, null, 2)) - } - dispose() { this.progressReporter.dispose() } diff --git a/src/extension/webview-api/chat-context-processor/vectordb/codebase-indexer.ts b/src/extension/webview-api/chat-context-processor/vectordb/codebase-indexer.ts index 5517eef..6f092b9 100644 --- a/src/extension/webview-api/chat-context-processor/vectordb/codebase-indexer.ts +++ b/src/extension/webview-api/chat-context-processor/vectordb/codebase-indexer.ts @@ -77,15 +77,15 @@ export class CodebaseIndexer extends BaseIndexer { } async getAllIndexedFilePaths(): Promise { - const filePaths = await traverseFileOrFolders({ + return await traverseFileOrFolders({ type: 'file', filesOrFolders: ['./'], isGetFileContent: false, workspacePath: this.workspaceRootPath, + customShouldIgnore: (fullFilePath: string) => + !this.isAvailableFile(fullFilePath), itemCallback: fileInfo => fileInfo.fullPath }) - - return filePaths.filter(filePath => this.isAvailableExtFile(filePath)) } private isAvailableExtFile(filePath: string): boolean { diff --git a/src/extension/webview-api/chat-context-processor/vectordb/doc-indexer.ts b/src/extension/webview-api/chat-context-processor/vectordb/doc-indexer.ts index 125b212..4f2734a 100644 --- a/src/extension/webview-api/chat-context-processor/vectordb/doc-indexer.ts +++ b/src/extension/webview-api/chat-context-processor/vectordb/doc-indexer.ts @@ -80,7 +80,8 @@ export class DocIndexer extends BaseIndexer { filesOrFolders: [this.docsRootPath], isGetFileContent: false, workspacePath: this.docsRootPath, - customShouldIgnore: () => false, + customShouldIgnore: (fullFilePath: string) => + !this.isAvailableFile(fullFilePath), itemCallback: fileInfo => fileInfo.fullPath }) } diff --git a/src/extension/webview-api/controllers/doc.controller.ts b/src/extension/webview-api/controllers/doc.controller.ts index c842b02..75f3cd8 100644 --- a/src/extension/webview-api/controllers/doc.controller.ts +++ b/src/extension/webview-api/controllers/doc.controller.ts @@ -62,10 +62,10 @@ export class DocController extends Controller { ): AsyncGenerator { try { const site = await this.findSiteById(request.id) - if (!site) throw new Error('找不到文档站点') - if (!site.isCrawled) throw new Error('请先爬取站点,然后再索引') + if (!site) throw new Error('can not find doc site') + if (!site.isCrawled) throw new Error('please crawl the site first') - const indexer = this.initiateIndexer(request.id) + const indexer = await this.initiateIndexer(request.id) await indexer.initialize() const indexingCompleted = indexer.reindexWorkspace(type) @@ -109,9 +109,10 @@ export class DocController extends Controller { return crawler } - private initiateIndexer(id: string) { + private async initiateIndexer(id: string) { if (this.docIndexers[id]) return this.docIndexers[id]! - const docsPath = aidePaths.getDocCrawlerPath() + const site = await this.findSiteById(id) + const docsPath = DocCrawler.getDocCrawlerFolderPath(site!.url) const dbPath = aidePaths.getGlobalLanceDbPath() const indexer = new DocIndexer(docsPath, dbPath) this.docIndexers[id] = indexer diff --git a/src/extension/webview-api/lowdb/doc-sites-db.ts b/src/extension/webview-api/lowdb/doc-sites-db.ts index 5c9fd60..1b8e303 100644 --- a/src/extension/webview-api/lowdb/doc-sites-db.ts +++ b/src/extension/webview-api/lowdb/doc-sites-db.ts @@ -3,7 +3,7 @@ import { aidePaths } from '@extension/file-utils/paths' import { BaseDB, BaseItem } from './base-db' -interface DocSite extends BaseItem { +export interface DocSite extends BaseItem { name: string url: string isCrawled: boolean diff --git a/src/shared/utils/common.ts b/src/shared/utils/common.ts index 6252b01..0ffd83a 100644 --- a/src/shared/utils/common.ts +++ b/src/shared/utils/common.ts @@ -1,2 +1,38 @@ export const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) + +export const removeDuplicates = ( + arr: T[], + keys?: (keyof T)[] | ((item: T) => any) +): T[] => { + if (!keys) { + return Array.from(new Set(arr)) + } + + const keyFn = + typeof keys === 'function' + ? keys + : (item: T) => keys.map(k => item[k]).join('|') + + const seen = new Set() + return arr.filter(item => { + const key = keyFn(item) + return seen.has(key) ? false : seen.add(key) + }) +} + +export const tryParseJSON = (jsonString: string) => { + try { + return JSON.parse(jsonString) + } catch (error) { + return null + } +} + +export const tryStringifyJSON = (obj: any) => { + try { + return JSON.stringify(obj) + } catch (error) { + return null + } +} diff --git a/src/shared/utils/get-default-conversation-attachments.ts b/src/shared/utils/get-default-conversation-attachments.ts index 5f4498c..1239822 100644 --- a/src/shared/utils/get-default-conversation-attachments.ts +++ b/src/shared/utils/get-default-conversation-attachments.ts @@ -9,8 +9,7 @@ export const getDefaultConversationAttachments = (): Attachments => ({ relevantCodeSnippets: [] }, docContext: { - enableTool: false, - allowSearchDocSiteUrls: [], + allowSearchDocSiteNames: [], relevantDocs: [] }, fileContext: { diff --git a/src/webview/components/chat/editor/chat-input.tsx b/src/webview/components/chat/editor/chat-input.tsx index 2fd8de6..3265870 100644 --- a/src/webview/components/chat/editor/chat-input.tsx +++ b/src/webview/components/chat/editor/chat-input.tsx @@ -1,4 +1,5 @@ import { useEffect, useRef, type FC } from 'react' +import { tryParseJSON, tryStringifyJSON } from '@shared/utils/common' import { convertToLangchainMessageContents } from '@shared/utils/convert-to-langchain-message-contents' import { getAllTextFromLangchainMessageContents } from '@shared/utils/get-all-text-from-langchain-message-contents' import { getDefaultConversationAttachments } from '@shared/utils/get-default-conversation-attachments' @@ -6,7 +7,7 @@ import { mergeLangchainMessageContents } from '@shared/utils/merge-langchain-mes import { Button } from '@webview/components/ui/button' import { useCloneState } from '@webview/hooks/use-clone-state' import type { ChatContext, Conversation, FileInfo } from '@webview/types/chat' -import { cn, tryParseJSON, tryStringifyJSON } from '@webview/utils/common' +import { cn } from '@webview/utils/common' import { $createParagraphNode, $createTextNode, diff --git a/src/webview/components/settings/custom-renders/doc-management.tsx b/src/webview/components/settings/custom-renders/doc-management.tsx index 6b4d1c1..a7d6c62 100644 --- a/src/webview/components/settings/custom-renders/doc-management.tsx +++ b/src/webview/components/settings/custom-renders/doc-management.tsx @@ -251,7 +251,7 @@ export const DocManagement = () => { className="w-full md:w-auto text-xs h-6" size="sm" onClick={() => handler(site.id)} - disabled={loading[site.id] || isCompleted} + disabled={loading[site.id]} > {loading[site.id] && ( diff --git a/src/webview/hooks/api/use-doc-sites.ts b/src/webview/hooks/api/use-doc-sites.ts new file mode 100644 index 0000000..01bec70 --- /dev/null +++ b/src/webview/hooks/api/use-doc-sites.ts @@ -0,0 +1,8 @@ +import { useQuery } from '@tanstack/react-query' +import { api } from '@webview/services/api-client' + +export const useDocSites = () => + useQuery({ + queryKey: ['realtime', 'docSites'], + queryFn: () => api.doc.getDocSites({}) + }) diff --git a/src/webview/hooks/chat/use-mention-options.tsx b/src/webview/hooks/chat/use-mention-options.tsx index 6564411..8384f61 100644 --- a/src/webview/hooks/chat/use-mention-options.tsx +++ b/src/webview/hooks/chat/use-mention-options.tsx @@ -14,6 +14,7 @@ import { MentionFilePreview } from '@webview/components/chat/selectors/mention-s import { MentionFolderPreview } from '@webview/components/chat/selectors/mention-selector/folders/mention-folder-preview' import { FileIcon as FileIcon2 } from '@webview/components/file-icon' import { RelevantCodeSnippetsMentionStrategy } from '@webview/lexical/mentions/codebase/relevant-code-snippets-mention-strategy' +import { AllowSearchDocSiteNamesToolMentionStrategy } from '@webview/lexical/mentions/docs/allow-search-doc-site-names-mention-strategy' import { SelectedFilesMentionStrategy } from '@webview/lexical/mentions/files/selected-files-mention-strategy' import { SelectedFoldersMentionStrategy } from '@webview/lexical/mentions/folders/selected-folders-mention-strategy' import { GitCommitsMentionStrategy } from '@webview/lexical/mentions/git/git-commits-mention-strategy' @@ -26,6 +27,7 @@ import { } from '@webview/types/chat' import { getFileNameFromPath } from '@webview/utils/path' +import { useDocSites } from '../api/use-doc-sites' import { useFiles } from '../api/use-files' import { useFolders } from '../api/use-folders' import { useGitCommits } from '../api/use-git-commits' @@ -34,6 +36,7 @@ export const useMentionOptions = () => { const { data: files = [] } = useFiles() const { data: folders = [] } = useFolders() const { data: gitCommits = [] } = useGitCommits() + const { data: docSites = [] } = useDocSites() const filesMentionOptions: MentionOption[] = files.map( file => @@ -103,6 +106,20 @@ export const useMentionOptions = () => { }) satisfies MentionOption ) + const docSitesMentionOptions: MentionOption[] = docSites.map(site => ({ + id: `doc-site#${site.id}`, + label: site.name, + category: MentionCategory.Docs, + mentionStrategy: new AllowSearchDocSiteNamesToolMentionStrategy(), + searchKeywords: [site.name, site.url], + data: site, + itemLayoutProps: { + icon: , + label: site.name, + details: site.url + } + })) + const mentionOptions: MentionOption[] = [ { id: 'files', @@ -156,8 +173,8 @@ export const useMentionOptions = () => { itemLayoutProps: { icon: , label: 'Docs' - } - // mentionStrategy: new AllowSearchDocSiteUrlsToolMentionStrategy() + }, + children: docSitesMentionOptions }, { id: 'git', diff --git a/src/webview/lexical/mentions/code/code-chunks-mention-strategy.ts b/src/webview/lexical/mentions/code/code-chunks-mention-strategy.ts index 5f20972..5a3bb06 100644 --- a/src/webview/lexical/mentions/code/code-chunks-mention-strategy.ts +++ b/src/webview/lexical/mentions/code/code-chunks-mention-strategy.ts @@ -1,10 +1,10 @@ +import { removeDuplicates } from '@shared/utils/common' import { MentionCategory, type Attachments, type CodeChunk, type IMentionStrategy } from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' export class CodeChunksMentionStrategy implements IMentionStrategy { category = MentionCategory.Code as const diff --git a/src/webview/lexical/mentions/docs/allow-search-doc-site-names-mention-strategy.ts b/src/webview/lexical/mentions/docs/allow-search-doc-site-names-mention-strategy.ts new file mode 100644 index 0000000..ec20db2 --- /dev/null +++ b/src/webview/lexical/mentions/docs/allow-search-doc-site-names-mention-strategy.ts @@ -0,0 +1,32 @@ +import { removeDuplicates } from '@shared/utils/common' +import { + IMentionStrategy, + MentionCategory, + type Attachments, + type DocSite +} from '@webview/types/chat' + +export class AllowSearchDocSiteNamesToolMentionStrategy + implements IMentionStrategy +{ + category = MentionCategory.Docs as const + + name = 'AllowSearchDocSiteNamesToolMentionStrategy' as const + + async buildNewAttachmentsAfterAddMention( + data: DocSite | DocSite[], + currentAttachments: Attachments + ): Promise> { + const sites = Array.isArray(data) ? data : [data] + + return { + docContext: { + ...currentAttachments.docContext, + allowSearchDocSiteNames: removeDuplicates([ + ...(currentAttachments.docContext?.allowSearchDocSiteNames || []), + ...sites.map(site => site.name) + ]) + } + } + } +} diff --git a/src/webview/lexical/mentions/docs/allow-search-doc-site-urls-mention-strategy.ts b/src/webview/lexical/mentions/docs/allow-search-doc-site-urls-mention-strategy.ts deleted file mode 100644 index 7044afc..0000000 --- a/src/webview/lexical/mentions/docs/allow-search-doc-site-urls-mention-strategy.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { - IMentionStrategy, - MentionCategory, - type Attachments -} from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' - -export class AllowSearchDocSiteUrlsToolMentionStrategy - implements IMentionStrategy -{ - category = MentionCategory.Docs as const - - name = 'AllowSearchDocSiteUrlsToolMentionStrategy' as const - - async buildNewAttachmentsAfterAddMention( - data: string | string[], - currentAttachments: Attachments - ): Promise> { - const urls = Array.isArray(data) ? data : [data] - - return { - docContext: { - ...currentAttachments.docContext, - enableTool: true, - allowSearchDocSiteUrls: removeDuplicates([ - ...(currentAttachments.docContext?.allowSearchDocSiteUrls || []), - ...urls - ]) - } - } - } -} diff --git a/src/webview/lexical/mentions/files/selected-files-mention-strategy.ts b/src/webview/lexical/mentions/files/selected-files-mention-strategy.ts index f9e1819..d39e137 100644 --- a/src/webview/lexical/mentions/files/selected-files-mention-strategy.ts +++ b/src/webview/lexical/mentions/files/selected-files-mention-strategy.ts @@ -1,10 +1,10 @@ +import { removeDuplicates } from '@shared/utils/common' import { IMentionStrategy, MentionCategory, type Attachments, type FileInfo } from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' export class SelectedFilesMentionStrategy implements IMentionStrategy { category = MentionCategory.Files as const diff --git a/src/webview/lexical/mentions/files/selected-images-mention-strategy.ts b/src/webview/lexical/mentions/files/selected-images-mention-strategy.ts index aae082b..0917762 100644 --- a/src/webview/lexical/mentions/files/selected-images-mention-strategy.ts +++ b/src/webview/lexical/mentions/files/selected-images-mention-strategy.ts @@ -1,10 +1,10 @@ +import { removeDuplicates } from '@shared/utils/common' import { IMentionStrategy, MentionCategory, type Attachments, type ImageInfo } from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' export class SelectedImagesMentionStrategy implements IMentionStrategy { category = MentionCategory.Files as const diff --git a/src/webview/lexical/mentions/folders/selected-folders-mention-strategy.ts b/src/webview/lexical/mentions/folders/selected-folders-mention-strategy.ts index d4ab190..ab3b351 100644 --- a/src/webview/lexical/mentions/folders/selected-folders-mention-strategy.ts +++ b/src/webview/lexical/mentions/folders/selected-folders-mention-strategy.ts @@ -1,10 +1,10 @@ +import { removeDuplicates } from '@shared/utils/common' import { IMentionStrategy, MentionCategory, type Attachments, type FolderInfo } from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' export class SelectedFoldersMentionStrategy implements IMentionStrategy { category = MentionCategory.Folders as const diff --git a/src/webview/lexical/mentions/git/git-commits-mention-strategy.ts b/src/webview/lexical/mentions/git/git-commits-mention-strategy.ts index a10c97b..1db2d63 100644 --- a/src/webview/lexical/mentions/git/git-commits-mention-strategy.ts +++ b/src/webview/lexical/mentions/git/git-commits-mention-strategy.ts @@ -1,10 +1,10 @@ +import { removeDuplicates } from '@shared/utils/common' import { MentionCategory, type Attachments, type GitCommit, type IMentionStrategy } from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' export class GitCommitsMentionStrategy implements IMentionStrategy { category = MentionCategory.Git as const diff --git a/src/webview/lexical/mentions/git/git-diffs-mention-strategy.ts b/src/webview/lexical/mentions/git/git-diffs-mention-strategy.ts index 3d6ac4d..0b9b89f 100644 --- a/src/webview/lexical/mentions/git/git-diffs-mention-strategy.ts +++ b/src/webview/lexical/mentions/git/git-diffs-mention-strategy.ts @@ -1,10 +1,10 @@ +import { removeDuplicates } from '@shared/utils/common' import { MentionCategory, type Attachments, type GitDiff, type IMentionStrategy } from '@webview/types/chat' -import { removeDuplicates } from '@webview/utils/common' export class GitDiffsMentionStrategy implements IMentionStrategy { category = MentionCategory.Git as const diff --git a/src/webview/types/chat.ts b/src/webview/types/chat.ts index 615c2e0..77ca2cf 100644 --- a/src/webview/types/chat.ts +++ b/src/webview/types/chat.ts @@ -5,6 +5,8 @@ import type { } from '@extension/webview-api/chat-context-processor/types/chat-context' import type { MentionItemLayoutProps } from '@webview/components/chat/selectors/mention-selector/mention-item-layout' +export type { DocSite } from '@extension/webview-api/lowdb/doc-sites-db' + export type { ProgressInfo } from '@extension/webview-api/chat-context-processor/utils/process-reporter' export * from '@extension/webview-api/chat-context-processor/types/chat-context' diff --git a/src/webview/utils/common.ts b/src/webview/utils/common.ts index 481024c..3de3b44 100644 --- a/src/webview/utils/common.ts +++ b/src/webview/utils/common.ts @@ -15,39 +15,3 @@ export const getErrorMsg = (error: any) => { return errorMessage } - -export const removeDuplicates = ( - arr: T[], - keys?: (keyof T)[] | ((item: T) => any) -): T[] => { - if (!keys) { - return Array.from(new Set(arr)) - } - - const keyFn = - typeof keys === 'function' - ? keys - : (item: T) => keys.map(k => item[k]).join('|') - - const seen = new Set() - return arr.filter(item => { - const key = keyFn(item) - return seen.has(key) ? false : seen.add(key) - }) -} - -export const tryParseJSON = (jsonString: string) => { - try { - return JSON.parse(jsonString) - } catch (error) { - return null - } -} - -export const tryStringifyJSON = (obj: any) => { - try { - return JSON.stringify(obj) - } catch (error) { - return null - } -}