diff --git a/HACKTOBERFEST.md b/HACKTOBERFEST.md index d3bed0fce..631f73ba7 100644 --- a/HACKTOBERFEST.md +++ b/HACKTOBERFEST.md @@ -7,7 +7,7 @@ All contributors with accepted PRs will receive a cool Holopin! 🤩 (Watch out ### 🏆 Top 50 contributors will recieve a special T-shirt ### 🏆 [LLM Document analysis by LexEU competition](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md): -A separate competition is available for those sumbit best new retrieval / workflow method that will analyze a Document using EU laws. +A separate competition is available for those who sumbit new retrieval / workflow method that will analyze a Document using EU laws. With 200$, 100$, 50$ prize for 1st, 2nd and 3rd place respectively. You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md) @@ -15,7 +15,11 @@ You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/ ```text 🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs. -🧩 API extention: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent. +🧩 API extension: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent. +They can be a completely separate repo. +For example: +https://github.com/arc53/tg-bot-docsgpt-extenstion or +https://github.com/arc53/DocsGPT-cli Non-Code Contributions: diff --git a/README.md b/README.md index 7ceb75b95..a88f2fc5f 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,11 @@ Say goodbye to time-consuming manual searches, and let + Let's chat + + +[Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions) ![video-example-of-docs-gpt](https://d3dg1063dc54p9.cloudfront.net/videos/demov3.gif) diff --git a/application/worker.py b/application/worker.py index 53f6c06a6..2000523c8 100755 --- a/application/worker.py +++ b/application/worker.py @@ -22,22 +22,23 @@ db = mongo["docsgpt"] sources_collection = db["sources"] +# Constants +MIN_TOKENS = 150 +MAX_TOKENS = 1250 +RECURSION_DEPTH = 2 # Define a function to extract metadata from a given filename. def metadata_from_filename(title): return {"title": title} - # Define a function to generate a random string of a given length. def generate_random_string(length): return "".join([string.ascii_letters[i % 52] for i in range(length)]) - current_dir = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ) - def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5): """ Recursively extract zip files with a limit on recursion depth. @@ -52,9 +53,13 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5): logging.warning(f"Reached maximum recursion depth of {max_depth}") return - with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(extract_to) - os.remove(zip_path) # Remove the zip file after extracting + try: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(extract_to) + os.remove(zip_path) # Remove the zip file after extracting + except Exception as e: + logging.error(f"Error extracting zip file {zip_path}: {e}") + return # Check for nested zip files and extract them for root, dirs, files in os.walk(extract_to): @@ -64,6 +69,38 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5): file_path = os.path.join(root, file) extract_zip_recursive(file_path, root, current_depth + 1, max_depth) +def download_file(url, params, dest_path): + try: + response = requests.get(url, params=params) + response.raise_for_status() + with open(dest_path, "wb") as f: + f.write(response.content) + except requests.RequestException as e: + logging.error(f"Error downloading file: {e}") + raise + +def upload_index(full_path, file_data): + try: + if settings.VECTOR_STORE == "faiss": + files = { + "file_faiss": open(full_path + "/index.faiss", "rb"), + "file_pkl": open(full_path + "/index.pkl", "rb"), + } + response = requests.post( + urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data + ) + else: + response = requests.post( + urljoin(settings.API_URL, "/api/upload_index"), data=file_data + ) + response.raise_for_status() + except requests.RequestException as e: + logging.error(f"Error uploading index: {e}") + raise + finally: + if settings.VECTOR_STORE == "faiss": + for file in files.values(): + file.close() # Define the main function for ingesting and processing documents. def ingest_worker( @@ -84,39 +121,25 @@ def ingest_worker( Returns: dict: Information about the completed ingestion task, including input parameters and a "limited" flag. """ - # directory = 'inputs' or 'temp' - # formats = [".rst", ".md"] input_files = None recursive = True limit = None exclude = True - # name_job = 'job1' - # filename = 'install.rst' - # user = 'local' sample = False token_check = True - min_tokens = 150 - max_tokens = 1250 - recursion_depth = 2 full_path = os.path.join(directory, user, name_job) logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job}) - # check if API_URL env variable is set file_data = {"name": name_job, "file": filename, "user": user} - response = requests.get( - urljoin(settings.API_URL, "/api/download"), params=file_data - ) - file = response.content + download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename)) if not os.path.exists(full_path): os.makedirs(full_path) - with open(os.path.join(full_path, filename), "wb") as f: - f.write(file) # check if file is .zip and extract it if filename.endswith(".zip"): extract_zip_recursive( - os.path.join(full_path, filename), full_path, 0, recursion_depth + os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH ) self.update_state(state="PROGRESS", meta={"current": 1}) @@ -132,8 +155,8 @@ def ingest_worker( ).load_data() raw_docs = group_split( documents=raw_docs, - min_tokens=min_tokens, - max_tokens=max_tokens, + min_tokens=MIN_TOKENS, + max_tokens=MAX_TOKENS, token_check=token_check, ) @@ -148,28 +171,13 @@ def ingest_worker( for i in range(min(5, len(raw_docs))): logging.info(f"Sample document {i}: {raw_docs[i]}") - # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl - # and send them to the server (provide user and name in form) - file_data = { - "name": name_job, - "user": user, + file_data.update({ "tokens": tokens, "retriever": retriever, "id": str(id), "type": "local", - } - if settings.VECTOR_STORE == "faiss": - files = { - "file_faiss": open(full_path + "/index.faiss", "rb"), - "file_pkl": open(full_path + "/index.pkl", "rb"), - } - response = requests.post( - urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data - ) - else: - response = requests.post( - urljoin(settings.API_URL, "/api/upload_index"), data=file_data - ) + }) + upload_index(full_path, file_data) # delete local shutil.rmtree(full_path) @@ -183,7 +191,6 @@ def ingest_worker( "limited": False, } - def remote_worker( self, source_data, @@ -197,16 +204,14 @@ def remote_worker( doc_id=None, ): token_check = True - min_tokens = 150 - max_tokens = 1250 - full_path = directory + "/" + user + "/" + name_job + full_path = os.path.join(directory, user, name_job) if not os.path.exists(full_path): os.makedirs(full_path) self.update_state(state="PROGRESS", meta={"current": 1}) logging.info( f"Remote job: {full_path}", - extra={"user": user, "job": name_job, source_data: source_data}, + extra={"user": user, "job": name_job, "source_data": source_data}, ) remote_loader = RemoteCreator.create_loader(loader) @@ -214,11 +219,10 @@ def remote_worker( docs = group_split( documents=raw_docs, - min_tokens=min_tokens, - max_tokens=max_tokens, + min_tokens=MIN_TOKENS, + max_tokens=MAX_TOKENS, token_check=token_check, ) - # docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs] tokens = count_tokens_docs(docs) if operation_mode == "upload": id = ObjectId() @@ -230,7 +234,6 @@ def remote_worker( call_openai_api(docs, full_path, id, self) self.update_state(state="PROGRESS", meta={"current": 100}) - # Proceed with uploading and cleaning as in the original function file_data = { "name": name_job, "user": user, @@ -241,23 +244,12 @@ def remote_worker( "remote_data": source_data, "sync_frequency": sync_frequency, } - if settings.VECTOR_STORE == "faiss": - files = { - "file_faiss": open(full_path + "/index.faiss", "rb"), - "file_pkl": open(full_path + "/index.pkl", "rb"), - } - - requests.post( - urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data - ) - else: - requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data) + upload_index(full_path, file_data) shutil.rmtree(full_path) return {"urls": source_data, "name_job": name_job, "user": user, "limited": False} - def sync( self, source_data, @@ -283,10 +275,10 @@ def sync( doc_id, ) except Exception as e: + logging.error(f"Error during sync: {e}") return {"status": "error", "error": str(e)} return {"status": "success"} - def sync_worker(self, frequency): sync_counts = Counter() sources = sources_collection.find() diff --git a/frontend/package-lock.json b/frontend/package-lock.json index a09e14c61..1a6e0ce38 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -24,7 +24,9 @@ "react-redux": "^8.0.5", "react-router-dom": "^6.8.1", "react-syntax-highlighter": "^15.5.0", - "remark-gfm": "^4.0.0" + "rehype-katex": "^7.0.1", + "remark-gfm": "^4.0.0", + "remark-math": "^6.0.0" }, "devDependencies": { "@types/react": "^18.0.27", @@ -1636,6 +1638,12 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/katex": { + "version": "0.16.7", + "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.7.tgz", + "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==", + "license": "MIT" + }, "node_modules/@types/mdast": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", @@ -3104,7 +3112,6 @@ "version": "4.5.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", - "dev": true, "engines": { "node": ">=0.12" }, @@ -4582,6 +4589,193 @@ "node": ">= 0.4" } }, + "node_modules/hast-util-from-dom": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.0.tgz", + "integrity": "sha512-d6235voAp/XR3Hh5uy7aGLbM3S4KamdW0WEgOaU1YoewnuYw4HXb5eRtv9g65m/RFGEfUY1Mw4UqCc5Y8L4Stg==", + "license": "ISC", + "dependencies": { + "@types/hast": "^3.0.0", + "hastscript": "^8.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-dom/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-dom/node_modules/hast-util-parse-selector": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", + "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-dom/node_modules/hastscript": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz", + "integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^4.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz", + "integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "devlop": "^1.1.0", + "hast-util-from-parse5": "^8.0.0", + "parse5": "^7.0.0", + "vfile": "^6.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html-isomorphic": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/hast-util-from-html-isomorphic/-/hast-util-from-html-isomorphic-2.0.0.tgz", + "integrity": "sha512-zJfpXq44yff2hmE0XmwEOzdWin5xwH+QIhMLOScpX91e/NSGPsAzNCvLQDIEPyO2TXi+lBmU6hjLIhV8MwP2kw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "hast-util-from-dom": "^5.0.0", + "hast-util-from-html": "^2.0.0", + "unist-util-remove-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html-isomorphic/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-html/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-parse5": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.1.tgz", + "integrity": "sha512-Er/Iixbc7IEa7r/XLtuG52zoqn/b3Xng/w6aZQ0xGVxzhw5xUFxcRqdPzP6yFi/4HBYRaifaI5fQ1RH8n0ZeOQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "devlop": "^1.0.0", + "hastscript": "^8.0.0", + "property-information": "^6.0.0", + "vfile": "^6.0.0", + "vfile-location": "^5.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-parse5/node_modules/hast-util-parse-selector": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", + "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5/node_modules/hastscript": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz", + "integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^4.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-is-element": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-3.0.0.tgz", + "integrity": "sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-is-element/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/hast-util-parse-selector": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-2.2.5.tgz", @@ -4625,6 +4819,31 @@ "@types/unist": "*" } }, + "node_modules/hast-util-to-text": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz", + "integrity": "sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "hast-util-is-element": "^3.0.0", + "unist-util-find-after": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-to-text/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/hast-util-whitespace": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", @@ -5421,6 +5640,31 @@ "node": ">=4.0" } }, + "node_modules/katex": { + "version": "0.16.11", + "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.11.tgz", + "integrity": "sha512-RQrI8rlHY92OLf3rho/Ts8i/XvjgguEjOkO1BEXcU3N8BqPpSzBNwV/G0Ukr+P/l3ivvJUE/Fa/CwbS6HesGNQ==", + "funding": [ + "https://opencollective.com/katex", + "https://github.com/sponsors/katex" + ], + "license": "MIT", + "dependencies": { + "commander": "^8.3.0" + }, + "bin": { + "katex": "cli.js" + } + }, + "node_modules/katex/node_modules/commander": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz", + "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -5837,6 +6081,34 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/mdast-util-math": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-math/-/mdast-util-math-3.0.0.tgz", + "integrity": "sha512-Tl9GBNeG/AhJnQM221bJR2HPvLOSnLE/T9cJI9tlc6zwQk2nPk/4f0cHkOdEixQPC/j8UtKDdITswvLAy1OZ1w==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "longest-streak": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.1.0", + "unist-util-remove-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-math/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/mdast-util-mdx-expression": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.0.tgz", @@ -6269,6 +6541,25 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/micromark-extension-math": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-math/-/micromark-extension-math-3.1.0.tgz", + "integrity": "sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==", + "license": "MIT", + "dependencies": { + "@types/katex": "^0.16.0", + "devlop": "^1.0.0", + "katex": "^0.16.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/micromark-factory-destination": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.0.tgz", @@ -7055,6 +7346,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "license": "MIT", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -7831,6 +8134,34 @@ "url": "https://github.com/sponsors/mysticatea" } }, + "node_modules/rehype-katex": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/rehype-katex/-/rehype-katex-7.0.1.tgz", + "integrity": "sha512-OiM2wrZ/wuhKkigASodFoo8wimG3H12LWQaH8qSPVJn9apWKFSH3YOCtbKpBorTVw/eI7cuT21XBbvwEswbIOA==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/katex": "^0.16.0", + "hast-util-from-html-isomorphic": "^2.0.0", + "hast-util-to-text": "^4.0.0", + "katex": "^0.16.0", + "unist-util-visit-parents": "^6.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/rehype-katex/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/remark-gfm": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.0.tgz", @@ -7848,6 +8179,22 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/remark-math": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz", + "integrity": "sha512-MMqgnP74Igy+S3WwnhQ7kqGlEerTETXMvJhrUzDikVZ2/uogJCb+WHUg97hK9/jcfc0dkD73s3LN8zU49cTEtA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-math": "^3.0.0", + "micromark-extension-math": "^3.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remark-parse": { "version": "11.0.0", "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", @@ -8962,6 +9309,20 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/unist-util-find-after": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-5.0.0.tgz", + "integrity": "sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/unist-util-is": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", @@ -9105,6 +9466,20 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/vfile-location": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz", + "integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/vfile-message": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz", @@ -9200,6 +9575,16 @@ "node": ">=0.10.0" } }, + "node_modules/web-namespaces": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz", + "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index 4619fd2e7..176c4fd9e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -35,7 +35,9 @@ "react-redux": "^8.0.5", "react-router-dom": "^6.8.1", "react-syntax-highlighter": "^15.5.0", - "remark-gfm": "^4.0.0" + "rehype-katex": "^7.0.1", + "remark-gfm": "^4.0.0", + "remark-math": "^6.0.0" }, "devDependencies": { "@types/react": "^18.0.27", diff --git a/frontend/src/conversation/ConversationBubble.tsx b/frontend/src/conversation/ConversationBubble.tsx index 3741bfa1f..543699ed5 100644 --- a/frontend/src/conversation/ConversationBubble.tsx +++ b/frontend/src/conversation/ConversationBubble.tsx @@ -4,6 +4,9 @@ import { useSelector } from 'react-redux'; import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism'; import remarkGfm from 'remark-gfm'; +import remarkMath from 'remark-math'; +import rehypeKatex from 'rehype-katex'; +import 'katex/dist/katex.min.css'; import Alert from '../assets/alert.svg'; import DocsGPT3 from '../assets/cute_docsgpt3.svg'; @@ -62,6 +65,21 @@ const ConversationBubble = forwardRef< ); } else { + const preprocessLaTeX = (content: string) => { + // Replace block-level LaTeX delimiters \[ \] with $$ $$ + const blockProcessedContent = content.replace( + /\\\[(.*?)\\\]/gs, + (_, equation) => `$$${equation}$$`, + ); + + // Replace inline LaTeX delimiters \( \) with $ $ + const inlineProcessedContent = blockProcessedContent.replace( + /\\\((.*?)\\\)/gs, + (_, equation) => `$${equation}$`, + ); + + return inlineProcessedContent; + }; bubble = (
- {message} + {preprocessLaTeX(message)}
diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index 7ba8add18..b1a322d9f 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -54,14 +54,16 @@ "name": "Document Name", "date": "Vector Date", "type": "Type", - "tokenUsage": "Token Usage" + "tokenUsage": "Token Usage", + "noData": "No existing Documents" }, "apiKeys": { "label": "Chatbots", "name": "Name", "key": "API Key", "sourceDoc": "Source Document", - "createNew": "Create New" + "createNew": "Create New", + "noData": "No existing Chatbots" }, "analytics": { "label": "Analytics" diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 49aa5d538..7b7dbec08 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -54,14 +54,16 @@ "name": "Nombre del Documento", "date": "Fecha Vector", "type": "Tipo", - "tokenUsage": "Uso de Tokens" + "tokenUsage": "Uso de Tokens", + "noData": "No hay documentos existentes" }, "apiKeys": { "label": "Chatbots", "name": "Nombre", "key": "Clave de API", "sourceDoc": "Documento Fuente", - "createNew": "Crear Nuevo" + "createNew": "Crear Nuevo", + "noData": "No hay chatbots existentes" }, "analytics": { "label": "Analítica" diff --git a/frontend/src/locale/jp.json b/frontend/src/locale/jp.json index 9e3673304..fa61c2913 100644 --- a/frontend/src/locale/jp.json +++ b/frontend/src/locale/jp.json @@ -54,14 +54,16 @@ "name": "ドキュメント名", "date": "ベクトル日付", "type": "タイプ", - "tokenUsage": "トークン使用量" + "tokenUsage": "トークン使用量", + "noData": "既存のドキュメントはありません" }, "apiKeys": { "label": "チャットボット", "name": "名前", "key": "APIキー", "sourceDoc": "ソースドキュメント", - "createNew": "新規作成" + "createNew": "新規作成", + "noData": "既存のチャットボットはありません" }, "analytics": { "label": "分析" diff --git a/frontend/src/locale/zh.json b/frontend/src/locale/zh.json index 81eff996d..080c4ee31 100644 --- a/frontend/src/locale/zh.json +++ b/frontend/src/locale/zh.json @@ -54,14 +54,16 @@ "name": "文件名称", "date": "向量日期", "type": "类型", - "tokenUsage": "令牌使用" + "tokenUsage": "令牌使用", + "noData": "没有现有的文档" }, "apiKeys": { "label": "聊天机器人", "name": "名称", "key": "API 密钥", "sourceDoc": "源文档", - "createNew": "创建新的" + "createNew": "创建新的", + "noData": "没有现有的聊天机器人" }, "analytics": { "label": "分析" diff --git a/frontend/src/settings/APIKeys.tsx b/frontend/src/settings/APIKeys.tsx index ebb322681..e27f56964 100644 --- a/frontend/src/settings/APIKeys.tsx +++ b/frontend/src/settings/APIKeys.tsx @@ -116,6 +116,13 @@ export default function APIKeys() { + {!apiKeys?.length && ( + + + {t('settings.apiKeys.noData')} + + + )} {apiKeys?.map((element, index) => ( {element.name} diff --git a/frontend/src/settings/Documents.tsx b/frontend/src/settings/Documents.tsx index ee88a98f1..288160629 100644 --- a/frontend/src/settings/Documents.tsx +++ b/frontend/src/settings/Documents.tsx @@ -74,6 +74,13 @@ const Documents: React.FC = ({ + {!documents?.length && ( + + + {t('settings.documents.noData')} + + + )} {documents && documents.map((document, index) => (