diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..778ad55 --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +OPENAI_API_KEY= + +# Update these with your Supabase details from your project settings > API and dashboard settings +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= +PINECONE_INDEX_NAME= diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..bffb357 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,3 @@ +{ + "extends": "next/core-web-vitals" +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1759b24 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# local env files +.env*.local +.env + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts + +#Notion_db +/Notion_DB diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..b58b603 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/gpt4-pdf-chatbot-langchain.iml b/.idea/gpt4-pdf-chatbot-langchain.iml new file mode 100644 index 0000000..0c8867d --- /dev/null +++ b/.idea/gpt4-pdf-chatbot-langchain.iml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..03d9549 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..d62182f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..0238bf0 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,6 @@ +{ + "trailingComma": "all", + "singleQuote": true, + "printWidth": 80, + "tabWidth": 2 +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..9819d49 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# GPT-4 & LangChain - Create a ChatGPT Chatbot for articles + +It's demo project, based on [repo](https://github.com/hwchase17/chat-langchain) + +Frontend [repo](https://github.com/idapgroup/chat-gpt-extension) + +Use the new GPT-4 api to build a chatGPT chatbot for get information from articles. + +Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. + +[Tutorial video](https://www.youtube.com/watch?v=ih9PBGVVOO4) + +[Get in touch via twitter if you have questions](https://twitter.com/mayowaoshin) + +The visual guide of this repo and tutorial is in the `visual guide` folder. + +**If you run into errors, please review the troubleshooting section further down this page.** + +## Development + +1. Clone the repo + +``` +git clone [github https url] +``` + +2. Install packages + +``` +yarn install +``` + +3. Set up your `.env` file + +- Copy `.env.example` into `.env` + Your `.env` file should look like this: + +``` +OPENAI_API_KEY= + +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= + +PINECONE_INDEX_NAME= + +``` + +- Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. +- Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys, and also retrieve your environment and index name from the dashboard. + +4. In the `config` folder, replace the `PINECONE_NAME_SPACE` with a `namespace` where you'd like to store your embeddings on Pinecone when you run `pnpm run ingest`. This namespace will later be used for queries and retrieval. + +5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to `gpt-3.5-turbo`, if you don't have access to `gpt-4`. Please verify outside this repo that you have access to `gpt-4`, otherwise the application will not work with it. + +## Convert your PDF files to embeddings + +**This repo can load multiple PDF files** + +1. Inside `docs` folder, add your pdf files or folders that contain pdf files. + +2. Run the script `npm run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below. + +3. Check Pinecone dashboard to verify your namespace and vectors have been added. + +## Run the app + +Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `pnpm run dev` to launch the local dev environment, and then type a question in the chat interface. + +## Troubleshooting + +In general, keep an eye out in the `issues` and `discussions` section of this repo for solutions. + +**General errors** + +- Make sure you're running the latest Node version. Run `node -v` +- Make sure you're using the same versions of LangChain and Pinecone as this repo. +- Check that you've created an `.env` file that contains your valid (and working) API keys, environment and index name. +- If you change `modelName` in `OpenAIChat` note that the correct name of the alternative model is `gpt-3.5-turbo` +- Make sure you have access to `gpt-4` if you decide to use. Test your openAI keys outside the repo and make sure it works and that you have enough API credits. +- Your pdf file is corrupted and cannot be parsed. + +**Pinecone errors** + +- Make sure your pinecone dashboard `environment` and `index` matches the one in the `pinecone.ts` and `.env` files. +- Check that you've set the vector dimensions to `1536`. +- Make sure your pinecone namespace is in lowercase. +- Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter. +- Retry from scratch with a new Pinecone index and cloned repo. diff --git a/components/layout.tsx b/components/layout.tsx new file mode 100644 index 0000000..5e3d207 --- /dev/null +++ b/components/layout.tsx @@ -0,0 +1,24 @@ +interface LayoutProps { + children?: React.ReactNode; +} + +export default function Layout({ children }: LayoutProps) { + return ( +
+
+
+ +
+
+
+
+ {children} +
+
+
+ ); +} diff --git a/components/ui/LoadingDots.tsx b/components/ui/LoadingDots.tsx new file mode 100644 index 0000000..46f2b91 --- /dev/null +++ b/components/ui/LoadingDots.tsx @@ -0,0 +1,23 @@ +import styles from '@/styles/loading-dots.module.css'; + +const LoadingDots = ({ + color = '#000', + style = 'small', +}: { + color: string; + style: string; +}) => { + return ( + + + + + + ); +}; + +export default LoadingDots; + +LoadingDots.defaultProps = { + style: 'small', +}; diff --git a/components/ui/TextArea.tsx b/components/ui/TextArea.tsx new file mode 100644 index 0000000..aa1f87a --- /dev/null +++ b/components/ui/TextArea.tsx @@ -0,0 +1,23 @@ +import * as React from 'react'; +import { cn } from '@/utils/cn'; + +export interface TextareaProps + extends React.TextareaHTMLAttributes {} + +const Textarea = React.forwardRef( + ({ className, ...props }, ref) => { + return ( +