From bc119cf7e885b0c05efccc1dac10356cf05806f0 Mon Sep 17 00:00:00 2001 From: kgrofelnik Date: Mon, 3 Jun 2024 12:51:36 +0200 Subject: [PATCH 1/2] Switch RAG tools from nft.storage to pinata.cloud --- rag_tools/README.md | 2 +- rag_tools/add_knowledge_base.py | 2 +- rag_tools/knowledgebase/upload_documents_use_case.py | 11 ++++++----- rag_tools/settings.py | 2 +- rag_tools/template.env | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/rag_tools/README.md b/rag_tools/README.md index b4d4783..8f23295 100644 --- a/rag_tools/README.md +++ b/rag_tools/README.md @@ -14,7 +14,7 @@ The RAG (Retrieval-Augmented Generation) Knowledge Base Script is designed to au Before you begin, ensure you have the following: - Python 3.11 or later installed on your system. - A funded wallet and corresponding private key -- An API key for `nft.storage` to facilitate document uploading to IPFS. You can obtain this key by registering at [nft.storage](https://nft.storage). +- An API key for `pinata.cloud` to facilitate document uploading to IPFS. You can obtain this key by registering at [pinata.cloud](https://www.pinata.cloud). ## Setup diff --git a/rag_tools/add_knowledge_base.py b/rag_tools/add_knowledge_base.py index d4221a2..ee3d304 100644 --- a/rag_tools/add_knowledge_base.py +++ b/rag_tools/add_knowledge_base.py @@ -39,7 +39,7 @@ def main(directory: str, chunk_size: int, chunk_overlap: int) -> None: parser.add_argument("-o", "--chunk-overlap", type=int, default=100) args = parser.parse_args() - assert settings.STORAGE_KEY, "NFT_STORAGE_API_KEY missing from .env" + assert settings.STORAGE_KEY, "PINATA_API_KEY missing from .env" if not os.path.exists(args.directory): print(f"Directory {args.directory} does not exist, exiting.") diff --git a/rag_tools/knowledgebase/upload_documents_use_case.py b/rag_tools/knowledgebase/upload_documents_use_case.py index 64c33c9..f3a3bb4 100644 --- a/rag_tools/knowledgebase/upload_documents_use_case.py +++ b/rag_tools/knowledgebase/upload_documents_use_case.py @@ -7,16 +7,17 @@ def execute(documents: List[Document]) -> str: serialized_data = _serialize_documents(documents) + multipart_data = { + "file": ("file", serialized_data, ), + } response = requests.post( - "https://api.nft.storage/upload", + "https://api.pinata.cloud/pinning/pinFileToIPFS", headers={ "Authorization": f"Bearer {settings.STORAGE_KEY}", - "Content-Type": "text/plain", }, - data=serialized_data, + files=multipart_data, ) - response.raise_for_status() - return response.json().get("value").get("cid") + return response.json().get("IpfsHash") def _serialize_documents(documents: List[Document]) -> str: diff --git a/rag_tools/settings.py b/rag_tools/settings.py index 9ddb844..f2233eb 100644 --- a/rag_tools/settings.py +++ b/rag_tools/settings.py @@ -6,7 +6,7 @@ CHAIN_ID = int(os.getenv("CHAIN_ID", "696969")) RPC_URL = os.getenv("RPC_URL", "https://devnet.galadriel.com") PRIVATE_KEY = os.getenv("PRIVATE_KEY") -STORAGE_KEY = os.getenv("NFT_STORAGE_API_KEY") +STORAGE_KEY = os.getenv("PINATA_API_KEY") ORACLE_ADDRESS = os.getenv("ORACLE_ADDRESS") ORACLE_ABI_PATH = os.getenv( "ORACLE_ABI_PATH", diff --git a/rag_tools/template.env b/rag_tools/template.env index 1887864..30775d1 100644 --- a/rag_tools/template.env +++ b/rag_tools/template.env @@ -2,4 +2,4 @@ CHAIN_ID=696969 RPC_URL="https://devnet.galadriel.com" PRIVATE_KEY="0x" ORACLE_ADDRESS="0x" -NFT_STORAGE_API_KEY="0x" \ No newline at end of file +PINATA_API_KEY="0x" \ No newline at end of file From b85d83579a22d72ae6d8ce6b86304876390239be Mon Sep 17 00:00:00 2001 From: kgrofelnik Date: Mon, 3 Jun 2024 12:53:31 +0200 Subject: [PATCH 2/2] Puts back exception raising --- rag_tools/knowledgebase/upload_documents_use_case.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rag_tools/knowledgebase/upload_documents_use_case.py b/rag_tools/knowledgebase/upload_documents_use_case.py index f3a3bb4..354db15 100644 --- a/rag_tools/knowledgebase/upload_documents_use_case.py +++ b/rag_tools/knowledgebase/upload_documents_use_case.py @@ -17,6 +17,7 @@ def execute(documents: List[Document]) -> str: }, files=multipart_data, ) + response.raise_for_status() return response.json().get("IpfsHash")