diff --git a/CHANGELOG.md b/CHANGELOG.md index a12de107dc..9d4fdaa727 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## Unreleased +### New Features +- Loader for Macrometa GDN (#484) + ### Smaller Features + Bug Fixes - fix: PyMuPDF Reader broken (#547) - Add page id to extra_info (#542) diff --git a/llama_hub/library.json b/llama_hub/library.json index fef038cfef..e80719b119 100644 --- a/llama_hub/library.json +++ b/llama_hub/library.json @@ -919,11 +919,13 @@ "ZepReader": { "id": "zep", "author": "zep", + "keywords": ["zep", "retriever", "memory", "storage"] + }, + "MacrometaGDNReader": { + "id": "macrometa_gdn", + "author": "Dain Im", "keywords": [ - "zep", - "retriever", - "memory", - "storage" + "macrometa" ] }, "BagelReader": { diff --git a/llama_hub/macrometa_gdn/README.md b/llama_hub/macrometa_gdn/README.md new file mode 100644 index 0000000000..716ba44e54 --- /dev/null +++ b/llama_hub/macrometa_gdn/README.md @@ -0,0 +1,17 @@ +# Macrometa GDN Loader + +This loader takes in a Macrometa federation URL, API key, and collection name and returns a list of vectors. + +## Usage + +To use this loader, you need to pass the URL and API key through the class contructor, and then load the data using an array of collection names. + +```python +from llama_index import download_loader + +MacrometaGDNReader = download_loader('MacrometaGDNReader') + +collections = ['test_collection'] +loader = MacrometaGDNReader(url="https://api-macrometa.io",apikey="test") +vectors= loader.load_data(collection_list=collections) +``` \ No newline at end of file diff --git a/llama_hub/macrometa_gdn/__init__.py b/llama_hub/macrometa_gdn/__init__.py new file mode 100644 index 0000000000..1d4640565a --- /dev/null +++ b/llama_hub/macrometa_gdn/__init__.py @@ -0,0 +1 @@ +"""Init file.""" diff --git a/llama_hub/macrometa_gdn/base.py b/llama_hub/macrometa_gdn/base.py new file mode 100644 index 0000000000..f8136b8849 --- /dev/null +++ b/llama_hub/macrometa_gdn/base.py @@ -0,0 +1,90 @@ +"""Macrometa GDN Reader.""" + +from typing import List +import requests +import json + +from llama_index.readers.base import BaseReader +from llama_index.readers.schema.base import Document + + +class MacrometaGDNReader(BaseReader): + """Macrometa GDN Reader. + + Reads vectors from Macrometa GDN + + + """ + + def __init__(self, url: str, apikey: str): + self.url = url + self.apikey = apikey + + def load_data(self, collection_list: List[str]) -> List[Document]: + """Loads data from the input directory. + + Args: + api: Macrometa GDN API key + collection_name: Name of the collection to read from + + """ + if collection_list is None: + raise ValueError("Must specify collection name(s)") + + results = [] + for collection_name in collection_list: + collection = self._load_collection(collection_name) + results.append( + Document( + text=collection, extra_info={"collection_name": collection_name} + ) + ) + return results + + def _load_collection(self, collection_name: str) -> str: + all_documents = [] + """Loads a collection from the database. + + Args: + collection_name: Name of the collection to read from + + """ + url = self.url + "/_fabric/_system/_api/cursor" + headers = { + "accept": "application/json", + "content-type": "application/json", + "Authorization": "apikey " + self.apikey, + } + + data = { + "batchSize": 1000, + "ttl": 60, + "query": "FOR doc IN " + collection_name + " RETURN doc", + } + response = requests.post(url, headers=headers, data=json.dumps(data)) + response_json = response.json() + if response.status_code == 201: + all_documents.extend(response_json.get("result", [])) + + while response_json.get("hasMore"): + cursor_id = response_json.get("id") + + next_url = self.url + "/_fabric/_system/_api/cursor/" + cursor_id + + response = requests.put(next_url, headers=headers) + + if response.status_code == 200: + response_json = response.json() + all_documents.extend(response_json.get("result", [])) + else: + print(f"Request failed with status code {response.status_code}") + break + else: + print(f"Initial request failed with status code {response.status_code}") + + return str(all_documents) + + +if __name__ == "__main__": + reader = MacrometaGDNReader("https://api-anurag.eng.macrometa.io", "test") + print(reader.load_data(collection_list=["test"])) diff --git a/llama_hub/macrometa_gdn/requirements.txt b/llama_hub/macrometa_gdn/requirements.txt new file mode 100644 index 0000000000..edf6a7d6ca --- /dev/null +++ b/llama_hub/macrometa_gdn/requirements.txt @@ -0,0 +1,2 @@ +requests +json \ No newline at end of file