Merge branch 'emptycrown:main' into main

Tachikoma000 · Sep 29, 2023 · 85a0732 · 85a0732
2 parents 342f0e6 + a207c69
commit 85a0732
Show file tree

Hide file tree

Showing 6 changed files with 119 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## Unreleased
 
+### New Features
+- Loader for Macrometa GDN (#484)
+
 ### Smaller Features + Bug Fixes
 - fix: PyMuPDF Reader broken (#547)
 - Add page id to extra_info (#542) 

diff --git a/llama_hub/library.json b/llama_hub/library.json
@@ -919,11 +919,13 @@
   "ZepReader": {
     "id": "zep",
     "author": "zep",
+    "keywords": ["zep", "retriever", "memory", "storage"]
+  },
+  "MacrometaGDNReader": {
+  	"id": "macrometa_gdn",
+  	"author": "Dain Im",
     "keywords": [
-      "zep",
-      "retriever",
-      "memory",
-      "storage"
+      "macrometa"
     ]
   },
   "BagelReader": {

diff --git a/llama_hub/macrometa_gdn/README.md b/llama_hub/macrometa_gdn/README.md
@@ -0,0 +1,17 @@
+# Macrometa GDN Loader
+
+This loader takes in a Macrometa federation URL, API key, and collection name and returns a list of vectors. 
+
+## Usage
+
+To use this loader, you need to pass the URL and API key through the class contructor, and then load the data using an array of collection names.
+
+```python
+from llama_index import download_loader
+
+MacrometaGDNReader = download_loader('MacrometaGDNReader')
+
+collections = ['test_collection']
+loader = MacrometaGDNReader(url="https://api-macrometa.io",apikey="test")
+vectors= loader.load_data(collection_list=collections)
+```
diff --git a/llama_hub/macrometa_gdn/__init__.py b/llama_hub/macrometa_gdn/__init__.py
@@ -0,0 +1 @@
+"""Init file."""
diff --git a/llama_hub/macrometa_gdn/base.py b/llama_hub/macrometa_gdn/base.py
@@ -0,0 +1,90 @@
+"""Macrometa GDN Reader."""
+
+from typing import List
+import requests
+import json
+
+from llama_index.readers.base import BaseReader
+from llama_index.readers.schema.base import Document
+
+
+class MacrometaGDNReader(BaseReader):
+    """Macrometa GDN Reader.
+
+    Reads vectors from Macrometa GDN
+
+
+    """
+
+    def __init__(self, url: str, apikey: str):
+        self.url = url
+        self.apikey = apikey
+
+    def load_data(self, collection_list: List[str]) -> List[Document]:
+        """Loads data from the input directory.
+
+        Args:
+            api: Macrometa GDN API key
+            collection_name: Name of the collection to read from
+
+        """
+        if collection_list is None:
+            raise ValueError("Must specify collection name(s)")
+
+        results = []
+        for collection_name in collection_list:
+            collection = self._load_collection(collection_name)
+            results.append(
+                Document(
+                    text=collection, extra_info={"collection_name": collection_name}
+                )
+            )
+        return results
+
+    def _load_collection(self, collection_name: str) -> str:
+        all_documents = []
+        """Loads a collection from the database.
+        
+        Args:
+            collection_name: Name of the collection to read from
+            
+        """
+        url = self.url + "/_fabric/_system/_api/cursor"
+        headers = {
+            "accept": "application/json",
+            "content-type": "application/json",
+            "Authorization": "apikey " + self.apikey,
+        }
+
+        data = {
+            "batchSize": 1000,
+            "ttl": 60,
+            "query": "FOR doc IN " + collection_name + " RETURN doc",
+        }
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        response_json = response.json()
+        if response.status_code == 201:
+            all_documents.extend(response_json.get("result", []))
+
+            while response_json.get("hasMore"):
+                cursor_id = response_json.get("id")
+
+                next_url = self.url + "/_fabric/_system/_api/cursor/" + cursor_id
+
+                response = requests.put(next_url, headers=headers)
+
+                if response.status_code == 200:
+                    response_json = response.json()
+                    all_documents.extend(response_json.get("result", []))
+                else:
+                    print(f"Request failed with status code {response.status_code}")
+                    break
+        else:
+            print(f"Initial request failed with status code {response.status_code}")
+
+        return str(all_documents)
+
+
+if __name__ == "__main__":
+    reader = MacrometaGDNReader("https://api-anurag.eng.macrometa.io", "test")
+    print(reader.load_data(collection_list=["test"]))
diff --git a/llama_hub/macrometa_gdn/requirements.txt b/llama_hub/macrometa_gdn/requirements.txt
@@ -0,0 +1,2 @@
+requests
+json