Skip to content

Commit

Permalink
Update migrate_to_v1_vectorstore.py
Browse files Browse the repository at this point in the history
Enhancement made in the code by  error handling and logging batch processing and new functionalities like backup , progress tracking
  • Loading branch information
Yash-2707 authored Oct 2, 2024
1 parent 8c8bf87 commit 2989be4
Showing 1 changed file with 53 additions and 19 deletions.
72 changes: 53 additions & 19 deletions scripts/migrate_to_v1_vectorstore.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,89 @@
import pymongo
import os
import shutil
import logging
from tqdm import tqdm

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Configuration
MONGO_URI = "mongodb://localhost:27017/"
MONGO_ATLAS_URI = "mongodb+srv://<username>:<password>@<cluster>/<dbname>?retryWrites=true&w=majority"
DB_NAME = "docsgpt"

def backup_collection(collection, backup_collection_name):
logger.info(f"Backing up collection {collection.name} to {backup_collection_name}")
collection.aggregate([{"$out": backup_collection_name}])
logger.info("Backup completed")

def migrate_to_v1_vectorstore_mongo():
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["docsgpt"]
client = pymongo.MongoClient(MONGO_URI)
db = client[DB_NAME]
vectors_collection = db["vectors"]
sources_collection = db["sources"]

for vector in vectors_collection.find():
# Backup collections before migration
backup_collection(vectors_collection, "vectors_backup")
backup_collection(sources_collection, "sources_backup")

vectors = list(vectors_collection.find())
for vector in tqdm(vectors, desc="Updating vectors"):
if "location" in vector:
del vector["location"]
if "retriever" not in vector:
vector["retriever"] = "classic"
vector["remote_data"] = None
vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})

# move data from vectors_collection to sources_collection
for vector in vectors_collection.find():
# Move data from vectors_collection to sources_collection
for vector in tqdm(vectors, desc="Moving to sources"):
sources_collection.insert_one(vector)

vectors_collection.drop()

client.close()
logger.info("Migration completed")

def migrate_faiss_to_v1_vectorstore():
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["docsgpt"]
client = pymongo.MongoClient(MONGO_URI)
db = client[DB_NAME]
vectors_collection = db["vectors"]

for vector in vectors_collection.find():
vectors = list(vectors_collection.find())
for vector in tqdm(vectors, desc="Migrating FAISS vectors"):
old_path = f"./application/indexes/{vector['user']}/{vector['name']}"
new_path = f"./application/indexes/{vector['_id']}"
try:
os.rename(old_path, new_path)
os.makedirs(os.path.dirname(new_path), exist_ok=True)
shutil.move(old_path, new_path)
except OSError as e:
print(f"Error moving {old_path} to {new_path}: {e}")
logger.error(f"Error moving {old_path} to {new_path}: {e}")

client.close()
logger.info("FAISS migration completed")

def migrate_mongo_atlas_vector_to_v1_vectorstore():
client = pymongo.MongoClient("mongodb+srv://<username>:<password>@<cluster>/<dbname>?retryWrites=true&w=majority")
db = client["docsgpt"]
client = pymongo.MongoClient(MONGO_ATLAS_URI)
db = client[DB_NAME]
vectors_collection = db["vectors"]

# mongodb atlas collection
documents_collection = db["documents"]

for vector in vectors_collection.find():
documents_collection.update_many({"store": vector["user"] + "/" + vector["name"]}, {"$set": {"source_id": str(vector["_id"])}})
# Backup collections before migration
backup_collection(vectors_collection, "vectors_backup")
backup_collection(documents_collection, "documents_backup")

vectors = list(vectors_collection.find())
for vector in tqdm(vectors, desc="Updating Mongo Atlas vectors"):
documents_collection.update_many(
{"store": vector["user"] + "/" + vector["name"]},
{"$set": {"source_id": str(vector["_id"])}}
)

client.close()
logger.info("Mongo Atlas migration completed")

migrate_faiss_to_v1_vectorstore()
migrate_to_v1_vectorstore_mongo()
if __name__ == "__main__":
migrate_faiss_to_v1_vectorstore()
migrate_to_v1_vectorstore_mongo()
migrate_mongo_atlas_vector_to_v1_vectorstore()

0 comments on commit 2989be4

Please sign in to comment.