Skip to content

Commit

Permalink
Breaks document upload to chroma into batches
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewcoole committed Nov 8, 2024
1 parent cb9522b commit 63f9d0c
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions scripts/upload_to_docstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import chromadb
from chromadb.utils import embedding_functions
from chromadb.utils.batch_utils import create_batches


def main(
Expand Down Expand Up @@ -33,8 +34,17 @@ def main(
collection = client.create_collection(
name=collection_name, embedding_function=func
)
collection.add(documents=docs, metadatas=metas, embeddings=embs, ids=ids)

batches = create_batches(
api=client, ids=ids, documents=docs, embeddings=embs, metadatas=metas
)
for batch in batches:
collection.add(
documents=batch[3],
metadatas=batch[2],
embeddings=batch[1],
ids=batch[0],
)


if __name__ == "__main__":
parser = ArgumentParser("prepare_data.py")
Expand Down

0 comments on commit 63f9d0c

Please sign in to comment.