.env.example

# Use only one model, must be installed in Ollama first,
# from https://ollama.com/library
LLM_MODEL_NAME = mistral

# The location of your embeddings, delete it every time
# you want to recreate the embeddings with a fresh set
# of documents
PERSISTENT_DATABASE = chroma_db

# Enable or disable Chroma anonymous telemetry with True or False
ANONYMIZED_TELEMETRY = False

# Documents existing in this location will be used to
# create embeddings stored in the PERSISTENT_DATABASE location
SOURCE_DIRECTORY = private_documents

# https://www.sbert.net/docs/pretrained_models.html
# The sentence-transformers/all-mpnet-base-v2 model provides the best quality, 
# while sentence-transformers/all-MiniLM-L6-v2 is 5 times faster and still offers good quality
# https://huggingface.co/BAAI/bge-m3
# Try the BAAI/bge-m3 model for Multi-Linguality, coupled with a LLM model in Ollama
# fine tuned for your language of choice.
# Make LLM and embeddings model choices based on your needs and hardware capabilities.
# Important: use only one model, e.g.:
# sentence-transformers/all-MiniLM-L6-v2
# sentence-transformers/all-mpnet-base-v2
# BAAI/bge-m3
EMBEDDINGS_MODEL_NAME = sentence-transformers/all-MiniLM-L6-v2

# Split documents into number of documents per batches,
# adjust it to your needs and hardware
DOCUMENTS_BATCH_SIZE = 200

# Processing embeddings batch size, maximum value
BATCH_SIZE = 5461

# Maximum number of tokens in a document chunk
CHUNK_SIZE = 500

# Number of tokens in overlap between chunks, 3 seems to
# be the ideal value, test your own values with your documents
CHUNK_OVERLAP = 3

# Determine the number of most relevant sources,
# included in the answer. Change it to include more
# or less documents, at least 5 for a decent source answer.
TARGET_SOURCE_CHUNKS = 10

# some sanity checks for file size in MB,
# adjust it to your needs and hardware
MAX_FILE_SIZE_MB = 200

# Define the limit of processes for document ingestion,
# useful to manage overhead on systems with many cores.
# If not defined the maximum cores detected are used
# DEFAULT_NUM_PROCESSES = 16

# Enable PyTorch CUDA memory allocation configuration
# if you get torch.cuda.OutOfMemoryError
# PYTORCH_CUDA_ALLOC_CONF = expandable_segments:True