-
Notifications
You must be signed in to change notification settings - Fork 3
/
.env.example
60 lines (48 loc) · 2.18 KB
/
.env.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Use only one model, must be installed in Ollama first,
# from https://ollama.com/library
LLM_MODEL_NAME = mistral
# The location of your embeddings, delete it every time
# you want to recreate the embeddings with a fresh set
# of documents
PERSISTENT_DATABASE = chroma_db
# Enable or disable Chroma anonymous telemetry with True or False
ANONYMIZED_TELEMETRY = False
# Documents existing in this location will be used to
# create embeddings stored in the PERSISTENT_DATABASE location
SOURCE_DIRECTORY = private_documents
# https://www.sbert.net/docs/pretrained_models.html
# The sentence-transformers/all-mpnet-base-v2 model provides the best quality,
# while sentence-transformers/all-MiniLM-L6-v2 is 5 times faster and still offers good quality
# https://huggingface.co/BAAI/bge-m3
# Try the BAAI/bge-m3 model for Multi-Linguality, coupled with a LLM model in Ollama
# fine tuned for your language of choice.
# Make LLM and embeddings model choices based on your needs and hardware capabilities.
# Important: use only one model, e.g.:
# sentence-transformers/all-MiniLM-L6-v2
# sentence-transformers/all-mpnet-base-v2
# BAAI/bge-m3
EMBEDDINGS_MODEL_NAME = sentence-transformers/all-MiniLM-L6-v2
# Split documents into number of documents per batches,
# adjust it to your needs and hardware
DOCUMENTS_BATCH_SIZE = 200
# Processing embeddings batch size, maximum value
BATCH_SIZE = 5461
# Maximum number of tokens in a document chunk
CHUNK_SIZE = 500
# Number of tokens in overlap between chunks, 3 seems to
# be the ideal value, test your own values with your documents
CHUNK_OVERLAP = 3
# Determine the number of most relevant sources,
# included in the answer. Change it to include more
# or less documents, at least 5 for a decent source answer.
TARGET_SOURCE_CHUNKS = 10
# some sanity checks for file size in MB,
# adjust it to your needs and hardware
MAX_FILE_SIZE_MB = 200
# Define the limit of processes for document ingestion,
# useful to manage overhead on systems with many cores.
# If not defined the maximum cores detected are used
# DEFAULT_NUM_PROCESSES = 16
# Enable PyTorch CUDA memory allocation configuration
# if you get torch.cuda.OutOfMemoryError
# PYTORCH_CUDA_ALLOC_CONF = expandable_segments:True