Skip to content

Commit

Permalink
add openai_cookbooks data
Browse files Browse the repository at this point in the history
  • Loading branch information
omar-sol committed Jul 28, 2024
1 parent a24bc71 commit 139a897
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 25 deletions.
36 changes: 25 additions & 11 deletions scripts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,42 @@
AVAILABLE_SOURCES,
AVAILABLE_SOURCES_UI,
CONCURRENCY_COUNT,
custom_retriever_llamaindex,
custom_retriever_llama_index,
custom_retriever_openai_cookbooks,
custom_retriever_peft,
custom_retriever_tf,
custom_retriever_transformers,
custom_retriever_trl,
)


def update_query_engine_tools(selected_sources):
tools = []
source_mapping = {
"HF Transformers": (
custom_retriever_tf,
"Transformers Docs": (
custom_retriever_transformers,
"Transformers_information",
"""Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
),
"PEFT": (
"PEFT Docs": (
custom_retriever_peft,
"PEFT_information",
"""Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
),
"TRL": (
"TRL Docs": (
custom_retriever_trl,
"TRL_information",
"""Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
),
"LlamaIndex Docs": (
custom_retriever_llamaindex,
custom_retriever_llama_index,
"LlamaIndex_information",
"""Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
),
"OpenAI Cookbooks": (
custom_retriever_openai_cookbooks,
"openai_cookbooks_info",
"""Useful for questions asking about accomplishing common tasks with the OpenAI API. Returns example code and guides stored in Jupyter notebooks, including info about ChatGPT GPT actions, OpenAI Assistants API, and How to fine-tune OpenAI's GPT-4o and GPT-4o-mini models with the OpenAI API.""",
),
}

for source in selected_sources:
Expand Down Expand Up @@ -148,9 +154,11 @@ def format_sources(completion) -> str:
)
all_documents.append(document)

documents = "\n".join(all_documents)

return documents_answer_template.format(documents=documents)
if len(all_documents) == 0:
return ""
else:
documents = "\n".join(all_documents)
return documents_answer_template.format(documents=documents)


def save_completion(completion, history):
Expand All @@ -165,7 +173,13 @@ def vote(data: gr.LikeData):
sources = gr.CheckboxGroup(
AVAILABLE_SOURCES_UI,
label="Sources",
value=["HF Transformers", "PEFT", "TRL", "LlamaIndex Docs"],
value=[
"Transformers Docs",
"PEFT Docs",
"TRL Docs",
"LlamaIndex Docs",
"OpenAI Cookbooks",
],
interactive=True,
)
model = gr.Dropdown(
Expand Down
35 changes: 21 additions & 14 deletions scripts/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,35 +63,41 @@ def setup_database(db_collection, dict_file_name):


# Setup retrievers
custom_retriever_tf = setup_database(
custom_retriever_transformers = setup_database(
"chroma-db-transformers",
"document_dict_tf.pkl",
"document_dict_transformers.pkl",
)
custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
custom_retriever_llamaindex = setup_database(
"chroma-db-llama-index",
"document_dict_llamaindex.pkl",
custom_retriever_llama_index = setup_database(
"chroma-db-llama_index",
"document_dict_llama_index.pkl",
)
custom_retriever_openai_cookbooks = setup_database(
"chroma-db-openai_cookbooks",
"document_dict_openai_cookbooks.pkl",
)

# Constants
CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
MONGODB_URI = os.getenv("MONGODB_URI")

AVAILABLE_SOURCES_UI = [
"HF Transformers",
"PEFT",
"TRL",
"Transformers Docs",
"PEFT Docs",
"TRL Docs",
"LlamaIndex Docs",
"OpenAI Cookbooks",
# "Towards AI Blog",
# "RAG Course",
]

AVAILABLE_SOURCES = [
"HF_Transformers",
"PEFT",
"TRL",
"LlamaIndex",
"transformers",
"peft",
"trl",
"llama_index",
"openai_cookbooks",
# "towards_ai_blog",
# "rag_course",
]
Expand All @@ -103,10 +109,11 @@ def setup_database(db_collection, dict_file_name):
# )

__all__ = [
"custom_retriever_tf",
"custom_retriever_transformers",
"custom_retriever_peft",
"custom_retriever_trl",
"custom_retriever_llamaindex",
"custom_retriever_llama_index",
"custom_retriever_openai_cookbooks",
"CONCURRENCY_COUNT",
"MONGODB_URI",
"AVAILABLE_SOURCES_UI",
Expand Down

0 comments on commit 139a897

Please sign in to comment.