From 15a621bf61ca8991a6e31946f1562c7d321d9227 Mon Sep 17 00:00:00 2001 From: savini98 Date: Mon, 29 Jul 2024 21:46:08 +0530 Subject: [PATCH] update formatting --- examples/canonirag/final_impl/app.jac | 138 ++++++++++++-------------- examples/canonirag/final_impl/rag.jac | 92 ++++++++--------- 2 files changed, 107 insertions(+), 123 deletions(-) diff --git a/examples/canonirag/final_impl/app.jac b/examples/canonirag/final_impl/app.jac index 092fdde03..cb33d6bc6 100644 --- a/examples/canonirag/final_impl/app.jac +++ b/examples/canonirag/final_impl/app.jac @@ -1,13 +1,14 @@ import:py json; import:py random; -import:py from mtllm.llms {OpenAI} +import:py from mtllm.llms { OpenAI } import:jac gui; -import:jac from rag {rag_engine} +import:jac from rag { rag_engine } glob llm1 = OpenAI(model_name='gpt-3.5-turbo'); + glob llm2 = OpenAI(model_name='gpt-4'); -glob RagEngine:rag_engine = rag_engine(); +glob RagEngine: rag_engine = rag_engine(); enum PR_states { init_PR = 'initial run', @@ -20,57 +21,52 @@ glob state = PR_states.init_PR; ''' User node has user_name attribute. ''' -node user{ - has user_name:string="user"; +node user { + has user_name: string = "user"; } ''' Chat session of a user. This node contains the session_id, user_data and todo_list. This should also include the chat history. Can have multiple chat sessions. ''' -node session{ - has session_id : string = 100; - has user_data : dict = {}; # User data such as habits, heart rate etc. - has todo_list : list = []; # List of things to do by the user. - has messages : list = []; # Chat history of the user. +node session { + has session_id: string = 100; + has user_data: dict = {}; # User data such as habits, heart rate etc. + has todo_list: list = []; # List of things to do by the user. + has messages: list = []; # Chat history of the user. } ''' Consists of user data such as age, pressure, married status. ''' -node data{ - has user_data:dict = { - "age" : 0, - "Pressure" : (0,0), - "Married" : False - }; +node data { + has user_data: dict = {"age": 0, "Pressure": (0, 0), "Married": False}; } ''' List of things to do by the user. ''' -node todo{ - has todo_list:list = []; +node todo { + has todo_list: list = []; } - ''' This is the chat walker which will be used to chat with the user. The create_session ability: - - gather the user data and todo list from connected nodes using filters. - - Creates a new session with the user data and todo list. - - Spawns the chat session with the session id. +- gather the user data and todo list from connected nodes using filters. +- Creates a new session with the user data and todo list. +- Spawns the chat session with the session id. ''' -walker chat{ +walker chat { can create_session with user entry; } ''' This is where we create the graph. ''' -walker create_graph{ - has user_data:dict = {}; - has todo_list:list = []; +walker create_graph { + has user_data: dict = {}; + has todo_list: list = []; can generate_graph with `root entry; } @@ -78,26 +74,26 @@ walker create_graph{ ''' This is the query walker which will be used to query a user input and generate a response. The init_query_engine ability: - - Gathers the user data and todo list from the user node. - - Asks the user for the routing method. - - Creates a router node and connects it to the RAG_chat, todo_chat and user_qa_chat nodes. - - Visits the router node. -''' -walker query{ - has query:str = ''; - has user_session_id:str=''; - has session_chat_history:list=[]; - has user_data: 'data about the health status of the user' :dict={}; - has todo_list: 'The tasks to be done by the user in the future.' :list=[]; - has messages:list = []; +- Gathers the user data and todo list from the user node. +- Asks the user for the routing method. +- Creates a router node and connects it to the RAG_chat, todo_chat and user_qa_chat nodes. +- Visits the router node. +''' +walker query { + has query: str = ''; + has user_session_id: str = ''; + has session_chat_history: list = []; + has user_data: 'data about the health status of the user': dict = {}; + has todo_list: 'The tasks to be done by the user in the future.': list = []; + has messages: list = []; can init_query_engine with session entry; } enum task_type { - RAG_TYPE : 'Need to use Retrivable information in specific medical articles' = "RAG", - QA_TYPE : 'Need to use given user information' = "user_qa", - TODO_TYPE : 'Need to use the todo list of the user' = "todo" + RAG_TYPE: 'Need to use Retrivable information in specific medical articles' = "RAG", + QA_TYPE: 'Need to use given user information' = "user_qa", + TODO_TYPE: 'Need to use the todo list of the user' = "todo" } glob router_examples: 'Examples of routing of a query': dict[str, task_type] = { @@ -112,10 +108,11 @@ This is the router node which will be used to route the user query to the approp node router { can route with query entry; can 'route the query to the appropriate task type' - router_with_llm(query:'Query from the user to be routed.':str, - todo_list:'The tasks to be done by the user in the future.':list, - user_data:'data about the health status of the user.':dict) -> task_type - by llm2(method="Reason", temperature=0.0, incl_info=(router_examples)); + router_with_llm(query: 'Query from the user to be routed.': str, todo_list: 'The tasks to be done by the user in the future.': list, user_data: 'data about the health status of the user.': dict) -> task_type by llm2( + method="Reason", + temperature=0.0, + incl_info=(router_examples) + ); } ''' @@ -125,11 +122,7 @@ TODO:Not yet implemented. node RAG_chat { can run_RAG with query entry; can 'You are a warmly Health Assistant named JacCare. Give a response based on the query using todo list. and user data' - chat_llm(todo_list:'The tasks to be done by the user in the future.':list, - user_data:'data about the health status of the user.':dict, - query:'Question from the user to be answered.':str, - retrived_context : 'Retrived information from expert articles':list, - chat_history:'Previous Conversation with the user':list) -> 'response':str by llm1(temperature =0.5); + chat_llm(todo_list: 'The tasks to be done by the user in the future.': list, user_data: 'data about the health status of the user.': dict, query: 'Question from the user to be answered.': str, retrived_context: 'Retrived information from expert articles': list, chat_history: 'Previous Conversation with the user': list) -> 'response': str by llm1(temperature=0.5); } ''' @@ -138,10 +131,7 @@ This is the TODO chat node which will be used to chat with the user using the to node todo_chat { can run_todo with query entry; can 'You are a warmly Health Assistant named JacCare. Give a response based on the query using todo list. and user data' - chat_llm(todo_list:'The tasks to be done by the user in the future.':list, - user_data:'data about the health status of the user.':dict, - query:'Question from the user to be answered.':str, - chat_history:'Previous Conversation with the user':list) -> 'response':str by llm1(temperature =0.5); + chat_llm(todo_list: 'The tasks to be done by the user in the future.': list, user_data: 'data about the health status of the user.': dict, query: 'Question from the user to be answered.': str, chat_history: 'Previous Conversation with the user': list) -> 'response': str by llm1(temperature=0.5); } ''' @@ -151,13 +141,12 @@ TODO:Not yet implemented. node user_qa_chat { can run_user_qa with query entry; can 'You are a warmly Health Assistant named JacCare. Give a response based on the query, using user data' - chat_llm(user_data:'data about the health status of the user.':dict, - query:'Question form the user.':str, - chat_history:'Previous Conversation with the user':list) -> 'response':str by llm1(temperature =0.5); + chat_llm(user_data: 'data about the health status of the user.': dict, query: 'Question form the user.': str, chat_history: 'Previous Conversation with the user': list) -> 'response': str by llm1(temperature=0.5); } -walker save_data{ - has messages:list = []; +walker save_data { + has messages: list = []; + can go_to_router with todo_chat | user_qa_chat | RAG_chat entry { visit [<--]; } @@ -165,6 +154,7 @@ walker save_data{ can go_to_session with router entry { visit [<--]; } + can save_data with session entry { here.messages = self.messages; disengage; @@ -172,33 +162,33 @@ walker save_data{ } can main { - :g: state; - :g: RagEngine; - + :g: state ; + + :g: RagEngine ; + gui.page_build(); - if state == PR_states.init_PR { # Import the data from JSON file - with open('user_info.json', 'r') as f{ + with open('user_info.json', 'r') as f { imported_data = json.load(f); } - - create_graph( user_data = imported_data['user_data'], - todo_list = imported_data['todo_list']) spawn root; - print('Graph created'); - state = PR_states.chat_create; - query() spawn [[root -->](`?user)[0] -->](`?session)[0]; + create_graph( + user_data=imported_data['user_data'], + todo_list=imported_data['todo_list'] + ) spawn root; + print('Graph created'); + state = PR_states.chat_create; + query() spawn [[root-->](`?user)[0]-->](`?session)[0]; } elif state == PR_states.chat_create { - query() spawn [[root -->](`?user)[0] -->](`?session)[0]; + query() spawn [[root-->](`?user)[0]-->](`?session)[0]; print('Chatting with user'); } else { print('Invalid state'); } - } with entry { - if __name__ == "__main__"{ + if __name__ == "__main__" { main(); } -} \ No newline at end of file +} diff --git a/examples/canonirag/final_impl/rag.jac b/examples/canonirag/final_impl/rag.jac index 0a3c5234f..2d331e97d 100644 --- a/examples/canonirag/final_impl/rag.jac +++ b/examples/canonirag/final_impl/rag.jac @@ -1,34 +1,36 @@ -import: py from langchain_community.document_loaders {PyPDFDirectoryLoader} -import: py from langchain_text_splitters {RecursiveCharacterTextSplitter} -import: py from langchain.schema.document {Document} -import: py from langchain_community.embeddings.ollama {OllamaEmbeddings} -import: py from langchain_community.vectorstores.chroma {Chroma} -import: py os; +import:py from langchain_community.document_loaders { PyPDFDirectoryLoader } +import:py from langchain_text_splitters { RecursiveCharacterTextSplitter } +import:py from langchain.schema.document { Document } +import:py from langchain_community.embeddings.ollama { OllamaEmbeddings } +import:py from langchain_community.vectorstores.chroma { Chroma } +import:py os; obj rag_engine { - has file_path:str="books"; - has chroma_path:str = "chroma"; + has file_path: str = "books"; + has chroma_path: str = "chroma"; can postinit { - documents:list = self.load_documents(); - chunks:list = self.split_documents(documents); + documents: list = self.load_documents(); + chunks: list = self.split_documents(documents); self.add_to_chroma(chunks); } - can load_documents{ + can load_documents { document_loader = PyPDFDirectoryLoader(self.file_path); return document_loader.load(); } - can split_documents(documents: list[Document]){ - text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, - chunk_overlap=80, - length_function=len, - is_separator_regex=False); + can split_documents(documents: list[Document]) { + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=800, + chunk_overlap=80, + length_function=len, + is_separator_regex=False + ); return text_splitter.split_documents(documents); } - can get_embedding_function{ + can get_embedding_function { embeddings = OllamaEmbeddings(model='nomic-embed-text'); return embeddings; } @@ -38,74 +40,66 @@ obj rag_engine { Example: "books/Oxford Handbook.pdf:100:2: Template: "Page Source:Page Number:Chunk Index" ''' - can add_chunk_id(chunks:str){ + can add_chunk_id(chunks: str) { last_page_id = None; current_chunk_index = 0; - - for chunk in chunks{ + + for chunk in chunks { source = chunk.metadata.get('source'); page = chunk.metadata.get('page'); current_page_id = f'{source}:{page}'; - - if current_page_id == last_page_id{ - current_chunk_index +=1; - } - - else{ + if current_page_id == last_page_id { + current_chunk_index += 1; + } else { current_chunk_index = 0; } - chunk_id = f'{current_page_id}:{current_chunk_index}'; last_page_id = current_page_id; - chunk.metadata['id'] = chunk_id; } - return chunks; } ''' Creating/ Updating the Vector Database ''' - can add_to_chroma(chunks: list[Document]){ + can add_to_chroma(chunks: list[Document]) { #Loading the existing database - db = Chroma(persist_directory=self.chroma_path, embedding_function=self.get_embedding_function()); - chunks_with_ids = self.add_chunk_id(chunks); #Chunk ID Calculation - + db = Chroma( + persist_directory=self.chroma_path, + embedding_function=self.get_embedding_function() + ); + chunks_with_ids = self.add_chunk_id(chunks);#Chunk ID Calculation + #Adding and Updating the documents existing_items = db.get(include=[]); existing_ids = set(existing_items['ids']); - + #Only adding new documents that does not exist in the database new_chunks = []; - for chunk in chunks_with_ids{ - if chunk.metadata['id'] not in existing_ids{ + for chunk in chunks_with_ids { + if chunk.metadata['id'] not in existing_ids { new_chunks.append(chunk); } - } - - if len(new_chunks){ + if len(new_chunks) { print('adding new documents'); new_chunk_ids = [chunk.metadata['id'] for chunk in new_chunks]; db.add_documents(new_chunks, ids=new_chunk_ids); - } - - else{ + } else { print('no new documents to add'); } - } ''' Retreive the relevent chunks upon a query. ''' - can get_from_chroma(query:str,chunck_nos:int=5){ - db = Chroma(persist_directory=self.chroma_path, embedding_function=self.get_embedding_function()); - results = db.similarity_search_with_score(query,k=chunck_nos); + can get_from_chroma(query: str, chunck_nos: int=5) { + db = Chroma( + persist_directory=self.chroma_path, + embedding_function=self.get_embedding_function() + ); + results = db.similarity_search_with_score(query, k=chunck_nos); return results; } - } - -