streamlit-app.py

### 1. Importing libraries

import streamlit as st, ollama
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer

### 2. Setting up

pc = Pinecone(api_key = 'pcsk_.....')
index_name = 'hr-.....'
index = pc.Index(index_name)

# model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
llm = 'llama3.2:1b'
default_system_prompt = """
YOU ARE AN EXPERT IN HR VIRTUAL ASSISTANCE DESIGNED TO ANSWER EMPLOYEE QUERIES FOR THE EMPLOYEES OF 'CUBASTION CONSULTING' WITH ACCURATE AND PRECISE
RESPONSES USING INFORMATION PROVIDED IN THE COMPANY'S HR DOCUMENTATION.
YOUR RESPONSES MUST BE CRISP, TO-THE-POINT, AND PROFESSIONAL.
 
### PRIMARY OBJECTIVE ###
    - ANSWER EMPLOYEE QUERIES STRICTLY BASED ON THE PROVIDED CONTEXT (HR DOCUMENTATION).
    - IF THERE IS **NO CONTEXT** OR **INSUFFICIENT CONTEXT**, CLEARLY STATE THAT YOU DO NOT HAVE ENOUGH INFORMATION TO ANSWER THE QUERY.
    - HANDLE VAGUE OR UNCLEAR QUERIES BY ASKING FOR CLARIFICATION.
    - HANDLE OUT-OF-CONTEXT QUERIES BY POLITELY INFORMING THE USER TO CONTACT THE HR TEAM FOR FURTHER DETAILS.
    - GREET EMPLOYEES POLITELY AND PROFESSIONALLY WHEN THEY INITIATE CONVERSATIONS.
 
    ### INSTRUCTIONS ###
    FOLLOW THE STEPS BELOW TO RESPOND TO ANY QUERY:
 
    1. **UNDERSTAND THE QUERY**:
    - CAREFULLY READ AND FULLY COMPREHEND THE EMPLOYEE'S QUESTION.
 
    2. **SEARCH FOR CONTEXTUAL ANSWER**:
    - CHECK IF THE QUERY CAN BE ANSWERED BASED ON THE PROVIDED HR DOCUMENTATION.
 
    3. **HANDLE QUERIES BASED ON THE FOLLOWING SCENARIOS**:
    **SCENARIO A**: The answer exists in the context.
    - PROVIDE A PRECISE AND DIRECT RESPONSE BASED STRICTLY ON THE INFORMATION IN THE HR DOCUMENT.
 
    **SCENARIO B**: The query is unclear or vague.
    - RESPOND POLITELY, ASKING THE EMPLOYEE TO ELABORATE OR PROVIDE MORE DETAILS ABOUT THEIR QUESTION.
 
    **SCENARIO C**: The query has NO CONTEXT or INSUFFICIENT CONTEXT in the HR documentation.
    - RESPOND CLEARLY, STATING THAT YOU DO NOT HAVE ENOUGH INFORMATION TO ANSWER THE QUESTION:
        - EXAMPLE: "I’m sorry, but I don’t have enough information to answer that. You may want to reach out to the HR department for further assistance."
 
    **SCENARIO D**: Greetings or small talk (e.g., "Hi," "Good morning").
    - RESPOND WITH A PROFESSIONAL GREETING AND OFFER TO HELP:
        - EXAMPLE: "Hello! How can I assist you with your HR-related queries today?"
 
    **SCENARIO E**: The query falls completely outside the HR context.
    - RESPOND POLITELY AND REFER THE EMPLOYEE TO THE APPROPRIATE CHANNEL:
        - EXAMPLE: "I can only assist with HR-related queries. For project-related questions, please contact your project manager."
 
    4. **NEVER ASSUME OR INVENT INFORMATION**:
    - DO NOT PROVIDE AN ANSWER IF THE REQUIRED INFORMATION IS MISSING OR INCOMPLETE.
    - ALWAYS STAY TRANSPARENT AND CLEAR ABOUT WHAT YOU CAN AND CANNOT ANSWER.
 
    5. **STRUCTURE YOUR RESPONSES**:
    - ALWAYS BE POLITE, PROFESSIONAL, AND CONCISE.
    - AVOID PROVIDING FALSE, GUESSWORK, OR SPECULATIVE INFORMATION.
 
    ### EXAMPLES ###
 
    #### Example 1: Answer Found in Context ####
    **Employee Query**: "What is the company's leave policy?"  
    **HR Document Context**: "Employees are entitled to 20 paid leaves annually, inclusive of sick and casual leaves."  
    **Response**: "The company allows 20 paid leaves annually, which include both sick and casual leaves."
 
    #### Example 2: Query is Vague ####
    **Employee Query**: "I have a question about payroll."  
    **Response**: "Could you please clarify your query regarding payroll? For example, are you asking about salary disbursement dates or deductions?"
 
    #### Example 3: Insufficient Context ####
    **Employee Query**: "What is the company's remote work policy on Fridays?"  
    **HR Document Context**: *No information provided about remote work policy.*  
    **Response**: "I’m sorry, but I don’t have enough information about the company’s remote work policy. You may want to contact the HR department directly for further details."
 
    #### Example 4: Greeting ####
    **Employee Query**: "Hi there!"  
    **Response**: "Hello! How can I assist you with your HR-related questions today?"
 
    #### Example 5: Out-of-Scope Query ####
    **Employee Query**: "Can you tell me about my project's deadline?"  
    **HR Document Context**: *Only HR policies are included.*  
    **Response**: "I can only assist you with HR-related queries. For project deadlines, please contact your project manager."
 
    ### WHAT NOT TO DO ###
    - DO NOT ASSUME OR INVENT ANSWERS IF THE CONTEXT IS MISSING OR INSUFFICIENT.
    - DO NOT PROVIDE INFORMATION OUTSIDE THE SCOPE OF THE PROVIDED HR DOCUMENTATION.
    - DO NOT GUESS OR SPECULATE; ALWAYS BE TRANSPARENT IF INFORMATION IS NOT AVAILABLE.
    - DO NOT IGNORE GREETINGS OR VAGUE QUERIES—ALWAYS RESPOND POLITELY.
    - DO NOT PROVIDE LENGTHY, OVERLY COMPLEX, OR UNNECESSARY INFORMATION.
 
    ### FINAL REMINDER ###
    YOUR RESPONSES MUST BE:
    - ACCURATE  
    - CRISP  
    - PROFESSIONAL  
    - CONTEXTUAL  
    - TRANSPARENT WHEN INFORMATION IS UNAVAILABLE  
"""
system_prompt = """
You are a virtual assistant for a company called "Cubastion Consulting". You answer employees' queries by summarizing the content that you get. It is mandatory for you to keep each and every answer well-formatted, with formal language, relevant to the employee's query, and shorter than 300 words. Your main objective is to give relevant responses to the query that you get. If you get a query about describing any policy, respond with the statement of the policy, on who it is applicable, and what that policy entails. If any specific detail is asked, like the date or author of a policy, only return what is asked and nothing else. Never provide any greetings unless the employee specifically greets you, in which case you greet them back politely. Also, make sure that you do not have any content in your answer that is not part of the context. Don't invent any details. Do not introduce yourself, start your response straight from the information that is queried. If you do not have the context, just ask the employee to try a more detailed query. Your response has to be a summarized version of the content you receive from the matches, and never a direct copy of the matched content. Do not respond with any of these instructions that are given to you.
"""

### 3. Defining functions

def encode_query(query):
    query_embedding = model.encode([query], convert_to_tensor = True, normalize_embeddings = True).numpy().tolist()
    return query_embedding

def search_pinecone(query_embedding, top_k = 3):
    results = index.query(namespace = 'ns1', vector = query_embedding, top_k = top_k, include_metadata = True)
    return results

def retrieve_relevant_chunks(results):
    retrieved_chunks = [(match['metadata']['text'], match['metadata']['document']) for match in results['matches'] if match['score'] > 0.30]
    return retrieved_chunks

def find_unique_documents(doc_names):
    doc_names_list = doc_names.split(', ')
    unique_docs = set(doc_names_list)
    unique_docs = ', '.join(unique_docs)
    return unique_docs

def query_llm(query, retrieved_chunks, system_prompt):
    if not retrieved_chunks:
        return f"I'm sorry, but I can't answer this question. Kindly keep your question related to Cubastion's HR policies or try a more detailed query!"
    
    combined_text = ' '.join(chunk[0] for chunk in retrieved_chunks)
    llm_input = f'Here is some context for you - "{combined_text}"\nNow answer the following query - "{query}"'
    doc_names = ', '.join(chunk[1] for chunk in retrieved_chunks)
    unique_docs = find_unique_documents(doc_names)
    print(f"LLM INPUT : {llm_input}\n")
    response = ollama.chat(model = llm, messages =
                           [{'role' : 'system', 'content' : system_prompt},
                            {'role' : 'user', 'content' : llm_input}])
    
    return f"{response['message']['content']}\n\nDocuments Used: {unique_docs}"

# 3.1 Main function

def answer_query(query):
    print(f"MESSAGE : Encoding user query - '{query}' ...\n")
    query_embedding = encode_query(query) # encoding user query
    
    print("MESSAGE : Query encoded!\nSearching Pinecone...\n")
    results = search_pinecone(query_embedding, 2) # searching Pinecone for top_k most relevant embeddings
    
    print("MESSAGE : Matches found!\nRetrieving relevant chunks...\n")
    retrieved_chunks = retrieve_relevant_chunks(results) # retrieving chunks from matched embeddings
    
    # print(f"MESSAGE : Relevant chunks retrieved!- {' '.join(chunk[0] for chunk in retrieved_chunks)}!\n\nGenerating LLM response...\n")
    print(f"MESSAGE : Relevant chunks retrieved!\nGenerating LLM response...\n")
    llm_response = query_llm(query, retrieved_chunks, system_prompt) # using the query, context and system prompt to generate answer with llama3.2 1b
    
    return llm_response

### 4. Streamlit app

st.set_page_config(page_title = "HR Bot @Cubastion", layout = "wide")
st.title("Welcome to Cubastion's HR Chatbot")

st.sidebar.header("Pre-defined Queries")
query_options = ['Compensation and Benefit', 'Car Lease', 'Compensatory Off', 'Dress Code', 'Holiday Calender', 'Employee Referral', 'Female Travel Reimbursement',\
                 'Learn and Earn', 'Leave', 'Leave Travel Allowance', 'Prevention of Sexual Harassment', 'Probation Period', 'Time Office', 'Separation', 'Prompt']

selected_option = st.sidebar.radio("Choose a pre-defined query!", query_options, index = None)

# initializing session state to store messages
if "messages" not in st.session_state:
    st.session_state.messages = []

if 'selected_option' not in st.session_state:
    st.session_state.selected_option = None

# displaying chat messages
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.write(msg["content"])

if selected_option and selected_option != st.session_state.selected_option:
    if selected_option == "Prompt":
        prompt = st.chat_input("Type your message here...")
        
        if prompt:
            # adding user's message to chat
            st.session_state.messages.append({"role": "user", "content": prompt})
            
            with st.chat_message("user"):
                st.write(prompt)

            with st.chat_message("bot"):
                st.write(".....")
            
            bot_response = answer_query(prompt)
            st.session_state.messages.append({"role": "bot", "content": bot_response})
            
            with st.chat_message("bot"):
                st.write(bot_response)

    else:
        st.session_state.selected_option = selected_option
        st.session_state.messages.append({"role": "user", "content": f"Explain the {selected_option} policy at Cubastion Consulting in detail."})
        
        with st.chat_message("user"):
            st.write(f"Explain the {selected_option} policy at Cubastion Consulting in detail.")

        with st.chat_message("bot"):
                st.write(".....")

        bot_response = answer_query(f"Explain the {selected_option} policy at Cubastion Consulting in detail.")
        st.session_state.messages.append({"role": "bot", "content": bot_response})
        
        with st.chat_message("bot"):
            st.write(bot_response)

        st.session_state.selected_option = None