Refined prompt templates to improve model behavior

Changed the prompt templates and alignment with expected outputs. Conducted stress testing across various leaf nodes to ensure accuracy and relevance. Signed-off-by: Oindrilla Chatterjee <[email protected]> Co-authored-by: Aakanksha Duggal <[email protected]> Co-authored-by: Shiv <[email protected]>
instructlab · Jul 2, 2024 · 1f89b1a · 1f89b1a
1 parent 8c26333
commit 1f89b1a
Show file tree

Hide file tree

Showing 8 changed files with 27 additions and 24 deletions.
diff --git a/scripts/test_freeform_skills.py b/scripts/test_freeform_skills.py
@@ -49,7 +49,7 @@
 
 ds = Dataset.from_list(samples)
 
-skills_flow = SynthSkillsFlow(client, teacher_model).get_flow()
+skills_flow = SynthSkillsFlow(client, "mixtral", teacher_model, 1).get_flow()
 skills_pipe = Pipeline(skills_flow)
 
 sdg = SDG([skills_pipe])

diff --git a/scripts/test_grounded_skills.py b/scripts/test_grounded_skills.py
@@ -22,7 +22,7 @@
 
 samples = [
     {
-        'seed_context': """*Ms. Thompson:* Good morning, everyone. Today, we''re here to discuss
+        "seed_context": """*Ms. Thompson:* Good morning, everyone. Today, we''re here to discuss
     our customer journey mapping and analysis. I believe this is crucial to understanding
     our customers'' experiences and improving our services.
 
@@ -64,9 +64,9 @@
 
     *Mr. Patel:* Sounds good. We''ll reconvene in a week to share our findings and
     discuss next steps.""",
-        'seed_question': """Generate a company wide email based on the given meeting transcript""", 
-        'task_description': 'Writing emails based on minutes of meeting', 
-        'seed_response': """Subject: Recap and Next Steps - Customer Journey Mapping and Analysis\n\
+        "seed_question": """Generate a company wide email based on the given meeting transcript""",
+        "task_description": "Writing emails based on minutes of meeting",
+        "seed_response": """Subject: Recap and Next Steps - Customer Journey Mapping and Analysis\n\
     \nDear [Company Name] Team,\n\nI hope this email finds you well. Yesterday, we\
     \ gathered to discuss our customer journey mapping and analysis, aiming to understand\
     \ our customers' experiences and identify opportunities for improvement. The discussions\
@@ -97,7 +97,7 @@
 
 ds = Dataset.from_list(samples)
 
-skills_flow = SynthGroundedSkillsFlow(client, teacher_model).get_flow()
+skills_flow = SynthGroundedSkillsFlow(client, "mixtral", teacher_model, 10).get_flow()
 skills_pipe = Pipeline(skills_flow)
 
 sdg = SDG([skills_pipe])

diff --git a/scripts/test_knowledge.py b/scripts/test_knowledge.py
@@ -38,8 +38,8 @@
 
 ds = Dataset.from_list(samples)
 
-mmlu_flow = MMLUBenchFlow(client, teacher_model).get_flow()
-knowledge_flow = SynthKnowledgeFlow(client, teacher_model).get_flow()
+mmlu_flow = MMLUBenchFlow(client, "mixtral", teacher_model, 1).get_flow()
+knowledge_flow = SynthKnowledgeFlow(client, "mixtral", teacher_model, 1).get_flow()
 knowledge_pipe = Pipeline(knowledge_flow)
 mmlu_pipe = Pipeline(mmlu_flow)
 

diff --git a/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml b/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml
@@ -31,6 +31,7 @@ examples: |
    [End of Score]
 
 generation: |
+  Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above. 
   Here's the context, question and the answer you need to evaluate:
   
   [Start of Context]
@@ -45,7 +46,6 @@ generation: |
   {answer}
   [End of Answer]
 
-  Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above. 
   * Return the evaluation between [Start of Evaluation] and [End of Evaluation] tags.
   * Return the score between [Start of Score] and [End of Score] tags.
 

diff --git a/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml b/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml
@@ -34,7 +34,7 @@ examples: |
    [End of Score]
 
 generation: |   
-   Here's the context and question you need to evaluate:
+   Here's the context and question you need to evaluate. Return the evaluation between [Start of Evaluation] and [End of Evaluation] tags.
 
    [Start of Context]
    {context}

diff --git a/src/instructlab/sdg/configs/skills/freeform_responses.yaml b/src/instructlab/sdg/configs/skills/freeform_responses.yaml
@@ -21,13 +21,13 @@ examples: |
   [End of Response]
 
 generation: |
-  Now generate a response to the following prompt. 
+  Now generate a response to the following prompt. Remember to use the same style and format as the example above. 
 
   [Start of Question]
   {question}
   [End of Question]
 
-  Remember to use the same style and format as the example above. Return the response between [Start of Response] and [End of Response] tags.
+  Return the response between [Start of Response] and [End of Response] tags.
 
 start_tags: ["[Start of Response]"]
 end_tags: ["[End of Response]"]
diff --git a/src/instructlab/sdg/configs/skills/grounded_responses.yaml b/src/instructlab/sdg/configs/skills/grounded_responses.yaml
@@ -26,7 +26,8 @@ examples: |
   [End of Response]
 
 generation: |
-  Now generate a response to the following prompt. Remember to use the same style and format as the example above. Return the response between [Start of Response] and [End of Response] tags.
+  Now generate a response to the following prompt. Remember to use the same style and format as the example above. 
+  Return the response between [Start of Response] and [End of Response] tags.
 
   [Start of Context]
   {context}
@@ -35,6 +36,8 @@ generation: |
   {question}
   [End of Question]
 
+  Return the response between [Start of Response] and [End of Response] tags.
+
 
 start_tags: ["[Start of Response]"]
 end_tags: ["[End of Response]"]
diff --git a/src/instructlab/sdg/default_flows.py b/src/instructlab/sdg/default_flows.py
@@ -226,7 +226,7 @@ def get_flow(self) -> list:
                 "block_config": {
                     "block_name": "filter_relevancy",
                     "filter_column": "score",
-                    "filter_value": "2.0",
+                    "filter_value": 2.0,
                     "operation": operator.eq,
                     "convert_dtype": float,
                     "batch_kwargs": {
@@ -260,7 +260,7 @@ def get_flow(self) -> list:
                 "block_config": {
                     "block_name": "filter_verify_question",
                     "filter_column": "rating",
-                    "filter_value": "1.0",
+                    "filter_value": 1.0,
                     "operation": operator.eq,
                     "convert_dtype": float,
                     "batch_kwargs": {
@@ -485,15 +485,15 @@ def get_flow(self) -> list:
                 },
             },
             {
-                'block_type': CombineColumnsBlock,
-                'block_config': {
-                    'block_name': 'combine_question_and_context',
-                    'columns': ['context', 'question'],
-                    'output_col': 'question',
-                    'batch_kwargs': {
-                        'num_procs': 8,
-                        'batched': True,
+                "block_type": CombineColumnsBlock,
+                "block_config": {
+                    "block_name": "combine_question_and_context",
+                    "columns": ["context", "question"],
+                    "output_col": "question",
+                    "batch_kwargs": {
+                        "num_procs": 8,
+                        "batched": True,
                     },
                 },
-            }
+            },
         ]