diff --git a/scripts/test_freeform_skills.py b/scripts/test_freeform_skills.py index 01232e27..a8612c09 100644 --- a/scripts/test_freeform_skills.py +++ b/scripts/test_freeform_skills.py @@ -49,7 +49,7 @@ ds = Dataset.from_list(samples) -skills_flow = SynthSkillsFlow(client, teacher_model).get_flow() +skills_flow = SynthSkillsFlow(client, "mixtral", teacher_model, 1).get_flow() skills_pipe = Pipeline(skills_flow) sdg = SDG([skills_pipe]) diff --git a/scripts/test_grounded_skills.py b/scripts/test_grounded_skills.py index 98dc739d..338edb6c 100644 --- a/scripts/test_grounded_skills.py +++ b/scripts/test_grounded_skills.py @@ -22,7 +22,7 @@ samples = [ { - 'seed_context': """*Ms. Thompson:* Good morning, everyone. Today, we''re here to discuss + "seed_context": """*Ms. Thompson:* Good morning, everyone. Today, we''re here to discuss our customer journey mapping and analysis. I believe this is crucial to understanding our customers'' experiences and improving our services. @@ -64,9 +64,9 @@ *Mr. Patel:* Sounds good. We''ll reconvene in a week to share our findings and discuss next steps.""", - 'seed_question': """Generate a company wide email based on the given meeting transcript""", - 'task_description': 'Writing emails based on minutes of meeting', - 'seed_response': """Subject: Recap and Next Steps - Customer Journey Mapping and Analysis\n\ + "seed_question": """Generate a company wide email based on the given meeting transcript""", + "task_description": "Writing emails based on minutes of meeting", + "seed_response": """Subject: Recap and Next Steps - Customer Journey Mapping and Analysis\n\ \nDear [Company Name] Team,\n\nI hope this email finds you well. Yesterday, we\ \ gathered to discuss our customer journey mapping and analysis, aiming to understand\ \ our customers' experiences and identify opportunities for improvement. The discussions\ @@ -97,7 +97,7 @@ ds = Dataset.from_list(samples) -skills_flow = SynthGroundedSkillsFlow(client, teacher_model).get_flow() +skills_flow = SynthGroundedSkillsFlow(client, "mixtral", teacher_model, 10).get_flow() skills_pipe = Pipeline(skills_flow) sdg = SDG([skills_pipe]) diff --git a/scripts/test_knowledge.py b/scripts/test_knowledge.py index d777c8c3..aeedcf59 100644 --- a/scripts/test_knowledge.py +++ b/scripts/test_knowledge.py @@ -38,8 +38,8 @@ ds = Dataset.from_list(samples) -mmlu_flow = MMLUBenchFlow(client, teacher_model).get_flow() -knowledge_flow = SynthKnowledgeFlow(client, teacher_model).get_flow() +mmlu_flow = MMLUBenchFlow(client, "mixtral", teacher_model, 1).get_flow() +knowledge_flow = SynthKnowledgeFlow(client, "mixtral", teacher_model, 1).get_flow() knowledge_pipe = Pipeline(knowledge_flow) mmlu_pipe = Pipeline(mmlu_flow) diff --git a/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml b/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml index 3f40a6fd..45580d3b 100644 --- a/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml +++ b/src/instructlab/sdg/configs/skills/evaluate_grounded_pair.yaml @@ -31,6 +31,7 @@ examples: | [End of Score] generation: | + Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above. Here's the context, question and the answer you need to evaluate: [Start of Context] @@ -45,7 +46,6 @@ generation: | {answer} [End of Answer] - Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above. * Return the evaluation between [Start of Evaluation] and [End of Evaluation] tags. * Return the score between [Start of Score] and [End of Score] tags. diff --git a/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml b/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml index 70f6feb9..6999987f 100644 --- a/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml +++ b/src/instructlab/sdg/configs/skills/evaluate_grounded_questions.yaml @@ -34,7 +34,7 @@ examples: | [End of Score] generation: | - Here's the context and question you need to evaluate: + Here's the context and question you need to evaluate. Return the evaluation between [Start of Evaluation] and [End of Evaluation] tags. [Start of Context] {context} diff --git a/src/instructlab/sdg/configs/skills/freeform_responses.yaml b/src/instructlab/sdg/configs/skills/freeform_responses.yaml index 0b0eda38..cf7ff177 100644 --- a/src/instructlab/sdg/configs/skills/freeform_responses.yaml +++ b/src/instructlab/sdg/configs/skills/freeform_responses.yaml @@ -21,13 +21,13 @@ examples: | [End of Response] generation: | - Now generate a response to the following prompt. + Now generate a response to the following prompt. Remember to use the same style and format as the example above. [Start of Question] {question} [End of Question] - Remember to use the same style and format as the example above. Return the response between [Start of Response] and [End of Response] tags. + Return the response between [Start of Response] and [End of Response] tags. start_tags: ["[Start of Response]"] end_tags: ["[End of Response]"] diff --git a/src/instructlab/sdg/configs/skills/grounded_responses.yaml b/src/instructlab/sdg/configs/skills/grounded_responses.yaml index 87429b9a..bacd5c10 100644 --- a/src/instructlab/sdg/configs/skills/grounded_responses.yaml +++ b/src/instructlab/sdg/configs/skills/grounded_responses.yaml @@ -26,7 +26,8 @@ examples: | [End of Response] generation: | - Now generate a response to the following prompt. Remember to use the same style and format as the example above. Return the response between [Start of Response] and [End of Response] tags. + Now generate a response to the following prompt. Remember to use the same style and format as the example above. + Return the response between [Start of Response] and [End of Response] tags. [Start of Context] {context} @@ -35,6 +36,8 @@ generation: | {question} [End of Question] + Return the response between [Start of Response] and [End of Response] tags. + start_tags: ["[Start of Response]"] end_tags: ["[End of Response]"] \ No newline at end of file diff --git a/src/instructlab/sdg/default_flows.py b/src/instructlab/sdg/default_flows.py index d4a4ec03..31edd3d6 100644 --- a/src/instructlab/sdg/default_flows.py +++ b/src/instructlab/sdg/default_flows.py @@ -226,7 +226,7 @@ def get_flow(self) -> list: "block_config": { "block_name": "filter_relevancy", "filter_column": "score", - "filter_value": "2.0", + "filter_value": 2.0, "operation": operator.eq, "convert_dtype": float, "batch_kwargs": { @@ -260,7 +260,7 @@ def get_flow(self) -> list: "block_config": { "block_name": "filter_verify_question", "filter_column": "rating", - "filter_value": "1.0", + "filter_value": 1.0, "operation": operator.eq, "convert_dtype": float, "batch_kwargs": { @@ -485,15 +485,15 @@ def get_flow(self) -> list: }, }, { - 'block_type': CombineColumnsBlock, - 'block_config': { - 'block_name': 'combine_question_and_context', - 'columns': ['context', 'question'], - 'output_col': 'question', - 'batch_kwargs': { - 'num_procs': 8, - 'batched': True, + "block_type": CombineColumnsBlock, + "block_config": { + "block_name": "combine_question_and_context", + "columns": ["context", "question"], + "output_col": "question", + "batch_kwargs": { + "num_procs": 8, + "batched": True, }, }, - } + }, ]