diff --git a/v2/compositional_skills.json b/v2/compositional_skills.json index 9042c26..2df9f61 100644 --- a/v2/compositional_skills.json +++ b/v2/compositional_skills.json @@ -16,9 +16,14 @@ "minLength": 1 }, "task_description": { - "description": "A description of the skill.", + "description": "A description of the task which is used in prompts to the teacher model during synthetic data generation. The description should be detailed and prescriptive to improve the teacher model's responses.", "type": "string", - "minLength": 1 + "minLength": 1, + "examples": [ + "Extracting content from a financial report and providing it in bulleted format", + "Providing engaging explanations for common questions across diverse topics at a primary school level", + "Assume the roles of historical figures and provide engaging explanations for common questions across diverse topics" + ] }, "seed_examples": { "description": "An array of seed examples for synthetic data generation.", @@ -34,7 +39,7 @@ "unevaluatedProperties": false, "properties": { "context": { - "description": "Information that the model is expected to take into account during processing. This is different from knowledge, where the model is expected to gain facts and background knowledge from the tuning process.", + "description": "Information that the teacher model is expected to take into account during processing. This is different from knowledge, where the model is expected to gain facts and background knowledge from the tuning process.", "type": "string", "minLength": 1 }, diff --git a/v2/knowledge.json b/v2/knowledge.json index 45cb76e..01ae940 100644 --- a/v2/knowledge.json +++ b/v2/knowledge.json @@ -18,14 +18,23 @@ "minLength": 1 }, "domain": { - "description": "The knowledge domain.", + "description": "The knowledge domain which is used in prompts to the teacher model during synthetic data generation. The domain should be brief such as the title to a textbook chapter or section.", "type": "string", - "minLength": 1 + "minLength": 1, + "examples": [ + "Chemistry", + "History", + "Pop culture" + ] }, "task_description": { - "description": "A description of the skill.", + "description": "A description of the task which is used in prompts to the teacher model during synthetic data generation. The description should be detailed and prescriptive to improve the teacher model's responses.", "type": "string", - "minLength": 1 + "minLength": 1, + "examples": [ + "To teach a language model about softball history", + "To teach a language model about tabby cats" + ] }, "seed_examples": { "description": "An array of seed examples for synthetic data generation.", @@ -68,7 +77,7 @@ "type": "string", "minLength": 1, "examples": [ - "https://github.com/instructlab/instructlab" + "https://github.com/instructlab/instructlab.git" ] }, "commit": { @@ -76,7 +85,7 @@ "type": "string", "minLength": 1, "examples": [ - "951999a" + "951999afdc59c46d325493568193b40bd5439c9e" ] }, "patterns": {