Skip to content

Commit

Permalink
Change question/response to icl_query/icl_response
Browse files Browse the repository at this point in the history
PR instructlab#50 changed the format used in the full knowledge pipeline. Change
the simple pipelines to match.

Part of issue instructlab#55.

Signed-off-by: Russell Bryant <[email protected]>
  • Loading branch information
russellb committed Jun 30, 2024
1 parent 8112123 commit 15ae2b9
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 48 deletions.
12 changes: 6 additions & 6 deletions src/instructlab/sdg/configs/knowledge/simple_generate_qa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ Here are the requirements:
examples: |
Here are some examples to help you understand the type of questions that are asked for this document:
{question_1}
{response_1}
{icl_query_1}
{icl_response_1}
{question_2}
{response_2}
{icl_query_2}
{icl_response_2}
{question_3}
{response_3}
{icl_query_3}
{icl_response_3}
Here is the document:
{document}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ examples: |
Here are some examples to help you understand the type of questions that are asked for:
{question_1}
{response_1}
{icl_query_1}
{icl_response_1}
{question_2}
{response_2}
{icl_query_2}
{icl_response_2}
{question_3}
{response_3}
{icl_query_3}
{icl_response_3}
generation: |
Provide a single question and answer pair based on the examples.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ examples: |
Here are some examples to help you understand the type of questions that are asked for:
{question_1}
{response_1}
{icl_query_1}
{icl_response_1}
{question_2}
{response_2}
{icl_query_2}
{icl_response_2}
{question_3}
{response_3}
{icl_query_3}
{icl_response_3}
generation: |
Provide a single question and answer pair based on the examples.
Expand Down
60 changes: 30 additions & 30 deletions src/instructlab/sdg/utils/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,28 +443,28 @@ def _knowledge_leaf_node_to_samples(leaf_node, server_ctx_size, chunk_word_count
raise utils.GenerateException(
"Error: No domain provided for knowledge document in leaf node"
)
if "question_3" in samples[-1]:
if "icl_query_3" in samples[-1]:
samples.append({})
if "question_1" not in samples[-1]:
samples[-1]["question_1"] = leaf_node[i]["instruction"]
samples[-1]["response_1"] = leaf_node[i]["output"]
elif "question_2" not in samples[-1]:
samples[-1]["question_2"] = leaf_node[i]["instruction"]
samples[-1]["response_2"] = leaf_node[i]["output"]
if "icl_query_1" not in samples[-1]:
samples[-1]["icl_query_1"] = leaf_node[i]["instruction"]
samples[-1]["icl_response_1"] = leaf_node[i]["output"]
elif "icl_query_2" not in samples[-1]:
samples[-1]["icl_query_2"] = leaf_node[i]["instruction"]
samples[-1]["icl_response_2"] = leaf_node[i]["output"]
else:
samples[-1]["question_3"] = leaf_node[i]["instruction"]
samples[-1]["response_3"] = leaf_node[i]["output"]
samples[-1]["icl_query_3"] = leaf_node[i]["instruction"]
samples[-1]["icl_response_3"] = leaf_node[i]["output"]

# wrap back around to the beginning if the number of examples was not
# evenly divisble by 3
if "question_2" not in samples[-1]:
samples[-1]["question_2"] = leaf_node[0]["instruction"]
samples[-1]["response_2"] = leaf_node[0]["output"]
if "question_3" not in samples[-1]:
samples[-1]["question_3"] = leaf_node[1 if len(leaf_node) > 1 else 0][
if "icl_query_2" not in samples[-1]:
samples[-1]["icl_query_2"] = leaf_node[0]["instruction"]
samples[-1]["icl_response_2"] = leaf_node[0]["output"]
if "icl_query_3" not in samples[-1]:
samples[-1]["icl_query_3"] = leaf_node[1 if len(leaf_node) > 1 else 0][
"instruction"
]
samples[-1]["response_3"] = leaf_node[1 if len(leaf_node) > 1 else 0][
samples[-1]["icl_response_3"] = leaf_node[1 if len(leaf_node) > 1 else 0][
"output"
]

Expand All @@ -479,28 +479,28 @@ def _skill_leaf_node_to_samples(leaf_node):
samples[-1].setdefault("task_description", leaf_node[i]["task_description"])
if leaf_node[i].get("input"):
samples[-1].setdefault("context", leaf_node[i]["input"])
if "question_3" in samples[-1]:
if "icl_query_3" in samples[-1]:
samples.append({})
if "question_1" not in samples[-1]:
samples[-1]["question_1"] = leaf_node[i]["instruction"]
samples[-1]["response_1"] = leaf_node[i]["output"]
elif "question_2" not in samples[-1]:
samples[-1]["question_2"] = leaf_node[i]["instruction"]
samples[-1]["response_2"] = leaf_node[i]["output"]
if "icl_query_1" not in samples[-1]:
samples[-1]["icl_query_1"] = leaf_node[i]["instruction"]
samples[-1]["icl_response_1"] = leaf_node[i]["output"]
elif "icl_query_2" not in samples[-1]:
samples[-1]["icl_query_2"] = leaf_node[i]["instruction"]
samples[-1]["icl_response_2"] = leaf_node[i]["output"]
else:
samples[-1]["question_3"] = leaf_node[i]["instruction"]
samples[-1]["response_3"] = leaf_node[i]["output"]
samples[-1]["icl_query_3"] = leaf_node[i]["instruction"]
samples[-1]["icl_response_3"] = leaf_node[i]["output"]

# wrap back around to the beginning if the number of examples was not
# evenly divisble by 3
if "question_2" not in samples[-1]:
samples[-1]["question_2"] = leaf_node[0]["instruction"]
samples[-1]["response_2"] = leaf_node[0]["output"]
if "question_3" not in samples[-1]:
samples[-1]["question_3"] = leaf_node[1 if len(leaf_node) > 1 else 0][
if "icl_query_2" not in samples[-1]:
samples[-1]["icl_query_2"] = leaf_node[0]["instruction"]
samples[-1]["icl_response_2"] = leaf_node[0]["output"]
if "icl_query_3" not in samples[-1]:
samples[-1]["icl_query_3"] = leaf_node[1 if len(leaf_node) > 1 else 0][
"instruction"
]
samples[-1]["response_3"] = leaf_node[1 if len(leaf_node) > 1 else 0]["output"]
samples[-1]["icl_response_3"] = leaf_node[1 if len(leaf_node) > 1 else 0]["output"]

return samples

Expand Down

0 comments on commit 15ae2b9

Please sign in to comment.