Skip to content

Commit

Permalink
Drop remaining import from main instructlab package
Browse files Browse the repository at this point in the history
This was the last import from the main `instructlab` package to
remove. All it did was return this string constant, so just copy it
over.

Closes instructlab#11

Signed-off-by: Russell Bryant <[email protected]>
  • Loading branch information
russellb committed Jul 6, 2024
1 parent 6251693 commit 502b4b8
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# Third Party
# instructlab - All of these need to go away (other than sdg) - issue #6
from datasets import Dataset
from instructlab.utils import get_sysprompt
import httpx
import openai

Expand All @@ -36,6 +35,8 @@
read_taxonomy_leaf_nodes,
)

_SYS_PROMPT = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."


def _unescape(s):
return bytes(s, "utf-8").decode("utf-8")
Expand Down Expand Up @@ -86,7 +87,7 @@ def _gen_train_data(logger, machine_instruction_data, output_file_train):
user += "\n" + synth_example["context"]
train_data.append(
{
"system": get_sysprompt(),
"system": _SYS_PROMPT,
"user": _unescape(user),
"assistant": _unescape(_get_response(logger, synth_example)),
}
Expand All @@ -112,7 +113,7 @@ def _gen_test_data(

test_data.append(
{
"system": get_sysprompt(),
"system": _SYS_PROMPT,
"user": _unescape(user),
"assistant": _unescape(seed_example["output"]), # answer
}
Expand Down

0 comments on commit 502b4b8

Please sign in to comment.