Skip to content

Commit

Permalink
Drop remaining import from main instructlab package (#89)
Browse files Browse the repository at this point in the history
This was the last import from the main `instructlab` package to
remove. All it did was return this string constant, so just copy it
over.

Closes #11

Signed-off-by: Russell Bryant <[email protected]>
  • Loading branch information
russellb authored Jul 8, 2024
1 parent e2f3bbc commit 08938f3
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# Third Party
# instructlab - All of these need to go away (other than sdg) - issue #6
from datasets import Dataset
from instructlab.utils import get_sysprompt
import httpx
import openai

Expand All @@ -36,6 +35,8 @@
read_taxonomy_leaf_nodes,
)

_SYS_PROMPT = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."


def _unescape(s):
return bytes(s, "utf-8").decode("utf-8")
Expand Down Expand Up @@ -86,7 +87,7 @@ def _gen_train_data(logger, machine_instruction_data, output_file_train):
user += "\n" + synth_example["context"]
train_data.append(
{
"system": get_sysprompt(),
"system": _SYS_PROMPT,
"user": _unescape(user),
"assistant": _unescape(_get_response(logger, synth_example)),
}
Expand All @@ -112,7 +113,7 @@ def _gen_test_data(

test_data.append(
{
"system": get_sysprompt(),
"system": _SYS_PROMPT,
"user": _unescape(user),
"assistant": _unescape(seed_example["output"]), # answer
}
Expand Down

0 comments on commit 08938f3

Please sign in to comment.