Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
Signed-off-by: minmingzhu <[email protected]>
  • Loading branch information
minmingzhu committed Jul 2, 2024
1 parent fb3152e commit 007f9c7
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion llm_on_ray/finetune/dpo_finetuing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,22 @@


class DPOFineTuning(Finetuning):
def load_tokenizer(self, config: Dict):
if config["General"].get("tokenizer_name") is not None:
tokenizer_name = config["General"].get("tokenizer_name")
else:
tokenizer_name = config["General"]["base_model"]
load_config = config["General"].get("config", {})
tokenizer = transformers.AutoTokenizer.from_pretrained(
tokenizer_name,
**load_config,
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
return tokenizer

def tokenize_dataset(self, config: Dict, tokenizer, dataset):
processor = DPOIntelOrcaProcessor(config, tokenizer)
print(dataset)
for key in dataset:
prompts = processor.make_prompt(dataset[key])
dataset[key] = datasets.Dataset.from_dict(prompts)
Expand Down

0 comments on commit 007f9c7

Please sign in to comment.