diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index d1cc1b910f29..906aa7dead31 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -3917,6 +3917,17 @@ jobs: --experiment-dir=/tmp/mixtral_pretrain_results \ --data-path=/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document + L2_HF_Transformers_peft_test: + needs: [ cicd-test-container-setup ] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformers_peft_test') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + python examples/llm/peft/hf.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 + AFTER_SCRIPT: | + rm -rf nemo_experiments + L2_NeMo_2_GPT_SFT_TP1PP1_MBS1: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml @@ -4469,6 +4480,7 @@ jobs: - L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1 - L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1 - L2_NeMo_2_Mixtral_Pretraining + - L2_HF_Transformers_peft_test - L2_PTQ_Llama2_FP8 - L2_Community_LLM_Checkpoints_tests_Llama3 - L2_Distill_Llama2 diff --git a/examples/llm/peft/hf.py b/examples/llm/peft/hf.py index 5b24c22ab79d..00e90a52bb0a 100644 --- a/examples/llm/peft/hf.py +++ b/examples/llm/peft/hf.py @@ -41,14 +41,15 @@ def formatting_prompts_func(examples): ans = tokenizer(text) tokens = ans['input_ids'] return { - 'tokens': tokens, + 'input_ids': tokens, 'labels': tokens[1:] + [tokens[-1]], } from datasets import load_dataset dataset = load_dataset("rajpurkar/squad", split="train") - dataset = dataset.map(formatting_prompts_func, batched=False, batch_size=2) + columns_to_remove = list(filter(lambda x: x not in ['input_ids', 'labels'], dataset.features.keys())) + dataset = dataset.map(formatting_prompts_func, batched=False, batch_size=2, remove_columns=columns_to_remove) return dataset diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py index 6922f38cfb26..d8d1917c4427 100644 --- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py +++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + import lightning.pytorch as pl import torch import torch.nn.functional as F @@ -82,41 +84,31 @@ def configure_model(self): self.model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code) self.model.train() - def forward(self, input_ids, attention_mask=None, labels=None, loss_mask=None): + def forward( + self, + input_ids: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: torch.Tensor = None, + labels: Optional[torch.Tensor] = None, + **kwargs, + ): outputs = self.model( input_ids=input_ids.to(self.model.device), - attention_mask=attention_mask, + attention_mask=attention_mask.to(self.model.device) if attention_mask is not None else attention_mask, ) labels = labels.to(self.model.device) if loss_mask is not None: loss_mask = loss_mask.to(self.model.device).view(-1) n_cls = outputs.logits.shape[-1] - outputs.loss = self.loss_fn(outputs.logits.view(-1, n_cls), labels.view(-1), loss_mask) - return outputs + return self.loss_fn(outputs.logits.view(-1, n_cls), labels.view(-1), loss_mask) def training_step(self, batch): - tokens = batch['tokens'] - labels = batch['labels'] - loss_mask = batch.get('loss_mask', None) - output = self.forward( - input_ids=tokens, - labels=labels, - loss_mask=loss_mask, - ) - - loss = output.loss + loss = self.forward(**batch) self.log('train_log', loss, on_step=True, on_epoch=True, prog_bar=True) return loss def validation_step(self, batch, batch_idx): - tokens = batch['tokens'] - labels = batch['labels'] - output = self.forward( - input_ids=tokens, - labels=labels, - ) - - loss = output.loss + loss = self.forward(**batch) self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True) def save_pretrained(self, path):