From c46598df1b6ed109f33c44bc4918cb54e2eef72f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D0=BB=D0=B0=D1=85=D0=BE=D0=B2=20=D0=90=D0=BB?= =?UTF-8?q?=D0=B5=D0=BA=D1=81=D0=B5=D0=B9=20=D0=9F=D0=B0=D0=B2=D0=BB=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Mon, 11 Nov 2024 12:31:42 +0300 Subject: [PATCH] in progress --- turbo_alignment/dataset/chat/chat.py | 2 ++ turbo_alignment/pipelines/inference/chat.py | 2 ++ turbo_alignment/pipelines/train/dpo.py | 33 +++++++++++---------- turbo_alignment/pipelines/train/sft.py | 27 +++++++++-------- turbo_alignment/trainers/dpo.py | 7 +++-- 5 files changed, 39 insertions(+), 32 deletions(-) diff --git a/turbo_alignment/dataset/chat/chat.py b/turbo_alignment/dataset/chat/chat.py index ebdede9..acc8d4c 100755 --- a/turbo_alignment/dataset/chat/chat.py +++ b/turbo_alignment/dataset/chat/chat.py @@ -299,6 +299,8 @@ def _encode( inference=inference, random_cut=random_cut, ) + if len(input_ids) >= 8000: + raise ValueError(f'{len(input_ids)=}, which is >=8000') except ValueError as ex: output.append(None) diff --git a/turbo_alignment/pipelines/inference/chat.py b/turbo_alignment/pipelines/inference/chat.py index 93b975c..aa08fef 100755 --- a/turbo_alignment/pipelines/inference/chat.py +++ b/turbo_alignment/pipelines/inference/chat.py @@ -44,6 +44,8 @@ def _get_single_inference_settings( dtype='bfloat16', tensor_parallel_size=model_inference_settings.tensor_parallel_size, enable_lora=enable_lora, + gpu_memory_utilization=0.9, + disable_custom_all_reduce=True, ) else: diff --git a/turbo_alignment/pipelines/train/dpo.py b/turbo_alignment/pipelines/train/dpo.py index 155f6d3..22cbea3 100755 --- a/turbo_alignment/pipelines/train/dpo.py +++ b/turbo_alignment/pipelines/train/dpo.py @@ -36,22 +36,23 @@ def _get_cherry_pick_callback( tokenizer: PreTrainedTokenizerBase, **kwargs, ) -> ChatCherryPickCallback: - cherry_pick_settings = experiment_settings.cherry_pick_settings - - cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( - cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE - ) - - metrics = [ - Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) - for metric in cherry_pick_settings.metric_settings - ] - - return ChatCherryPickCallback( - cherry_pick_settings=cherry_pick_settings, - datasets=cherry_pick_datasets, - metrics=metrics, - ) + return None + # cherry_pick_settings = experiment_settings.cherry_pick_settings + + # cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( + # cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE + # ) + + # metrics = [ + # Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) + # for metric in cherry_pick_settings.metric_settings + # ] + + # return ChatCherryPickCallback( + # cherry_pick_settings=cherry_pick_settings, + # datasets=cherry_pick_datasets, + # metrics=metrics, + # ) @staticmethod def _get_training_args(experiment_settings: DPOTrainExperimentSettings) -> DPOTrainingArguments: diff --git a/turbo_alignment/pipelines/train/sft.py b/turbo_alignment/pipelines/train/sft.py index a1bddec..800359d 100755 --- a/turbo_alignment/pipelines/train/sft.py +++ b/turbo_alignment/pipelines/train/sft.py @@ -37,22 +37,23 @@ def _get_cherry_pick_callback( tokenizer: PreTrainedTokenizerBase, **kwargs, ) -> ChatCherryPickCallback: - cherry_pick_settings = experiment_settings.cherry_pick_settings + return None + # cherry_pick_settings = experiment_settings.cherry_pick_settings - cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( - cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE - ) + # cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( + # cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE + # ) - metrics = [ - Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) - for metric in cherry_pick_settings.metric_settings - ] + # metrics = [ + # Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) + # for metric in cherry_pick_settings.metric_settings + # ] - return ChatCherryPickCallback( - cherry_pick_settings=cherry_pick_settings, - datasets=cherry_pick_datasets, - metrics=metrics, - ) + # return ChatCherryPickCallback( + # cherry_pick_settings=cherry_pick_settings, + # datasets=cherry_pick_datasets, + # metrics=metrics, + # ) @staticmethod def _get_training_args(experiment_settings: SftTrainExperimentSettings) -> TrainingArguments: diff --git a/turbo_alignment/trainers/dpo.py b/turbo_alignment/trainers/dpo.py index 490e6d1..693baa6 100755 --- a/turbo_alignment/trainers/dpo.py +++ b/turbo_alignment/trainers/dpo.py @@ -749,9 +749,9 @@ def _compute_metrics( metrics[f'{prefix_name}grad_term'] = ( (self.dpo_loss_registry.beta * F.sigmoid(rejected_rewards - chosen_rewards)).detach().cpu().mean().item() ) - metrics[f'{prefix_name}grad_term_std'] = ( - (self.dpo_loss_registry.beta * F.sigmoid(rejected_rewards - chosen_rewards)).detach().cpu().std().item() - ) + # metrics[f'{prefix_name}grad_term_std'] = ( + # (self.dpo_loss_registry.beta * F.sigmoid(rejected_rewards - chosen_rewards)).detach().cpu().std().item() + # ) return metrics @@ -791,6 +791,7 @@ def compute_loss( model: PreTrainedModel | nn.Module, inputs: dict[str, torch.Tensor | Any], return_outputs=False, + num_items_in_batch=None, ) -> torch.Tensor | tuple[torch.Tensor, dict[str, float]]: loss, metrics = self.get_batch_metrics(model, inputs, train_eval='train')