diff --git a/turbo_alignment/dataset/chat/chat.py b/turbo_alignment/dataset/chat/chat.py index ebdede9e..acc8d4ce 100755 --- a/turbo_alignment/dataset/chat/chat.py +++ b/turbo_alignment/dataset/chat/chat.py @@ -299,6 +299,8 @@ def _encode( inference=inference, random_cut=random_cut, ) + if len(input_ids) >= 8000: + raise ValueError(f'{len(input_ids)=}, which is >=8000') except ValueError as ex: output.append(None) diff --git a/turbo_alignment/pipelines/inference/chat.py b/turbo_alignment/pipelines/inference/chat.py index 93b975c6..aa08fefe 100755 --- a/turbo_alignment/pipelines/inference/chat.py +++ b/turbo_alignment/pipelines/inference/chat.py @@ -44,6 +44,8 @@ def _get_single_inference_settings( dtype='bfloat16', tensor_parallel_size=model_inference_settings.tensor_parallel_size, enable_lora=enable_lora, + gpu_memory_utilization=0.9, + disable_custom_all_reduce=True, ) else: diff --git a/turbo_alignment/pipelines/train/dpo.py b/turbo_alignment/pipelines/train/dpo.py index 155f6d3f..22cbea32 100755 --- a/turbo_alignment/pipelines/train/dpo.py +++ b/turbo_alignment/pipelines/train/dpo.py @@ -36,22 +36,23 @@ def _get_cherry_pick_callback( tokenizer: PreTrainedTokenizerBase, **kwargs, ) -> ChatCherryPickCallback: - cherry_pick_settings = experiment_settings.cherry_pick_settings - - cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( - cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE - ) - - metrics = [ - Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) - for metric in cherry_pick_settings.metric_settings - ] - - return ChatCherryPickCallback( - cherry_pick_settings=cherry_pick_settings, - datasets=cherry_pick_datasets, - metrics=metrics, - ) + return None + # cherry_pick_settings = experiment_settings.cherry_pick_settings + + # cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( + # cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE + # ) + + # metrics = [ + # Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) + # for metric in cherry_pick_settings.metric_settings + # ] + + # return ChatCherryPickCallback( + # cherry_pick_settings=cherry_pick_settings, + # datasets=cherry_pick_datasets, + # metrics=metrics, + # ) @staticmethod def _get_training_args(experiment_settings: DPOTrainExperimentSettings) -> DPOTrainingArguments: diff --git a/turbo_alignment/pipelines/train/sft.py b/turbo_alignment/pipelines/train/sft.py index a1bddecc..800359d9 100755 --- a/turbo_alignment/pipelines/train/sft.py +++ b/turbo_alignment/pipelines/train/sft.py @@ -37,22 +37,23 @@ def _get_cherry_pick_callback( tokenizer: PreTrainedTokenizerBase, **kwargs, ) -> ChatCherryPickCallback: - cherry_pick_settings = experiment_settings.cherry_pick_settings + return None + # cherry_pick_settings = experiment_settings.cherry_pick_settings - cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( - cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE - ) + # cherry_pick_datasets = DatasetLoader[InferenceChatDataset](InferenceChatDataset).load_datasets( + # cherry_pick_settings.dataset_settings, tokenizer=tokenizer, strategy=DatasetStrategy.INFERENCE + # ) - metrics = [ - Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) - for metric in cherry_pick_settings.metric_settings - ] + # metrics = [ + # Metric.by_name(metric.type)(MetricSettingsRegistry.by_name(metric.type)(**metric.parameters)) + # for metric in cherry_pick_settings.metric_settings + # ] - return ChatCherryPickCallback( - cherry_pick_settings=cherry_pick_settings, - datasets=cherry_pick_datasets, - metrics=metrics, - ) + # return ChatCherryPickCallback( + # cherry_pick_settings=cherry_pick_settings, + # datasets=cherry_pick_datasets, + # metrics=metrics, + # ) @staticmethod def _get_training_args(experiment_settings: SftTrainExperimentSettings) -> TrainingArguments: diff --git a/turbo_alignment/trainers/dpo.py b/turbo_alignment/trainers/dpo.py index 73df9df8..693baa6b 100755 --- a/turbo_alignment/trainers/dpo.py +++ b/turbo_alignment/trainers/dpo.py @@ -749,9 +749,9 @@ def _compute_metrics( metrics[f'{prefix_name}grad_term'] = ( (self.dpo_loss_registry.beta * F.sigmoid(rejected_rewards - chosen_rewards)).detach().cpu().mean().item() ) - metrics[f'{prefix_name}grad_term_std'] = ( - (self.dpo_loss_registry.beta * F.sigmoid(rejected_rewards - chosen_rewards)).detach().cpu().std().item() - ) + # metrics[f'{prefix_name}grad_term_std'] = ( + # (self.dpo_loss_registry.beta * F.sigmoid(rejected_rewards - chosen_rewards)).detach().cpu().std().item() + # ) return metrics