From fc27f688929c29080b552bf6034c9251de6b567d Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 11 Nov 2024 15:12:52 +0100 Subject: [PATCH 1/4] strengthen protections around trust_remote_code --- garak/buffs/paraphrase.py | 4 +++- garak/generators/huggingface.py | 6 +----- garak/resources/api/huggingface.py | 8 +++++++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 42d1a8a62..663febec5 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -39,7 +39,9 @@ def _load_model(self): self.para_model = PegasusForConditionalGeneration.from_pretrained( self.para_model_name ).to(self.device) - self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name) + self.tokenizer = PegasusTokenizer.from_pretrained( + self.para_model_name, trust_remote_code=False + ) def _get_response(self, input_text): if self.para_model is None: diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index cca9b3e0f..1b72b86b0 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -436,15 +436,11 @@ def _load_client(self): if _config.run.seed is not None: transformers.set_seed(_config.run.seed) - trust_remote_code = self.name.startswith("mosaicml/mpt-") - model_kwargs = self._gather_hf_params( hf_constructor=transformers.AutoConfig.from_pretrained ) # will defer to device_map if device map was `auto` may not match self.device - self.config = transformers.AutoConfig.from_pretrained( - self.name, trust_remote_code=trust_remote_code, **model_kwargs - ) + self.config = transformers.AutoConfig.from_pretrained(self.name, **model_kwargs) self._set_hf_context_len(self.config) self.config.init_device = self.device # determined by Pipeline `__init__`` diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py index 6af14a834..67802c217 100644 --- a/garak/resources/api/huggingface.py +++ b/garak/resources/api/huggingface.py @@ -9,7 +9,6 @@ class HFCompatible: - """Mixin class providing private utility methods for using Huggingface transformers within garak""" @@ -79,6 +78,13 @@ def _gather_hf_params(self, hf_constructor: Callable): del args["device"] args["device_map"] = self.device + # trust_remote_code reset to default disabled unless unlocked in garak HF item config + if ( + "trust_remote_code" in params_to_process + and "trust_remote_code" not in params + ): + args["trust_remote_code"] = False + return args def _select_hf_device(self): From 191ccc95bbf36bf25ea8de27ca0599919078a26e Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 12 Nov 2024 11:05:03 +0100 Subject: [PATCH 2/4] inject paraphrase.PegasusT5 trust_remote_code --- garak/buffs/paraphrase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 663febec5..df80c911d 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -40,7 +40,7 @@ def _load_model(self): self.para_model_name ).to(self.device) self.tokenizer = PegasusTokenizer.from_pretrained( - self.para_model_name, trust_remote_code=False + self.para_model_name, trust_remote_code=self.hf_args["trust_remote_code"] ) def _get_response(self, input_text): From c86a39f2664e755ff6f44d6e8e1a0d57806edde0 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 12 Nov 2024 11:07:34 +0100 Subject: [PATCH 3/4] handle case where trust_remote_code val not prepared for Pegasus model --- garak/buffs/paraphrase.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index df80c911d..93149a27a 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -39,8 +39,13 @@ def _load_model(self): self.para_model = PegasusForConditionalGeneration.from_pretrained( self.para_model_name ).to(self.device) + trust_remote_code = ( + self.hf_args["trust_remote_code"] + if "trust_remote_code" in self.hf_args + else False + ) self.tokenizer = PegasusTokenizer.from_pretrained( - self.para_model_name, trust_remote_code=self.hf_args["trust_remote_code"] + self.para_model_name, trust_remote_code=trust_remote_code ) def _get_response(self, input_text): From dd2258cc63a055c2963a2f5cc79f86f809eb3805 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 12 Nov 2024 17:44:51 +0100 Subject: [PATCH 4/4] move trust_remote_code to pegasus configurable params --- garak/buffs/paraphrase.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 93149a27a..5f5b1e6dd 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -17,7 +17,8 @@ class PegasusT5(Buff, HFCompatible): DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | { "para_model_name": "garak-llm/pegasus_paraphrase", "hf_args": { - "device": "cpu" + "device": "cpu", + "trust_remote_code": False, }, # torch_dtype doesn't have standard support in Pegasus "max_length": 60, "temperature": 1.5, @@ -39,13 +40,8 @@ def _load_model(self): self.para_model = PegasusForConditionalGeneration.from_pretrained( self.para_model_name ).to(self.device) - trust_remote_code = ( - self.hf_args["trust_remote_code"] - if "trust_remote_code" in self.hf_args - else False - ) self.tokenizer = PegasusTokenizer.from_pretrained( - self.para_model_name, trust_remote_code=trust_remote_code + self.para_model_name, trust_remote_code=self.hf_args["trust_remote_code"] ) def _get_response(self, input_text):