diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py index d503c27a..a251c586 100644 --- a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py +++ b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py @@ -35,6 +35,7 @@ def __init__( self._infinity_tokenizer = AutoTokenizer.from_pretrained( engine_args.model_name_or_path, + revision=engine_args.revision, trust_remote_code=engine_args.trust_remote_code, ) diff --git a/libs/infinity_emb/infinity_emb/transformer/crossencoder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/crossencoder/optimum.py index e0f2cf80..6992d30e 100644 --- a/libs/infinity_emb/infinity_emb/transformer/crossencoder/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/crossencoder/optimum.py @@ -40,14 +40,18 @@ def __init__(self, *, engine_args: EngineArgs): file_name=onnx_file.as_posix(), optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), model_class=ORTModelForSequenceClassification, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, ) self.model.use_io_binding = False self.tokenizer = AutoTokenizer.from_pretrained( engine_args.model_name_or_path, + revision=engine_args.revision, trust_remote_code=engine_args.trust_remote_code, ) self.config = AutoConfig.from_pretrained( engine_args.model_name_or_path, + revision=engine_args.revision, trust_remote_code=engine_args.trust_remote_code, ) self._infinity_tokenizer = copy.deepcopy(self.tokenizer) diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/neuron.py b/libs/infinity_emb/infinity_emb/transformer/embedder/neuron.py index 95e9e6d7..7781ca25 100644 --- a/libs/infinity_emb/infinity_emb/transformer/embedder/neuron.py +++ b/libs/infinity_emb/infinity_emb/transformer/embedder/neuron.py @@ -82,8 +82,16 @@ def __init__(self, *, engine_args: EngineArgs): else cls_token_pooling ) - self.tokenizer = AutoTokenizer.from_pretrained(engine_args.model_name_or_path) - self.config = AutoConfig.from_pretrained(engine_args.model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained( + engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, + ) + self.config = AutoConfig.from_pretrained( + engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, + ) self._infinity_tokenizer = copy.deepcopy(self.tokenizer) compiler_args = {"num_cores": get_nc_count(), "auto_cast_type": "fp16"} @@ -97,6 +105,8 @@ def __init__(self, *, engine_args: EngineArgs): } self.model = NeuronModelForFeatureExtraction.from_pretrained( model_id=engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, export=True, **compiler_args, **input_shapes, diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py index d80d3d52..fdf31d2d 100644 --- a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py @@ -50,6 +50,8 @@ def __init__(self, *, engine_args: EngineArgs): self.model = optimize_model( model_name_or_path=engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, execution_provider=provider, file_name=onnx_file.as_posix(), optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False), @@ -59,10 +61,12 @@ def __init__(self, *, engine_args: EngineArgs): self.tokenizer = AutoTokenizer.from_pretrained( engine_args.model_name_or_path, + revision=engine_args.revision, trust_remote_code=engine_args.trust_remote_code, ) self.config = AutoConfig.from_pretrained( engine_args.model_name_or_path, + revision=engine_args.revision, trust_remote_code=engine_args.trust_remote_code, ) self._infinity_tokenizer = copy.deepcopy(self.tokenizer) diff --git a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py index c044339e..4021fd4b 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py @@ -70,6 +70,7 @@ def optimize_model( file_name: str, optimize_model=False, revision: Optional[str] = None, + trust_remote_code: bool = True, ): CHECK_ONNXRUNTIME.mark_required() path_folder = ( @@ -82,6 +83,7 @@ def optimize_model( return model_class.from_pretrained( model_name_or_path, revision=revision, + trust_remote_code=trust_remote_code, provider=execution_provider, file_name=file_name, provider_options={ @@ -100,15 +102,17 @@ def optimize_model( return model_class.from_pretrained( file_optimized.parent.as_posix(), revision=revision, + trust_remote_code=trust_remote_code, provider=execution_provider, file_name=file_optimized.name, ) unoptimized_model_path = model_class.from_pretrained( model_name_or_path, + revision=revision, + trust_remote_code=trust_remote_code, provider=execution_provider, file_name=file_name, - revision=revision, ) if not optimize_model or execution_provider == "TensorrtExecutionProvider": return unoptimized_model_path @@ -137,6 +141,8 @@ def optimize_model( model = model_class.from_pretrained( optimized_model_path, + revision=revision, + trust_remote_code=trust_remote_code, provider=execution_provider, file_name=Path(file_name).name.replace(".onnx", "_optimized.onnx"), )