Skip to content

Commit

Permalink
Merge pull request #224 from chiragjn/cj_add_revision_and_trust_remot…
Browse files Browse the repository at this point in the history
…e_code

Add revision and trust_remote_code to from_pretrained calls
  • Loading branch information
michaelfeil authored May 19, 2024
2 parents 7013d3d + 98fb360 commit 458e404
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(

self._infinity_tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,18 @@ def __init__(self, *, engine_args: EngineArgs):
file_name=onnx_file.as_posix(),
optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False),
model_class=ORTModelForSequenceClassification,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self.model.use_io_binding = False
self.tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self.config = AutoConfig.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self._infinity_tokenizer = copy.deepcopy(self.tokenizer)
Expand Down
14 changes: 12 additions & 2 deletions libs/infinity_emb/infinity_emb/transformer/embedder/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,16 @@ def __init__(self, *, engine_args: EngineArgs):
else cls_token_pooling
)

self.tokenizer = AutoTokenizer.from_pretrained(engine_args.model_name_or_path)
self.config = AutoConfig.from_pretrained(engine_args.model_name_or_path)
self.tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self.config = AutoConfig.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self._infinity_tokenizer = copy.deepcopy(self.tokenizer)

compiler_args = {"num_cores": get_nc_count(), "auto_cast_type": "fp16"}
Expand All @@ -97,6 +105,8 @@ def __init__(self, *, engine_args: EngineArgs):
}
self.model = NeuronModelForFeatureExtraction.from_pretrained(
model_id=engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
export=True,
**compiler_args,
**input_shapes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def __init__(self, *, engine_args: EngineArgs):

self.model = optimize_model(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
execution_provider=provider,
file_name=onnx_file.as_posix(),
optimize_model=not os.environ.get("INFINITY_ONNX_DISABLE_OPTIMIZE", False),
Expand All @@ -59,10 +61,12 @@ def __init__(self, *, engine_args: EngineArgs):

self.tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self.config = AutoConfig.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
)
self._infinity_tokenizer = copy.deepcopy(self.tokenizer)
Expand Down
8 changes: 7 additions & 1 deletion libs/infinity_emb/infinity_emb/transformer/utils_optimum.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def optimize_model(
file_name: str,
optimize_model=False,
revision: Optional[str] = None,
trust_remote_code: bool = True,
):
CHECK_ONNXRUNTIME.mark_required()
path_folder = (
Expand All @@ -82,6 +83,7 @@ def optimize_model(
return model_class.from_pretrained(
model_name_or_path,
revision=revision,
trust_remote_code=trust_remote_code,
provider=execution_provider,
file_name=file_name,
provider_options={
Expand All @@ -100,15 +102,17 @@ def optimize_model(
return model_class.from_pretrained(
file_optimized.parent.as_posix(),
revision=revision,
trust_remote_code=trust_remote_code,
provider=execution_provider,
file_name=file_optimized.name,
)

unoptimized_model_path = model_class.from_pretrained(
model_name_or_path,
revision=revision,
trust_remote_code=trust_remote_code,
provider=execution_provider,
file_name=file_name,
revision=revision,
)
if not optimize_model or execution_provider == "TensorrtExecutionProvider":
return unoptimized_model_path
Expand Down Expand Up @@ -137,6 +141,8 @@ def optimize_model(

model = model_class.from_pretrained(
optimized_model_path,
revision=revision,
trust_remote_code=trust_remote_code,
provider=execution_provider,
file_name=Path(file_name).name.replace(".onnx", "_optimized.onnx"),
)
Expand Down

0 comments on commit 458e404

Please sign in to comment.