Skip to content

Commit

Permalink
fix bettertransformer
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelfeil committed Mar 14, 2024
1 parent aad32ca commit bc61f1f
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 38 deletions.
1 change: 0 additions & 1 deletion libs/infinity_emb/infinity_emb/_optional_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def is_available(self) -> bool:
lib = self.lib.split(".")
for i in range(len(lib)):
module = ".".join(lib[: i + 1])
print("checking", module)
if importlib.util.find_spec(module) is None:
return False

Expand Down
23 changes: 10 additions & 13 deletions libs/infinity_emb/infinity_emb/transformer/acceleration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,20 @@
from optimum.bettertransformer import BetterTransformer # type: ignore


def to_bettertransformer(model, logger, force_usage=False):
if not force_usage:
logger.info("No optimizations via Huggingface optimum. ")
return model
def to_bettertransformer(model, logger):
if os.environ.get("INFINITY_DISABLE_OPTIMUM", False):
logger.info(
"No optimizations via Huggingface optimum,"
" it is disabled via env INFINITY_DISABLE_OPTIMUM "
)
return model
if CHECK_OPTIMUM.mark_required():
logger.info("Adding optimizations via Huggingface optimum. ")
try:
model = BetterTransformer.transform(model)
except Exception as ex:
logger.exception(
f"BetterTransformer is not available for model. {ex}."
" Continue without bettertransformer modeling code."
)
CHECK_OPTIMUM.mark_required()
logger.info("Adding optimizations via Huggingface optimum. ")
try:
model = BetterTransformer.transform(model)
except Exception as ex:
logger.exception(
f"BetterTransformer is not available for model. {ex}."
" Continue without bettertransformer modeling code."
)
return model
13 changes: 6 additions & 7 deletions libs/infinity_emb/infinity_emb/transformer/classifier/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ def __init__(
if self._pipe.device.type != "cpu": # and engine_args.dtype == "float16":
self._pipe.model = self._pipe.model.half()

self._pipe.model = to_bettertransformer(
self._pipe.model,
logger,
force_usage=(
engine_args.device == Device.mps and not engine_args.bettertransformer
),
)
if not (engine_args.device == Device.mps or not engine_args.bettertransformer):
self._pipe.model = to_bettertransformer(
self._pipe.model,
logger,

)

self._infinity_tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
Expand Down
11 changes: 5 additions & 6 deletions libs/infinity_emb/infinity_emb/transformer/crossencoder/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,11 @@ def __init__(self, *, engine_args: EngineArgs):
self._infinity_tokenizer = copy.deepcopy(self.tokenizer)
self.model.eval() # type: ignore

self.model = to_bettertransformer(
self.model, # type: ignore
logger,
force_usage=self._target_device.type == "mps"
and not engine_args.bettertransformer,
)
if not (self._target_device.type == "mps" or not engine_args.bettertransformer):
self.model = to_bettertransformer(
self.model, # type: ignore
logger,
)

if self._target_device.type == "cuda" and engine_args.dtype in [
Dtype.auto,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)
from infinity_emb.args import EngineArgs
from infinity_emb.log_handler import logger
from infinity_emb.primitives import Dtype, EmbeddingReturnType
from infinity_emb.primitives import Dtype, EmbeddingReturnType, Device
from infinity_emb.transformer.abstract import BaseEmbedder
from infinity_emb.transformer.acceleration import to_bettertransformer
from infinity_emb.transformer.quantization.interface import quant_interface
Expand All @@ -33,7 +33,7 @@ class SentenceTransformer: # type: ignore
import torch._dynamo.config
import torch._inductor.config

torch._inductor.config.coordinate_descent_tuning = True
# torch._inductor.config.coordinate_descent_tuning = True
torch._inductor.config.triton.unique_kernel_names = True
torch._inductor.config.fx_graph_cache = True

Expand All @@ -58,13 +58,11 @@ def __init__(self, *, engine_args=EngineArgs):
self._infinity_tokenizer = copy.deepcopy(fm.tokenizer)
self.eval()

fm.auto_model = to_bettertransformer(
fm.auto_model,
logger,
force_usage=(
engine_args.device == "mps" and not engine_args.bettertransformer
),
)
if not (self.device.type == "mps" or not engine_args.bettertransformer):
fm.auto_model = to_bettertransformer(
fm.auto_model,
logger,
)

if self.device.type == "cuda" and engine_args.dtype in [
Dtype.auto,
Expand All @@ -75,12 +73,12 @@ def __init__(self, *, engine_args=EngineArgs):

if engine_args.dtype in (Dtype.int8,):
fm.auto_model = quant_interface(
fm.auto_model, engine_args.dtype, device=engine_args.device
fm.auto_model, engine_args.dtype, device=Device[self.device.type]
)

if engine_args.compile:
logger.info("using torch.compile()")
fm.auto_model = torch.compile(fm.auto_model, dynamic=True, fullgraph=True)
fm.auto_model = torch.compile(fm.auto_model, dynamic=True)

def encode_pre(self, sentences) -> Mapping[str, Tensor]:
features = self.tokenize(sentences)
Expand Down
2 changes: 2 additions & 0 deletions libs/infinity_emb/tests/unit_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ async def test_async_api_optimum_crossencoder():
revision=None,
device="cpu",
model_warmup=False,
compile=True,
)
)

Expand Down Expand Up @@ -170,6 +171,7 @@ async def test_async_api_torch_usage():
device=device,
lengths_via_tokenize=True,
model_warmup=False,
compile=True,
)
)
async with engine:
Expand Down

0 comments on commit bc61f1f

Please sign in to comment.