Skip to content

Commit

Permalink
Support per token measurements through logits processor (#130)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Feb 20, 2024
1 parent 924a4c7 commit fba6ce2
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 97 deletions.
32 changes: 21 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# List of targets that are not associated with files
.PHONY: quality style install build_docker_cpu build_docker_cuda build_docker_rocm test_cli_cpu_neural_compressor test_cli_cpu_onnxruntime test_cli_cpu_openvino test_cli_cpu_pytorch test_cli_rocm_pytorch test_cli_cuda_pytorch test_api_cpu test_api_cuda test_api_rocm test_api_misc
.PHONY: quality style install

quality:
ruff check .
Expand All @@ -26,7 +26,7 @@ test_cli_cpu_neural_compressor:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,neural-compressor,diffusers,timm] && pytest tests/ -k 'cli and cpu and neural_compressor' -x"

Expand All @@ -35,7 +35,7 @@ test_cli_cpu_onnxruntime:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,onnxruntime,diffusers,timm] && pytest tests/ -k 'cli and cpu and onnxruntime' -x"

Expand All @@ -44,7 +44,7 @@ test_cli_cpu_openvino:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,openvino,diffusers,timm] && pytest tests/ -k 'cli and cpu and openvino' -x"

Expand All @@ -53,7 +53,7 @@ test_cli_cpu_pytorch:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,diffusers,timm] && pytest tests/ -k 'cli and cpu and pytorch' -x"

Expand All @@ -66,7 +66,7 @@ test_cli_rocm_pytorch:
--device /dev/dri/renderD129 \
--group-add video \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-rocm:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"

Expand All @@ -76,16 +76,26 @@ test_cli_cuda_pytorch:
--pid=host \
--gpus '"device=0,1"' \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cuda:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"

test_cli_tensorrt_llm:
docker run \
--rm \
--pid=host \
--gpus '"device=0,1"' \
--entrypoint /bin/bash \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-tensorrt-llm:latest -c "pip install -e .[testing] && pip uninstall -y nvidia-ml-py && pytest tests/ -k 'cli and tensorrt_llm' -x"

test_api_cpu:
docker run \
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cpu' -x"

Expand All @@ -95,7 +105,7 @@ test_api_cuda:
--pid=host \
--gpus '"device=0,1"' \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cuda:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"

Expand All @@ -108,7 +118,7 @@ test_api_rocm:
--device /dev/dri/renderD129 \
--group-add video \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-rocm:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"

Expand All @@ -117,6 +127,6 @@ test_api_misc:
--rm \
--pid=host \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--volume $(shell pwd):/workspace \
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and not (cpu or cuda or rocm or tensorrt)' -x"
8 changes: 5 additions & 3 deletions optimum_benchmark/backends/tensorrt_llm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ def load_trtmodel_from_pretrained(self) -> None:

def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
return self.pretrained_model.generate(
input_ids=inputs.get("input_ids", None), attention_mask=inputs.get("attention_mask", None), max_new_tokens=1
input_ids=inputs.get("input_ids"),
attention_mask=inputs.get("attention_mask"),
max_new_tokens=1,
)

def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
return self.pretrained_model.generate(
input_ids=inputs.get("inputs", None), # diff names
attention_mask=inputs.get("attention_mask", None),
input_ids=inputs.get("input_ids"),
attention_mask=inputs.get("attention_mask"),
# important for benchmarking
max_new_tokens=kwargs.get("max_new_tokens", -1),
min_length=kwargs.get("min_new_tokens", -1), # why different ?
Expand Down
Loading

0 comments on commit fba6ce2

Please sign in to comment.