model-analyzer profile
--model-repository /workspace/inference/model-analyzer
--profile-models text-reg-batch
--triton-launch-mode=remote
--output-model-repository-path /workspace/inference/model-analyzer/text-reg-batch/output
--export-path /workspace/inference/model-analyzer/text-reg-batch/profile_results
--override-output-model-repository
--triton-grpc-endpoint
--triton-http-endpoint \
model-analyzer profile
--model-repository /workspace/inference/model-analyzer
--profile-models text-recognition
--triton-launch-mode=remote
--output-model-repository-path /workspace/inference/model-analyzer/text-recognition/output
--export-path /workspace/inference/model_analyzer/text_recognition/profile-results
--run-config-search-mode quick
--checkpoint-directory=/workspace/inference/model-analyzer/text-recognition/checkpoints
--override-output-model-repository
--run-config-search-max-concurrency 2 \
--run-config-search-max-model-batch-size 2 \
--run-config-search-max-instance-count 2 \
perf-analyzer -m text-reg-batch -b 2 --shape input.1:1,32,100 --concurrency-range 2:16:2 --percentile=95
docker run
--gpus=all -it
--shm-size=256m
--rm -p8000:8000 -p8001:8001 -p8002:8002
-v /home/dev/triton:/workspace
-v /home/dev/triton/model-repository/models
nvcr.io/nvidia/tritonserver:24.06-py3
tritonserver --model-repository=/models --model-control-mode=explicit --load-model=*
curl -X POST localhost:8000/v2/repository/models/text-reg-batch/load curl -X POST localhost:8000/v2/repository/models/text-reg-batch/unload