diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 05f9b29b71..2dba63eee3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -11,7 +11,7 @@ on: jobs: build: runs-on: ubuntu-latest - name: Test changed-files + name: Lint changed files steps: - uses: actions/checkout@v3 with: @@ -20,18 +20,24 @@ jobs: - name: Install lint utilities run: | pip install pre-commit - pre-commit install + pre-commit install - - name: Get specific changed files - id: changed-files-specific - uses: tj-actions/changed-files@v20.1 + - name: Check links in all markdown files + uses: gaurav-nelson/github-action-markdown-link-check@1.0.13 + with: + use-verbose-mode: 'yes' + config-file: "ts_scripts/markdown_link_check_config.json" + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v29.0.4 with: files: | **/*.py - name: Lint all changed files run: | - for file in ${{ steps.changed-files-specific.outputs.all_changed_files }}; do + for file in ${{ steps.changed-files.outputs.all_changed_files }}; do pre-commit run --files $file done @@ -43,3 +49,33 @@ jobs: echo "cd serve/" echo "pre-commit install" echo "pre-commit will lint your code for you, so git add and commit those new changes and this check should become green" + spellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install dependencies + run: | + sudo apt-get install aspell aspell-en + pip install pyspelling + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v29.0.4 + with: + files: | + **/*.md + + - name: Check spellings + run: | + sources="" + for file in ${{ steps.changed-files.outputs.all_changed_files }}; do + sources+=" -S $file" + done + pyspelling -c $GITHUB_WORKSPACE/ts_scripts/spellcheck_conf/spellcheck.yaml --name Markdown $sources + + - name: In the case of misspellings + if: ${{ failure() }} + run: | + echo "Please fix the misspellings. If you are sure about some of them, " + echo "so append those to ts_scripts/spellcheck_conf/wordlist.txt" diff --git a/.gitignore b/.gitignore index c074ac4af2..d7548217d2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ plugins/.gradle *.pem *.backup docs/sphinx/src/ +ts_scripts/spellcheck_conf/wordlist.dic # Postman files test/artifacts/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 97f6cd6131..a58827ad99 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,7 +35,7 @@ Your contributions will fall into two categories: - For running individual test suites refer [code_coverage](docs/code_coverage.md) documentation - If you are updating an existing model make sure that performance hasn't degraded by typing running [benchmarks](https://github.com/pytorch/serve/tree/master/benchmarks) on the master branch and your branch and verify there is no performance regression - Run `ts_scripts/spellcheck.sh` to fix any typos in your documentation - - For large changes make sure to run the [automated benchmark suite](https://github.com/pytorch/serve/tree/master/test/benchmark) which will run the apache bench tests on several configurations of CUDA and EC2 instances + - For large changes make sure to run the [automated benchmark suite](https://github.com/pytorch/serve/tree/master/benchmarks) which will run the apache bench tests on several configurations of CUDA and EC2 instances - If you need more context on a particular issue, please create raise a ticket on [`TorchServe` GH repo](https://github.com/pytorch/serve/issues/new/choose) or connect to [PyTorch's slack channel](https://pytorch.slack.com/) Once you finish implementing a feature or bug-fix, please send a Pull Request to https://github.com/pytorch/serve. diff --git a/README.md b/README.md index 636f9ca783..4279ac4dd1 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Refer to [torchserve docker](docker/README.md) for details. ## šŸ† Highlighted Examples * [šŸ¤— HuggingFace Transformers](examples/Huggingface_Transformers) -* [Model parallel inference](examples/Huggingface_Transformers#model-paralellism) +* [Model parallel inference](examples/Huggingface_Transformers#model-parallelism) * [MultiModal models with MMF](https://github.com/pytorch/serve/tree/master/examples/MMF-activity-recognition) combining text, audio and video * [Dual Neural Machine Translation](examples/Workflows/nmt_transformers_pipeline) for a complex workflow DAG @@ -96,7 +96,7 @@ To learn more about how to contribute, see the contributor guide [here](https:// * [Optimize your inference jobs using dynamic batch inference with TorchServe on Amazon SageMaker](https://aws.amazon.com/blogs/machine-learning/optimize-your-inference-jobs-using-dynamic-batch-inference-with-torchserve-on-amazon-sagemaker/) * [Using AI to bring children's drawings to life](https://ai.facebook.com/blog/using-ai-to-bring-childrens-drawings-to-life/) * [šŸŽ„ Model Serving in PyTorch](https://www.youtube.com/watch?v=2A17ZtycsPw) -* [Evolution of Crestaā€™s machine learning architecture: Migration to AWS and PyTorch](https://aws.amazon.com/blogs/machine-learning/evolution-of-crestas-machine-learning-architecture-migration-to-aws-and-pytorch/) +* [Evolution of Cresta's machine learning architecture: Migration to AWS and PyTorch](https://aws.amazon.com/blogs/machine-learning/evolution-of-crestas-machine-learning-architecture-migration-to-aws-and-pytorch/) * [šŸŽ„ Explain Like Iā€™m 5: TorchServe](https://www.youtube.com/watch?v=NEdZbkfHQCk) * [šŸŽ„ How to Serve PyTorch Models with TorchServe](https://www.youtube.com/watch?v=XlO7iQMV3Ik) * [How to deploy PyTorch models on Vertex AI](https://cloud.google.com/blog/topics/developers-practitioners/pytorch-google-cloud-how-deploy-pytorch-models-vertex-ai) diff --git a/benchmarks/README.md b/benchmarks/README.md index 3cf5b14859..b935844de6 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -5,7 +5,7 @@ The benchmarks measure the performance of TorchServe on various models and bench We currently support benchmarking with JMeter & Apache Bench. One can also profile backend code with snakeviz. * [Benchmarking with Apache Bench](#benchmarking-with-apache-bench) -* [Auto Benchmarking with Apache Bench](#Auto-Benchmarking-with-Apache-Bench) +* [Auto Benchmarking with Apache Bench](#auto-benchmarking-with-apache-bench) * [Benchmarking and Profiling with JMeter](jmeter.md) # Benchmarking with Apache Bench @@ -32,7 +32,7 @@ Apache Bench is available on Mac by default. You can test by running ```ab -h``` * Windows - Download apache binaries from [Apache Lounge](https://www.apachelounge.com/download/) - - Extract and place the contents at some location eg: `C:\Program Files\` + - Extract and place the contents at some location e.g.: `C:\Program Files\` - Add this path `C:\Program Files\Apache24\bin`to the environment variable PATH. NOTE - You may need to install Visual C++ Redistributable for Visual Studio 2015-2019. @@ -156,7 +156,7 @@ The reports are generated at location "/tmp/benchmark/" ![](predict_latency.png) # Auto Benchmarking with Apache Bench -`auto_benchmark.py` runs Apache Bench on a set of models and generates an easy to read `report.md` once [Apach bench installation](https://github.com/pytorch/serve/tree/master/benchmarks#installation-1) is done. +`auto_benchmark.py` runs Apache Bench on a set of models and generates an easy to read `report.md` once [Apache bench installation](https://github.com/pytorch/serve/tree/master/benchmarks#installation-1) is done. ## How does the auto benchmark script work? Auto Benchmarking is tool to allow users to run multiple test cases together and generates final report. Internally, the workflow is: @@ -214,6 +214,6 @@ If you need to run your benchmarks on a specific cloud or hardware infrastructur The high level approach 1. Create a cloud instance in your favorite cloud provider 2. Configure it so it can talk to github actions by running some shell commands listed here https://docs.github.com/en/actions/hosting-your-own-runners/adding-self-hosted-runners -3. Tag your instances in https://github.com/pytorch/serve/settings/actions/runners +3. Tag your instances in the runners tab on Github 3. In the `.yml` make sure to use `runs-on [self-hosted, your_tag]` 4. Inspect the results in https://github.com/pytorch/serve/actions and download the artifacts for further analysis \ No newline at end of file diff --git a/benchmarks/add_jmeter_test.md b/benchmarks/add_jmeter_test.md index f51aaf0d95..4f8e790909 100644 --- a/benchmarks/add_jmeter_test.md +++ b/benchmarks/add_jmeter_test.md @@ -4,7 +4,7 @@ A new Jmeter test plan for torchserve benchmark can be added as follows: * Assuming you know how to create a jmeter test plan. If not then please use this jmeter [guide](https://jmeter.apache.org/usermanual/build-test-plan.html) * Here, we will show you how 'MMS Benchmarking Image Input Model Test Plan' plan can be added. -This test plan doesn following: +This test plan does following: * Register a model - `default is resnet-18` * Scale up to add workers for inference @@ -40,7 +40,7 @@ e.g. on macOS, type `jmeter` on commandline ![](img/inference.png) * Right Click on test plan to add `tearDown Thread Group` and configured required details indicated in the following screenshot -![](img/teardn-tg.png) +![](img/teardown-tg.png) * Right Click on `tearDown Thread Group` to add `HTTP Request` and configure `unregister` request per given screenshot ![](img/unregister.png) diff --git a/benchmarks/img/teardn-tg.png b/benchmarks/img/teardown-tg.png similarity index 100% rename from benchmarks/img/teardn-tg.png rename to benchmarks/img/teardown-tg.png diff --git a/benchmarks/sample_report.md b/benchmarks/sample_report.md index c195ed4fdc..9899f030ed 100644 --- a/benchmarks/sample_report.md +++ b/benchmarks/sample_report.md @@ -10,29 +10,29 @@ TorchServe Benchmark on gpu |version|Benchmark|Batch size|Batch delay|Workers|Model|Concurrency|Requests|TS failed requests|TS throughput|TS latency P50|TS latency P90|TS latency P99|TS latency mean|TS error rate|Model_p50|Model_p90|Model_p99|predict_mean|handler_time_mean|waiting_time_mean|worker_thread_mean|cpu_percentage_mean|memory_percentage_mean|gpu_percentage_mean|gpu_memory_percentage_mean|gpu_memory_used_mean| | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | -|master|AB|1|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|2345.99|4|5|6|4.263|0.0|1.04|1.15|1.53|1.06|1.02|1.93|0.28|0.0|0.0|0.0|0.0|0.0| -|master|AB|2|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|3261.31|3|4|5|3.066|0.0|1.36|1.91|2.18|1.45|1.41|0.17|0.44|0.0|0.0|0.0|0.0|0.0| -|master|AB|4|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|2457.64|4|6|7|4.069|0.0|1.89|2.2|2.96|1.97|1.94|0.53|0.59|0.0|0.0|0.0|0.0|0.0| -|master|AB|8|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|1640.2|5|9|11|6.097|0.0|2.95|3.15|3.43|3.0|2.96|1.06|0.8|0.0|0.0|0.0|0.0|0.0| -|master|AB|1|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|3444.57|3|3|4|2.903|0.0|1.32|1.68|1.87|1.37|1.34|0.08|0.46|0.0|0.0|0.0|0.0|0.0| -|master|AB|2|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|3275.88|3|4|5|3.053|0.0|1.61|2.23|2.51|1.72|1.68|0.01|0.55|0.0|0.0|0.0|0.0|0.0| -|master|AB|4|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|2346.15|4|6|8|4.262|0.0|2.01|2.42|3.19|2.1|2.06|0.57|0.57|0.0|0.0|0.0|0.0|0.0| -|master|AB|8|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|[input](10000)|0|1572.82|5|9|12|6.358|0.0|3.09|3.39|4.7|3.15|3.11|1.1|0.82|0.0|0.0|0.0|0.0|0.0| +|master|AB|1|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|2345.99|4|5|6|4.263|0.0|1.04|1.15|1.53|1.06|1.02|1.93|0.28|0.0|0.0|0.0|0.0|0.0| +|master|AB|2|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|3261.31|3|4|5|3.066|0.0|1.36|1.91|2.18|1.45|1.41|0.17|0.44|0.0|0.0|0.0|0.0|0.0| +|master|AB|4|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|2457.64|4|6|7|4.069|0.0|1.89|2.2|2.96|1.97|1.94|0.53|0.59|0.0|0.0|0.0|0.0|0.0| +|master|AB|8|100|4|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|1640.2|5|9|11|6.097|0.0|2.95|3.15|3.43|3.0|2.96|1.06|0.8|0.0|0.0|0.0|0.0|0.0| +|master|AB|1|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|3444.57|3|3|4|2.903|0.0|1.32|1.68|1.87|1.37|1.34|0.08|0.46|0.0|0.0|0.0|0.0|0.0| +|master|AB|2|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|3275.88|3|4|5|3.053|0.0|1.61|2.23|2.51|1.72|1.68|0.01|0.55|0.0|0.0|0.0|0.0|0.0| +|master|AB|4|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|2346.15|4|6|8|4.262|0.0|2.01|2.42|3.19|2.1|2.06|0.57|0.57|0.0|0.0|0.0|0.0|0.0| +|master|AB|8|100|8|[.mar](https://torchserve.pytorch.org/mar_files/mnist_v2.mar)|10|10000|0|1572.82|5|9|12|6.358|0.0|3.09|3.39|4.7|3.15|3.11|1.1|0.82|0.0|0.0|0.0|0.0|0.0| ## eager_mode_vgg16 |version|Benchmark|Batch size|Batch delay|Workers|Model|Concurrency|Requests|TS failed requests|TS throughput|TS latency P50|TS latency P90|TS latency P99|TS latency mean|TS error rate|Model_p50|Model_p90|Model_p99|predict_mean|handler_time_mean|waiting_time_mean|worker_thread_mean|cpu_percentage_mean|memory_percentage_mean|gpu_percentage_mean|gpu_memory_percentage_mean|gpu_memory_used_mean| | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | -|master|AB|1|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|[input](10000)|0|277.64|353|384|478|360.178|0.0|13.27|14.49|18.55|13.61|13.57|343.11|0.35|69.2|11.3|22.25|12.4|2004.0| -|master|AB|2|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|[input](10000)|0|284.7|344|377|462|351.248|0.0|25.69|29.79|49.7|26.86|26.82|320.57|0.84|33.3|11.29|16.25|12.39|2002.0| -|master|AB|4|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|[input](10000)|0|298.66|331|355|386|334.831|0.0|50.61|54.65|72.63|51.69|51.64|278.95|1.33|66.7|11.63|16.0|12.81|2070.0| -|master|AB|8|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|[input](10000)|0|302.97|321|367|401|330.066|0.0|100.17|108.43|134.97|102.03|101.97|222.5|2.62|0.0|12.1|15.25|13.4|2166.0| +|master|AB|1|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|10000|0|277.64|353|384|478|360.178|0.0|13.27|14.49|18.55|13.61|13.57|343.11|0.35|69.2|11.3|22.25|12.4|2004.0| +|master|AB|2|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|10000|0|284.7|344|377|462|351.248|0.0|25.69|29.79|49.7|26.86|26.82|320.57|0.84|33.3|11.29|16.25|12.39|2002.0| +|master|AB|4|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|10000|0|298.66|331|355|386|334.831|0.0|50.61|54.65|72.63|51.69|51.64|278.95|1.33|66.7|11.63|16.0|12.81|2070.0| +|master|AB|8|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16.mar)|100|10000|0|302.97|321|367|401|330.066|0.0|100.17|108.43|134.97|102.03|101.97|222.5|2.62|0.0|12.1|15.25|13.4|2166.0| ## scripted_mode_vgg16 |version|Benchmark|Batch size|Batch delay|Workers|Model|Concurrency|Requests|TS failed requests|TS throughput|TS latency P50|TS latency P90|TS latency P99|TS latency mean|TS error rate|Model_p50|Model_p90|Model_p99|predict_mean|handler_time_mean|waiting_time_mean|worker_thread_mean|cpu_percentage_mean|memory_percentage_mean|gpu_percentage_mean|gpu_memory_percentage_mean|gpu_memory_used_mean| | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | -|master|AB|1|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|[input](10000)|0|282.06|351|368|430|354.53|0.0|13.18|13.91|18.68|13.41|13.37|337.73|0.33|80.0|11.32|23.25|12.4|2004.0| -|master|AB|2|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|[input](10000)|0|288.03|345|363|406|347.18|0.0|25.68|29.08|40.61|26.53|26.49|316.93|0.83|37.5|11.31|16.5|12.39|2002.0| -|master|AB|4|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|[input](10000)|0|296.25|332|356|447|337.552|0.0|50.72|55.09|84.0|52.09|52.04|281.21|1.34|0.0|11.63|16.0|12.81|2070.0| -|master|AB|8|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|[input](10000)|0|301.07|324|367|407|332.147|0.0|100.49|109.71|136.18|102.69|102.63|223.7|2.59|0.0|0.0|0.0|0.0|0.0| +|master|AB|1|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|10000|0|282.06|351|368|430|354.53|0.0|13.18|13.91|18.68|13.41|13.37|337.73|0.33|80.0|11.32|23.25|12.4|2004.0| +|master|AB|2|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|10000|0|288.03|345|363|406|347.18|0.0|25.68|29.08|40.61|26.53|26.49|316.93|0.83|37.5|11.31|16.5|12.39|2002.0| +|master|AB|4|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|10000|0|296.25|332|356|447|337.552|0.0|50.72|55.09|84.0|52.09|52.04|281.21|1.34|0.0|11.63|16.0|12.81|2070.0| +|master|AB|8|100|4|[.mar](https://torchserve.pytorch.org/mar_files/vgg16_scripted.mar)|100|10000|0|301.07|324|367|407|332.147|0.0|100.49|109.71|136.18|102.69|102.63|223.7|2.59|0.0|0.0|0.0|0.0|0.0| diff --git a/binaries/README.md b/binaries/README.md index 1b5cde4994..f1ab370d99 100644 --- a/binaries/README.md +++ b/binaries/README.md @@ -1,6 +1,6 @@ # Building TorchServe and Torch-Model-Archiver release binaries 1. Make sure all the dependencies are installed - ##### Linux and MacOs: + ##### Linux and macOS: ```bash python ts_scripts/install_dependencies.py --environment=dev ``` @@ -13,7 +13,7 @@ 2. To build a `torchserve` and `torch-model-archiver` wheel execute: - ##### Linux and MacOs: + ##### Linux and macOS: ```bash python binaries/build.py ``` @@ -26,7 +26,7 @@ > For additional info on conda builds refer to [this readme](conda/README.md) 3. Build outputs are located at - ##### Linux and MacOs: + ##### Linux and macOS: - Wheel files `dist/torchserve-*.whl` `model-archiver/dist/torch_model_archiver-*.whl` @@ -44,7 +44,7 @@ # Install torchserve and torch-model-archiver binaries 1. To install torchserve using the newly created binaries execute: - ##### Linux and MacOs: + ##### Linux and macOS: ```bash python binaries/install.py ``` @@ -56,7 +56,7 @@ ``` 2. Alternatively, you can manually install binaries - Using wheel files - ##### Linux and MacOs: + ##### Linux and macOS: ```bash pip install dist/torchserve-*.whl pip install model-archiver/dist/torch_model_archiver-*.whl @@ -70,7 +70,7 @@ pip install .\workflow-archiver\dist\ ``` - Using conda packages - ##### Linux and MacOs: + ##### Linux and macOS: ```bash conda install --channel ./binaries/conda/output -y torchserve torch-model-archiver torch-workflow-archiver ``` @@ -80,7 +80,7 @@ # Building TorchServe, Torch-Model-Archiver & Torch-WorkFlow-Archiver nightly binaries 1. Make sure all the dependencies are installed - ##### Linux and MacOs: + ##### Linux and macOS: ```bash python ts_scripts/install_dependencies.py --environment=dev ``` @@ -93,7 +93,7 @@ 2. To build a `torchserve`, `torch-model-archiver` & `torch-workflow-archiver` nightly wheel execute: - ##### Linux and MacOs: + ##### Linux and macOS: ```bash python binaries/build.py --nightly ``` @@ -106,7 +106,7 @@ > For additional info on conda builds refer to [this readme](conda/README.md) 3. Build outputs are located at - ##### Linux and MacOs: + ##### Linux and macOS: - Wheel files `dist/torchserve-*.whl` `model-archiver/dist/torch_model_archiver-*.whl` diff --git a/docker/README.md b/docker/README.md index f316457cf1..51e1e783ca 100644 --- a/docker/README.md +++ b/docker/README.md @@ -29,7 +29,7 @@ cd serve/docker # Create TorchServe docker image Use `build_image.sh` script to build the docker images. The script builds the `production`, `dev` and `codebuild` docker images. -| Parameter | Desciption | +| Parameter | Description | |------|------| |-h, --help|Show script help| |-b, --branch_name|Specify a branch name to use. Default: master | @@ -271,7 +271,7 @@ torch-model-archiver --model-name densenet161 --version 1.0 --model-file /home/m Refer [torch-model-archiver](../model-archiver/README.md) for details. -6. desnet161.mar file should be present at /home/model-server/model-store +6. densenet161.mar file should be present at /home/model-server/model-store # Running TorchServe in a Production Docker Environment. diff --git a/docs/FAQs.md b/docs/FAQs.md index 85e6ab149e..d9631c8b7b 100644 --- a/docs/FAQs.md +++ b/docs/FAQs.md @@ -16,7 +16,7 @@ Torchserve API's are compliant with the [OpenAPI specification 3.0](https://swag ### How to use Torchserve in production? Depending on your use case, you will be able to deploy torchserve in production using following mechanisms. > Standalone deployment. Refer [TorchServe docker documentation](https://github.com/pytorch/serve/tree/master/docker#readme) or [TorchServe documentation](https://github.com/pytorch/serve/tree/master/docs#readme) -> Cloud based deployment. Refer [TorchServe kubernetes documentation](https://github.com/pytorch/serve/tree/master/kubernetes#readme) or [TorchServe cloudformation documentation](../cloudformation/README.md) +> Cloud based deployment. Refer [TorchServe kubernetes documentation](https://github.com/pytorch/serve/tree/master/kubernetes#readme) or [TorchServe cloudformation documentation](https://github.com/pytorch/serve/tree/master/examples/cloudformation/README.md) ### What's difference between Torchserve and a python web app using web frameworks like Flask, Django? @@ -59,7 +59,7 @@ Yes, you can deploy Torchserve in Kubernetes using Helm charts. Refer [Kubernetes deployment ](../kubernetes/README.md) for more details. ### Can I deploy Torchserve with AWS ELB and AWS ASG? -Yes, you can deploy Torchserve on a multi-node ASG AWS EC2 cluster. There is a cloud formation template available [here](https://github.com/pytorch/serve/blob/master/cloudformation/ec2-asg.yaml) for this type of deployment. Refer [ Multi-node EC2 deployment behind Elastic LoadBalancer (ELB)](https://github.com/pytorch/serve/tree/master/cloudformation#multi-node-ec2-deployment-behind-elastic-loadbalancer-elb) more details. +Yes, you can deploy Torchserve on a multi-node ASG AWS EC2 cluster. There is a cloud formation template available [here](https://github.com/pytorch/serve/blob/master/examples/cloudformation/ec2-asg.yaml) for this type of deployment. Refer [ Multi-node EC2 deployment behind Elastic LoadBalancer (ELB)](https://github.com/pytorch/serve/tree/master/examples/cloudformation/README.md#multi-node-ec2-deployment-behind-elastic-loadbalancer-elb) more details. ### How can I backup and restore Torchserve state? TorchServe preserves server runtime configuration across sessions such that a TorchServe instance experiencing either a planned or unplanned service stop can restore its state upon restart. These saved runtime configuration files can be used for backup and restore. diff --git a/docs/README.md b/docs/README.md index 748b34fd12..497ae39e1c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -16,7 +16,7 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [Logging](logging.md) - How to configure logging * [Metrics](metrics.md) - How to configure metrics * [Prometheus and Grafana metrics](metrics_api.md) - How to configure metrics API with Prometheus formatted metrics in a Grafana dashboard -* [Captum Explanations](https://github.com/pytorch/serve/blob/master/captum/Captum_visualization_for_bert.ipynb) - Built in support for Captum explanations for both text and images +* [Captum Explanations](https://github.com/pytorch/serve/blob/master/examples/captum/Captum_visualization_for_bert.ipynb) - Built in support for Captum explanations for both text and images * [Batch inference with TorchServe](batch_inference_with_ts.md) - How to create and serve a model with batch inference in TorchServe * [Workflows](workflows.md) - How to create workflows to compose Pytorch models and Python functions in sequential and parallel pipelines @@ -33,7 +33,7 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [HuggingFace Language Model](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/Transformer_handler_generalized.py) - This handler takes an input sentence and can return sequence classifications, token classifications or Q&A answers * [Multi Modal Framework](https://github.com/pytorch/serve/blob/master/examples/MMF-activity-recognition/handler.py) - Build and deploy a classifier that combines text, audio and video input data -* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_tranformers_pipeline) - +* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_transformers_pipeline) - * [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe. * [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models with TorchServe * [Workflow Examples](https://github.com/pytorch/serve/tree/master/examples/Workflows) - Examples of how to compose models in a workflow with TorchServe diff --git a/docs/Troubleshooting.md b/docs/Troubleshooting.md index 5a04fd0793..d406300beb 100644 --- a/docs/Troubleshooting.md +++ b/docs/Troubleshooting.md @@ -8,7 +8,7 @@ Refer to this section for common issues faced while deploying your Pytorch model ### Deployment and config issues -#### "Failed to bind to address: http://127.0.0.1:8080", port 8080/8081 already in use. +#### "Failed to bind to address: `http://127.0.0.1:8080`", port 8080/8081 already in use. Usually, the port number 8080/8081 is already used by some other application or service, it can be verified by using cmd `ss -ntl | grep 8080`. There are two ways to troubleshoot this issue either kill the process which is using port 8080/8081 or run Torchserve on different ports other than 8080 & 8081. Refer [configuration.md](https://github.com/pytorch/serve/blob/master/docs/configuration.md) for more details. @@ -88,6 +88,6 @@ Refer [Allow model specific custom python packages](https://github.com/pytorch/s #### Backend worker monitoring thread interrupted or backend worker process died error. -This issue is moslty occurs when the model fails to initialize, which may be due to erroneous code in handler's initialize function. This error is also observed when there is missing package/module. +This issue mostly occurs when the model fails to initialize, which may be due to erroneous code in handler's initialize function. This error is also observed when there is missing package/module. Relevant issues: [[#667](https://github.com/pytorch/serve/issues/667), [#537](https://github.com/pytorch/serve/issues/537)] diff --git a/docs/batch_inference_with_ts.md b/docs/batch_inference_with_ts.md index 5c1d4737f9..28b12d7023 100644 --- a/docs/batch_inference_with_ts.md +++ b/docs/batch_inference_with_ts.md @@ -6,8 +6,8 @@ * [Prerequisites](#prerequisites) * [Batch Inference with TorchServe's default handlers](#batch-inference-with-torchserves-default-handlers) * [Batch Inference with TorchServe using ResNet-152 model](#batch-inference-with-torchserve-using-resnet-152-model) -* [Demo to configure TorchServe ResNet-152 model with batch-supported model](#demo-to-configure-torchServe-resNet-152-model-with-batch-supported-model) -* [Demo to configure TorchServe ResNet-152 model with batch-supported model using Docker](#demo-to-configure-torchServe-resNet-152-model-with-batch-supported-model-using-docker) +* [Demo to configure TorchServe ResNet-152 model with batch-supported model](#demo-to-configure-torchserve-resnet-152-model-with-batch-supported-model) +* [Demo to configure TorchServe ResNet-152 model with batch-supported model using Docker](#demo-to-configure-torchserve-resnet-152-model-with-batch-supported-model-using-docker) ## Introduction @@ -90,7 +90,7 @@ In this section lets bring up model server and launch Resnet-152 model, which us First things first, follow the main [Readme](../README.md) and install all the required packages including `torchserve`. -### Batch inference of Resnet-152 configured with managment API +### Batch inference of Resnet-152 configured with management API * Start the model server. In this example, we are starting the model server to run on inference port 8080 and management port 8081. @@ -258,9 +258,9 @@ curl http://localhost:8081/models/resnet-152-batch_v2 ``` ## Demo to configure TorchServe ResNet-152 model with batch-supported model using Docker -Here, we show how to register a model with batch inference support when serving the model using docker contianers. We set the `batch_size` and `max_batch_delay` in the config.properties similar to the previous section which is being used by [dockered_entrypoint.sh](../docker/dockerd-entrypoint.sh). +Here, we show how to register a model with batch inference support when serving the model using docker containers. We set the `batch_size` and `max_batch_delay` in the config.properties similar to the previous section which is being used by [dockered_entrypoint.sh](../docker/dockerd-entrypoint.sh). -### Batch inference of Resnet-152 using docker contianer +### Batch inference of Resnet-152 using docker container * Set the batch `batch_size` and `max_batch_delay` in the config.properties as referenced in the [dockered_entrypoint.sh](../docker/dockerd-entrypoint.sh) diff --git a/docs/configuration.md b/docs/configuration.md index 1e57b44eb0..3e8297aa6c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -78,9 +78,9 @@ TorchServe doesn't support authentication natively. To avoid unauthorized access The inference API is listening on port 8080. The management API is listening on port 8081. Both expect HTTP requests. These are the default ports. See [Enable SSL](#enable-ssl) to configure HTTPS. -* `inference_address`: Inference API binding address. Default: http://127.0.0.1:8080 -* `management_address`: Management API binding address. Default: http://127.0.0.1:8081 -* `metrics_address`: Metrics API binding address. Default: http://127.0.0.1:8082 +* `inference_address`: Inference API binding address. Default: `http://127.0.0.1:8080` +* `management_address`: Management API binding address. Default: `http://127.0.0.1:8081` +* `metrics_address`: Metrics API binding address. Default: `http://127.0.0.1:8082` * To run predictions on models on a public IP address, specify the IP address as `0.0.0.0`. To run predictions on models on a specific IP address, specify the IP address and port. @@ -289,7 +289,7 @@ the backend workers convert "Bytearray to utf-8 string" when the Content-Type of * `max_response_size` : The maximum allowable response size that the Torchserve sends, in bytes. Default: 6553500 * `limit_max_image_pixels` : Default value is true (Use default [PIL.Image.MAX_IMAGE_PIXELS](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS)). If this is set to "false", set PIL.Image.MAX_IMAGE_PIXELS = None in backend default vision handler for large image payload. * `allowed_urls` : Comma separated regex of allowed source URL(s) from where models can be registered. Default: "file://.*|http(s)?://.*" (all URLs and local file system) -eg : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.pytorch.org/` use the following regex string `allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.*` +e.g. : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.pytorch.org/` use the following regex string `allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.*` * `workflow_store` : Path of workflow store directory. Defaults to model store directory. **NOTE** @@ -298,7 +298,7 @@ All the above config properties can be set using environment variable as follows - set `enable_envvars_config` to true in config.properties - export environment variable for property as`TS_`. - eg: to set inference_address property run cmd + e.g.: to set inference_address property run cmd `export TS_INFERENCE_ADDRESS="http://127.0.0.1:8082"`. --- diff --git a/docs/custom_service.md b/docs/custom_service.md index 997ea302d6..8418990869 100755 --- a/docs/custom_service.md +++ b/docs/custom_service.md @@ -259,7 +259,7 @@ Torchserve returns the captum explanations for Image Classification, Text Classi The explanations are written as a part of the explain_handle method of base handler. The base handler invokes this explain_handle_method. The arguments that are passed to the explain handle methods are the pre-processed data and the raw data. It invokes the get insights function of the custom handler that returns the captum attributions. The user should write his own get_insights functionality to get the explanations -For serving a custom handler the captum algorithm should be initialized in the intialize functions of the handler +For serving a custom handler the captum algorithm should be initialized in the initialize functions of the handler The user can override the explain_handle function in the custom handler. The user should define their get_insights method for custom handler to get Captum Attributions. diff --git a/docs/getting_started.md b/docs/getting_started.md index 50ec2889ec..7c2aa47820 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -24,12 +24,12 @@ #### For Windows - Refer to the documentation [here](docs/torchserve_on_win_native.md). + Refer to the documentation [here](./torchserve_on_win_native.md). 2. Install torchserve, torch-model-archiver and torch-workflow-archiver For [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install) - Note: Conda packages are not supported for Windows. Refer to the documentation [here](docs/torchserve_on_win_native.md). + Note: Conda packages are not supported for Windows. Refer to the documentation [here](./torchserve_on_win_native.md). ``` conda install torchserve torch-model-archiver torch-workflow-archiver -c pytorch ``` @@ -77,7 +77,7 @@ You can also create model stores to store your archived models. torch-model-archiver --model-name densenet161 --version 1.0 --model-file ./serve/examples/image_classifier/densenet_161/model.py --serialized-file densenet161-8d451a50.pth --export-path model_store --extra-files ./serve/examples/image_classifier/index_to_name.json --handler image_classifier ``` -For more information about the model archiver, see [Torch Model archiver for TorchServe](model-archiver/README.md) +For more information about the model archiver, see [Torch Model archiver for TorchServe](https://github.com/pytorch/serve/tree/master/model-archiver/README.md) ### Start TorchServe to serve the model @@ -89,11 +89,11 @@ torchserve --start --ncs --model-store model_store --models densenet161.mar After you execute the `torchserve` command above, TorchServe runs on your host, listening for inference requests. -**Note**: If you specify model(s) when you run TorchServe, it automatically scales backend workers to the number equal to available vCPUs (if you run on a CPU instance) or to the number of available GPUs (if you run on a GPU instance). In case of powerful hosts with a lot of compute resources (vCPUs or GPUs), this start up and autoscaling process might take considerable time. If you want to minimize TorchServe start up time you should avoid registering and scaling the model during start up time and move that to a later point by using corresponding [Management API](docs/management_api.md#register-a-model), which allows finer grain control of the resources that are allocated for any particular model). +**Note**: If you specify model(s) when you run TorchServe, it automatically scales backend workers to the number equal to available vCPUs (if you run on a CPU instance) or to the number of available GPUs (if you run on a GPU instance). In case of powerful hosts with a lot of compute resources (vCPUs or GPUs), this start up and autoscaling process might take considerable time. If you want to minimize TorchServe start up time you should avoid registering and scaling the model during start up time and move that to a later point by using corresponding [Management API](./management_api.md#register-a-model), which allows finer grain control of the resources that are allocated for any particular model). ### Get predictions from a model -To test the model server, send a request to the server's `predictions` API. TorchServe supports all [inference](docs/inference_api.md) and [management](docs/management_api.md) api's through both [gRPC](docs/grpc_api.md) and [HTTP/REST](docs/rest_api.md). +To test the model server, send a request to the server's `predictions` API. TorchServe supports all [inference](./inference_api.md) and [management](./management_api.md) apis through both [gRPC](./grpc_api.md) and [HTTP/REST](./rest_api.md). #### Using GRPC APIs through python client @@ -109,7 +109,7 @@ pip install -U grpcio protobuf grpcio-tools python -m grpc_tools.protoc --proto_path=frontend/server/src/main/resources/proto/ --python_out=ts_scripts --grpc_python_out=ts_scripts frontend/server/src/main/resources/proto/inference.proto frontend/server/src/main/resources/proto/management.proto ``` - - Run inference using a sample client [gRPC python client](ts_scripts/torchserve_grpc_client.py) + - Run inference using a sample client [gRPC python client](https://github.com/pytorch/serve/blob/master/ts_scripts/torchserve_grpc_client.py) ```bash python ts_scripts/torchserve_grpc_client.py infer densenet161 examples/image_classifier/kitten.jpg @@ -155,7 +155,7 @@ Which will return the following JSON object All interactions with the endpoint will be logged in the `logs/` directory, so make sure to check it out! -Now you've seen how easy it can be to serve a deep learning model with TorchServe! [Would you like to know more?](docs/server.md) +Now you've seen how easy it can be to serve a deep learning model with TorchServe! [Would you like to know more?](./server.md) ### Stop TorchServe @@ -168,7 +168,7 @@ torchserve --stop ### Inspect the logs All the logs you've seen as output to stdout related to model registration, management, inference are recorded in the `/logs` folder. -High level performance data like Throughput or Percentile Precision can be generated with [Benchmark](benchmark/README.md) and visualized in a report. +High level performance data like Throughput or Percentile Precision can be generated with [Benchmark](https://github.com/pytorch/serve/tree/master/benchmarks/README.md) and visualized in a report. ### Install TorchServe for development @@ -192,8 +192,8 @@ Use `--cuda` flag with `install_dependencies.py` for installing cuda version spe #### For Windows -Refer to the documentation [here](docs/torchserve_on_win_native.md). +Refer to the documentation [here](./torchserve_on_win_native.md). -For information about the model archiver, see [detailed documentation](model-archiver/README.md). +For information about the model archiver, see [detailed documentation](https://github.com/pytorch/serve/tree/master/model-archiver/README.md). diff --git a/docs/index.md b/docs/index.md index d6da829cfb..824d7ab259 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,7 +16,7 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [Logging](logging.md) - How to configure logging * [Metrics](metrics.md) - How to configure metrics * [Prometheus and Grafana metrics](metrics_api.md) - How to configure metrics API with Prometheus formatted metrics in a Grafana dashboard -* [Captum Explanations](https://github.com/pytorch/serve/blob/master/captum/Captum_visualization_for_bert.ipynb) - Built in support for Captum explanations for both text and images +* [Captum Explanations](https://github.com/pytorch/serve/blob/master/examples/captum/Captum_visualization_for_bert.ipynb) - Built in support for Captum explanations for both text and images * [Batch inference with TorchServe](batch_inference_with_ts.md) - How to create and serve a model with batch inference in TorchServe * [Workflows](workflows.md) - How to create workflows to compose Pytorch models and Python functions in sequential and parallel pipelines @@ -33,7 +33,7 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [HuggingFace Language Model](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/Transformer_handler_generalized.py) - This handler takes an input sentence and can return sequence classifications, token classifications or Q&A answers * [Multi Modal Framework](https://github.com/pytorch/serve/blob/master/examples/MMF-activity-recognition/handler.py) - Build and deploy a classifier that combines text, audio and video input data -* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_tranformers_pipeline) - +* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_transformers_pipeline) - * [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe. * [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models with TorchServe * [Workflow Examples](https://github.com/pytorch/serve/tree/master/examples/Workflows) - Examples of how to compose models in a workflow with TorchServe @@ -43,7 +43,7 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [Advanced configuration](configuration.md) - Describes advanced TorchServe configurations. * [A/B test models](https://github.com/pytorch/serve/blob/master/docs/use_cases.md#serve-models-for-ab-testing) - A/B test your models for regressions before shipping them to production * [Custom Service](custom_service.md) - Describes how to develop custom inference services. -* [Encrypted model serving](management_api.md/encrypted-model-serving) - S3 server side model encryption via KMS +* [Encrypted model serving](management_api.md#encrypted-model-serving) - S3 server side model encryption via KMS * [Snapshot serialization](https://github.com/pytorch/serve/blob/master/plugins/docs/ddb_endpoint.md) - Serialize model artifacts to AWS Dynamo DB * [Benchmarking and Profiling](https://github.com/pytorch/serve/tree/master/benchmarks#torchserve-model-server-benchmarking) - Use JMeter or Apache Bench to benchmark your models and TorchServe itself * [TorchServe on Kubernetes](https://github.com/pytorch/serve/blob/master/kubernetes/README.md#torchserve-on-kubernetes) - Demonstrates a Torchserve deployment in Kubernetes using Helm Chart supported in both Azure Kubernetes Service and Google Kubernetes service diff --git a/docs/internals.md b/docs/internals.md index 5d68d4bf3a..a82003ce9d 100644 --- a/docs/internals.md +++ b/docs/internals.md @@ -27,7 +27,7 @@ TorchServe was designed a multi model inferencing framework. A production grade * `requirements`: requirements.txt * `serving_sdk`: SDK to support TorchServe in sagemaker * `test`: tests -* `ts_scripts`: useful utility filees that don't fit in any other folder +* `ts_scripts`: useful utility files that don't fit in any other folder * `workflow-archiver`: workflow package CLI ## Important files for the core TorchServe engine @@ -78,7 +78,7 @@ https://github.com/pytorch/serve/blob/8903ca1fb059eab3c1e8eccdee1376d4ff52fb67/f https://github.com/pytorch/serve/blob/8903ca1fb059eab3c1e8eccdee1376d4ff52fb67/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkLoadManager.java * Get number of running workers -* Number of workers which is just a concurrent hashmap, bakendgroup, ports etc are all here +* Number of workers which is just a concurrent hashmap, backendgroup, ports etc are all here * Add worker threads by submitting them to a threadpool Executor Service (create a pool of threads and assign tasks or worker threads to it) diff --git a/docs/management_api.md b/docs/management_api.md index 296d4ad457..c7e7af5d9f 100644 --- a/docs/management_api.md +++ b/docs/management_api.md @@ -9,11 +9,12 @@ TorchServe provides the following APIs that allows you to manage models at runti 5. [List registered models](#list-models) 6. [Set default version of a model](#set-default-version) -The Management API listens on port 8081 and is only accessible from localhost by default. To change the default setting, see [TorchServe Configuration](configuration.md). +The Management API listens on port 8081 and is only accessible from localhost by default. To change the default setting, see [TorchServe Configuration](./configuration.md). Similar to the [Inference API](inference_api.md), the Management API provides a [API description](#api-description) to describe management APIs with the OpenAPI 3.0 specification. Alternatively, if you want to use KServe, TorchServe supports both v1 and v2 API. For more details please look into this [kserve documentation](https://github.com/pytorch/serve/tree/master/kubernetes/kserve) + ## Register a model This API follows the [ManagementAPIsService.RegisterModel](https://github.com/pytorch/serve/blob/master/frontend/server/src/main/resources/proto/management.proto) gRPC API. @@ -48,13 +49,14 @@ If you'd like to serve an encrypted model then you need to setup [S3 SSE-KMS](ht And set "s3_sse_kms=true" in HTTP request. -For example: model squeezenet1_1 is [encrypted on S3 under your own private account](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html). The model http url on S3 is https://torchserve.pytorch.org/sse-test/squeezenet1_1.mar. -- if torchserve will run on EC2 instance (eg. OS: ubuntu) +For example: model squeezenet1_1 is [encrypted on S3 under your own private account](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html). The model http url on S3 is `https://torchserve.pytorch.org/sse-test/squeezenet1_1.mar`. +- if torchserve will run on EC2 instance (e.g. OS: ubuntu) 1. add an IAM Role (AWSS3ReadOnlyAccess) for the EC2 instance 2. run ts_scripts/get_aws_credential.sh to export AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY 3. export AWS_DEFAULT_REGION=your_s3_bucket_region 4. start torchserve 5. Register encrypted model squeezenet1_1 by setting s3_sse_kms=true in curl command. + ```bash curl -X POST "http://localhost:8081/models?url=https://torchserve.pytorch.org/sse-test/squeezenet1_1.mar&s3_sse_kms=true" @@ -62,7 +64,8 @@ curl -X POST "http://localhost:8081/models?url=https://torchserve.pytorch.org/s "status": "Model \"squeezenet_v1.1\" Version: 1.0 registered with 0 initial workers. Use scale workers API to add workers for the model." } ``` -- if torchserve will run on local (eg. OS: macOS) + +- if torchserve will run on local (e.g. OS: macOS) 1. Find your AWS access key and secret key. You can [reset them](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys_retrieve.html) if you forgot the keys. 2. export AWS_ACCESS_KEY_ID=your_aws_access_key 3. export AWS_SECRET_ACCESS_KEY=your_aws_secret_key @@ -174,7 +177,7 @@ curl -v -X PUT "http://localhost:8081/models/noop/2.0?min_worker=3&synchronous=t ## Describe model -This API follows the [ManagementAPIsService.DescribeModel](../frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer. +This API follows the [ManagementAPIsService.DescribeModel](https://github.com/pytorch/serve/blob/master/frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer. `GET /models/{model_name}` @@ -291,8 +294,9 @@ or `GET /models/{model_name}?customized=true` Use the Describe Model API to get detail runtime status and customized metadata of a version of a model: -* Implement function describe_handle. Eg. -``` +* Implement function describe_handle. E.g. + +```python def describe_handle(self): """Customized describe handler Returns: @@ -306,7 +310,8 @@ Use the Describe Model API to get detail runtime status and customized metadata ``` * Implement function _is_describe if handler is not inherited from BaseHandler. And then, call _is_describe and describe_handle in handle. -``` + +```python def _is_describe(self): if self.context and self.context.get_request_header(0, "describe"): if self.context.get_request_header(0, "describe") == "True": @@ -328,15 +333,16 @@ Use the Describe Model API to get detail runtime status and customized metadata return output ``` -* Call function _is_describe and describe_handle in handle. Eg. -``` +* Call function _is_describe and describe_handle in handle. E.g. + +```python def handle(self, data, context): """Entry point for default handler. It takes the data from the input request and returns the predicted outcome for the input. Args: data (list): The input data that needs to be made a prediction request on. context (Context): It is a JSON Object containing information pertaining to - the model artefacts parameters. + the model artifacts parameters. Returns: list : Returns a list of dictionary with the predicted response. """ @@ -368,7 +374,9 @@ def handle(self, data, context): (stop_time - start_time) * 1000, 2), None, 'ms') return output ``` + * Here is an example. "customizedMetadata" shows the metadata from user's model. These metadata can be decoded into a dictionary. + ```bash curl http://localhost:8081/models/noop-customized/1.0?customized=true [ @@ -397,8 +405,10 @@ curl http://localhost:8081/models/noop-customized/1.0?customized=true } ] ``` + * Decode customizedMetadata on client side. For example: -``` + +```python import requests import json diff --git a/docs/metrics.md b/docs/metrics.md index 46262647f5..4cad39111a 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -6,7 +6,7 @@ * [System metrics](#system-metrics) * [Formatting](#formatting) * [Custom Metrics API](#custom-metrics-api) -* [Logging the custom metrics](#logging-the-custom-metrics) +* [Logging the custom metrics](#logging-custom-metrics) ## Introduction @@ -323,7 +323,7 @@ metrics.add_counter('LoopCount', -1, None, dimensions) ``` -### Log custom metrics +## Logging custom metrics Following sample code can be used to log the custom metrics created in the model's custom handler: diff --git a/docs/metrics_api.md b/docs/metrics_api.md index e3b55ef162..edbee43a74 100644 --- a/docs/metrics_api.md +++ b/docs/metrics_api.md @@ -52,13 +52,13 @@ scrape_configs: static_configs: - targets: ['localhost:8082'] #TorchServe metrics endpoint ``` -Navigate to http://localhost:9090/ on a browser to execute queries and create graphs +Navigate to `http://localhost:9090/` on a browser to execute queries and create graphs PrometheusServer #### Grafana -Once you have the Torchserve and Prometheus servers running, you can further [setup](https://prometheus.io/docs/visualization/grafana/) Grafana, point it to Prometheus server and navigate to http://localhost:3000/ to create dashboards and graphs. +Once you have the Torchserve and Prometheus servers running, you can further [setup](https://prometheus.io/docs/visualization/grafana/) Grafana, point it to Prometheus server and navigate to `http://localhost:3000/` to create dashboards and graphs. You can use command given below to start Grafana - `sudo systemctl daemon-reload && sudo systemctl enable grafana-server && sudo systemctl start grafana-server` diff --git a/docs/performance_guide.md b/docs/performance_guide.md index e3a9558a81..59f35773aa 100644 --- a/docs/performance_guide.md +++ b/docs/performance_guide.md @@ -1,7 +1,7 @@ # Performance Guide In case you're interested in optimizing the memory usage, latency or throughput of a PyTorch model served with TorchServe, this is the guide for you. ## Optimizing PyTorch -There are many tricks to optimize PyTorch models for production including but not limited to distillation, quantization, fusion, pruning, setting environment variables and we encourage you to benchmark and see what works best for you. An experimental tool that may make this process is easier is https://github.com/pytorch/serve/tree/master/experimental/torchprep +There are many tricks to optimize PyTorch models for production including but not limited to distillation, quantization, fusion, pruning, setting environment variables and we encourage you to benchmark and see what works best for you. An experimental tool that may make this process easier is https://pypi.org/project/torchprep. In general it's hard to optimize models and the easiest approach can be exporting to some runtime like ORT, TensorRT, IPEX, FasterTransformer and we have many examples for how to integrate these runtimes in https://github.com/pytorch/serve/tree/master/examples. If your favorite runtime is not supported please feel free to open a PR. diff --git a/docs/workflows.md b/docs/workflows.md index 6b5755f1f1..b1341c1104 100644 --- a/docs/workflows.md +++ b/docs/workflows.md @@ -70,7 +70,7 @@ User can define following workflow model properties: | max-workers | Number of maximum workers launched for every workflow model | 1 | | batch-size | Batch size used for every workflow model | 1 | | max-batch-delay | Maximum batch delay time TorchServe waits for every workflow model to receive `batch_size` number of requests.| 50 ms | -| retry-attempts | Retry attempts for a specific workflow node incase of a failure | 1 | +| retry-attempts | Retry attempts for a specific workflow node in case of a failure | 1 | | timeout-ms | Timeout in MilliSeconds for a given node | 10000 | These properties can be defined as a global value for every model and can be over-ridden at every model level in workflow specification. Refer the above example for more details. diff --git a/examples/FasterTransformer_HuggingFace_Bert/README.md b/examples/FasterTransformer_HuggingFace_Bert/README.md index 95b609578b..af66501b20 100644 --- a/examples/FasterTransformer_HuggingFace_Bert/README.md +++ b/examples/FasterTransformer_HuggingFace_Bert/README.md @@ -5,10 +5,10 @@ Batch inferencing with Transformers faces two challenges - Large batch sizes suffer from higher latency and small or medium-sized batches this will become kernel latency launch bound. - Padding wastes a lot of compute, (batchsize, seq_length) requires to pad the sequence to (batchsize, max_length) where difference between avg_length and max_length results in a considerable waste of computation, increasing the batch size worsen this situation. -[Faster Transformers](https://github.com/NVIDIA/FasterTransformer/blob/main/sample/pytorch/run_glue.py) (FT) from Nvidia along with [Efficient Transformers](https://github.com/bytedance/effective_transformer) (EFFT) that is built on top of FT address the above two challenges, by fusing the CUDA kernels and dynamically removing padding during computations. The current implementation from [Faster Transformers](https://github.com/NVIDIA/FasterTransformer/blob/main/sample/pytorch/run_glue.py) support BERT like encoder and decoder layers. In this example, we show how to get a Torchsctipted (traced) EFFT variant of Bert models from HuggingFace (HF) for sequence classification and question answering and serve it. +[Faster Transformers](https://github.com/NVIDIA/FasterTransformer/blob/main/examples/pytorch/bert/run_glue.py) (FT) from Nvidia along with [Efficient Transformers](https://github.com/bytedance/effective_transformer) (EFFT) that is built on top of FT address the above two challenges, by fusing the CUDA kernels and dynamically removing padding during computations. The current implementation from [Faster Transformers](https://github.com/NVIDIA/FasterTransformer/blob/main/examples/pytorch/bert/run_glue.py) support BERT like encoder and decoder layers. In this example, we show how to get a Torchscripted (traced) EFFT variant of Bert models from HuggingFace (HF) for sequence classification and question answering and serve it. -### How to get a Torchsctipted (Traced) EFFT of HF Bert model and serving it +### How to get a Torchscripted (Traced) EFFT of HF Bert model and serving it **Requirements** @@ -65,14 +65,14 @@ Now we are ready to make the Torchscripted file, as mentioned at the beginning t # Sequence classification, make sure to comment out import set_seed in Download_Transformer_models.py as its not supported in Transformers=2.5.1 python ./examples/Huggingface_Transformers/Download_Transformer_models.py -# This will downlaod the model weights in ../Huggingface_Transformers/Transfomer_model directory +# This will download the model weights in ../Huggingface_Transformers/Transfomer_model directory cd /workspace/FasterTransformer/build/ # This will generate the Traced model "traced_model.pt" # --data_type can be fp16 or fp32 # --model_type being specified to thsext will use the Faster Transformer fusions -# --remove_padding is chosen by default that will make use of efficient padding along with Faster Trasnformer +# --remove_padding is chosen by default that will make use of efficient padding along with Faster Transformer python pytorch/Bert_FT_trace.py --mode sequence_classification --model_name_or_path /workspace/serve/Transformer_model --tokenizer_name "bert-base-uncased" --batch_size 1 --data_type fp16 --model_type thsext @@ -124,7 +124,7 @@ curl -X POST http://127.0.0.1:8080/predictions/my_tc -T ../Huggingface_Transform } python ../Huggingface_Transformers/Download_Transformer_models.py -# This will downlaod the model weights in ../Huggingface_Transformers/Transfomer_model directory +# This will download the model weights in ../Huggingface_Transformers/Transfomer_model directory cd /workspace/FasterTransformer/build/ diff --git a/examples/Huggingface_Transformers/README.md b/examples/Huggingface_Transformers/README.md index e69d55d489..776e69e3a5 100644 --- a/examples/Huggingface_Transformers/README.md +++ b/examples/Huggingface_Transformers/README.md @@ -291,7 +291,7 @@ curl -X POST http://127.0.0.1:8080/predictions/BERTSeqClassification -T ./Seq_c ### Captum Explanations for Visual Insights -The [Captum Explanations for Visual Insights Notebook](../../captum/Captum_visualization_for_bert.ipynb) provides a visual example for how model interpretations can help +The [Captum Explanations for Visual Insights Notebook](https://github.com/pytorch/serve/tree/master/examples/captum/Captum_visualization_for_bert.ipynb) provides a visual example for how model interpretations can help Known issues: * Captum does't work well for batched inputs and may result in timeouts @@ -307,9 +307,9 @@ curl -H "Content-Type: application/json" --data @examples/Huggingface_Transforme When a json file is passed as a request format to the curl, Torchserve unwraps the json file from the request body. This is the reason for specifying service_envelope=body in the config.properties file -## Model Paralellism +## Model Parallelism -[Parallelize] (https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.GPT2Model.parallelize) is a an experimental feature that HuggingFace recently added to support large model inference for some very large models, GPT2 and T5. GPT2 model choices based on their size are gpt2-medium, gpt2-large, gpt2-xl. This feature only supports LMHeadModel that could be used for text generation, other applicaiton such as sequence, token classification and question answering are not supported. We have added parallelize support for GPT2 model in the cutom handler in this example that will enable you to perfrom model parallel inference for GPT2 models used for text generation. The same logic in the handler can be extended to T5 and the applications it supports. Make sure that you register your model with one worker using this feature. To run this example, a machine with #gpus > 1 is required. The number of required gpus depends on the size of the model. This feature only supports single node, one machine with multi-gpus. +[Parallelize] (https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.GPT2Model.parallelize) is a an experimental feature that HuggingFace recently added to support large model inference for some very large models, GPT2 and T5. GPT2 model choices based on their size are gpt2-medium, gpt2-large, gpt2-xl. This feature only supports LMHeadModel that could be used for text generation, other application such as sequence, token classification and question answering are not supported. We have added parallelize support for GPT2 model in the custom handler in this example that will enable you to perform model parallel inference for GPT2 models used for text generation. The same logic in the handler can be extended to T5 and the applications it supports. Make sure that you register your model with one worker using this feature. To run this example, a machine with #gpus > 1 is required. The number of required gpus depends on the size of the model. This feature only supports single node, one machine with multi-gpus. Change `setup_config.json` to @@ -356,5 +356,5 @@ To get an explanation: `curl -X POST http://127.0.0.1:8080/explanations/Textgene ### Running KServe -[BERT Readme for KServe](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/torchserve/bert/README.md). +[BERT Readme for KServe](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/custom/torchserve/bert-sample/hugging-face-bert-sample.md). [End to End KServe document](https://github.com/pytorch/serve/blob/master/kubernetes/kserve/README.md). diff --git a/examples/MMF-activity-recognition/README.md b/examples/MMF-activity-recognition/README.md index e7bdb5d07c..b9965a0222 100644 --- a/examples/MMF-activity-recognition/README.md +++ b/examples/MMF-activity-recognition/README.md @@ -27,9 +27,9 @@ If you installed using pip, then you need install Pyav : `pip install av` -MMF currenly is using Transformers 3.4.0, in case you have other version installed in your enviroment, this would be the best instead of installing it directly, add the MMF Github and 'av', in the requirements.txt and pass it to the model archiver using -r flag. You can read more about serving models with thrid party dependencies [here](https://github.com/pytorch/serve/tree/master/docs/use_cases.md#serve-custom-models-with-third-party-dependency). +MMF currently is using Transformers 3.4.0, in case you have other version installed in your environment, this would be the best instead of installing it directly, add the MMF Github and 'av', in the requirements.txt and pass it to the model archiver using -r flag. You can read more about serving models with third party dependencies [here](https://github.com/pytorch/serve/tree/master/docs/use_cases.md#serve-custom-models-with-third-party-dependency). -***Note, MMF currenly does not support Pytorch 1.10, please make sure you are using Pytorch 1.9*** +***Note, MMF currently does not support Pytorch 1.10, please make sure you are using Pytorch 1.9*** #### Getting started on Serving @@ -104,7 +104,7 @@ In the following we discuss each of the steps in more details. #### New Dataset -In this example Charades dataset has been used which is a video dataset added in the [dataset zoo](https://github.com/facebookresearch/mmf/tree/master/mmf/datasets/builders/charades). We can define a new dataset in MMF by following this [guide](https://mmf.sh/docs/tutorials/dataset). **To add a new dataset**, we need to define a new dataset class which extends the Basedataset class from mmf.datasets.base_dataset, where we need to override three methods, __init__, __getitem__ and __len__. These methods basically define how to initialize ( set the path to the dataset), get each item from the dataset and then provide the length of the dataset. The Charades dataset class can be found [here](https://github.com/facebookresearch/mmf/tree/master/mmf/datasets/builders/charades/dataset.py#L16). Also, we are able to set the [processors](https://github.com/facebookresearch/mmf/tree/master/mmf/configs/datasets/charades/defaults.yaml#L22) in the dataset config file and intialize them in the dataset class. +In this example Charades dataset has been used which is a video dataset added in the [dataset zoo](https://github.com/facebookresearch/mmf/tree/master/mmf/datasets/builders/charades). We can define a new dataset in MMF by following this [guide](https://mmf.sh/docs/tutorials/dataset). **To add a new dataset**, we need to define a new dataset class which extends the Basedataset class from mmf.datasets.base_dataset, where we need to override three methods, __init__, __getitem__ and __len__. These methods basically define how to initialize ( set the path to the dataset), get each item from the dataset and then provide the length of the dataset. The Charades dataset class can be found [here](https://github.com/facebookresearch/mmf/tree/master/mmf/datasets/builders/charades/dataset.py#L16). Also, we are able to set the [processors](https://github.com/facebookresearch/mmf/tree/master/mmf/configs/datasets/charades/defaults.yaml#L22) in the dataset config file and initialize them in the dataset class. The **next step** is to define a dataset builder class which extends the "BaseDatasetBuilder" class from mmf.datasets.base_dataset_builder. In this class essentially we need to override three methods, __init__, __build__ and __load__. Where in the __init __ method, the dataset class name is set (as we defined in the previous step), the __build__ method, is responsible for downloading the dataset and __load__ method is taking care of loading the dataset, builds an object of class inheriting "BaseDataset" which contains your dataset logic and returns it. The dataset builder code is also available [here](https://github.com/facebookresearch/mmf/tree/master/mmf/datasets/builders/charades/builder.py). diff --git a/examples/README.md b/examples/README.md index 53656d9b5e..51bcc1f1f6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -4,17 +4,17 @@ * [Serving torchvision image classification models](#serving-image-classification-models) * [Serving custom model with custom service handler](#serving-custom-model-with-custom-service-handler) * [Serving text classification model](#serving-text-classification-model) -* [Serving text classification model with scriptable tokenizer](#serving-text-classification-model-with-scriptable-tokenzier) +* [Serving text classification model with scriptable tokenizer](#serving-text-classification-model-with-scriptable-tokenizer) * [Serving object detection model](#serving-object-detection-model) * [Serving image segmentation model](#serving-image-segmentation-model) * [Serving huggingface transformers model](#serving-huggingface-transformers) -* [Serving image generator model](#example-to-serve-GAN-model) +* [Serving image generator model](#example-to-serve-gan-model) * [Serving machine translation model](#serving-neural-machine-translation) -* [Serving waveglow text to speech synthesizer model](#serving-wavegolw-text-to-speech-synthesizer) -* [Serving multi modal framework model](#Serving-Multi-modal-model) +* [Serving waveglow text to speech synthesizer model](#serving-waveglow-text-to-speech-synthesizer) +* [Serving multi modal framework model](#serving-multi-modal-model) * [Serving Image Classification Workflow](#serving-image-classification-workflow) * [Serving Neural Machine Translation Workflow](#serving-neural-machine-translation-workflow) -* [Serving Torchrec DLRM (Recommender Model)](#serving-torchrec-dlrm) +* [Serving Torchrec DLRM (Recommender Model)](#serving-torchrec-dlrm-recommender-model) # TorchServe Examples @@ -65,11 +65,11 @@ The following example demonstrates how to create and serve a custom NN model wit ## Serving text classification model -The following example demonstrates how to create and serve a custom text_classification NN model with default text_classifer handler provided by TorchServe : +The following example demonstrates how to create and serve a custom text_classification NN model with default text_classifier handler provided by TorchServe : * [Text classification example](text_classification) -## Serving text classification model with scriptable tokenzier +## Serving text classification model with scriptable tokenizer This example shows how to combine a text classification model with a scriptable tokenizer into a single, scripted artifact to serve with TorchServe. A scriptable tokenizer is a tokenizer compatible with TorchScript. * [Scriptable Tokenizer example with scriptable tokenizer](text_classification_with_scriptable_tokenizer) @@ -105,7 +105,7 @@ The following example demonstrates how to create and serve a neural translation * [Neural machine translation ](nmt_transformer) -## Serving Wavegolw text to speech synthesizer +## Serving Waveglow text to speech synthesizer The following example demonstrates how to create and serve the waveglow text to speech synthesizer diff --git a/examples/custom_metrics/README.md b/examples/custom_metrics/README.md index 13e6e4d988..149a71cf8d 100644 --- a/examples/custom_metrics/README.md +++ b/examples/custom_metrics/README.md @@ -64,7 +64,7 @@ Run the commands given in following steps from the parent directory of the root The inference request logs the time taken for prediction to the model_metrics.log file. Mtail parses the file and is served at 3903 port - http://localhost:3903 + `http://localhost:3903` - Step 8: Sart Prometheus with mtailtarget added to scarpe config diff --git a/examples/dcgan_fashiongen/Readme.md b/examples/dcgan_fashiongen/Readme.md index 0ba2843d72..f2c595a0b7 100644 --- a/examples/dcgan_fashiongen/Readme.md +++ b/examples/dcgan_fashiongen/Readme.md @@ -13,7 +13,7 @@ The [create_mar.sh](create_mar.sh) script does the following : - Download a checkpoint file [DCGAN_fashionGen-1d67302.pth](https://dl.fbaipublicfiles.com/gan_zoo/DCGAN_fashionGen-1d67302.pth). (`--serialized-file`) - Provide a custom handler - [dcgan_fashiongen_handler.py](dcgan_fashiongen_handler.py). (`--handler`) -Alterantively, you can directly [download the dcgan_fashiongen.mar](https://torchserve.s3.amazonaws.com/mar_files/dcgan_fashiongen.mar) +Alternatively, you can directly [download the dcgan_fashiongen.mar](https://torchserve.s3.amazonaws.com/mar_files/dcgan_fashiongen.mar) ### 2. Start TorchServe and Register Model ``` diff --git a/examples/image_classifier/mnist/README.md b/examples/image_classifier/mnist/README.md index ae1fc6cfca..2116e593dd 100644 --- a/examples/image_classifier/mnist/README.md +++ b/examples/image_classifier/mnist/README.md @@ -18,7 +18,7 @@ Run the commands given in following steps from the parent directory of the root * Step - 1: Create a new model architecture file which contains model class extended from torch.nn.modules. In this example we have created [mnist model file](mnist.py). * Step - 2: Train a MNIST digit recognition model using https://github.com/pytorch/examples/blob/master/mnist/main.py and save the state dict of model. We have added the pre-created [state dict](mnist_cnn.pt) of this model. - * Step - 3: Write a custom handler to run the inference on your model. In this example, we have added a [custom_handler](mnist_handler.py) which runs the inference on the input greyscale images using the above model and recognizes the digit in the image. + * Step - 3: Write a custom handler to run the inference on your model. In this example, we have added a [custom_handler](mnist_handler.py) which runs the inference on the input grayscale images using the above model and recognizes the digit in the image. * Step - 4: Create a torch model archive using the torch-model-archiver utility to archive the above files. ```bash diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index 0a04a5a0e2..fc5b896222 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -6,12 +6,12 @@ Here we show how to use TorchServe with IntelĀ® Extension for PyTorch*. 1. While IntelĀ® Extension for PyTorch* benefits all platforms, platforms with AVX512 benefit the most. ## Contents of this Document -* [Install IntelĀ® Extension for PyTorch*](#install-intel-extension-for-pytorch) -* [Serving model with IntelĀ® Extension for PyTorch*](#serving-model-with-intel-extension-for-pytorch) +* [Install IntelĀ® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#install-intel-extension-for-pytorch) +* [Serving model with IntelĀ® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#serving-model-with-intel-extension-for-pytorch) * [TorchServe with Launcher](#torchserve-with-launcher) -* [Creating and Exporting INT8 model for IntelĀ® Extension for PyTorch*](#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) +* [Creating and Exporting INT8 model for IntelĀ® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) * [Benchmarking with Launcher](#benchmarking-with-launcher) -* [Performance Boost with IntelĀ® Extension for PyTorch* and Launcher](#performance-boost-with-intel-extension-for-pytorch-and-launcher) +* [Performance Boost with IntelĀ® Extension for PyTorch* and Launcher](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#performance-boost-with-intel-extension-for-pytorch-and-launcher) ## Install IntelĀ® Extension for PyTorch* @@ -25,7 +25,7 @@ ipex_enable=true Once IntelĀ® Extension for PyTorch* is enabled, deploying PyTorch model follows the same procedure shown [here](https://pytorch.org/serve/use_cases.html). TorchServe with IntelĀ® Extension for PyTorch* can deploy any model and do inference. ## TorchServe with Launcher -Launcher is a script to automate the process of tunining configuration setting on Intel hardware to boost performance. Tuning configurations such as OMP_NUM_THREADS, thread affinity, memory allocator can have a dramatic effect on performance. Refer to [Performance Tuning Guide](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/tuning_guide.md) and [Launch Script Usage Guide](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md) for details on performance tuning with launcher. +Launcher is a script to automate the process of tuning configuration setting on Intel hardware to boost performance. Tuning configurations such as OMP_NUM_THREADS, thread affinity, memory allocator can have a dramatic effect on performance. Refer to [Performance Tuning Guide](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/tuning_guide.md) and [Launch Script Usage Guide](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md) for details on performance tuning with launcher. All it needs to use TorchServe with launcher is to set its configuration in `config.properties`. @@ -67,7 +67,7 @@ Below are some useful `cpu_launcher_args` to note. Italic values are default if Refer to [Launch Script Usage Guide](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md) for a full list of tunable configuration of launcher. And refer to [Performance Tuning Guide](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/tuning_guide.md) for more details. ### Launcher Core Pinning to Boost Performance of TorchServe Multi Worker Inference -When running [multi-worker inference](https://pytorch.org/serve/management_api.html#scale-workers) with Torchserve, launcher pin cores to workers to boost performance. Internally, launcher equally divides the number of cores by the number of workers such that each worker is pinned to assigned cores. Doing so avoids core overlap among workers which can signficantly boost performance for TorchServe multi-worker inference. For example, assume running 4 workers on a machine with Intel(R) Xeon(R) Platinum 8180 CPU, 2 sockets, 28 cores per socket, 2 threads per core. Launcher will bind worker 0 to cores 0-13, worker 1 to cores 14-27, worker 2 to cores 28-41, and worker 3 to cores 42-55. +When running [multi-worker inference](https://pytorch.org/serve/management_api.html#scale-workers) with Torchserve, launcher pin cores to workers to boost performance. Internally, launcher equally divides the number of cores by the number of workers such that each worker is pinned to assigned cores. Doing so avoids core overlap among workers which can significantly boost performance for TorchServe multi-worker inference. For example, assume running 4 workers on a machine with Intel(R) Xeon(R) Platinum 8180 CPU, 2 sockets, 28 cores per socket, 2 threads per core. Launcher will bind worker 0 to cores 0-13, worker 1 to cores 14-27, worker 2 to cores 28-41, and worker 3 to cores 42-55. CPU usage is shown below. 4 main worker threads were launched, each launching 14 threads affinitized to the assigned physical cores. ![26](https://user-images.githubusercontent.com/93151422/170373651-fd8a0363-febf-4528-bbae-e1ddef119358.gif) @@ -171,7 +171,7 @@ torch.jit.save(model, 'rn50_int8_jit.pt') ``` ### 2. Creating a Model Archive -Once the serialized file ( `.pt`) is created, it can be used with `torch-model-archiver` as ususal. +Once the serialized file ( `.pt`) is created, it can be used with `torch-model-archiver` as usual. Use the following command to package `rn50_int8_jit.pt` into `rn50_ipex_int8.mar`. ``` @@ -237,7 +237,7 @@ $ cat logs/model_log.log ``` ### Benchmarking with Launcher Core Pinning -As described previously in [TorchServe with Launcher](#torchserve-with-launcher), launcher core pinning boosts performance of multi-worker inference. We'll demonstrate launcher core pinning with TorchServe benchmark, but keep in mind that launcher core pinning is a generic feature applicable to any TorchServe multi-worker inference use casese. +As described previously in [TorchServe with Launcher](#torchserve-with-launcher), launcher core pinning boosts performance of multi-worker inference. We'll demonstrate launcher core pinning with TorchServe benchmark, but keep in mind that launcher core pinning is a generic feature applicable to any TorchServe multi-worker inference use case. For example, assume running 4 workers ``` @@ -309,12 +309,12 @@ Use the following command to reproduce the results. python benchmark-ab.py --url {modelUrl} --input {inputPath} --concurrency 1 ``` -For example, run the following command to reproduce latency performance of ResNet50 with data type of IntelĀ® Extension for PyTorch* int8 and batch size of 1. Refer to [Creating and Exporting INT8 model for IntelĀ® Extension for PyTorch*](#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) for steps to creating ```rn50_ipex_int8.mar``` file for ResNet50 with IntelĀ® Extension for PyTorch* int8 data type. +For example, run the following command to reproduce latency performance of ResNet50 with data type of IntelĀ® Extension for PyTorch* int8 and batch size of 1. Refer to [Creating and Exporting INT8 model for IntelĀ® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) for steps to creating ```rn50_ipex_int8.mar``` file for ResNet50 with IntelĀ® Extension for PyTorch* int8 data type. ``` python benchmark-ab.py --url 'file:///model_store/rn50_ipex_int8.mar' --concurrency 1 ``` -For example, run the following command to reproduce latency performance of BERT with data type of IntelĀ® Extension for PyTorch* int8 and batch size of 1. Refer to [Creating and Exporting INT8 model for IntelĀ® Extension for PyTorch*](#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) for steps to creating ```bert_ipex_int8.mar``` file for BERT with IntelĀ® Extension for PyTorch* int8 data type. +For example, run the following command to reproduce latency performance of BERT with data type of IntelĀ® Extension for PyTorch* int8 and batch size of 1. Refer to [Creating and Exporting INT8 model for IntelĀ® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) for steps to creating ```bert_ipex_int8.mar``` file for BERT with IntelĀ® Extension for PyTorch* int8 data type. ``` python benchmark-ab.py --url 'file:///model_store/bert_ipex_int8.mar' --input '../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text_captum_input.txt' --concurrency 1 ``` diff --git a/examples/text_classification/README.md b/examples/text_classification/README.md index 65aa826134..47a0e489e7 100644 --- a/examples/text_classification/README.md +++ b/examples/text_classification/README.md @@ -1,4 +1,4 @@ -# Text Classfication using TorchServe's default text_classifier handler +# Text Classification using TorchServe's default text_classifier handler This is an example to create a text classification dataset and train a sentiment model. We have used the following torchtext example to train the model. diff --git a/examples/text_classification_with_scriptable_tokenizer/README.md b/examples/text_classification_with_scriptable_tokenizer/README.md index e6acaffb1a..a4a58840ec 100644 --- a/examples/text_classification_with_scriptable_tokenizer/README.md +++ b/examples/text_classification_with_scriptable_tokenizer/README.md @@ -1,4 +1,4 @@ -# Text Classfication using a Scriptable Tokenizer +# Text Classification using a Scriptable Tokenizer TorchScript is a way to serialize and optimize your PyTorch models. A scriptable tokenizer is a special tokenizer which is compatible with [TorchScript's compiler](https://pytorch.org/docs/stable/jit.html) so that it can be jointly serialized with a PyTorch model. @@ -21,7 +21,7 @@ This can be achieved with pip by running: pip install torchdata ``` -Or conda by runnning: +Or conda by running: ``` conda install -c pytorch torchdata diff --git a/frontend/.gitignore b/frontend/.gitignore index 0ad7978582..0a4642caf8 100644 --- a/frontend/.gitignore +++ b/frontend/.gitignore @@ -5,4 +5,4 @@ build libs archive/bin -server/bin +server/bin \ No newline at end of file diff --git a/kubernetes/AKS/README.md b/kubernetes/AKS/README.md index fb67658609..c2179e8cef 100644 --- a/kubernetes/AKS/README.md +++ b/kubernetes/AKS/README.md @@ -78,7 +78,7 @@ Your output should look similar to ```persistentvolumeclaim/model-store-claim created``` -Verify that the PVC / PV is created by excuting. +Verify that the PVC / PV is created by executing. ```kubectl get pvc,pv``` @@ -102,7 +102,7 @@ Your output should look similar to ```pod/model-store-pod created``` -Verify that the pod is created by excuting. +Verify that the pod is created by executing. ```kubectl get po``` @@ -180,9 +180,9 @@ torchserve-75f5b67469-5hnsn 1/1 Running 0 2m36s ### 3 Test Torchserve Installation -#### 3.1 Fetch the Load Balancer Extenal IP +#### 3.1 Fetch the Load Balancer External IP -Fetch the Load Balancer Extenal IP by executing. +Fetch the Load Balancer External IP by executing. ```kubectl get svc``` @@ -302,7 +302,7 @@ az group delete --name myResourceGroup --yes --no-wait **Troubleshooting Azure resource for AKS cluster creation** - * Check AKS availble region, https://azure.microsoft.com/en-us/global-infrastructure/services/?products=kubernetes-service + * Check AKS available region, https://azure.microsoft.com/en-us/global-infrastructure/services/?products=kubernetes-service * Check AKS quota and VM size limitation, https://docs.microsoft.com/en-us/azure/aks/quotas-skus-regions * Check whether your subscription has enough quota to create AKS cluster, https://docs.microsoft.com/en-us/azure/networking/check-usage-against-limits diff --git a/kubernetes/GKE/README.md b/kubernetes/GKE/README.md index ff3e2949c6..fccc532068 100644 --- a/kubernetes/GKE/README.md +++ b/kubernetes/GKE/README.md @@ -51,7 +51,7 @@ ts us-west1 1.16.13-gke.401 34.83.140.167 n1-standard-4 1.16.13-gke.401 For running a cluster with GPU accelerator use `--accelerator type=,count=` -The below command creates a cluster with a single n1-standard-4 node with and nvidia-testla-t4 GPU accelerator. +The below command creates a cluster with a single n1-standard-4 node with and nvidia-tesla-t4 GPU accelerator. ```bash gcloud container clusters create torchserve --machine-type n1-standard-4 --accelerator type=nvidia-tesla-t4,count=1 --num-nodes 1 @@ -93,13 +93,13 @@ git clone https://github.com/pytorch/serve.git cd serve/kubernetes/GKE ``` -**_NOTE:_** By default the helm chart installs GPU version of torchserve. Follow steps in section [2.2](####-2.2-For-CPU-setup) for running in a CPU only cluster. For GPU setup section [2.2](####-2.2-For-CPU-setup) can be skipped. +**_NOTE:_** By default the helm chart installs GPU version of torchserve. Follow steps in section [2.2](#22-for-cpu-setup) for running in a CPU only cluster. For GPU setup section [2.2](#22-for-cpu-setup) can be skipped. #### 2.2 For CPU setup * Change torchserve image in Helm/values.yaml to the CPU version * Set `n_gpu` to `0` in Helm/values.yaml -* Skip NVIDIA plugin installation in section [2.3](#####-2.3-Install-NVIDIA-device-plugin) +* Skip NVIDIA plugin installation in section [2.3](#23-install-nvidia-device-plugin) #### 2.3 Install NVIDIA device plugin @@ -148,7 +148,7 @@ cd GKE helm install mynfs ./nfs-provisioner/ ``` -```kubectl get pods``` should show something similiar to: +```kubectl get pods``` should show something similar to: ```bash NAME READY STATUS RESTARTS AGE @@ -169,7 +169,7 @@ Replace storage size and server IP in pv_pvc.yaml with the server IP got from ab kubectl apply -f templates/pv_pvc.yaml -n default ``` -Verify that the PVC / PV is created by excuting. +Verify that the PVC / PV is created by executing. ```bash kubectl get pvc,pv -n default @@ -199,7 +199,7 @@ Your output should look similar to pod/model-store-pod created ``` -Verify that the pod is created by excuting. +Verify that the pod is created by executing. ```bash kubectl get po @@ -275,13 +275,13 @@ Possible errors in this step may be a result of * IAM limits. * Quota restrictions during cluster creation - [GKE Quotas](https://cloud.google.com/compute/quotas) -You should able be able to find the following resources at the end of this step in the respective Gcoud consoles +You should able be able to find the following resources at the end of this step in the respective Gcloud consoles -* GKE -> Cluser in the Gcloud Console +* GKE -> Cluster in the Gcloud Console -#### 4.2 Troubleshooting NFS Persitant Volume Creation +#### 4.2 Troubleshooting NFS Persistent Volume Creation -Possible error in this step may be a result of one of the following. Your pod my be struck in *Init / Creating* forever / persitant volume claim may be in *Pending* forever. +Possible error in this step may be a result of one of the following. Your pod my be struck in *Init / Creating* forever / persistent volume claim may be in *Pending* forever. * Storage disk not created / wrong storage disk name. diff --git a/kubernetes/README.md b/kubernetes/README.md index 1b58a15fca..f94f15f937 100644 --- a/kubernetes/README.md +++ b/kubernetes/README.md @@ -25,7 +25,7 @@ The following table describes all the parameters for the Helm Chart. | `image` | Torchserve Serving image | `pytorch/torchserve:latest-gpu` | | `inference_port` | TS Inference port | `8080` | | `management_port` | TS Management port | `8081` | -| `metrics_port` | TS Mertics port | `8082` | +| `metrics_port` | TS Metrics port | `8082` | | `replicas` | K8S deployment replicas | `1` | | `model-store` | EFS mountpath | `/home/model-server/shared/` | | `persistence.size` | Storage size to request | `1Gi` | @@ -78,7 +78,7 @@ REVISION: 1 TEST SUITE: None ``` -Verify that torchserve has succesfully started by executing ```kubectl exec pod/torchserve-fff -- cat logs/ts_log.log``` on your torchserve pod. You can get this id by lookingup `kubectl get po --all-namespaces` +Verify that torchserve has successfully started by executing ```kubectl exec pod/torchserve-fff -- cat logs/ts_log.log``` on your torchserve pod. You can get this id by lookingup `kubectl get po --all-namespaces` Your output should should look similar to @@ -93,7 +93,7 @@ Current directory: /home/model-server ## Test Torchserve Installation -Fetch the Load Balancer Extenal IP by executing +Fetch the Load Balancer External IP by executing ```bash kubectl get svc @@ -272,7 +272,7 @@ Follow the link for log aggregation with EFK Stack.\ * You may inspect the values by running ``helm list`` and `helm get all ts` to verify if the values used for the installation. * You can uninstall / reinstall the helm chart by executing `helm uninstall ts` and `helm install ts .` * `helm install ts .` fails with `Error: create: failed to create: Request entity too large: limit is 3145728` or `invalid: data: Too long: must have at most 1048576 characters`. - * Ensure that you dont have any stale files in your kubernetes directory where you are executing the command. If so, move them out of the directory or add them to .helmignore file. + * Ensure that you don't have any stale files in your kubernetes directory where you are executing the command. If so, move them out of the directory or add them to .helmignore file. * `kubectl get svc` does't show my torchserve service * Try reinstalling the helm chart by executing `helm uninstall ts` and `helm install ts .` * "Error: unable to build kubernetes objects from release manifest: unable to recognize ā€œā€: no matches for kind ā€œClusterConfigā€ in version ā€œeksctl.io/v1alpha5ā€" diff --git a/kubernetes/autoscale.md b/kubernetes/autoscale.md index 923be6b1e0..e631d8c8fd 100644 --- a/kubernetes/autoscale.md +++ b/kubernetes/autoscale.md @@ -1,6 +1,6 @@ # Autoscaler -Setup Kubernetes HPA(Horizontal Pod Autoscaler) for Torchserve, tuned for torchserve metrics. This uses Prometheus as metrics collector and Prometheus Adapter as mertrics server, serving Torchserve metrics for HPA. +Setup Kubernetes HPA(Horizontal Pod Autoscaler) for Torchserve, tuned for torchserve metrics. This uses Prometheus as metrics collector and Prometheus Adapter as metrics server, serving Torchserve metrics for HPA. ## Steps @@ -31,7 +31,7 @@ prometheus-server.default.svc.cluster.local ... ``` -### 3. Install Prometheus Adapater +### 3. Install Prometheus Adapter - Update Prometheus url and port in adapter.yaml. Use the url given in prometheus installation output. diff --git a/kubernetes/examples/FasterTransformer_HuggingFace_Bert.md b/kubernetes/examples/FasterTransformer_HuggingFace_Bert.md index e08b7feaec..7d1b696e0b 100644 --- a/kubernetes/examples/FasterTransformer_HuggingFace_Bert.md +++ b/kubernetes/examples/FasterTransformer_HuggingFace_Bert.md @@ -2,7 +2,7 @@ ## Overview -This documnet demonstrates, running fast transformers HuggingFace BERT example with Torchserve in kubernetes setup. +This document demonstrates, running fast transformers HuggingFace BERT example with Torchserve in kubernetes setup. Refer: [FasterTransformer_HuggingFace_Bert](../../examples/FasterTransformer_HuggingFace_Bert/README.md#faster-transformer) @@ -22,7 +22,7 @@ Once the cluster and the PVCs are ready, we can generate MAR file. ## Generate Mar file -[Follow steps from here to generate MAR file](../../examples/FasterTransformer_HuggingFace_Bert/README.md#how-to-get-a-torchsctipted-traced-efft-of-hf-bert-model-and-serving-it) +[Follow steps from here to generate MAR file](../../examples/FasterTransformer_HuggingFace_Bert/README.md#how-to-get-a-torchscripted-traced-efft-of-hf-bert-model-and-serving-it) ## Copy Mar file from container to local path diff --git a/kubernetes/kserve/README.md b/kubernetes/kserve/README.md index 3cc731bb15..d040ed7d63 100644 --- a/kubernetes/kserve/README.md +++ b/kubernetes/kserve/README.md @@ -171,7 +171,7 @@ kubectl cp mnist.mar model-store-pod:/pv/model-store/ -c model-store -n kserve-t kubectl cp config.properties model-store-pod:/pv/config/ -c model-store -n kserve-test ``` -Refer link for other [storage options](https://github.com/kserve/kserve/tree/master/docs/samples/storagehttps://github.com/kserve/kserve/tree/master/docs/samples/storage) +Refer link for other [storage options](https://github.com/kserve/kserve/tree/master/docs/samples/storage) - Step - 5 : Create the Inference Service @@ -228,10 +228,10 @@ For v2 protocol curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/mnist-kf/explain -d ./kf_request_json/v2/mnist/mnist_v2_bytes.json ``` -Refer the individual Readmes for KServe : +Refer the individual readmes for KServe : -- [BERT](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/bert#readme) -- [MNIST](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/torchserve/README.md) +* [BERT](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/custom/torchserve/bert-sample/hugging-face-bert-sample.md) +* [MNIST](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/torchserve/README.md) Sample input JSON file for v1 and v2 protocols diff --git a/kubernetes/kserve/developer_guide.md b/kubernetes/kserve/developer_guide.md index 6f669dde84..623ca65c1c 100644 --- a/kubernetes/kserve/developer_guide.md +++ b/kubernetes/kserve/developer_guide.md @@ -12,7 +12,7 @@ Below are the prerequisites should be met. ## Steps to run Torchserve with Kserve -### Generating marfile and config.propertiest file +### Generating marfile and config.properties file Navigate to the cloned serve repo and run @@ -20,7 +20,7 @@ Navigate to the cloned serve repo and run torch-model-archiver --model-name mnist_kf --version 1.0 --model-file examples/image_classifier/mnist/mnist.py --serialized-file examples/image_classifier/mnist/mnist_cnn.pt --handler examples/image_classifier/mnist/mnist_handler.py ``` -The above command genereates mnist_kf.mar +The above command generates mnist_kf.mar Copy the below contents to config.properties and change the model_store path diff --git a/kubernetes/kserve/kf_request_json/v2/bert/README.md b/kubernetes/kserve/kf_request_json/v2/bert/README.md index 47e205ea7d..8f157f57ec 100644 --- a/kubernetes/kserve/kf_request_json/v2/bert/README.md +++ b/kubernetes/kserve/kf_request_json/v2/bert/README.md @@ -7,7 +7,7 @@ model locally using kserve. Clone [pytorch/serve](https://github.com/pytorch/serve) repository. -Copy the [Transformer_kserve_handler.py](Transformer_kserve_handler.py) handler file to `examples/Huggingface_Transformers` folder +Copy the [Transformer_kserve_handler.py](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/torchserve/v2/bert/sequence_classification/Transformer_kserve_handler.py) handler file to `examples/Huggingface_Transformers` folder Navigate to `examples/Huggingface_Transformers` diff --git a/kubernetes/kserve/kserve_wrapper/README.md b/kubernetes/kserve/kserve_wrapper/README.md index 89f98bc58e..b5c8a328a8 100644 --- a/kubernetes/kserve/kserve_wrapper/README.md +++ b/kubernetes/kserve/kserve_wrapper/README.md @@ -12,13 +12,13 @@ The KServe wrapper files were created to enable the Torchserve integration with 2. The TorchserveModel.py file contains the methods to handle the request and response that comes from the Torchserve side and passes it on to the KServe side. -3. TSModelRepository.py file contains the intialize method for the parameters that gets passed on to the Torchservemodel.py. +3. TSModelRepository.py file contains the initialize method for the parameters that gets passed on to the Torchservemodel.py. ## The Local Testing of KServe Wrapper for MNIST Run KFServer locally to test it before creating a docker image. Torchserve makes use of port 8085 and the kfserver runs at port 8080 -We will hit kfserve , which inturn hit torch serve for inference and explanations request. +We will hit kfserve , which in turn hits torch serve for inference and explanations request. Follow the below steps to serve the MNIST Model : - Step 1 : Install python3.6.9 @@ -202,7 +202,7 @@ Response: - Step 1: Follow the same steps from to 10 as what was done for MNIST. -- Step 2: Use this config.properties- Change the mode_snaphot to bert +- Step 2: Use this config.properties- Change the mode_snapshot to bert ```bash inference_address=http://0.0.0.0:8085 diff --git a/plugins/docs/README.md b/plugins/docs/README.md index c69a7a37dc..5f921e25fa 100644 --- a/plugins/docs/README.md +++ b/plugins/docs/README.md @@ -12,7 +12,7 @@ of following type using plugin. e.g. [endpoints plugin](../endpoints/) here is [AWS DynamoDB snapshot serializer](../DDBEndPoint). This enables torchserve to serialize snapshots to DynamoDB. ### How to use plugins with torchserve. -There are following two ways to inluce plugin jars to torchserve. +There are following two ways to include plugin jars to torchserve. 1. Using config. property - `plugins_path` e.g. diff --git a/plugins/docs/ddb_endpoint.md b/plugins/docs/ddb_endpoint.md index 176cd4d8ea..8dac6bd62a 100644 --- a/plugins/docs/ddb_endpoint.md +++ b/plugins/docs/ddb_endpoint.md @@ -50,4 +50,4 @@ You can change snapshot serializer by using a DDBEndPoint plugin as follow from `2020-10-26 15:06:06,705 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Trying to load snapshot serializer via plugin.... 2020-10-26 15:06:06,896 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Snapshot serializer via plugin has been loaded successfully 2020-10-26 15:06:06,897 [INFO ] main DDBSnapshotSerializer - Fetching last snapshot from DDB... -2020-10-26 15:06:09,894 [ERROR] main DDBSnapshotSerializer - Failed to get last snpahost from DDB. Torchserve will start with default or given configuration.` +2020-10-26 15:06:09,894 [ERROR] main DDBSnapshotSerializer - Failed to get last snapshot from DDB. Torchserve will start with default or given configuration.` diff --git a/test/README.md b/test/README.md index 9eadae424c..d1e7e081b4 100644 --- a/test/README.md +++ b/test/README.md @@ -1,6 +1,6 @@ # TorchServe Regression Tests -This folder contains nightly regression tests execututed against TorchServe master.These tests use [POSTMAN](https://www.postman.com/downloads/) for exercising all the Management & Inference APIs. +This folder contains nightly regression tests executed against TorchServe master.These tests use [POSTMAN](https://www.postman.com/downloads/) for exercising all the Management & Inference APIs. ### Latest Test Run Status @@ -16,7 +16,7 @@ For example update 2020-05-20 in https://torchserve-regression-test.s3.amazonaws ### Running the test manually. -Clone Torch Serve Repo & Build the Docker Image for the execition env. +Clone Torch Serve Repo & Build the Docker Image for the execution env. ``` git clone https://github.com/pytorch/serve diff --git a/test/data_file_config.md b/test/data_file_config.md index 1cd4ca9cc4..b0ac3db31a 100644 --- a/test/data_file_config.md +++ b/test/data_file_config.md @@ -2,7 +2,7 @@ Use following properties to add inference test case in inference_data.json Mandatory properties ---- -**url:** Model url or a varialbe {{mar_path_xxxx}} defined in environment.json. xxxx is replaced with model name. This model's mar file is generated by ts_scripts/marsgen.py via ts_scripts/mar_config.json. +**url:** Model url or a variable {{mar_path_xxxx}} defined in environment.json. xxxx is replaced with model name. This model's mar file is generated by ts_scripts/marsgen.py via ts_scripts/mar_config.json. **model_name:** Model name of the model url. @@ -34,7 +34,7 @@ For "application/json", content-type response json is compared with expected jso "image_classification" is a custom comparator for json structure given in following section. In this case, prediction scores may vary hence tolerance is used while comparing scores. **Note:** -If expected output from your model's inference request is json with a different structure (compared to image classfication above] then you will have to add a custom comparator with name validate_ and add entry for :validate_ in `validators` +If expected output from your model's inference request is json with a different structure (compared to image classification above] then you will have to add a custom comparator with name validate_ and add entry for :validate_ in `validators` objects in `inference_api_test_collection.json`. **expected:** Expected string or json object based on content-type. @@ -64,7 +64,7 @@ Sample expected output for "image_classification" in json. } ] ``` -For above image classifiction inference response, here is the test case - +For above image classification inference response, here is the test case - ```json [{ "url":"https://torchserve.pytorch.org/mar_files/squeezenet1_1.mar", diff --git a/ts_scripts/markdown_link_check_config.json b/ts_scripts/markdown_link_check_config.json new file mode 100644 index 0000000000..fe5f4af935 --- /dev/null +++ b/ts_scripts/markdown_link_check_config.json @@ -0,0 +1,13 @@ +{ + "retryOn429": true, + "retryCount": 5, + "fallbackRetryDelay": "10s", + "httpHeaders": [ + { + "urls": ["https://docs.github.com/", "https://help.github.com/"], + "headers": { + "Accept-Encoding": "zstd, br, gzip, deflate" + } + } + ] +} \ No newline at end of file diff --git a/ts_scripts/spellcheck.sh b/ts_scripts/spellcheck.sh old mode 100644 new mode 100755 index 378ddbfe68..86b4e2bf19 --- a/ts_scripts/spellcheck.sh +++ b/ts_scripts/spellcheck.sh @@ -1,4 +1,15 @@ # Source: https://github.com/pytorch/torchx/blob/main/scripts/spellcheck.sh set -ex sudo apt-get install aspell -pyspelling -c ts_scripts/spellcheck_conf/spellcheck.yaml \ No newline at end of file + +if [[ -z "$@" ]]; then + sources=$(find -name '*.md') +else + sources=$@ +fi + +sources_arg="" +for src in $sources ;do + sources_arg+=" -S $src" +done +pyspelling -c ts_scripts/spellcheck_conf/spellcheck.yaml --name Markdown $sources_arg \ No newline at end of file diff --git a/ts_scripts/spellcheck_conf/spellcheck.yaml b/ts_scripts/spellcheck_conf/spellcheck.yaml index ce101966e2..03187482ce 100644 --- a/ts_scripts/spellcheck_conf/spellcheck.yaml +++ b/ts_scripts/spellcheck_conf/spellcheck.yaml @@ -9,9 +9,14 @@ matrix: output: ts_scripts/spellcheck_conf/wordlist.dic encoding: utf-8 pipeline: + - pyspelling.filters.context: + context_visible_first: true + delimiters: + - open: '(?s)^ *(?P`{3,})[a-z0-9]*?$' + close: '^(?P=open)$' + - open: '' + content: 'https?://[-a-zA-Z0-9.]+?\.[a-z]{2,6}[-?=&%.0-9a-zA-Z/_#]*' + close: '' - pyspelling.filters.markdown: markdown_extensions: - markdown.extensions.extra: - sources: - - 'docs/*.md' - - 'examples/*/*.md' diff --git a/ts_scripts/spellcheck_conf/wordlist.dic b/ts_scripts/spellcheck_conf/wordlist.dic deleted file mode 100644 index dd70a4eb9b..0000000000 Binary files a/ts_scripts/spellcheck_conf/wordlist.dic and /dev/null differ diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index 7c60210b15..8b5bf90a88 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -26,6 +26,7 @@ html microsoft ol openjdk +OpenJDK pre psutil sentencepiece @@ -101,7 +102,6 @@ cloudformation cmd dev dir -ec io issuecomment lxning @@ -125,7 +125,6 @@ prebuilt smi stackoverflow util -whl AlexNet DeepLabV Densenet @@ -148,13 +147,11 @@ checkstyle cov gradlew htmlcov -integ node.js pylint pylintrc pytest rcfile -rn tcort ut localhost @@ -255,7 +252,6 @@ isfile isinstance jit kwargs -lig os param pred @@ -272,7 +268,6 @@ LJO MiB cv dockerd -dockered entrypoint gpuId gpuUsage @@ -346,9 +341,7 @@ ServiceUnavailableException lang mb ntl -ss PrometheusServer -dfb globoff noopversioned systemctl @@ -361,17 +354,7 @@ ManagementAPIsService ReadOnlyAccess UserGuide UsingKMSEncryption -aa acknowledgement -adc -ae -ccd -cce -cfe -de -ebcb -ecd -fbf macOS sse fairseq @@ -469,20 +452,14 @@ BaseDataset BaseDatasetBuilder BaseModel FNSio -IwAR -JnhUwpNcz MMFTransformer MultiModal -OZi OmegaConfing Pyav REU TextCaps TextVQA Tochserve -VQA -XCCUv -Ysq csv datasets facebook @@ -493,14 +470,6 @@ lables len mc mmfartifacts -qM -qkD -rQaxO -thrid -uwLxwvvvr -wpnNYEZEmWOQ -zccSXqNt -zzRl EmbeddingBag TextHandler overriden @@ -514,12 +483,10 @@ Transfomer bytedance cmake cp -dn geforce libpyt nvcr oauthtoken -thsext turing volta xlarge @@ -566,7 +533,6 @@ Torchserve's asg aws elb - readme sdk apis @@ -590,9 +556,6 @@ init waveglow hostname statsd -ef -fae -fc grafana kms userguide @@ -606,7 +569,7 @@ mmf multimodal preprocessed batchsize -downlaod +download fastertransformer ngc deeplearningexamples @@ -615,4 +578,402 @@ scarpe NVidia WaveGlow huggingface -torchServe \ No newline at end of file +torchServe +CProfile +KSERVE +apachelounge +args +jmeter +kserve +latencies +snakeviz +codec +loadbalancer +torchserves +xml +Conda +autoscaling +conda +GPUMemoryUsed +GPUMemoryUtilization +GPUUtilization +JSONPatternLayout +MXNetModelServer +QLog +QLogLayout +QLogsetupModelDependencies +abc +dda +patternlayout +qlog +IPEX +ORT +PROFILER +TensorRT +ValueToSet +kineto +profiler +pypi +runtimes +torchprep +GPT +KServe +LMHeadModel +Parallelize +Textgeneration +gpt +kserve +parallelize +tx +xl +DCGAN +DLRM +GAN +NN +Recommender +ScriptModule +Scriptable +TorchRec +TorchScript +Torchrec +dcgan +dlrm +fashiongen +FashionGen +fashionGen +gan +nn +scriptable +torchrec +AVX +Allocator +BLOCKTIME +BertModel +CONDA +JeMalloc +KMP +LD +NUMA +Numa +OMP +OpenMP +PRELOAD +PTMalloc +TCMalloc +Xeon +afeeb +affinitized +allocator +args +eval +gif +hyperthreaded +hyperthreading +inplace +inputPath +intel +iomp +ipex +iter +jemalloc +libiomp +libtcmalloc +numa +numactl +pdt +qconfig +randint +randn +tcmalloc +tunable +unix +unutilized +usr +CONTAINERD +DaemonSet +GKE +Gcloud +Gi +GoogleCloudPlatform +Ki +NFS +PV +PersistentVolume +RWX +STORAGECLASS +VPC +allocatable +auth +autoupgrade +bcc +cidr +clusterIP +creationTimestamp +daemonset +drwx +drwxr +fsSL +gcloud +ggc +gke +googleapis +ip +ipv +jsonpath +kubeconfig +kubectl +lR +mynfs +namespaces +nfs +nodePools +persistentvolume +persistentvolumeclaim +po +preloaded +provisioner +pv +pvc +quickstart +rw +svc +tesla +tty +unformatted +AAAAAElFTkSuQmCC +Autoscaler +BUILDKIT +GOR +InferenceService +Knative +Rollout +inferenceservice +ingressgateway +istio +kfs +knative +loadBalancer +mnt +modelCount +readmes +rollout +serverless +recommender +HandlerTime +customizedMetadata +environ +ContentType +kservev +tobytes +CustomHandler +GH +OSS +PRs +ctx +onnx +ClusterConfig +EBS +EFS +EKS +apiVersion +desiredCapacity +efs +eks +eksctl +instanceTypes +instancesDistribution +maxSize +minSize +namespace +ng +nodeGroups +onDemandBaseCapacity +onDemandPercentageAboveBaseCapacity +pvpod +spotInstancePools +storagehttps +subnet +subnets +vpc +MMS +commandline +filepath +jmx +rampup +requestdefaults +scaleup +tearDown +testplan +JProfiler +JProfiler's +SqueezeNet +TSBenchmark +apos +cProfile +dockerhub +filesystem +filterresults +gradle +homebrew +imageFilePath +jpgc +linuxbrew +mergeresults +modelN +perfmon +urlN +Arg +KFserving +arg +authn +authz +dicts +dockerfiles +enum +eventloop +hashmap +lifecycles +sagemaker +startServer +threadpool +mGPU +socio +gridfs +NLP +TorchScript's +Meta's +criteo +personalization +NMTBackTranslate +NMTDualTranslate +nlp +DogCatBreed +DogCatBreedClassification +CloudWatch +LogGroup +TorchServeInferenceURL +TorchServeManagementURL +cloudwatch +keypair +spinup +ReactApp +logdir +tensorboard +DenseNet +pytorchbot +Validator +comparator +validator +validators +Datafile +UI +buildspec +cmds +AKS +PVCs +DockerHub +jq +HPA +HPG +targetValue +totensor +KFServer +TSModelRepository +TorchserveModel +Torchservemodel +kfserve +kfserver +KFModel +marfile +AKS +Balancer +EFK +Liveness +autoscale +datasource +helmignore +lookingup +mountpath +Az +VM +aks +az +ds +eastus +myAKSCluster +myResourceGroup +sc +vm +CODEBUILD +CodeBuild +Dockerfiles +bt +buildtype +codebuild +cudaversion +cudnn +memlock +shm +ulimit +Cresta's +DAGs +Dynabench +Dynaboard +MLFlow +MLOps +MLflow +Operationalize +Sagemaker +Streamlit +Inferentia +opensource +operationalising +Wadhwani +modelarchive +eagermode +AttributeName +AttributeType +DDBEndPoint +DDBSnapshotSerializer +DefaultCredentialsProvider +FS +IndexName +KeySchema +KeyType +PluginsManager +ProjectionType +ProvisionedThroughput +ReadCapacityUnits +SDKs +WriteCapacityUnits +createdOn +createdOnMonth +dynamodb +impl +serializer +servingsdk +snapshotName +behaviour +teardown +tg +udv +dataN +backendgroup +sexualized +ecbe +grayscale +bz +marsgen +efft +envvar +Roadmap +fff +pvd +whl +ss +dn +rn +De +ec +VQA +xxxx \ No newline at end of file