From 8e5ca65d1f4df00cc12c84eb53bc71de2ce46798 Mon Sep 17 00:00:00 2001 From: Noam Gat Date: Wed, 18 Oct 2023 21:18:05 +0300 Subject: [PATCH] vLLM notebook updates --- README.md | 2 +- samples/colab_vllm_integration.ipynb | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6315a28..a374bc3 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ print(result) ## Capabilities / Advantages -- Works with any Python language model and tokenizer. Already supports transformers, [LangChain](https://docs.langchain.com/docs/) and [vLLM](https://github.com/vllm-project/vllm). Can be adapted to others. +- Works with any Python language model and tokenizer. Already supports transformers, [LangChain](https://docs.langchain.com/docs/) and [vLLM](https://github.com/noamgat/lm-format-enforcer/blob/main/samples/colab_vllm_integration.ipynb). Can be adapted to others. - Supports batched generation and beam searches - each input / beam can have different tokens filtered at every timestep - Supports both JSON Schema and Regular Expression formats - Supports both required and optional fields in JSON schemas diff --git a/samples/colab_vllm_integration.ipynb b/samples/colab_vllm_integration.ipynb index 872363d..36b1462 100644 --- a/samples/colab_vllm_integration.ipynb +++ b/samples/colab_vllm_integration.ipynb @@ -36,16 +36,16 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install vllm lm-format-enforcer\n", - "# !huggingface-cli login\n", + "!pip install vllm lm-format-enforcer\n", + "!huggingface-cli login\n", "\n", "# When running from source / developing the library, use this instead\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "import sys\n", - "import os\n", - "sys.path.append(os.path.abspath('..'))\n", - "# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'" + "# %load_ext autoreload\n", + "# %autoreload 2\n", + "# import sys\n", + "# import os\n", + "# sys.path.append(os.path.abspath('..'))\n", + "## os.environ['CUDA_LAUNCH_BLOCKING'] = '1'" ] }, {