Skip to content
This repository has been archived by the owner on May 10, 2024. It is now read-only.

Commit

Permalink
fix serving mounts for finetuned models
Browse files Browse the repository at this point in the history
  • Loading branch information
asaiacai committed Oct 26, 2023
1 parent 3ebc9b7 commit d44d198
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 23 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest-smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
python-version: ["3.10"]
test-path:
- tests/test_cli.py::test_hf_serve
- tests/test_cli.py::test_llmatc_serve
- tests/test_launch.py
- tests/test_runtracker.py
- tests/test_serve.py
Expand Down
8 changes: 4 additions & 4 deletions docs/source/quickstart/serving.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ Deployment
----------

Model deployments are referenced by their HuggingFace modelhub name. Finetuned models trained through LLM-ATC are referenced
by using the :code:`llm-atc/` prefix.
by using the :code:`--name llm-atc`.

.. code-block:: console
# serve an llm-atc finetuned model, requires `llm-atc/` prefix and grabs model checkpoint from object store
$ llm-atc serve --name llm-atc/myvicuna --source s3://my-bucket/my_vicuna/ --accelerator A100:1 -c servecluster --cloud gcp --region asia-southeast1 --envs "HF_TOKEN=<HuggingFace_token>"
# serve an llm-atc finetuned model, requires source `llm-atc/` prefix and grabs model checkpoint from object store
$ llm-atc serve --name llm-atc --source s3://my-bucket/my_vicuna/ --accelerator A100:1 -c servecluster --cloud gcp --region asia-southeast1 --envs "HF_TOKEN=<HuggingFace_token>"
# serve a HuggingFace model, e.g. `lmsys/vicuna-13b-v1.3`
$ llm-atc serve --name lmsys/vicuna-13b-v1.3 --accelerator A100:1 -c servecluster --cloud gcp --region asia-southeast1 --envs "HF_TOKEN=<HuggingFace_token>"
Expand All @@ -33,7 +33,7 @@ from your laptop.
.. code-block:: console
# get the ip address of the OpenAI API endpoint
$ ip=$(grep -A1 "Host servecluster" ~/.ssh/config | grep "HostName" | awk '{print $2}')
$ ip=$(sky status --ip servecluster)
# test which models are available
$ curl http://$ip:8000/v1/models
Expand Down
12 changes: 0 additions & 12 deletions llm_atc/config/serve/serve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,6 @@ setup: |
pip install git+https://github.com/huggingface/transformers.git
sudo apt update
sudo apt install -y rclone
# copy files from object store onto disk
if [[ $MODEL_NAME == llm-atc/* ]];
then
CHECKPOINT="/$MODEL_NAME/"
LOCAL_CHKPT="./$MODEL_NAME/"
mkdir -p $LOCAL_CHKPT
rclone sync --progress --exclude "train*" $CHECKPOINT $LOCAL_CHKPT
fi
run: |
master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
Expand Down
10 changes: 4 additions & 6 deletions llm_atc/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ def serve_route(model_name: str, source: Optional[str] = None, **serve_kwargs):
raise ValueError(
"Attempting to use a finetuned model without a corresponding object store location"
)
elif not source is None and not model_name.startswith("llm-atc/"):
logging.warning(
"Specified object store mount but model is not an llm-atc model. Skipping mounting."
)
return Serve(model_name, source, **serve_kwargs).serve()


Expand Down Expand Up @@ -69,6 +65,10 @@ def default_serve_task(self) -> sky.Task:
def serve(self) -> sky.Task:
"""Deploy fastchat.serve.openai_api_server with vllm_worker"""
serve_task = self.default_serve_task
if self.source and self.names == "llm-atc":
logging.info(f"Using a fine tuned model at {self.source}")
serve_task.update_file_mounts({"/llm-atc": self.source})
self.names = "/llm-atc"
self.envs["MODEL_NAME"] = self.names
if "HF_TOKEN" not in self.envs:
logging.warning(
Expand All @@ -80,6 +80,4 @@ def serve(self) -> sky.Task:
resource._cloud = sky.clouds.CLOUD_REGISTRY.from_str(self.cloud)
resource._set_region_zone(self.region, self.zone)
serve_task.set_resources(resource)
if self.source and self.names.startswith("llm-atc/"):
serve_task.update_file_mounts({"/" + self.names: self.source})
return serve_task
23 changes: 22 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,28 @@ def test_hf_serve():
+ """awk '{print $2}'); echo $ip; curl http://"$ip":8000/v1/models | grep vicuna""",
],
f"sky stop -y {name} ; sleep 300 ; sky down --purge -y {name}",
timeout=45 * 60,
timeout=30 * 60,
)
run_one_test(test)

@pytest.mark.cli
def test_llmatc_serve():
"""
Tests serving a llm-atc fine tuned model
"""

name = "test fine tune"
ssh_config = os.path.expanduser("~/.ssh/config")
test = Test(
"serve_llmatc",
[
f"llm-atc serve --detach_run --name llm-atc --source s3://my-trainy-bucket/mymistral --accelerator V100:1 -c {name} --cloud aws --region us-east-2",
"sleep 300",
f"""ip=$(grep -A1 "Host {name}" {ssh_config} | grep "HostName" | """
+ """awk '{print $2}'); echo $ip; curl http://"$ip":8000/v1/models | grep vicuna""",
],
f"sky stop -y {name} ; sleep 300 ; sky down --purge -y {name}",
timeout=30 * 60,
)
run_one_test(test)

Expand Down

0 comments on commit d44d198

Please sign in to comment.