Skip to content

Commit

Permalink
Merge pull request #406 from rhatdan/quadlet
Browse files Browse the repository at this point in the history
Make quadlets work with OCI images
  • Loading branch information
ericcurtin authored Nov 4, 2024
2 parents 5791050 + 7f34d4f commit e2da6ef
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 48 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ ramalama/*.patch
dist
.#*
venv/
*.container
*.image
*.volume
66 changes: 57 additions & 9 deletions docs/ramalama-serve.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ For REST API endpoint documentation, see: [https://github.com/ggerganov/llama.cp

## OPTIONS

#### **--authfile**=*password*
path of the authentication file for OCI registries

#### **--detach**, **-d**
Run the container in the background and print the new container ID.
The default is TRUE. The --nocontainer option forces this option to False.
Expand All @@ -40,14 +43,17 @@ Name of the container to run the Model in.
#### **--port**, **-p**
port for AI Model server to listen on

#### **--tls-verify**=*true*
require HTTPS and verify certificates when contacting OCI registries

## EXAMPLES
### Run two AI Models at the same time. Notice both are running within Podman Containers.
```
$ ramalama serve -p 8080 --name mymodel ollama://tiny-llm:latest
$ ramalama serve -d -p 8080 --name mymodel ollama://tiny-llm:latest
09b0e0d26ed28a8418fb5cd0da641376a08c435063317e89cf8f5336baf35cfa
$ ramalama serve -n example --port 8081 oci://quay.io/mmortari/gguf-py-example/v1/example.gguf
$ ramalama serve -d -n example --port 8081 oci://quay.io/mmortari/gguf-py-example/v1/example.gguf
3f64927f11a5da5ded7048b226fbe1362ee399021f5e8058c73949a677b6ac9c
$ podman ps
Expand All @@ -56,11 +62,12 @@ CONTAINER ID IMAGE COMMAND CREATED
3f64927f11a5 quay.io/ramalama/ramalama:latest /usr/bin/ramalama... 17 seconds ago Up 17 seconds 0.0.0.0:8082->8082/tcp ramalama_YMPQvJxN97
```

### Generate a quadlet for running the AI Model service
### Generate quadlet service off of HuggingFace granite Model
```
$ ramalama serve --name MyGraniteServer --generate=quadlet granite > $HOME/.config/containers/systemd/MyGraniteServer.container
$ cat $HOME/.config/containers/systemd/MyGraniteServer.container
$ ramalama serve --name MyGraniteServer --generate=quadlet granite
Generating quadlet file: MyGraniteServer.container
$ cat MyGraniteServer.container
[Unit]
Description=RamaLama granite AI Model Service
After=local-fs.target
Expand All @@ -74,26 +81,67 @@ Volume=/home/dwalsh/.local/share/ramalama/models/huggingface/instructlab/granite
ContainerName=MyGraniteServer
PublishPort=8080
[Install]
# Start by default on boot
WantedBy=multi-user.target default.target
$ mv MyGraniteServer.container $HOME/.config/containers/systemd/
$ systemctl --user daemon-reload
$ systemctl start --user MyGraniteServer
$ systemctl status --user MyGraniteServer
● MyGraniteServer.service - RamaLama granite AI Model Service
Loaded: loaded (/home/dwalsh/.config/containers/systemd/MyGraniteServer.container; generated)
Drop-In: /usr/lib/systemd/user/service.d
└─10-timeout-abort.conf
└─10-timeout-abort.conf
Active: active (running) since Fri 2024-09-27 06:54:17 EDT; 3min 3s ago
Main PID: 3706287 (conmon)
Tasks: 20 (limit: 76808)
Memory: 1.0G (peak: 1.0G)
...
$ podman ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
7bb35b97a0fe quay.io/ramalama/ramalama:latest llama-server --po... 3 minutes ago Up 3 minutes 0.0.0.0:43869->8080/tcp MyGraniteServer
```

### Generate quadlet service off of tiny OCI Model
```
$ ramalama --runtime=vllm serve --name tiny --generate=quadlet oci://quay.io/rhatdan/tiny:latest
Downloading quay.io/rhatdan/tiny:latest...
Trying to pull quay.io/rhatdan/tiny:latest...
Getting image source signatures
Copying blob 65ba8d40e14a skipped: already exists
Copying blob e942a1bf9187 skipped: already exists
Copying config d8e0b28ee6 done |
Writing manifest to image destination
Generating quadlet file: tiny.container
Generating quadlet file: tiny.image
Generating quadlet file: tiny.volume
$cat tiny.container
[Unit]
Description=RamaLama /run/model/model.file AI Model Service
After=local-fs.target
[Container]
AddDevice=-/dev/dri
AddDevice=-/dev/kfd
Exec=vllm serve --port 8080 /run/model/model.file
Image=quay.io/ramalama/ramalama:latest
Mount=type=volume,source=tiny:latest.volume,dest=/mnt/models,ro
ContainerName=tiny
PublishPort=8080
[Install]
# Start by default on boot
WantedBy=multi-user.target default.target
$ cat tiny.volume
[Volume]
Driver=image
Image=tiny:latest.image
$ cat tiny.image
[Image]
Image=quay.io/rhatdan/tiny:latest
```

### Generate a kubernetes YAML file named tini
```
$ ramalama serve --name tini --generate kube tiny
Expand Down
2 changes: 1 addition & 1 deletion install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ setup_ramalama() {
install -m755 "$to_file" "$ramalama_bin"

local python_files=("cli.py" "huggingface.py" "model.py" "ollama.py" "common.py" "__init__.py" \
"oci.py" "version.py" "shortnames.py" "toml_parser.py")
"quadlet.py", "oci.py" "version.py" "shortnames.py" "toml_parser.py")

for i in "${python_files[@]}"; do
url="${host}/containers/ramalama/${branch}/ramalama/${i}"
Expand Down
7 changes: 7 additions & 0 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,9 +605,16 @@ def run_cli(args):

def serve_parser(subparsers):
parser = subparsers.add_parser("serve", help="serve REST API on specified AI Model")
parser.add_argument("--authfile", help="path of the authentication file")
parser.add_argument("-d", "--detach", action="store_true", dest="detach", help="run the container in detached mode")
parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
parser.add_argument("-p", "--port", default="8080", help="port for AI Model server to listen on")
parser.add_argument(
"--tls-verify",
dest="tlsverify",
default=True,
help="require HTTPS and verify certificates when contacting registries",
)
parser.add_argument(
"--generate",
choices=["quadlet", "kube"],
Expand Down
34 changes: 3 additions & 31 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
run_cmd,
)
from ramalama.version import version
from ramalama.quadlet import Quadlet


file_not_found = """\
Expand Down Expand Up @@ -310,37 +311,8 @@ def serve(self, args):
raise NotImplementedError(file_not_found % (exec_args[0], exec_args[0], exec_args[0], str(e).strip("'")))

def quadlet(self, model, args, exec_args):
port_string = ""
if hasattr(args, "port"):
port_string = f"PublishPort={args.port}"

name_string = ""
if hasattr(args, "name") and args.name:
name_string = f"ContainerName={args.name}"

if hasattr(args, "MODEL"):
model = args.MODEL

print(
f"""
[Unit]
Description=RamaLama {model} AI Model Service
After=local-fs.target
[Container]
AddDevice=-/dev/dri
AddDevice=-/dev/kfd
Exec={" ".join(exec_args)}
Image={default_image()}
Volume={model}:/mnt/models:ro,z
{name_string}
{port_string}
[Install]
# Start by default on boot
WantedBy=multi-user.target default.target
"""
)
quadlet = Quadlet(model, args, exec_args)
quadlet.gen_container()

def _gen_ports(self, args):
if not hasattr(args, "port"):
Expand Down
10 changes: 8 additions & 2 deletions ramalama/oci.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,14 @@ def pull(self, args):
print(f"Downloading {self.model}...")
if args.engine:
try:
run_cmd([args.engine, "pull", self.model], debug=args.debug)
return "/mnt/models/model.file"
conman_args = [args.engine, "pull"]
if str(args.tlsverify).lower() == "false":
conman_args.extend([f"--tls-verify={args.tlsverify}"])
if args.authfile:
conman_args.extend([f"--authfile={args.authfile}"])
conman_args.extend([self.model])
run_cmd(conman_args, debug=args.debug)
return "/run/model/model.file"
except subprocess.CalledProcessError:
pass
return self._pull_omlmd(args)
Expand Down
82 changes: 82 additions & 0 deletions ramalama/quadlet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os

from ramalama.common import default_image


class Quadlet:
def __init__(self, model, args, exec_args):
self.ai_image = model
if hasattr(args, "MODEL"):
self.ai_image = args.MODEL
self.ai_image = self.ai_image.removeprefix("oci://")
if args.name:
self.name = args.name
else:
self.name = os.path.basename(self.ai_image)

self.model = model.removeprefix("oci://")
self.args = args
self.exec_args = exec_args

def gen_container(self):
port_string = ""
if hasattr(self.args, "port"):
port_string = f"PublishPort={self.args.port}"

name_string = ""
if hasattr(self.args, "name") and self.args.name:
name_string = f"ContainerName={self.args.name}"

outfile = self.name + ".container"
print(f"Generating quadlet file: {outfile}")
volume = self.gen_volume()
with open(outfile, 'w') as c:
c.write(
f"""\
[Unit]
Description=RamaLama {self.model} AI Model Service
After=local-fs.target
[Container]
AddDevice=-/dev/dri
AddDevice=-/dev/kfd
Exec={" ".join(self.exec_args)}
Image={default_image()}
{volume}
{name_string}
{port_string}
[Install]
# Start by default on boot
WantedBy=multi-user.target default.target
"""
)

def gen_volume(self):
if os.path.exists(self.model):
return f"Volume={self.model}:/mnt/models/model.file,ro:Z"

outfile = self.name + ".volume"

self.gen_image()
print(f"Generating quadlet file: {outfile} ")
with open(outfile, 'w') as c:
c.write(
f"""\
[Volume]
Driver=image
Image={self.name}.image
"""
)
return f"Mount=type=volume,source={self.name}.volume,dest=/mnt/models,ro"

def gen_image(self):
outfile = self.name + ".image"
print(f"Generating quadlet file: {outfile} ")
with open(outfile, 'w') as c:
c.write(
f"""\
[Image]
Image={self.ai_image}
"""
)
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def find_package_modules(self, package, package_dir):


setuptools.setup(
name = "ramalama",
version = "0.0.20",
name="ramalama",
version="0.0.20",
packages=find_packages(),
cmdclass={"build_py": build_py},
scripts=["bin/ramalama"],
Expand Down
53 changes: 50 additions & 3 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env bats

load helpers
load helpers.registry
load setup_suite

verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name"

Expand Down Expand Up @@ -117,14 +119,59 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name
model=tiny
name=c_$(safename)
run_ramalama pull ${model}
run_ramalama serve --name=${name} --port 1234 --generate=quadlet ${model}
run_ramalama serve --port 1234 --generate=quadlet ${model}
is "$output" "Generating quadlet file: tinyllama.container" "generate tinllama.container"

run cat tinyllama.container
is "$output" ".*PublishPort=1234" "PublishPort should match"
is "$output" ".*Name=${name}" "Quadlet should have name field"
is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct"
run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus ${model}
is "$output" ".*Volume=.*ollama/tinyllama" "Volume line should be correct"

rm tinyllama.container
run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus tiny
is "$output" ".*error: argument --generate: invalid choice: 'bogus' (choose from 'quadlet', 'kube')" "Should fail"
}

@test "ramalama serve --generate=quadlet with OCI" {
skip_if_darwin
skip_if_docker
local registry=localhost:${PODMAN_LOGIN_REGISTRY_PORT}
local authfile=$RAMALAMA_TMPDIR/authfile.json

name=c_$(safename)
start_registry
run_ramalama login --authfile=$authfile \
--tls-verify=false \
--username ${PODMAN_LOGIN_USER} \
--password ${PODMAN_LOGIN_PASS} \
oci://$registry
run_ramalama pull tiny
run_ramalama push --authfile=$authfile --tls-verify=false tiny oci://$registry/tiny
run_ramalama serve --authfile=$authfile --tls-verify=false --name=${name} --port 1234 --generate=quadlet oci://$registry/tiny
is "$output" ".*Generating quadlet file: ${name}.container" "generate .container file"
is "$output" ".*Generating quadlet file: ${name}.volume" "generate .volume file"
is "$output" ".*Generating quadlet file: ${name}.image" "generate .image file"

run cat $name.container
is "$output" ".*PublishPort=1234" "PublishPort should match"
is "$output" ".*ContainerName=${name}" "Quadlet should have ContainerName field"
is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct"
is "$output" ".*Mount=type=volume,source=${name}.volume,dest=/mnt/models,ro" "Volume line should be correct"

run cat $name.volume
is "$output" ".*Driver=image" "Driver Image"
is "$output" ".*Image=$name.image" "Image should exist"

run cat $name.image
is "$output" ".*Image=$registry/tiny" "Image should match"

rm $name.container
rm $name.volume
rm $name.image
stop_registry
}


@test "ramalama serve --generate=kube" {
model=tiny
name=c_$(safename)
Expand Down

0 comments on commit e2da6ef

Please sign in to comment.