Merge pull request #406 from rhatdan/quadlet

Make quadlets work with OCI images
containers · Nov 4, 2024 · e2da6ef · e2da6ef
2 parents 5791050 + 7f34d4f
commit e2da6ef
Show file tree

Hide file tree

Showing 9 changed files with 213 additions and 48 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,6 @@ ramalama/*.patch
 dist
 .#*
 venv/
+*.container
+*.image
+*.volume
diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
@@ -17,6 +17,9 @@ For REST API endpoint documentation, see: [https://github.com/ggerganov/llama.cp
 
 ## OPTIONS
 
+#### **--authfile**=*password*
+path of the authentication file for OCI registries
+
 #### **--detach**, **-d**
 Run the container in the background and print the new container ID.
 The default is TRUE. The --nocontainer option forces this option to False.
@@ -40,14 +43,17 @@ Name of the container to run the Model in.
 #### **--port**, **-p**
 port for AI Model server to listen on
 
+#### **--tls-verify**=*true*
+require HTTPS and verify certificates when contacting OCI registries
+
 ## EXAMPLES
 ### Run two AI Models at the same time. Notice both are running within Podman Containers.
 ```
 
-$ ramalama serve -p 8080 --name mymodel ollama://tiny-llm:latest
+$ ramalama serve -d -p 8080 --name mymodel ollama://tiny-llm:latest
 09b0e0d26ed28a8418fb5cd0da641376a08c435063317e89cf8f5336baf35cfa
 
-$ ramalama serve -n example --port 8081 oci://quay.io/mmortari/gguf-py-example/v1/example.gguf
+$ ramalama serve -d -n example --port 8081 oci://quay.io/mmortari/gguf-py-example/v1/example.gguf
 3f64927f11a5da5ded7048b226fbe1362ee399021f5e8058c73949a677b6ac9c
 
 $ podman ps
@@ -56,11 +62,12 @@ CONTAINER ID  IMAGE                             COMMAND               CREATED
 3f64927f11a5  quay.io/ramalama/ramalama:latest  /usr/bin/ramalama...  17 seconds ago  Up 17 seconds  0.0.0.0:8082->8082/tcp  ramalama_YMPQvJxN97
 ```
 
-### Generate a quadlet for running the AI Model service
+### Generate quadlet service off of HuggingFace granite Model
 ```
-$ ramalama serve --name MyGraniteServer --generate=quadlet granite > $HOME/.config/containers/systemd/MyGraniteServer.container
-$ cat $HOME/.config/containers/systemd/MyGraniteServer.container
+$ ramalama serve --name MyGraniteServer --generate=quadlet granite
+Generating quadlet file: MyGraniteServer.container
 
+$ cat MyGraniteServer.container
 [Unit]
 Description=RamaLama granite AI Model Service
 After=local-fs.target
@@ -74,26 +81,67 @@ Volume=/home/dwalsh/.local/share/ramalama/models/huggingface/instructlab/granite
 ContainerName=MyGraniteServer
 PublishPort=8080
 
-[Install]
-# Start by default on boot
-WantedBy=multi-user.target default.target
+$ mv  MyGraniteServer.container $HOME/.config/containers/systemd/
 $ systemctl --user daemon-reload
 $ systemctl start --user MyGraniteServer
 $ systemctl status --user MyGraniteServer
 ● MyGraniteServer.service - RamaLama granite AI Model Service
      Loaded: loaded (/home/dwalsh/.config/containers/systemd/MyGraniteServer.container; generated)
     Drop-In: /usr/lib/systemd/user/service.d
-	     └─10-timeout-abort.conf
+            └─10-timeout-abort.conf
      Active: active (running) since Fri 2024-09-27 06:54:17 EDT; 3min 3s ago
    Main PID: 3706287 (conmon)
       Tasks: 20 (limit: 76808)
      Memory: 1.0G (peak: 1.0G)
+
 ...
 $ podman ps
 CONTAINER ID  IMAGE                             COMMAND               CREATED        STATUS        PORTS                    NAMES
 7bb35b97a0fe  quay.io/ramalama/ramalama:latest  llama-server --po...  3 minutes ago  Up 3 minutes  0.0.0.0:43869->8080/tcp  MyGraniteServer
 ```
 
+### Generate quadlet service off of tiny OCI Model
+```
+$ ramalama --runtime=vllm serve --name tiny --generate=quadlet oci://quay.io/rhatdan/tiny:latest
+Downloading quay.io/rhatdan/tiny:latest...
+Trying to pull quay.io/rhatdan/tiny:latest...
+Getting image source signatures
+Copying blob 65ba8d40e14a skipped: already exists
+Copying blob e942a1bf9187 skipped: already exists
+Copying config d8e0b28ee6 done   |
+Writing manifest to image destination
+Generating quadlet file: tiny.container
+Generating quadlet file: tiny.image
+Generating quadlet file: tiny.volume
+
+$cat tiny.container
+[Unit]
+Description=RamaLama /run/model/model.file AI Model Service
+After=local-fs.target
+
+[Container]
+AddDevice=-/dev/dri
+AddDevice=-/dev/kfd
+Exec=vllm serve --port 8080 /run/model/model.file
+Image=quay.io/ramalama/ramalama:latest
+Mount=type=volume,source=tiny:latest.volume,dest=/mnt/models,ro
+ContainerName=tiny
+PublishPort=8080
+
+[Install]
+# Start by default on boot
+WantedBy=multi-user.target default.target
+
+$ cat tiny.volume
+[Volume]
+Driver=image
+Image=tiny:latest.image
+
+$ cat tiny.image
+[Image]
+Image=quay.io/rhatdan/tiny:latest
+```
+
 ### Generate a kubernetes YAML file named tini
 ```
 $ ramalama serve --name tini --generate kube tiny

diff --git a/install.sh b/install.sh
@@ -80,7 +80,7 @@ setup_ramalama() {
   install -m755 "$to_file" "$ramalama_bin"
 
   local python_files=("cli.py" "huggingface.py" "model.py" "ollama.py" "common.py" "__init__.py" \
-                      "oci.py" "version.py" "shortnames.py" "toml_parser.py")
+                      "quadlet.py", "oci.py" "version.py" "shortnames.py" "toml_parser.py")
 
   for i in "${python_files[@]}"; do
     url="${host}/containers/ramalama/${branch}/ramalama/${i}"

diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -605,9 +605,16 @@ def run_cli(args):
 
 def serve_parser(subparsers):
     parser = subparsers.add_parser("serve", help="serve REST API on specified AI Model")
+    parser.add_argument("--authfile", help="path of the authentication file")
     parser.add_argument("-d", "--detach", action="store_true", dest="detach", help="run the container in detached mode")
     parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
     parser.add_argument("-p", "--port", default="8080", help="port for AI Model server to listen on")
+    parser.add_argument(
+        "--tls-verify",
+        dest="tlsverify",
+        default=True,
+        help="require HTTPS and verify certificates when contacting registries",
+    )
     parser.add_argument(
         "--generate",
         choices=["quadlet", "kube"],

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -12,6 +12,7 @@
     run_cmd,
 )
 from ramalama.version import version
+from ramalama.quadlet import Quadlet
 
 
 file_not_found = """\
@@ -310,37 +311,8 @@ def serve(self, args):
             raise NotImplementedError(file_not_found % (exec_args[0], exec_args[0], exec_args[0], str(e).strip("'")))
 
     def quadlet(self, model, args, exec_args):
-        port_string = ""
-        if hasattr(args, "port"):
-            port_string = f"PublishPort={args.port}"
-
-        name_string = ""
-        if hasattr(args, "name") and args.name:
-            name_string = f"ContainerName={args.name}"
-
-        if hasattr(args, "MODEL"):
-            model = args.MODEL
-
-        print(
-            f"""
-[Unit]
-Description=RamaLama {model} AI Model Service
-After=local-fs.target
-
-[Container]
-AddDevice=-/dev/dri
-AddDevice=-/dev/kfd
-Exec={" ".join(exec_args)}
-Image={default_image()}
-Volume={model}:/mnt/models:ro,z
-{name_string}
-{port_string}
-
-[Install]
-# Start by default on boot
-WantedBy=multi-user.target default.target
-"""
-        )
+        quadlet = Quadlet(model, args, exec_args)
+        quadlet.gen_container()
 
     def _gen_ports(self, args):
         if not hasattr(args, "port"):

diff --git a/ramalama/oci.py b/ramalama/oci.py
@@ -153,8 +153,14 @@ def pull(self, args):
         print(f"Downloading {self.model}...")
         if args.engine:
             try:
-                run_cmd([args.engine, "pull", self.model], debug=args.debug)
-                return "/mnt/models/model.file"
+                conman_args = [args.engine, "pull"]
+                if str(args.tlsverify).lower() == "false":
+                    conman_args.extend([f"--tls-verify={args.tlsverify}"])
+                if args.authfile:
+                    conman_args.extend([f"--authfile={args.authfile}"])
+                conman_args.extend([self.model])
+                run_cmd(conman_args, debug=args.debug)
+                return "/run/model/model.file"
             except subprocess.CalledProcessError:
                 pass
         return self._pull_omlmd(args)

diff --git a/ramalama/quadlet.py b/ramalama/quadlet.py
@@ -0,0 +1,82 @@
+import os
+
+from ramalama.common import default_image
+
+
+class Quadlet:
+    def __init__(self, model, args, exec_args):
+        self.ai_image = model
+        if hasattr(args, "MODEL"):
+            self.ai_image = args.MODEL
+        self.ai_image = self.ai_image.removeprefix("oci://")
+        if args.name:
+            self.name = args.name
+        else:
+            self.name = os.path.basename(self.ai_image)
+
+        self.model = model.removeprefix("oci://")
+        self.args = args
+        self.exec_args = exec_args
+
+    def gen_container(self):
+        port_string = ""
+        if hasattr(self.args, "port"):
+            port_string = f"PublishPort={self.args.port}"
+
+        name_string = ""
+        if hasattr(self.args, "name") and self.args.name:
+            name_string = f"ContainerName={self.args.name}"
+
+        outfile = self.name + ".container"
+        print(f"Generating quadlet file: {outfile}")
+        volume = self.gen_volume()
+        with open(outfile, 'w') as c:
+            c.write(
+                f"""\
+[Unit]
+Description=RamaLama {self.model} AI Model Service
+After=local-fs.target
+
+[Container]
+AddDevice=-/dev/dri
+AddDevice=-/dev/kfd
+Exec={" ".join(self.exec_args)}
+Image={default_image()}
+{volume}
+{name_string}
+{port_string}
+
+[Install]
+# Start by default on boot
+WantedBy=multi-user.target default.target
+"""
+            )
+
+    def gen_volume(self):
+        if os.path.exists(self.model):
+            return f"Volume={self.model}:/mnt/models/model.file,ro:Z"
+
+        outfile = self.name + ".volume"
+
+        self.gen_image()
+        print(f"Generating quadlet file: {outfile} ")
+        with open(outfile, 'w') as c:
+            c.write(
+                f"""\
+[Volume]
+Driver=image
+Image={self.name}.image
+"""
+            )
+            return f"Mount=type=volume,source={self.name}.volume,dest=/mnt/models,ro"
+
+    def gen_image(self):
+        outfile = self.name + ".image"
+        print(f"Generating quadlet file: {outfile} ")
+        with open(outfile, 'w') as c:
+            c.write(
+                f"""\
+[Image]
+Image={self.ai_image}
+"""
+            )
diff --git a/setup.py b/setup.py
@@ -62,8 +62,8 @@ def find_package_modules(self, package, package_dir):
 
 
 setuptools.setup(
-    name = "ramalama",
-    version = "0.0.20",
+    name="ramalama",
+    version="0.0.20",
     packages=find_packages(),
     cmdclass={"build_py": build_py},
     scripts=["bin/ramalama"],

diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
@@ -1,6 +1,8 @@
 #!/usr/bin/env bats
 
 load helpers
+load helpers.registry
+load setup_suite
 
 verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name"
 
@@ -117,14 +119,59 @@ verify_begin=".*run --rm -i --label RAMALAMA --security-opt=label=disable --name
     model=tiny
     name=c_$(safename)
     run_ramalama pull ${model}
-    run_ramalama serve --name=${name} --port 1234 --generate=quadlet ${model}
+    run_ramalama serve --port 1234 --generate=quadlet ${model}
+    is "$output" "Generating quadlet file: tinyllama.container" "generate tinllama.container"
+
+    run cat tinyllama.container
     is "$output" ".*PublishPort=1234" "PublishPort should match"
-    is "$output" ".*Name=${name}" "Quadlet should have name field"
     is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct"
-    run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus ${model}
+    is "$output" ".*Volume=.*ollama/tinyllama" "Volume line should be correct"
+
+    rm tinyllama.container
+    run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus tiny
     is "$output" ".*error: argument --generate: invalid choice: 'bogus' (choose from 'quadlet', 'kube')" "Should fail"
 }
 
+@test "ramalama serve --generate=quadlet with OCI" {
+    skip_if_darwin
+    skip_if_docker
+    local registry=localhost:${PODMAN_LOGIN_REGISTRY_PORT}
+    local authfile=$RAMALAMA_TMPDIR/authfile.json
+
+    name=c_$(safename)
+    start_registry
+    run_ramalama login --authfile=$authfile \
+	--tls-verify=false \
+	--username ${PODMAN_LOGIN_USER} \
+	--password ${PODMAN_LOGIN_PASS} \
+	oci://$registry
+    run_ramalama pull tiny
+    run_ramalama push --authfile=$authfile --tls-verify=false tiny oci://$registry/tiny
+    run_ramalama serve --authfile=$authfile --tls-verify=false --name=${name} --port 1234 --generate=quadlet oci://$registry/tiny
+    is "$output" ".*Generating quadlet file: ${name}.container" "generate .container file"
+    is "$output" ".*Generating quadlet file: ${name}.volume" "generate .volume file"
+    is "$output" ".*Generating quadlet file: ${name}.image" "generate .image file"
+
+    run cat $name.container
+    is "$output" ".*PublishPort=1234" "PublishPort should match"
+    is "$output" ".*ContainerName=${name}" "Quadlet should have ContainerName field"
+    is "$output" ".*Exec=llama-server --port 1234 -m .*" "Exec line should be correct"
+    is "$output" ".*Mount=type=volume,source=${name}.volume,dest=/mnt/models,ro" "Volume line should be correct"
+
+    run cat $name.volume
+    is "$output" ".*Driver=image" "Driver Image"
+    is "$output" ".*Image=$name.image" "Image should exist"
+
+    run cat $name.image
+    is "$output" ".*Image=$registry/tiny" "Image should match"
+
+    rm $name.container
+    rm $name.volume
+    rm $name.image
+    stop_registry
+}
+
+
 @test "ramalama serve --generate=kube" {
     model=tiny
     name=c_$(safename)
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,6 @@ ramalama/*.patch @@
     dist
     .#*
     venv/
+    *.container
+    *.image
+    *.volume