diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index 8b11531c..6f575d92 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -19,7 +19,7 @@ Running in containers eliminates the need for users to configure the host system RamaLama pulls AI Models from model registries. Starting a chatbot or a rest API service from a simple single command. Models are treated similarly to how Podman and Docker treat container images. -When both Podman and Docker are installed, RamaLama defaults to Podman, The `RAMALAMA_CONTAINER_ENGINE=docker` environment variable can override this behavior. When neither are installed RamaLama attempts to run the model with software on the local system. +When both Podman and Docker are installed, RamaLama defaults to Podman, The `RAMALAMA_CONTAINER_ENGINE=docker` environment variable can override this behaviour. When neither are installed RamaLama attempts to run the model with software on the local system. Note: diff --git a/ramalama/annotations.py b/ramalama/annotations.py new file mode 100644 index 00000000..f35c561f --- /dev/null +++ b/ramalama/annotations.py @@ -0,0 +1,112 @@ +# These annotations are based off the proposed standard: +# https://github.com/CloudNativeAI/model-spec + +# ArtifactTypeModelManifest specifies the media type for a model manifest. +ArtifactTypeModelManifest = "application/vnd.cnai.model.manifest.v1+json" + +# ArtifactTypeModelLayer is the media type used for layers referenced by the +# manifest. +ArtifactTypeModelLayer = "application/vnd.cnai.model.layer.v1.tar" + +# ArtifactTypeModelLayerGzip is the media type used for gzipped layers +# referenced by the manifest. +ArtifactTypeModelLayerGzip = "application/vnd.cnai.model.layer.v1.tar+gzip" + +# AnnotationCreated is the annotation key for the date and time on which the +# model was built (date-time string as defined by RFC 3339). +AnnotationCreated = "org.cnai.model.created" + +# AnnotationAuthors is the annotation key for the contact details of the people +# or organization responsible for the model (freeform string). +AnnotationAuthors = "org.cnai.model.authors" + +# AnnotationURL is the annotation key for the URL to find more information on +# the model. +AnnotationURL = "org.cnai.model.url" + +# AnnotationDocumentation is the annotation key for the URL to get documentation +# on the model. +AnnotationDocumentation = "org.cnai.model.documentation" + +# AnnotationSource is the annotation key for the URL to get source code for +# building the model. +AnnotationSource = "org.cnai.model.source" + +# AnnotationVersion is the annotation key for the version of the packaged +# software. +# The version MAY match a label or tag in the source code repository. +# The version MAY be Semantic versioning-compatible. +AnnotationVersion = "org.cnai.model.version" + +# AnnotationRevision is the annotation key for the source control revision +# identifier for the packaged software. +AnnotationRevision = "org.cnai.model.revision" + +# AnnotationVendor is the annotation key for the name of the distributing +# entity, organization or individual. +AnnotationVendor = "org.cnai.model.vendor" + +# AnnotationLicenses is the annotation key for the license(s) under which +# contained software is distributed as an SPDX License Expression. +AnnotationLicenses = "org.cnai.model.licenses" + +# AnnotationRefName is the annotation key for the name of the reference for a +# target. +# SHOULD only be considered valid when on descriptors on `index.json` within +# model layout. +AnnotationRefName = "org.cnai.model.ref.name" + +# AnnotationTitle is the annotation key for the human-readable title of the +# model. +AnnotationTitle = "org.cnai.model.title" + +# AnnotationDescription is the annotation key for the human-readable description +# of the software packaged in the model. +AnnotationDescription = "org.cnai.model.description" + +# AnnotationArchitecture is the annotation key for the model architecture, such +# as `transformer`, `cnn`, `rnn`, etc. +AnnotationArchitecture = "org.cnai.model.architecture" + +# AnnotationFamily is the annotation key for the model family, such as +# `llama3`, `gpt2`, `qwen2`, etc. +AnnotationFamily = "org.cnai.model.family" + +# AnnotationName is the annotation key for the model name, such as +# `llama3-8b-instruct`, `gpt2-xl`, `qwen2-vl-72b-instruct`, etc. +AnnotationName = "org.cnai.model.name" + +# AnnotationFormat is the annotation key for the model format, such as +# `onnx`, `tensorflow`, `pytorch`, etc. +AnnotationFormat = "org.cnai.model.format" + +# AnnotationParamSize is the annotation key for the size of the model +# parameters. +AnnotationParamSize = "org.cnai.model.param.size" + +# AnnotationPrecision is the annotation key for the model precision, such as +# `bf16`, `fp16`, `int8`, etc. +AnnotationPrecision = "org.cnai.model.precision" + +# AnnotationQuantization is the annotation key for the model quantization, +# such as `awq`, `gptq`, etc. +AnnotationQuantization = "org.cnai.model.quantization" + +# AnnotationReadme is the annotation key for the layer is a README.md file +# (boolean), such as `true` or `false`. +AnnotationReadme = "org.cnai.model.readme" + +# AnnotationLicense is the annotation key for the layer is a license file +# (boolean), such as `true` or `false`. +AnnotationLicense = "org.cnai.model.license" + +# AnnotationConfig is the annotation key for the layer is a configuration file +# (boolean), such as `true` or `false`. +AnnotationConfig = "org.cnai.model.config" + +# AnnotationModel is the annotation key for the layer is a model file (boolean), +# such as `true` or `false`. +AnnotationModel = "org.cnai.model.model" + +# AnnotationFilepath is the annotation key for the file path of the layer. +AnnotationFilepath = "org.cnai.model.filepath" diff --git a/ramalama/cli.py b/ramalama/cli.py index 41b3a4da..12f139fe 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -602,14 +602,14 @@ def push_cli(args): try: model = New(tgt, args) model.push(source, args) - except KeyError as e: + except NotImplementedError as e: for mtype in model_types: - if model.startswith(mtype + "://"): + if tgt.startswith(mtype + "://"): raise e try: # attempt to push as a container image - m = OCI(model, config.get('engine', container_manager())) - m.push(args) + m = OCI(tgt, config.get('engine', container_manager())) + m.push(source, args) except Exception: raise e diff --git a/ramalama/common.py b/ramalama/common.py index aa97bfac..88c65fb8 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -221,3 +221,9 @@ def download_file(url, dest_path, headers=None, show_progress=True): print(f"File {url} already fully downloaded.") else: raise e + + +def engine_version(engine): + # Create manifest list for target with imageid + cmd_args = [engine, "version", "--format", "{{ .Client.Version }}"] + return run_cmd(cmd_args).stdout.decode("utf-8").strip() diff --git a/ramalama/oci.py b/ramalama/oci.py index 82ab2d9b..0bbc5463 100644 --- a/ramalama/oci.py +++ b/ramalama/oci.py @@ -4,8 +4,16 @@ import sys import tempfile +import ramalama.annotations as annotations from ramalama.model import Model -from ramalama.common import run_cmd, exec_cmd, perror, available, mnt_file +from ramalama.common import ( + available, + engine_version, + exec_cmd, + mnt_file, + perror, + run_cmd, +) prefix = "oci://" @@ -14,6 +22,63 @@ ociimage_car = "org.containers.type=ai.image.model.car" +def engine_supports_manifest_attributes(engine): + if not engine or engine == "" or engine != "docker": + return False + if engine == "podman" and engine_version(engine) < "5": + return False + return True + + +def list_manifests(args): + conman_args = [ + args.engine, + "images", + "--filter", + "manifest=true", + "--format", + '{"name":"oci://{{ .Repository }}:{{ .Tag }}","modified":"{{ .Created }}",\ + "size":"{{ .Size }}", "ID":"{{ .ID }}"},', + ] + output = run_cmd(conman_args, debug=args.debug).stdout.decode("utf-8").strip() + if output == "": + return [] + + manifests = json.loads("[" + output[:-1] + "]") + if not engine_supports_manifest_attributes(args.engine): + return manifests + + models = [] + for manifest in manifests: + conman_args = [ + args.engine, + "manifest", + "inspect", + manifest["ID"], + ] + output = run_cmd(conman_args, debug=args.debug).stdout.decode("utf-8").strip() + + if output == "": + continue + inspect = json.loads(output) + if 'manifests' not in inspect: + continue + if not inspect['manifests']: + continue + img = inspect['manifests'][0] + if 'annotations' not in img: + continue + if annotations.AnnotationModel in img['annotations']: + models += [ + { + "name": manifest["name"], + "modified": manifest["modified"], + "size": manifest["size"], + } + ] + return models + + def list_models(args): conman = args.engine if conman is None: @@ -30,7 +95,9 @@ def list_models(args): output = run_cmd(conman_args, debug=args.debug).stdout.decode("utf-8").strip() if output == "": return [] - return json.loads("[" + output[:-1] + "]") + models = json.loads("[" + output[:-1] + "]") + models += list_manifests(args) + return models class OCI(Model): @@ -116,37 +183,79 @@ def _build(self, source, target, args): c.write(model_car) else: c.write(model_raw) - run_cmd( - [self.conman, "build", "-t", target, "-f", containerfile.name, contextdir], stdout=None, debug=args.debug + imageid = ( + run_cmd([self.conman, "build", "--no-cache", "-q", "-f", containerfile.name, contextdir], debug=args.debug) + .stdout.decode("utf-8") + .strip() ) + return imageid + + def _create_manifest_without_attributes(self, target, imageid, args): + # Create manifest list for target with imageid + cmd_args = [ + self.conman, + "manifest", + "create", + target, + imageid, + ] + run_cmd(cmd_args, debug=args.debug) + + def _create_manifest(self, target, imageid, args): + if not engine_supports_manifest_attributes(args.engine): + return self._create_manifest_without_attributes(self, target, imageid, args) + + # Create manifest list for target with imageid + cmd_args = [ + self.conman, + "manifest", + "create", + target, + imageid, + ] + run_cmd(cmd_args, debug=args.debug) + + # Annotate manifest list + cmd_args = [ + self.conman, + "manifest", + "annotate", + "--annotation", + f"{annotations.AnnotationModel}=true", + "--annotation", + f"{ocilabeltype}=''", + "--annotation", + f"{annotations.AnnotationTitle}=args.SOURCE", + target, + imageid, + ] + run_cmd(cmd_args, stdout=None, debug=args.debug) + + def _convert(self, source, target, args): + print(f"Converting {source} to {target}...") + try: + run_cmd([self.conman, "manifest", "rm", target], ignore_stderr=True, stdout=None, debug=args.debug) + except subprocess.CalledProcessError: + pass + imageid = self._build(source, target, args) + self._create_manifest(target, imageid, args) def push(self, source, args): target = self.model.removeprefix(prefix) source = source.removeprefix(prefix) + print(f"Pushing {target}...") conman_args = [self.conman, "push"] if args.authfile: conman_args.extend([f"--authfile={args.authfile}"]) if str(args.tlsverify).lower() == "false": conman_args.extend([f"--tls-verify={args.tlsverify}"]) - - print(f"Pushing {target}...") + conman_args.extend([target]) if source != target: - try: - self._build(source, target, args) - try: - conman_args.extend([target]) - run_cmd(conman_args, debug=args.debug) - return - except subprocess.CalledProcessError as e: - perror(f"Failed to push {source} model to OCI: {e}") - raise e - except subprocess.CalledProcessError: - pass + self._convert(source, target, args) try: - conman_args.extend([source, target]) run_cmd(conman_args, debug=args.debug) except subprocess.CalledProcessError as e: - perror(f"Failed to push {source} model to OCI {target}: {e}") + perror(f"Failed to push OCI {target} : {e}") raise e def pull(self, args): @@ -218,8 +327,12 @@ def remove(self, args, ignore_stderr=False): if self.conman is None: raise NotImplementedError("OCI Images require a container engine") - conman_args = [self.conman, "rmi", f"--force={args.ignore}", self.model] - run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr) + try: + conman_args = [self.conman, "manifest", "rm", self.model] + run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr) + except subprocess.CalledProcessError: + conman_args = [self.conman, "rmi", f"--force={args.ignore}", self.model] + run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr) def exists(self, args): try: