From 82b70fa096b010a00464615b4b4fbd650dac272f Mon Sep 17 00:00:00 2001
From: Shawn Rushefsky <shawn.rushefsky@gmail.com>
Date: Tue, 14 Jan 2025 11:44:00 -0500
Subject: [PATCH] 1.7.1 - Auto-restart, non-fatal file conversion errors (#16)

* version bump, support frames for batchSize

* handle case of local image paths

* fix installation of sharp

* animatediff works

* zod hates to base64 validate huge stuff. fair enough

* start outlining tests

* ignore model files

* scaffolding tests

* controlnet frames

* ignore model directory

* rip out filewatcher, just use comfy apis

* first passing test

* fixes based on testing

* sync tests passing

* remove dependency on recipes repo

* create test utils, do webhook tests

* sd1.5 tests

* ltx video tests

* fix webhook test issue

* flux tests

* sd3.5 tests

* sdxl tests

* workaround testing race condition

* document required models

* remove unused

* hunyuan video

* add hunyuan to supported models

* more testing instructions

* make body size configurable, little cleanup in server

* remove commented out code in workflow loader

* shorten that test

* document location of all models used for testing

* cogvideo works

* cogvideox 2b works

* configurable restart policy

* document config change

* no longer treat image conversion as  a fatal error, just send the unconverted output

* fix

* mochi support

* remove .only

* build scripts for docker images for ghcr

* move more env into base

* specify mochi video in readme

* document cogvideox and mochi model sources

* version bump

* comments

* await workflow generation. allows for async prompt generations

* label the event in the webhook

* readme note about timeouts

* update workflows

* nah, didn't like that

* more readme update
---
 .github/workflows/create-release.yml    |   2 +
 .github/workflows/pr-build.yml          |   2 -
 README.md                               |  35 ++++--
 docker-compose.yml                      |   2 +-
 docker/api.dockerfile                   |  14 +++
 docker/build-api-images                 |  23 ++++
 docker/build-comfy-base-images          |  16 +++
 docker/comfyui.dockerfile               |  32 ++++++
 docker/push-all                         |  17 +++
 package-lock.json                       |   4 +-
 package.json                            |   2 +-
 src/config.ts                           |  10 +-
 src/server.ts                           | 112 ++++++++++++-------
 src/utils.ts                            |   2 +-
 test/cogvideox.spec.ts                  |  52 +++++++++
 test/docker-image/Dockerfile            |  27 +----
 test/mochi.spec.ts                      |  52 +++++++++
 test/workflows/cogvideox-txt2video.json | 137 ++++++++++++++++++++++++
 test/workflows/mochi.json               | 116 ++++++++++++++++++++
 19 files changed, 581 insertions(+), 76 deletions(-)
 create mode 100644 docker/api.dockerfile
 create mode 100755 docker/build-api-images
 create mode 100755 docker/build-comfy-base-images
 create mode 100644 docker/comfyui.dockerfile
 create mode 100755 docker/push-all
 create mode 100644 test/cogvideox.spec.ts
 create mode 100644 test/mochi.spec.ts
 create mode 100644 test/workflows/cogvideox-txt2video.json
 create mode 100644 test/workflows/mochi.json

diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
index e38dfab..b07111e 100644
--- a/.github/workflows/create-release.yml
+++ b/.github/workflows/create-release.yml
@@ -39,6 +39,8 @@ jobs:
 
       - name: Get the PR that was merged into main
         id: pr-output
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           info=$(gh pr list --state merged --limit 1 --json title --json body)
           echo "title=$(echo $info | jq -r '.title')" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/pr-build.yml b/.github/workflows/pr-build.yml
index cfd088f..e4f3d28 100644
--- a/.github/workflows/pr-build.yml
+++ b/.github/workflows/pr-build.yml
@@ -5,12 +5,10 @@ on:
     branches:
       - main
     paths-ignore:
-      - ".github/**"
       - "**.md"
       - "**.png"
       - ".gitignore"
       - "generate-workflow"
-      - "**.yml"
       - "test/**"
       - "example-workflows/**"
 
diff --git a/README.md b/README.md
index edd4b68..c74e45a 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ A simple wrapper that facilitates using ComfyUI as a stateless API, either by re
   - [Generating New Workflow Endpoints](#generating-new-workflow-endpoints)
     - [Automating with Claude 3.5 Sonnet](#automating-with-claude-35-sonnet)
   - [Prebuilt Docker Images](#prebuilt-docker-images)
+  - [Considerations for Running on SaladCloud](#considerations-for-running-on-saladcloud)
   - [Contributing](#contributing)
   - [Testing](#testing)
     - [Required Models](#required-models)
@@ -20,11 +21,18 @@ A simple wrapper that facilitates using ComfyUI as a stateless API, either by re
 
 ## Download and Usage
 
-Download the latest version from the release page, and copy it into your existing ComfyUI dockerfile. Then, you can use it like this:
+Either use a [pre-built Docker image](#prebuilt-docker-images), or build your own.
+
+Download the latest version from the release page, and copy it into your existing ComfyUI dockerfile.
+You can find good base dockerfiles in the [docker](./docker) directory.
+There are also example dockerfiles for popular models in the [SaladCloud Recipes Repo](https://github.com/SaladTechnologies/salad-recipes/tree/master/src).
+
+If you have your own ComfyUI dockerfile, you can add the comfyui-api server to it like so:
 
 ```dockerfile
 # Change this to the version you want to use
-ARG api_version=1.7.0
+ARG api_version=1.7.1
+
 
 # Download the comfyui-api binary, and make it executable
 ADD https://github.com/SaladTechnologies/comfyui-api/releases/download/${api_version}/comfyui-api .
@@ -41,7 +49,7 @@ The server hosts swagger docs at `/docs`, which can be used to interact with the
 ## Features
 
 - **Full Power Of ComfyUI**: The server supports the full ComfyUI /prompt API, and can be used to execute any ComfyUI workflow.
-- **Verified Model/Workflow Support**: Stable Diffusion 1.5, Stable Diffusion XL, Stable Diffusion 3.5, Flux, AnimateDiff, LTX Video, Hunyuan Video. My assumption is more model types are supported, but these are the ones I have verified.
+- **Verified Model/Workflow Support**: Stable Diffusion 1.5, Stable Diffusion XL, Stable Diffusion 3.5, Flux, AnimateDiff, LTX Video, Hunyuan Video, CogVideoX, Mochi Video. My assumption is more model types are supported, but these are the ones I have verified.
 - **Stateless API**: The server is stateless, and can be scaled horizontally to handle more requests.
 - **Swagger Docs**: The server hosts swagger docs at `/docs`, which can be used to interact with the API.
 - **"Synchronous" Support**: The server will return base64-encoded images directly in the response, if no webhook is provided.
@@ -60,8 +68,8 @@ The server hosts swagger docs at `/docs`, which can be used to interact with the
 
 The server has two probes, `/health` and `/ready`.
 
-- The `/health` probe will return a 200 status code once the warmup workflow has complete.
-- The `/ready` probe will also return a 200 status code once the warmup workflow has completed, and the server is ready to accept requests.
+- The `/health` probe will return a 200 status code once the warmup workflow has completed. It will stay healthy as long as the server is running, even if ComfyUI crashes.
+- The `/ready` probe will also return a 200 status code once the warmup workflow has completed. It will return a 503 status code if ComfyUI is not running, such as in the case it has crashed, but is being automatically restarted.
 
 Here's a markdown guide to configuring the application based on the provided config.ts file:
 
@@ -91,6 +99,7 @@ The default values mostly assume this will run on top of an [ai-dock](https://gi
 | WARMUP_PROMPT_FILE       | (not set)             | Path to warmup prompt file (optional)                                                                                                                                                                  |
 | WORKFLOW_DIR             | "/workflows"          | Directory for workflow files                                                                                                                                                                           |
 | BASE                     | "ai-dock"             | There are different ways to load the comfyui environment for determining config values that vary with the base image. Currently only "ai-dock" has preset values. Set to empty string to not use this. |
+| ALWAYS_RESTART_COMFYUI   | "false"               | If set to "true", the ComfyUI process will be automatically restarted if it exits. Otherwise, the API server will exit when ComfyUI exits.                                                             |
 
 ### Configuration Details
 
@@ -155,10 +164,13 @@ const ComfyNodeSchema = z.object({
 });
 
 type ComfyNode = z.infer<typeof ComfyNodeSchema>;
+type ComfyPrompt = Record<string, ComfyNode>;
 
 interface Workflow {
   RequestSchema: z.ZodObject<any, any>;
-  generateWorkflow: (input: any) => ComfyPrompt;
+  generateWorkflow: (input: any) => Promise<ComfyPrompt> | ComfyPrompt;
+  description?: string;
+  summary?: string;
 }
 
 // This defaults the checkpoint to whatever was used in the warmup workflow
@@ -374,6 +386,11 @@ The tag pattern is `saladtechnologies/comfyui:comfy<comfy-version>-api<api-versi
 - `<api-version>` is the version of the comfyui-api server
 - `<model|base>` is the model used. There is a `base` tag for an image that contains ComfyUI and the comfyui-api server, but no models. There are also tags for specific models, like `sdxl-with-refiner` or `flux-schnell-fp8`.
 
+## Considerations for Running on SaladCloud
+
+- **SaladCloud's Container Gateway has a 100s timeout.** It is possible to construct very long running workflows, such for video generation, with ComfyUI that would exceed this timeout. In this scenario, you will need to either use a webhook to receive the results, or integrate with SaladCloud's [Job Queues](https://docs.salad.com/products/sce/job-queues/job-queues#job-queues) to handle long-running workflows.
+- **SaladCloud's maximum container image size is 35GB(compressed).** The base [comfyui-api image](https://hub.docker.com/r/saladtechnologies/comfyui/tags) is around 3.25GB(compressed), so any models and extensions must fit in the remaining space.
+
 ## Contributing
 
 Contributions are welcome! Please open an issue or a pull request if you have any suggestions or improvements.
@@ -402,6 +419,9 @@ Automated tests for this project require model files to be present in the `./tes
 - `llava_llama3_fp8_scaled.safetensors` - https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/text_encoders
 - `hunyuan_video_vae_bf16.safetensors` - https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files/vae
 - `vae-ft-mse-840000-ema-pruned.ckpt` - https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.ckpt
+- `THUDM/CogVideoX-2b` - https://huggingface.co/THUDM/CogVideoX-2b
+- `mochi_preview_fp8_scaled.safetensors` - https://huggingface.co/Comfy-Org/mochi_preview_repackaged/blob/main/all_in_one/mochi_preview_fp8_scaled.safetensors
+
 
 They should be in the correct comfyui directory structure, like so:
 
@@ -413,6 +433,7 @@ They should be in the correct comfyui directory structure, like so:
 │   ├── dreamshaper_8.safetensors
 │   ├── flux1-schnell-fp8.safetensors
 │   ├── ltx-video-2b-v0.9.1.safetensors
+|   ├── mochi_preview_fp8_scaled.safetensors
 │   ├── sd3.5_medium.safetensors
 │   ├── sd_xl_base_1.0.safetensors
 │   └── sd_xl_refiner_1.0.safetensors
@@ -421,6 +442,8 @@ They should be in the correct comfyui directory structure, like so:
 │   ├── clip_l.safetensors
 │   ├── t5xxl_fp16.safetensors
 │   └── t5xxl_fp8_e4m3fn.safetensors
+├── CogVideo
+│   └── CogVideo2B/
 ├── controlnet
 │   ├── openpose-sd1.5-1.1.safetensors
 ├── diffusion_models
diff --git a/docker-compose.yml b/docker-compose.yml
index 213ebfd..7117a23 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 services:
   comfyui:
-    image: saladtechnologies/comfyui:comfy0.3.10-test-image
+    image: ghcr.io/saladtechnologies/comfyui-api:comfy0.3.10-test-image
     volumes:
       - ./bin:/app/bin
       - ./test/docker-image/models:/opt/ComfyUI/models
diff --git a/docker/api.dockerfile b/docker/api.dockerfile
new file mode 100644
index 0000000..5809a31
--- /dev/null
+++ b/docker/api.dockerfile
@@ -0,0 +1,14 @@
+ARG base=runtime
+ARG comfy_version=0.3.10
+ARG pytorch_version=2.5.0
+ARG cuda_version=12.1
+FROM ghcr.io/saladtechnologies/comfyui-api:comfy${comfy_version}-torch${pytorch_version}-cuda${cuda_version}-${base}
+
+ENV WORKFLOW_DIR=/workflows
+ENV STARTUP_CHECK_MAX_TRIES=30
+
+ARG api_version=1.7.1
+ADD https://github.com/SaladTechnologies/comfyui-api/releases/download/${api_version}/comfyui-api .
+RUN chmod +x comfyui-api
+
+CMD ["./comfyui-api"]
\ No newline at end of file
diff --git a/docker/build-api-images b/docker/build-api-images
new file mode 100755
index 0000000..6135c4c
--- /dev/null
+++ b/docker/build-api-images
@@ -0,0 +1,23 @@
+#! /usr/bin/bash
+
+usage="Usage: $0 [comfy_version] [torch_version] [cuda_version] [api_version]"
+
+comfy_version=${1:-0.3.10}
+torch_version=${2:-2.5.0}
+cuda_version=${3:-12.1}
+
+current_api_version=$(cat ../package.json | jq -r '.version')
+api_version=${4:-$current_api_version}
+
+bases=("runtime" "devel")
+
+for base in "${bases[@]}"; do
+  docker build -t ghcr.io/saladtechnologies/comfyui-api:comfy$comfy_version-api$api_version-torch$torch_version-cuda$cuda_version-$base \
+    -f api.dockerfile \
+    --build-arg comfy_version=$comfy_version \
+    --build-arg base=$base \
+    --build-arg pytorch_version=$torch_version \
+    --build-arg cuda_version=$cuda_version \
+    --build-arg api_version=$api_version \
+    .
+done
\ No newline at end of file
diff --git a/docker/build-comfy-base-images b/docker/build-comfy-base-images
new file mode 100755
index 0000000..8a9715d
--- /dev/null
+++ b/docker/build-comfy-base-images
@@ -0,0 +1,16 @@
+#! /usr/bin/bash
+
+comfy_version=${1:-0.3.10}
+torch_version=${2:-2.5.0}
+cuda_version=${3:-12.1}
+bases=("runtime" "devel")
+
+for base in "${bases[@]}"; do
+  docker build -t ghcr.io/saladtechnologies/comfyui-api:comfy$comfy_version-torch$torch_version-cuda$cuda_version-$base \
+    -f comfyui.dockerfile \
+    --build-arg comfy_version=$comfy_version \
+    --build-arg base=$base \
+    --build-arg pytorch_version=$torch_version \
+    --build-arg cuda_version=$cuda_version \
+    .
+done
diff --git a/docker/comfyui.dockerfile b/docker/comfyui.dockerfile
new file mode 100644
index 0000000..b6944f4
--- /dev/null
+++ b/docker/comfyui.dockerfile
@@ -0,0 +1,32 @@
+ARG base=runtime
+ARG pytorch_version=2.5.0
+ARG cuda_version=12.1
+FROM pytorch/pytorch:${pytorch_version}-cuda${cuda_version}-cudnn9-${base}
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+  curl \
+  git \
+  unzip \
+  wget \
+  && rm -rf /var/lib/apt/lists/*
+
+# Install comfy-cli, which makes it easy to install custom nodes and other comfy specific functionality.
+RUN pip install --upgrade pip
+RUN pip install comfy-cli
+WORKDIR /opt
+ARG comfy_version=0.3.10
+RUN git clone --depth 1 --branch v${comfy_version} https://github.com/comfyanonymous/ComfyUI.git
+WORKDIR /opt/ComfyUI
+RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121
+RUN pip install -r requirements.txt
+ENV COMFY_HOME=/opt/ComfyUI
+RUN comfy --skip-prompt tracking disable
+RUN comfy --skip-prompt set-default ${COMFY_HOME}
+ENV MODEL_DIR=${COMFY_HOME}/models
+ENV OUTPUT_DIR=${COMFY_HOME}/output
+ENV INPUT_DIR=${COMFY_HOME}/input
+ENV CMD="comfy --workspace ${COMFY_HOME} launch -- --listen *"
+ENV BASE=""
+
+CMD ["comfy", "--workspace", "${COMFY_HOME}", "launch", "--", "--listen", "*"]
\ No newline at end of file
diff --git a/docker/push-all b/docker/push-all
new file mode 100755
index 0000000..0576ee6
--- /dev/null
+++ b/docker/push-all
@@ -0,0 +1,17 @@
+#! /usr/bin/bash
+
+usage="Usage: $0 [comfy_version] [torch_version] [cuda_version] [api_version]"
+
+comfy_version=${1:-0.3.10}
+torch_version=${2:-2.5.0}
+cuda_version=${3:-12.1}
+
+current_api_version=$(cat package.json | jq -r '.version')
+api_version=${4:-$current_api_version}
+
+bases=("runtime" "devel")
+
+for base in "${bases[@]}"; do
+  docker push ghcr.io/saladtechnologies/comfyui-api:comfy$comfy_version-torch$torch_version-cuda$cuda_version-$base
+  docker push ghcr.io/saladtechnologies/comfyui-api:comfy$comfy_version-api$api_version-torch$torch_version-cuda$cuda_version-$base
+done
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
index 6f81d3d..894fe5e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "comfyui-api",
-  "version": "1.7.0",
+  "version": "1.7.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "comfyui-api",
-      "version": "1.7.0",
+      "version": "1.7.1",
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
diff --git a/package.json b/package.json
index fb8f3c1..ebe39e1 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "comfyui-api",
-  "version": "1.7.0",
+  "version": "1.7.1",
   "description": "Wraps comfyui to make it easier to use as a stateless web service",
   "main": "dist/src/index.js",
   "scripts": {
diff --git a/src/config.ts b/src/config.ts
index ce99bd0..c820f38 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -22,6 +22,7 @@ const {
   MARKDOWN_SCHEMA_DESCRIPTIONS = "true",
   BASE = "ai-dock",
   MAX_BODY_SIZE_MB = "100",
+  ALWAYS_RESTART_COMFYUI = "false",
 } = process.env;
 
 fs.mkdirSync(WORKFLOW_DIR, { recursive: true });
@@ -34,8 +35,7 @@ const port = parseInt(PORT, 10);
 const startupCheckInterval = parseInt(STARTUP_CHECK_INTERVAL_S, 10) * 1000;
 const startupCheckMaxTries = parseInt(STARTUP_CHECK_MAX_TRIES, 10);
 const maxBodySize = parseInt(MAX_BODY_SIZE_MB, 10) * 1024 * 1024;
-
-// type for {string: string}
+const alwaysRestartComfyUI = ALWAYS_RESTART_COMFYUI.toLowerCase() === "true";
 
 const loadEnvCommand: Record<string, string> = {
   "ai-dock": `source /opt/ai-dock/etc/environment.sh \
@@ -71,6 +71,11 @@ interface ComfyDescription {
   schedulers: string[];
 }
 
+/**
+ * This function uses python to import some of the ComfyUI code and get the
+ * description of the samplers and schedulers.
+ * @returns ComfyDescription
+ */
 function getComfyUIDescription(): ComfyDescription {
   const temptComfyFilePath = path.join(comfyDir, "temp_comfy_description.json");
   const pythonCode = `
@@ -131,6 +136,7 @@ const config = {
   comfyHost: DIRECT_ADDRESS,
   comfyPort: COMFYUI_PORT_HOST,
   comfyURL,
+  alwaysRestartComfyUI,
   wsClientId,
   comfyWSURL,
   startupCheckInterval,
diff --git a/src/server.ts b/src/server.ts
index e355e47..8446053 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -55,6 +55,9 @@ for (const modelType in config.models) {
   modelResponse[modelType] = config.models[modelType].all;
 }
 
+let warm = false;
+let wasEverWarm = false;
+
 server.register(fastifySwagger, {
   openapi: {
     openapi: "3.0.0",
@@ -107,7 +110,7 @@ server.after(() => {
     },
     async (request, reply) => {
       // 200 if ready, 500 if not
-      if (warm) {
+      if (wasEverWarm) {
         return reply.code(200).send({ version, status: "healthy" });
       }
       return reply.code(500).send({ version, status: "not healthy" });
@@ -125,7 +128,7 @@ server.after(() => {
             version: z.literal(version),
             status: z.literal("ready"),
           }),
-          500: z.object({
+          503: z.object({
             version: z.literal(version),
             status: z.literal("not ready"),
           }),
@@ -136,7 +139,7 @@ server.after(() => {
       if (warm) {
         return reply.code(200).send({ version, status: "ready" });
       }
-      return reply.code(500).send({ version, status: "not ready" });
+      return reply.code(503).send({ version, status: "not ready" });
     }
   );
 
@@ -157,6 +160,15 @@ server.after(() => {
     }
   );
 
+  /**
+   * This route is the primary wrapper around the ComfyUI /prompt endpoint.
+   * It shares the same schema as the ComfyUI /prompt endpoint, but adds the
+   * ability to convert the output image to a different format, and to send
+   * the output image to a webhook, or return it in the response.
+   *
+   * If your application has it's own ID scheme, you can provide the ID in the
+   * request body. If you don't provide an ID, one will be generated for you.
+   */
   app.post<{
     Body: PromptRequest;
   }>(
@@ -279,18 +291,22 @@ server.after(() => {
               let filename = originalFilename;
               let fileBuffer = outputs[filename];
               if (convert_output) {
-                fileBuffer = await convertImageBuffer(
-                  fileBuffer,
-                  convert_output
-                );
-
-                /**
-                 * If the user has provided an output format, we need to update the filename
-                 */
-                filename = originalFilename.replace(
-                  /\.[^/.]+$/,
-                  `.${convert_output.format}`
-                );
+                try {
+                  fileBuffer = await convertImageBuffer(
+                    fileBuffer,
+                    convert_output
+                  );
+
+                  /**
+                   * If the user has provided an output format, we need to update the filename
+                   */
+                  filename = originalFilename.replace(
+                    /\.[^/.]+$/,
+                    `.${convert_output.format}`
+                  );
+                } catch (e: any) {
+                  app.log.warn(`Failed to convert image: ${e.message}`);
+                }
               }
               const base64File = fileBuffer.toString("base64");
               app.log.info(`Sending image ${filename} to webhook: ${webhook}`);
@@ -300,6 +316,7 @@ server.after(() => {
                   "Content-Type": "application/json",
                 },
                 body: JSON.stringify({
+                  event: "output.complete",
                   image: base64File,
                   id,
                   filename,
@@ -346,14 +363,18 @@ server.after(() => {
           let filename = originalFilename;
 
           if (convert_output) {
-            fileBuffer = await convertImageBuffer(fileBuffer, convert_output);
-            /**
-             * If the user has provided an output format, we need to update the filename
-             */
-            filename = originalFilename.replace(
-              /\.[^/.]+$/,
-              `.${convert_output.format}`
-            );
+            try {
+              fileBuffer = await convertImageBuffer(fileBuffer, convert_output);
+              /**
+               * If the user has provided an output format, we need to update the filename
+               */
+              filename = originalFilename.replace(
+                /\.[^/.]+$/,
+                `.${convert_output.format}`
+              );
+            } catch (e: any) {
+              app.log.warn(`Failed to convert image: ${e.message}`);
+            }
           }
 
           const base64File = fileBuffer.toString("base64");
@@ -398,6 +419,11 @@ server.after(() => {
           summary = node.summary;
         }
 
+        /**
+         * Workflow endpoints expose a simpler API to users, and then perform the transformation
+         * to a ComfyUI prompt behind the scenes. These endpoints behind-the-scenes just call the /prompt
+         * endpoint with the appropriate parameters.
+         */
         app.post<{
           Body: BodyType;
         }>(
@@ -415,7 +441,7 @@ server.after(() => {
           },
           async (request, reply) => {
             const { id, input, webhook, convert_output } = request.body;
-            const prompt = node.generateWorkflow(input);
+            const prompt = await node.generateWorkflow(input);
 
             const resp = await fetch(
               `http://localhost:${config.wrapperPort}/prompt`,
@@ -448,29 +474,43 @@ server.after(() => {
   walk(workflows);
 });
 
-let warm = false;
-
 process.on("SIGINT", async () => {
   server.log.info("Received SIGINT, interrupting process");
   shutdownComfyUI();
   process.exit(0);
 });
 
-export async function start() {
-  try {
-    const start = Date.now();
-    // Start the command
-    launchComfyUI();
-    await waitForComfyUIToStart(server.log);
-
+async function launchComfyUIAndAPIServerAndWaitForWarmup() {
+  warm = false;
+  launchComfyUI().catch((err: any) => {
+    server.log.error(err.message);
+    if (config.alwaysRestartComfyUI) {
+      server.log.info("Restarting ComfyUI");
+      launchComfyUIAndAPIServerAndWaitForWarmup();
+    } else {
+      server.log.info("Exiting");
+      process.exit(1);
+    }
+  });
+  await waitForComfyUIToStart(server.log);
+  if (!wasEverWarm) {
     await server.ready();
     server.swagger();
-
     // Start the server
     await server.listen({ port: config.wrapperPort, host: config.wrapperHost });
     server.log.info(`ComfyUI API ${version} started.`);
-    await warmupComfyUI();
-    warm = true;
+  }
+  await warmupComfyUI();
+  wasEverWarm = true;
+  warm = true;
+}
+
+export async function start() {
+  try {
+    const start = Date.now();
+    // Start ComfyUI
+    await launchComfyUIAndAPIServerAndWaitForWarmup();
+
     const warmupTime = Date.now() - start;
     server.log.info(`Warmup took ${warmupTime / 1000}s`);
   } catch (err: any) {
diff --git a/src/utils.ts b/src/utils.ts
index 0ef2f8c..e80bf32 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -20,7 +20,7 @@ export function launchComfyUI() {
   const cmdAndArgs = config.comfyLaunchCmd.split(" ");
   const cmd = cmdAndArgs[0];
   const args = cmdAndArgs.slice(1);
-  commandExecutor.execute(cmd, args, {
+  return commandExecutor.execute(cmd, args, {
     DIRECT_ADDRESS: config.comfyHost,
     COMFYUI_PORT_HOST: config.comfyPort,
     WEB_ENABLE_AUTH: "false",
diff --git a/test/cogvideox.spec.ts b/test/cogvideox.spec.ts
new file mode 100644
index 0000000..4dd389c
--- /dev/null
+++ b/test/cogvideox.spec.ts
@@ -0,0 +1,52 @@
+import { expect } from "earl";
+import path from "path";
+import fs from "fs";
+import {
+  sleep,
+  createWebhookListener,
+  submitPrompt,
+  checkImage,
+  waitForServerToStart,
+} from "./test-utils";
+import txt2Video from "./workflows/cogvideox-txt2video.json";
+
+const text2VideoOptions = {
+  webpFrames: txt2Video["35"].inputs.num_frames - 1,
+  width: txt2Video["37"].inputs.width,
+  height: txt2Video["37"].inputs.height,
+};
+
+describe("CogVideoX", () => {
+  before(async () => {
+    await waitForServerToStart();
+  });
+  describe("Return content in response", () => {
+    it("text2video works", async () => {
+      const respBody = await submitPrompt(txt2Video);
+      expect(respBody.filenames.length).toEqual(1);
+      expect(respBody.images.length).toEqual(1);
+      await checkImage(
+        respBody.filenames[0],
+        respBody.images[0],
+        text2VideoOptions
+      );
+    });
+  });
+
+  describe("Return content in webhook", () => {
+    it("text2video works", async () => {
+      let expected = 1;
+      const webhook = await createWebhookListener(async (body) => {
+        expected--;
+        const { id, filename, image } = body;
+        expect(id).toEqual(reqId);
+        await checkImage(filename, image, text2VideoOptions);
+      });
+      const { id: reqId } = await submitPrompt(txt2Video, true);
+      while (expected > 0) {
+        await sleep(100);
+      }
+      await webhook.close();
+    });
+  });
+});
diff --git a/test/docker-image/Dockerfile b/test/docker-image/Dockerfile
index 1a8fb1e..57a303c 100644
--- a/test/docker-image/Dockerfile
+++ b/test/docker-image/Dockerfile
@@ -1,39 +1,16 @@
-FROM pytorch/pytorch:2.5.0-cuda12.1-cudnn9-devel
-ENV DEBIAN_FRONTEND=noninteractive
+FROM ghcr.io/saladtechnologies/comfyui-api:comfy0.3.10-torch2.5.0-cuda12.1-devel
 
 RUN apt-get update && apt-get install -y \
-  curl \
-  git \
-  unzip \
-  wget \
   libgl1 \
   libgl1-mesa-glx \
   libglib2.0-0 \
   && rm -rf /var/lib/apt/lists/*
 
-# Install comfy-cli, which makes it easy to install custom nodes and other comfy specific functionality.
-RUN pip install --upgrade pip
-RUN pip install comfy-cli
-WORKDIR /opt
-ARG comfy_version=0.3.10
-RUN git clone --depth 1 --branch v${comfy_version} https://github.com/comfyanonymous/ComfyUI.git
-WORKDIR /opt/ComfyUI
-RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121
-RUN pip install -r requirements.txt
-ENV COMFY_HOME=/opt/ComfyUI
-RUN comfy --skip-prompt tracking disable
-RUN comfy --skip-prompt set-default ${COMFY_HOME}
-ENV CMD="comfy --workspace ${COMFY_HOME} launch -- --listen *"
-ENV BASE=""
-ENV MODEL_DIR=${COMFY_HOME}/models
-ENV OUTPUT_DIR=${COMFY_HOME}/output
-ENV INPUT_DIR=${COMFY_HOME}/input
-ENV WORKFLOW_DIR=/workflows
-
 RUN comfy node registry-install comfyui-videohelpersuite
 RUN comfy node registry-install comfyui-animatediff-evolved
 RUN comfy node registry-install efficiency-nodes-comfyui
 RUN comfy node registry-install comfyui-advanced-controlnet
+RUN comfy node registry-install comfyui-cogvideoxwrapper
 
 COPY poses ${INPUT_DIR}/poses
 ENV STARTUP_CHECK_MAX_TRIES=100
\ No newline at end of file
diff --git a/test/mochi.spec.ts b/test/mochi.spec.ts
new file mode 100644
index 0000000..c7ed190
--- /dev/null
+++ b/test/mochi.spec.ts
@@ -0,0 +1,52 @@
+import { expect } from "earl";
+import path from "path";
+import fs from "fs";
+import {
+  sleep,
+  createWebhookListener,
+  submitPrompt,
+  checkImage,
+  waitForServerToStart,
+} from "./test-utils";
+import txt2Video from "./workflows/mochi.json";
+
+const text2VideoOptions = {
+  webpFrames: txt2Video["21"].inputs.length,
+  width: txt2Video["21"].inputs.width,
+  height: txt2Video["21"].inputs.height,
+};
+
+describe("Mochi Video", () => {
+  before(async () => {
+    await waitForServerToStart();
+  });
+  describe("Return content in response", () => {
+    it("text2video works", async () => {
+      const respBody = await submitPrompt(txt2Video);
+      expect(respBody.filenames.length).toEqual(1);
+      expect(respBody.images.length).toEqual(1);
+      await checkImage(
+        respBody.filenames[0],
+        respBody.images[0],
+        text2VideoOptions
+      );
+    });
+  });
+
+  describe("Return content in webhook", () => {
+    it("text2video works", async () => {
+      let expected = 1;
+      const webhook = await createWebhookListener(async (body) => {
+        expected--;
+        const { id, filename, image } = body;
+        expect(id).toEqual(reqId);
+        await checkImage(filename, image, text2VideoOptions);
+      });
+      const { id: reqId } = await submitPrompt(txt2Video, true);
+      while (expected > 0) {
+        await sleep(100);
+      }
+      await webhook.close();
+    });
+  });
+});
diff --git a/test/workflows/cogvideox-txt2video.json b/test/workflows/cogvideox-txt2video.json
new file mode 100644
index 0000000..347afc0
--- /dev/null
+++ b/test/workflows/cogvideox-txt2video.json
@@ -0,0 +1,137 @@
+{
+  "11": {
+    "inputs": {
+      "enable_vae_tiling": true,
+      "tile_sample_min_height": 240,
+      "tile_sample_min_width": 360,
+      "tile_overlap_factor_height": 0.2,
+      "tile_overlap_factor_width": 0.2,
+      "auto_tile_size": true,
+      "vae": [
+        "36",
+        1
+      ],
+      "samples": [
+        "35",
+        0
+      ]
+    },
+    "class_type": "CogVideoDecode",
+    "_meta": {
+      "title": "CogVideo Decode"
+    }
+  },
+  "20": {
+    "inputs": {
+      "clip_name": "t5xxl_fp8_e4m3fn.safetensors",
+      "type": "sd3"
+    },
+    "class_type": "CLIPLoader",
+    "_meta": {
+      "title": "Load CLIP"
+    }
+  },
+  "30": {
+    "inputs": {
+      "prompt": "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer.\n\n",
+      "strength": 1,
+      "force_offload": false,
+      "clip": [
+        "20",
+        0
+      ]
+    },
+    "class_type": "CogVideoTextEncode",
+    "_meta": {
+      "title": "CogVideo TextEncode"
+    }
+  },
+  "31": {
+    "inputs": {
+      "prompt": "",
+      "strength": 1,
+      "force_offload": true,
+      "clip": [
+        "30",
+        1
+      ]
+    },
+    "class_type": "CogVideoTextEncode",
+    "_meta": {
+      "title": "CogVideo TextEncode"
+    }
+  },
+  "33": {
+    "inputs": {
+      "frame_rate": 8,
+      "loop_count": 0,
+      "filename_prefix": "CogVideoX5B-T2V",
+      "format": "image/webp",
+      "pingpong": false,
+      "save_output": true,
+      "images": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "VHS_VideoCombine",
+    "_meta": {
+      "title": "Video Combine 🎥🅥🅗🅢"
+    }
+  },
+  "35": {
+    "inputs": {
+      "num_frames": 6,
+      "steps": 50,
+      "cfg": 6,
+      "seed": 0,
+      "scheduler": "CogVideoXDDIM",
+      "denoise_strength": 1,
+      "model": [
+        "36",
+        0
+      ],
+      "positive": [
+        "30",
+        0
+      ],
+      "negative": [
+        "31",
+        0
+      ],
+      "samples": [
+        "37",
+        0
+      ]
+    },
+    "class_type": "CogVideoSampler",
+    "_meta": {
+      "title": "CogVideo Sampler"
+    }
+  },
+  "36": {
+    "inputs": {
+      "model": "THUDM/CogVideoX-2b",
+      "precision": "bf16",
+      "quantization": "fp8_e4m3fn",
+      "enable_sequential_cpu_offload": true,
+      "attention_mode": "sdpa",
+      "load_device": "main_device"
+    },
+    "class_type": "DownloadAndLoadCogVideoModel",
+    "_meta": {
+      "title": "(Down)load CogVideo Model"
+    }
+  },
+  "37": {
+    "inputs": {
+      "width": 720,
+      "height": 480,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  }
+}
\ No newline at end of file
diff --git a/test/workflows/mochi.json b/test/workflows/mochi.json
new file mode 100644
index 0000000..0138726
--- /dev/null
+++ b/test/workflows/mochi.json
@@ -0,0 +1,116 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 531883064500892,
+      "steps": 30,
+      "cfg": 4.5,
+      "sampler_name": "euler",
+      "scheduler": "simple",
+      "denoise": 1,
+      "model": [
+        "40",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "21",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "A husky puppy playing in fresh snow. he is so happy",
+      "clip": [
+        "40",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "",
+      "clip": [
+        "40",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "21": {
+    "inputs": {
+      "width": 848,
+      "height": 480,
+      "length": 13,
+      "batch_size": 1
+    },
+    "class_type": "EmptyMochiLatentVideo",
+    "_meta": {
+      "title": "EmptyMochiLatentVideo"
+    }
+  },
+  "28": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "fps": 24,
+      "lossless": false,
+      "quality": 90,
+      "method": "default",
+      "images": [
+        "42",
+        0
+      ]
+    },
+    "class_type": "SaveAnimatedWEBP",
+    "_meta": {
+      "title": "SaveAnimatedWEBP"
+    }
+  },
+  "40": {
+    "inputs": {
+      "ckpt_name": "mochi_preview_fp8_scaled.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "42": {
+    "inputs": {
+      "tile_size": 256,
+      "overlap": 64,
+      "temporal_size": 8,
+      "temporal_overlap": 4,
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "40",
+        2
+      ]
+    },
+    "class_type": "VAEDecodeTiled",
+    "_meta": {
+      "title": "VAE Decode (Tiled)"
+    }
+  }
+}
\ No newline at end of file