From 8a8f3552192b47d0fd16aeeef007d23b451fe240 Mon Sep 17 00:00:00 2001
From: Matteo Collina <hello@matteocollina.com>
Date: Wed, 8 May 2024 13:00:04 +0200
Subject: [PATCH 1/6] Expose routes over OpenAPI

Signed-off-by: Matteo Collina <hello@matteocollina.com>
---
 .npmrc                          |  1 +
 index.ts                        | 11 ++---
 package.json                    |  4 +-
 plugins/api.ts                  |  2 +-
 tests/e2e/api.test.ts           | 63 +++++++++++++++++++++++++-
 tests/e2e/auth.test.ts          |  2 +
 tests/e2e/index.ts              |  6 ---
 tests/e2e/rate-limiting.test.ts | 78 +++++++++++++++++++++++++++------
 tests/unit/ai-providers.test.ts |  2 +
 tests/unit/generator.test.ts    |  2 +
 tests/unit/index.ts             |  5 ---
 tests/utils/stackable.ts        |  7 +--
 12 files changed, 147 insertions(+), 36 deletions(-)
 create mode 100644 .npmrc
 delete mode 100644 tests/e2e/index.ts
 delete mode 100644 tests/unit/index.ts
diff --git a/.npmrc b/.npmrc
new file mode 100644
index 0000000..cafe685
--- /dev/null
+++ b/.npmrc
@@ -0,0 +1 @@
+package-lock=true
diff --git a/index.ts b/index.ts
index 771657e..7486761 100644
--- a/index.ts
+++ b/index.ts
@@ -16,11 +16,6 @@ const stackable: Stackable<AiWarpConfig> = async function (fastify, opts) {
   await fastify.register(fastifyUser as any, config.auth)
   await fastify.register(authPlugin, opts)
 
-  await fastify.register(warpPlugin, opts) // needs to be registered here for fastify.ai to be decorated
-
-  await fastify.register(rateLimitPlugin, opts)
-  await fastify.register(apiPlugin, opts)
-
   if (config.showAiWarpHomepage !== undefined && config.showAiWarpHomepage) {
     await fastify.register(fastifyStatic, {
       root: join(import.meta.dirname, 'static')
@@ -28,6 +23,12 @@ const stackable: Stackable<AiWarpConfig> = async function (fastify, opts) {
   }
 
   await fastify.register(platformaticService, opts)
+
+  await fastify.register(warpPlugin, opts) // needs to be registered here for fastify.ai to be decorated
+
+  await fastify.register(rateLimitPlugin, opts)
+  await fastify.register(apiPlugin, opts)
+
 }
 
 stackable.configType = 'ai-warp-app'
diff --git a/package.json b/package.json
index e61c2c4..2ce79fa 100644
--- a/package.json
+++ b/package.json
@@ -18,8 +18,8 @@
     "lint-md": "markdownlint-cli2 .",
     "lint-md:fix": "markdownlint-cli2 --fix .",
     "test": "npm run test:unit && npm run test:e2e && npm run test:types",
-    "test:unit": "node --test --test-reporter=@reporters/github --test-reporter-destination=stdout --test-reporter=spec --test-reporter-destination=stdout --import=tsx ./tests/unit/index.ts",
-    "test:e2e": "node --test --test-reporter=@reporters/github --test-reporter-destination=stdout --test-reporter=spec --test-reporter-destination=stdout --import=tsx ./tests/e2e/index.ts",
+    "test:unit": "node --test --test-reporter=@reporters/github --test-reporter-destination=stdout --test-reporter=spec --test-reporter-destination=stdout --import=tsx --test-concurrency=1 ./tests/unit/*",
+    "test:e2e": "node --test --test-reporter=@reporters/github --test-reporter-destination=stdout --test-reporter=spec --test-reporter-destination=stdout --import=tsx --test-concurrency=1 ./tests/e2e/*",
     "test:types": "tsd"
   },
   "engines": {
diff --git a/plugins/api.ts b/plugins/api.ts
index 2de4b91..54080dd 100644
--- a/plugins/api.ts
+++ b/plugins/api.ts
@@ -23,7 +23,7 @@ const plugin: FastifyPluginAsyncTypebox = async (fastify) => {
           response: Type.String()
         }),
         default: Type.Object({
-          code: Type.String(),
+          code: Type.Optional(Type.String()),
           message: Type.String()
         })
       }
diff --git a/tests/e2e/api.test.ts b/tests/e2e/api.test.ts
index d7cfd7a..3844d95 100644
--- a/tests/e2e/api.test.ts
+++ b/tests/e2e/api.test.ts
@@ -1,4 +1,5 @@
-/* eslint-disable @typescript-eslint/no-floating-promises */
+/* eslint-disable @typescript
+* eslint/no-floating-promises */
 import { before, after, describe, it } from 'node:test'
 import assert from 'node:assert'
 import { FastifyInstance } from 'fastify'
@@ -8,9 +9,12 @@ import { buildAiWarpApp } from '../utils/stackable.js'
 import { AZURE_DEPLOYMENT_NAME, AZURE_MOCK_HOST } from '../utils/mocks/azure.js'
 import { MOCK_CONTENT_RESPONSE, buildExpectedStreamBodyString } from '../utils/mocks/base.js'
 import { OLLAMA_MOCK_HOST } from '../utils/mocks/ollama.js'
+import { mockAllProviders } from '../utils/mocks/index.js'
+mockAllProviders()
 
 const expectedStreamBody = buildExpectedStreamBodyString()
 
+
 interface Provider {
   name: string
   config: AiWarpConfig['aiProvider']
@@ -108,6 +112,7 @@ for (const { name, config } of providers) {
           prompt: 'asd'
         })
       })
+
       assert.strictEqual(res.headers.get('content-type'), 'text/event-stream')
 
       assert.strictEqual(chunkCallbackCalled, true)
@@ -170,3 +175,59 @@ it('calls the preResponseCallback', async () => {
 
   await app.close()
 })
+
+it('provides all paths in OpenAPI', async () => {
+  const [app, port] = await buildAiWarpApp({
+    aiProvider: {
+      openai: {
+        model: 'gpt-3.5-turbo',
+        apiKey: ''
+      }
+    }
+  })
+
+  await app.start()
+
+  const res = await fetch(`http://localhost:${port}/documentation/json`)
+  const body = await res.json()
+
+  assert.deepStrictEqual(Object.keys(body.paths), [
+    '/api/v1/prompt',
+    '/api/v1/stream'
+  ])
+
+  await app.close()
+})
+
+it('prompt with wrong JSON', async () => {
+  const [app, port] = await buildAiWarpApp({
+    aiProvider: {
+      openai: {
+        model: 'gpt-3.5-turbo',
+        apiKey: ''
+      }
+    }
+  })
+
+  await app.start()
+
+  const res = await fetch(`http://localhost:${port}/api/v1/prompt`, {
+    method: 'POST',
+    headers: {
+      'content-type': 'application/json'
+    },
+    body: JSON.stringify({
+      prompt: 'asd'
+    }).slice(0, 10)
+  })
+
+  assert.strictEqual(res.status, 400)
+
+  const body = await res.json()
+
+  assert.deepStrictEqual(body, {
+    message: 'Unexpected end of JSON input'
+  })
+
+  await app.close()
+})
diff --git a/tests/e2e/auth.test.ts b/tests/e2e/auth.test.ts
index e099bd3..0970c59 100644
--- a/tests/e2e/auth.test.ts
+++ b/tests/e2e/auth.test.ts
@@ -4,6 +4,8 @@ import assert from 'node:assert'
 import { buildAiWarpApp } from '../utils/stackable.js'
 import { AiWarpConfig } from '../../config.js'
 import { authConfig, createToken } from '../utils/auth.js'
+import { mockAllProviders } from '../utils/mocks/index.js'
+mockAllProviders()
 
 const aiProvider: AiWarpConfig['aiProvider'] = {
   openai: {
diff --git a/tests/e2e/index.ts b/tests/e2e/index.ts
deleted file mode 100644
index b675db9..0000000
--- a/tests/e2e/index.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-import './api.test'
-import './rate-limiting.test'
-import './auth.test'
-import { mockAllProviders } from '../utils/mocks/index.js'
-
-mockAllProviders()
diff --git a/tests/e2e/rate-limiting.test.ts b/tests/e2e/rate-limiting.test.ts
index da0408e..5aa007a 100644
--- a/tests/e2e/rate-limiting.test.ts
+++ b/tests/e2e/rate-limiting.test.ts
@@ -5,6 +5,8 @@ import fastifyPlugin from 'fastify-plugin'
 import { AiWarpConfig } from '../../config.js'
 import { buildAiWarpApp } from '../utils/stackable.js'
 import { authConfig, createToken } from '../utils/auth.js'
+import { mockAllProviders } from '../utils/mocks/index.js'
+mockAllProviders()
 
 const aiProvider: AiWarpConfig['aiProvider'] = {
   openai: {
@@ -28,7 +30,15 @@ it('calls ai.rateLimiting.max callback', async () => {
 
     await app.start()
 
-    const res = await fetch(`http://localhost:${port}`)
+    const res = await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
+    })
     assert.strictEqual(callbackCalled, true)
     assert.strictEqual(res.headers.get('x-ratelimit-limit'), `${expectedMax}`)
   } finally {
@@ -50,7 +60,15 @@ it('calls ai.rateLimiting.allowList callback', async () => {
 
     await app.start()
 
-    await fetch(`http://localhost:${port}`)
+    await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
+    })
     assert.strictEqual(callbackCalled, true)
   } finally {
     await app.close()
@@ -76,13 +94,21 @@ it('calls ai.rateLimiting.onBanReach callback', async () => {
 
       app.ai.rateLimiting.errorResponseBuilder = () => {
         errorResponseBuilderCalled = true
-        return { error: 'rate limited' }
+        return { message: 'rate limited' }
       }
     }))
 
     await app.start()
 
-    await fetch(`http://localhost:${port}`)
+    await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
+    })
     assert.strictEqual(onBanReachCalled, true)
     assert.strictEqual(errorResponseBuilderCalled, true)
   } finally {
@@ -104,7 +130,15 @@ it('calls ai.rateLimiting.keyGenerator callback', async () => {
 
     await app.start()
 
-    await fetch(`http://localhost:${port}`)
+    await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
+    })
     assert.strictEqual(callbackCalled, true)
   } finally {
     await app.close()
@@ -120,13 +154,21 @@ it('calls ai.rateLimiting.errorResponseBuilder callback', async () => {
       app.ai.rateLimiting.max = () => 0
       app.ai.rateLimiting.errorResponseBuilder = () => {
         callbackCalled = true
-        return { error: 'rate limited' }
+        return { message: 'rate limited' }
       }
     }))
 
     await app.start()
 
-    await fetch(`http://localhost:${port}`)
+    await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
+      headers: {
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
+    })
     assert.strictEqual(callbackCalled, true)
   } finally {
     await app.close()
@@ -156,17 +198,27 @@ it('uses the max for a specific claim', async () => {
   try {
     await app.start()
 
-    let res = await fetch(`http://localhost:${port}`, {
+    let res = await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
       headers: {
-        Authorization: `Bearer ${createToken({ rateLimitMax: '10' })}`
-      }
+        Authorization: `Bearer ${createToken({ rateLimitMax: '10' })}`,
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
     })
     assert.strictEqual(res.headers.get('x-ratelimit-limit'), '10')
 
-    res = await fetch(`http://localhost:${port}`, {
+    res = await fetch(`http://localhost:${port}/api/v1/prompt`, {
+      method: 'POST',
       headers: {
-        Authorization: `Bearer ${createToken({ rateLimitMax: '100' })}`
-      }
+        Authorization: `Bearer ${createToken({ rateLimitMax: '100' })}`,
+        'content-type': 'application/json'
+      },
+      body: JSON.stringify({
+        prompt: 'asd'
+      })
     })
     assert.strictEqual(res.headers.get('x-ratelimit-limit'), '100')
   } finally {
diff --git a/tests/unit/ai-providers.test.ts b/tests/unit/ai-providers.test.ts
index 3f59c73..48d0d67 100644
--- a/tests/unit/ai-providers.test.ts
+++ b/tests/unit/ai-providers.test.ts
@@ -10,6 +10,8 @@ import { MOCK_CONTENT_RESPONSE, buildExpectedStreamBodyString } from '../utils/m
 import { OLLAMA_MOCK_HOST } from '../utils/mocks/ollama.js'
 import { AZURE_DEPLOYMENT_NAME, AZURE_MOCK_HOST } from '../utils/mocks/azure.js'
 import { mockLlama2 } from '../utils/mocks/llama2.js'
+import { mockAllProviders } from '../utils/mocks/index.js'
+mockAllProviders()
 
 const expectedStreamBody = buildExpectedStreamBodyString()
 
diff --git a/tests/unit/generator.test.ts b/tests/unit/generator.test.ts
index ee18121..ad39487 100644
--- a/tests/unit/generator.test.ts
+++ b/tests/unit/generator.test.ts
@@ -7,6 +7,8 @@ import { join } from 'node:path'
 import AiWarpGenerator from '../../lib/generator.js'
 import { generateGlobalTypesFile } from '../../lib/templates/types.js'
 import { generatePluginWithTypesSupport } from '@platformatic/generators/lib/create-plugin.js'
+import { mockAllProviders } from '../utils/mocks/index.js'
+mockAllProviders()
 
 const tempDirBase = join(import.meta.dirname, 'tmp')
 
diff --git a/tests/unit/index.ts b/tests/unit/index.ts
deleted file mode 100644
index 7f8249b..0000000
--- a/tests/unit/index.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-import './generator.test'
-import './ai-providers.test'
-import { mockAllProviders } from '../utils/mocks/index.js'
-
-mockAllProviders()
diff --git a/tests/utils/stackable.ts b/tests/utils/stackable.ts
index 7c87fd9..6466fae 100644
--- a/tests/utils/stackable.ts
+++ b/tests/utils/stackable.ts
@@ -21,13 +21,14 @@ export async function buildAiWarpApp (config: AiWarpConfig): Promise<[FastifyIns
     server: {
       port,
       forceCloseConnections: true,
-      healthCheck: {
-        enabled: false
-      },
+      healthCheck: false,
       logger: {
         level: 'silent'
       }
     },
+    service: {
+      openapi: true
+    },
     ...config
   }, stackable)
 

From 45c259dbaf4ff2a28b20a7d681ad5885b63ce341 Mon Sep 17 00:00:00 2001
From: Matteo Collina <hello@matteocollina.com>
Date: Wed, 8 May 2024 13:01:55 +0200
Subject: [PATCH 2/6] fixup

Signed-off-by: Matteo Collina <hello@matteocollina.com>
---
 index.ts              | 1 -
 tests/e2e/api.test.ts | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/index.ts b/index.ts
index 7486761..ab02bd1 100644
--- a/index.ts
+++ b/index.ts
@@ -28,7 +28,6 @@ const stackable: Stackable<AiWarpConfig> = async function (fastify, opts) {
 
   await fastify.register(rateLimitPlugin, opts)
   await fastify.register(apiPlugin, opts)
-
 }
 
 stackable.configType = 'ai-warp-app'
diff --git a/tests/e2e/api.test.ts b/tests/e2e/api.test.ts
index 3844d95..99a6b90 100644
--- a/tests/e2e/api.test.ts
+++ b/tests/e2e/api.test.ts
@@ -1,5 +1,4 @@
-/* eslint-disable @typescript
-* eslint/no-floating-promises */
+/* eslint-disable @typescript-eslint/no-floating-promises */
 import { before, after, describe, it } from 'node:test'
 import assert from 'node:assert'
 import { FastifyInstance } from 'fastify'
@@ -14,7 +13,6 @@ mockAllProviders()
 
 const expectedStreamBody = buildExpectedStreamBodyString()
 
-
 interface Provider {
   name: string
   config: AiWarpConfig['aiProvider']

From ea9b7e692862e515e9704d1e6bde3e976db4dc01 Mon Sep 17 00:00:00 2001
From: Matteo Collina <hello@matteocollina.com>
Date: Wed, 8 May 2024 13:05:49 +0200
Subject: [PATCH 3/6] moar guide

Signed-off-by: Matteo Collina <hello@matteocollina.com>
---
 CONTRIBUTING.md | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0224565..fc28406 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -38,12 +38,30 @@ Steps for downloading and setting up AI Warp for local development.
  is located at `ai-warp-app/platformatic.json`. **Note: this will be overwrited
  every time you generate the test app.**
 
- 8. Start the test app.
+ 8. Start the test app. From the `app-warp-ai` folder, run:
 
     ```bash
-    npm start
+    node ../dist/cli/start.js
     ```
 
+### Testing a local model with llama2
+
+To test a local model with with llama2, you can use the following to downloaded a tested model:
+
+```bash
+curl -L -O https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q8_0.gguf
+```
+
+Then, in your `platformatic.json` file, add:
+
+```
+  "aiProvider": {
+    "llama2": {
+      "modelPath": "./mistral-7b-instruct-v0.2.Q8_0.gguf"
+    }
+  },
+```
+
 ## Important Notes
 
 * AI Warp needs to be rebuilt for any code change to take affect in your test

From d667943c540c130056672f7da6e9379c9d3eaf4f Mon Sep 17 00:00:00 2001
From: Matteo Collina <hello@matteocollina.com>
Date: Wed, 8 May 2024 14:03:04 +0200
Subject: [PATCH 4/6] fixup

Signed-off-by: Matteo Collina <hello@matteocollina.com>
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index fc28406..d219ea3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -54,7 +54,7 @@ curl -L -O https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve
 
 Then, in your `platformatic.json` file, add:
 
-```
+```json
   "aiProvider": {
     "llama2": {
       "modelPath": "./mistral-7b-instruct-v0.2.Q8_0.gguf"

From fdc793ffd8f9cb45890a8566244e8625f5a021e7 Mon Sep 17 00:00:00 2001
From: Matteo Collina <hello@matteocollina.com>
Date: Wed, 8 May 2024 14:03:34 +0200
Subject: [PATCH 5/6] fixup

Signed-off-by: Matteo Collina <hello@matteocollina.com>
---
 .github/workflows/lint-md.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/lint-md.yml b/.github/workflows/lint-md.yml
index 6214ba0..8c31500 100644
--- a/.github/workflows/lint-md.yml
+++ b/.github/workflows/lint-md.yml
@@ -30,7 +30,8 @@ jobs:
   lint-md:
     name: Linting Markdown
     runs-on: ubuntu-latest
-    needs: setup-node-modulessteps:
+    needs: setup-node-modules
+    steps:
       - name: Git Checkout
         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
 

From 103e8602aa76ea3c01cab65b5da0a52331aee852 Mon Sep 17 00:00:00 2001
From: Matteo Collina <hello@matteocollina.com>
Date: Wed, 8 May 2024 14:14:40 +0200
Subject: [PATCH 6/6] fixup

Signed-off-by: Matteo Collina <hello@matteocollina.com>
---
 CONTRIBUTING.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d219ea3..0b1b4e6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -46,7 +46,8 @@ Steps for downloading and setting up AI Warp for local development.
 
 ### Testing a local model with llama2
 
-To test a local model with with llama2, you can use the following to downloaded a tested model:
+To test a local model with with llama2, you can use the following to
+download the model we used for testing:
 
 ```bash
 curl -L -O https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q8_0.gguf