diff --git a/docs/add-ai-provider.md b/docs/add-ai-provider.md new file mode 100644 index 0000000..9be471d --- /dev/null +++ b/docs/add-ai-provider.md @@ -0,0 +1,33 @@ +# Adding a new AI Provider +Documentation on how to support a new AI provider. + +## Steps + +### Add it to the Config Schema +The `aiProvider` property in the config schema ([lib/schema.ts](../lib/schema.ts)) needs to be updated to allow for inputting any necessary information for this AI provider (e.g. model name, api key). Don't forget to rebuild the config! + +### Creating the Provider class + +Implement the `Provider` interface ([ai-providers/provider.ts](../ai-providers/provider.ts)) in a file also under [ai-providers/](../ai-providers/) (e.g. [ai-providers/open-ai.ts](../ai-providers/open-ai.ts)) + +Ensure that the `askStream` response returns a Node.js-builtin `ReadableStream` that outputs the expected format defined in the [REST API docs](./rest-api.md). + +### Add the provider to the `build` function in the `warp` plugin + +See [plugins/warp.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/plugins/warp.ts#L12) `build()` + +### Add the provider to the generator code + +See [lib/generator.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/lib/generator.ts#L64-L88) + +### Unit Tests + +Add provider to [tests/unit/ai-providers.test.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/tests/unit/ai-providers.test.ts#L11) + +### E2E Tests + +Add provider config to [tests/e2e/api.test.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/tests/e2e/api.test.ts#L17-L36) + +### Type Tests + +Add the provider config to the schema tests [tests/types/schema.test-d.ts](https://github.com/platformatic/ai-warp/blob/main/tests/types/schema.test-d.ts) diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..b3a70da --- /dev/null +++ b/docs/api.md @@ -0,0 +1,90 @@ +# AI Warp API + +Documentation on the methods and properties availabile to plugins added onto this stackable. + +All of these exist under the `fastify.ai` object. + +## `fastify.ai.warp()` + +Send a prompt to the AI provider and receive the full response. + +Takes in: + + * `request` (`FastifyRequest`) The request object + * `prompt` (`string`) The prompt to send to the AI provider + +Returns: + + * `string` - Full response from the AI provider + +
+ Example usage + +```typescript +const response: string = await fastify.ai.warp("What's 1+1?") +fastify.log.info(response) +``` +
+ +## `fastify.ai.warpStream` + +Send a prompt to the AI provider and receive a streamed response. See [here](./rest-api.md#post-apiv1stream) for more information on the contents of the stream. + +Takes in: + + * `request` (`FastifyRequest`) The request object + * `prompt` (`string`) The prompt to send to the AI provider + +Returns: + + * `ReadableStream` - Streamed response chunks from the AI provider + +## `fastify.ai.preResponseCallback` + +A function to be called before [fastify.ai.warp](#fastifyaiwarp) returns it's result. It can modify the response and can be synchronous or asynchronous. + +## `fastify.ai.preResponseChunkCallback` + +A function to be called on each chunk present in the `ReadableStream` returned by [fastify.ai.warpStream](#fastifyaiwarpstream). It can modify each individual chunk and can be synchronous or asynchronous. + +## `fastify.ai.rateLimiting.max` + +Callback for determining the max amount of requests a client can send before they are rate limited. If the `rateLimiting.max` property is defined in the Platformatic config, this method will not be called. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. + +## `fastify.ai.rateLimiting.allowList` + +Callback for determining the clients excluded from rate limiting. If the `rateLimiting.allowList` property is defined in the Platformatic config, this method will not be called. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. + +## `fastify.ai.rateLimiting.onBanReach` + +Callback executed when a client reaches the ban threshold. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. + +## `fastify.ai.rateLimiting.keyGenerator` + +Callback for generating the unique rate limiting identifier for each client. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. + +## `fastify.ai.rateLimiting.errorResponseBuilder` + +Callback for generating custom response objects for rate limiting errors. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. + +## `fastify.ai.rateLimiting.onExceeding` + +Callback executed before a client exceeds their request limit. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. + +## `fastify.ai.rateLimiting.onExceeded` + +Callback executed after a client exceeds their request limit. + +See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info. diff --git a/docs/auth.md b/docs/auth.md new file mode 100644 index 0000000..ac2f253 --- /dev/null +++ b/docs/auth.md @@ -0,0 +1,38 @@ +# Authentication + +Documentation on how to configure and use AI Warp's authentication. + +## Configuring + +Configuring authentication can be done via your Platformatic config file under the `auth` object. E.g. + +```json +// platformatic.json +{ + "auth": { + // ... + } +} +``` + +We utilize [fastify-user](https://github.com/platformatic/fastify-user) to do authentication, so you +can pass in any configuration options for it in the `auth` object. + +AI Warp-specific options: + + * `required` (`boolean`) - If true, any unauthenticated users will receive a 401 status code and body. + +### Example + +This makes authentication required and accepts JWTs signed with the secret `abc123`: + +```json +{ + "auth": { + "required": true, + "jwt": { + "secret": "abc123" + } + } +} +``` diff --git a/docs/dev-setup.md b/docs/dev-setup.md new file mode 100644 index 0000000..397df6a --- /dev/null +++ b/docs/dev-setup.md @@ -0,0 +1,47 @@ +# Development Setup + +Steps for downloading and setting up AI Warp for local development. + +## Steps + + 1. Fork the repository. + + 2. Clone your fork using SSH, Github CLI, or HTTPS. + + ```bash + git clone git@github.com:/ai-warp.git # SSH + git clone https://github.com//ai-warp.git # HTTPS + gh repo clone /ai-warp # GitHub CLI + ``` + + 3. Install [Node.js](https://nodejs.org/). + + 4. Install dependencies. + + ```bash + npm install + ``` + + 5. Build. + + ```bash + npm run build + ``` + + 6. Generate the test app. + + ```bash + ./dist/cli/create.js + ``` + + 7. Configure the test app's `platformatic.json` to your liking. By default, it is located at `ai-warp-app/platformatic.json`. **Note: this will be overwrited every time you generate the test app.** + + 8. Start the test app. + + ```bash + ./dist/cli/start.js + ``` + +## Additional Resources + + * [Use Stackables to build Platformatic applications](https://docs.platformatic.dev/docs/guides/applications-with-stackables) diff --git a/docs/rate-limiting.md b/docs/rate-limiting.md new file mode 100644 index 0000000..f7b88f5 --- /dev/null +++ b/docs/rate-limiting.md @@ -0,0 +1,64 @@ +# Rate Limiting + +Documentation on configuring AI Warp's rate limiting. + +## Configuring + +Configuring rate limiting can be done via your Platformatic config file under the `rateLimiting` object. E.g. + +```json +// platformatic.json +{ + "rateLimiting": { + // ... + } +} +``` + +We utilize the [@fastify/rate-limit](https://github.com/fastify/rate-limit) module for rate limiting. You can +pass in any configuration options from it into the `rateLimiting` object. + +For defining the callbacks allowed by that module, set them in the `fastify.ai.rateLimiting` object. +See the [plugin API docs](./api.md#fastifyairatelimitingmax) for more information. + +## Determining a client's request limit from JWT claims + +AI Warp provides an easy and simple way to decide a client's request limit based off of JWT claims. +This is useful for say differentiating between free and premium users, where premium users get a higher +request limit. + +> \[!NOTE]\ +> This requires authentication to be enabled. Documentation for configuring authentication is available [here](./auth.md). + +You can configure this within your Platformatic config under the `rateLimiting.maxByClaims` array: + +```json +{ + "rateLimiting": { + "maxByClaims": [ + { + "claim": "name-of-the-claim", + "claimValue": "value-necessary", + "max": 10 + } + ] + } +} +``` + +So, for differentiating between free and premium users, you could do: + +```json +{ + "rateLimiting": { + "max": 100, // request limit for free users + "maxByClaims": { + { + "claim": "userType", + "claimValue": "premium", + "max": 1000 + } + } + } +} +``` diff --git a/docs/rest-api.md b/docs/rest-api.md new file mode 100644 index 0000000..2bad873 --- /dev/null +++ b/docs/rest-api.md @@ -0,0 +1,69 @@ +# REST API Endpoints + +Documentation on AI Warp's REST API. + +For information on authentication, see [here](./auth.md). + +For information on rate limiting, see [here](./rate-limiting.md) + +## Endpoints + +### POST `/api/v1/prompt` + +Prompt the AI Provider and receive the full response. + +
+ Body + +```json +{ "prompt": "What's 1+1?" } +``` +
+ +
+ Response + +```json +{ "response": "..." } +``` +
+ +### POST `/api/v1/stream` + +Prompt the AI Provider and receive a streamed response. This endpoint supports [Server Side Events](https://html.spec.whatwg.org/multipage/server-sent-events.html). + +Event types: + + * `content` - Response chunk + * `error` - An error has occured and the stream is closed. + +
+ Body + +```json +{ "prompt": "What's 1+1?" } +``` +
+ +
+ Success response + +``` +event: content +data: {"response": "..."} + +event: content +data: {"response": "..."} +``` +
+ +
+ Error response + +``` +event: error +data: {"code":"...","message":"..."} +``` +
+ +When there is no more chunks to return or an error occurs, the stream is closed. diff --git a/index.d.ts b/index.d.ts index eccbaf6..abc60c6 100644 --- a/index.d.ts +++ b/index.d.ts @@ -7,20 +7,78 @@ declare module 'fastify' { interface FastifyInstance { platformatic: PlatformaticApp ai: { + /** + * Send a prompt to the AI provider and receive the full response. + */ warp: (request: FastifyRequest, prompt: string) => Promise + + /** + * Send a prompt to the AI provider and receive a streamed response. + */ warpStream: (request: FastifyRequest, prompt: string) => Promise + + /** + * A function to be called before warp() returns it's result. It can + * modify the response and can be synchronous or asynchronous. + */ preResponseCallback?: ((request: FastifyRequest, response: string) => string) | ((request: FastifyRequest, response: string) => Promise) + + /** + * A function to be called on each chunk present in the `ReadableStream` + * returned by warpStream(). It can modify each individual chunk and can + * be synchronous or asynchronous. + */ preResponseChunkCallback?: ((request: FastifyRequest, response: string) => string) | ((request: FastifyRequest, response: string) => Promise) + rateLimiting: { + /** + * Callback for determining the max amount of requests a client can + * send before they are rate limited. If the `rateLimiting.max` + * property is defined in the Platformatic config, this method will + * not be called. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ max?: ((req: FastifyRequest, key: string) => number) | ((req: FastifyRequest, key: string) => Promise) + + /** + * Callback for determining the clients excluded from rate limiting. If + * the `rateLimiting.allowList` property is defined in the Platformatic + * config, this method will not be called. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ allowList?: (req: FastifyRequest, key: string) => boolean | Promise + + /** + * Callback executed when a client reaches the ban threshold. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ onBanReach?: (req: FastifyRequest, key: string) => void + + /** + * Callback for generating the unique rate limiting identifier for each client. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ keyGenerator?: (req: FastifyRequest) => string | number | Promise + + /** + * Callback for generating custom response objects for rate limiting errors. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ errorResponseBuilder?: ( req: FastifyRequest, context: errorResponseBuilderContext ) => object + + /** + * Callback executed before a client exceeds their request limit. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ onExceeding?: (req: FastifyRequest, key: string) => void + + /** + * Callback executed after a client exceeds their request limit. + * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options + */ onExceeded?: (req: FastifyRequest, key: string) => void } }