diff --git a/docs/add-ai-provider.md b/docs/add-ai-provider.md
new file mode 100644
index 0000000..9be471d
--- /dev/null
+++ b/docs/add-ai-provider.md
@@ -0,0 +1,33 @@
+# Adding a new AI Provider
+Documentation on how to support a new AI provider.
+
+## Steps
+
+### Add it to the Config Schema
+The `aiProvider` property in the config schema ([lib/schema.ts](../lib/schema.ts)) needs to be updated to allow for inputting any necessary information for this AI provider (e.g. model name, api key). Don't forget to rebuild the config!
+
+### Creating the Provider class
+
+Implement the `Provider` interface ([ai-providers/provider.ts](../ai-providers/provider.ts)) in a file also under [ai-providers/](../ai-providers/) (e.g. [ai-providers/open-ai.ts](../ai-providers/open-ai.ts))
+
+Ensure that the `askStream` response returns a Node.js-builtin `ReadableStream` that outputs the expected format defined in the [REST API docs](./rest-api.md).
+
+### Add the provider to the `build` function in the `warp` plugin
+
+See [plugins/warp.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/plugins/warp.ts#L12) `build()`
+
+### Add the provider to the generator code
+
+See [lib/generator.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/lib/generator.ts#L64-L88)
+
+### Unit Tests
+
+Add provider to [tests/unit/ai-providers.test.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/tests/unit/ai-providers.test.ts#L11)
+
+### E2E Tests
+
+Add provider config to [tests/e2e/api.test.ts](https://github.com/platformatic/ai-warp/blob/b9cddeedf8609d1c2ce3efcfdd84a739150a1e91/tests/e2e/api.test.ts#L17-L36)
+
+### Type Tests
+
+Add the provider config to the schema tests [tests/types/schema.test-d.ts](https://github.com/platformatic/ai-warp/blob/main/tests/types/schema.test-d.ts)
diff --git a/docs/api.md b/docs/api.md
new file mode 100644
index 0000000..b3a70da
--- /dev/null
+++ b/docs/api.md
@@ -0,0 +1,90 @@
+# AI Warp API
+
+Documentation on the methods and properties availabile to plugins added onto this stackable.
+
+All of these exist under the `fastify.ai` object.
+
+## `fastify.ai.warp()`
+
+Send a prompt to the AI provider and receive the full response.
+
+Takes in:
+
+ * `request` (`FastifyRequest`) The request object
+ * `prompt` (`string`) The prompt to send to the AI provider
+
+Returns:
+
+ * `string` - Full response from the AI provider
+
+<details>
+    <summary>Example usage</summary>
+
+```typescript
+const response: string = await fastify.ai.warp("What's 1+1?")
+fastify.log.info(response)
+```
+</details>
+
+## `fastify.ai.warpStream`
+
+Send a prompt to the AI provider and receive a streamed response. See [here](./rest-api.md#post-apiv1stream) for more information on the contents of the stream.
+
+Takes in:
+
+ * `request` (`FastifyRequest`) The request object
+ * `prompt` (`string`) The prompt to send to the AI provider
+
+Returns:
+
+ * `ReadableStream<Uint8Array>` - Streamed response chunks from the AI provider
+
+## `fastify.ai.preResponseCallback`
+
+A function to be called before [fastify.ai.warp](#fastifyaiwarp) returns it's result. It can modify the response and can be synchronous or asynchronous.
+
+## `fastify.ai.preResponseChunkCallback`
+
+A function to be called on each chunk present in the `ReadableStream` returned by [fastify.ai.warpStream](#fastifyaiwarpstream). It can modify each individual chunk and can be synchronous or asynchronous.
+
+## `fastify.ai.rateLimiting.max`
+
+Callback for determining the max amount of requests a client can send before they are rate limited. If the `rateLimiting.max` property is defined in the Platformatic config, this method will not be called.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
+
+## `fastify.ai.rateLimiting.allowList`
+
+Callback for determining the clients excluded from rate limiting. If the `rateLimiting.allowList` property is defined in the Platformatic config, this method will not be called.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
+
+## `fastify.ai.rateLimiting.onBanReach`
+
+Callback executed when a client reaches the ban threshold.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
+
+## `fastify.ai.rateLimiting.keyGenerator`
+
+Callback for generating the unique rate limiting identifier for each client.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
+
+## `fastify.ai.rateLimiting.errorResponseBuilder`
+
+Callback for generating custom response objects for rate limiting errors.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
+
+## `fastify.ai.rateLimiting.onExceeding`
+
+Callback executed before a client exceeds their request limit.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
+
+## `fastify.ai.rateLimiting.onExceeded`
+
+Callback executed after a client exceeds their request limit.
+
+See [@fastify/rate-limit](https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options) options for more info.
diff --git a/docs/auth.md b/docs/auth.md
new file mode 100644
index 0000000..ac2f253
--- /dev/null
+++ b/docs/auth.md
@@ -0,0 +1,38 @@
+# Authentication
+
+Documentation on how to configure and use AI Warp's authentication.
+
+## Configuring
+
+Configuring authentication can be done via your Platformatic config file under the `auth` object. E.g.
+
+```json
+// platformatic.json
+{
+  "auth": {
+    // ...
+  }
+}
+```
+
+We utilize [fastify-user](https://github.com/platformatic/fastify-user) to do authentication, so you
+can pass in any configuration options for it in the `auth` object.
+
+AI Warp-specific options:
+
+ * `required` (`boolean`) - If true, any unauthenticated users will receive a 401 status code and body.
+
+### Example
+
+This makes authentication required and accepts JWTs signed with the secret `abc123`:
+
+```json
+{
+  "auth": {
+    "required": true,
+    "jwt": {
+      "secret": "abc123"
+    }
+  }
+}
+```
diff --git a/docs/dev-setup.md b/docs/dev-setup.md
new file mode 100644
index 0000000..397df6a
--- /dev/null
+++ b/docs/dev-setup.md
@@ -0,0 +1,47 @@
+# Development Setup
+
+Steps for downloading and setting up AI Warp for local development.
+
+## Steps
+
+ 1. Fork the repository.
+ 
+ 2. Clone your fork using SSH, Github CLI, or HTTPS.
+
+    ```bash
+    git clone git@github.com:<YOUR_GITHUB_USERNAME>/ai-warp.git # SSH
+    git clone https://github.com/<YOUR_GITHUB_USERNAME>/ai-warp.git # HTTPS
+    gh repo clone <YOUR_GITHUB_USERNAME>/ai-warp # GitHub CLI
+    ```
+
+ 3. Install [Node.js](https://nodejs.org/).
+
+ 4. Install dependencies.
+
+    ```bash
+    npm install
+    ```
+
+ 5. Build.
+
+    ```bash
+    npm run build
+    ```
+
+ 6. Generate the test app.
+
+    ```bash
+    ./dist/cli/create.js
+    ```
+
+ 7. Configure the test app's `platformatic.json` to your liking. By default, it is located at `ai-warp-app/platformatic.json`. **Note: this will be overwrited every time you generate the test app.**
+
+ 8. Start the test app.
+
+    ```bash
+    ./dist/cli/start.js
+    ```
+
+## Additional Resources
+
+ * [Use Stackables to build Platformatic applications](https://docs.platformatic.dev/docs/guides/applications-with-stackables)
diff --git a/docs/rate-limiting.md b/docs/rate-limiting.md
new file mode 100644
index 0000000..f7b88f5
--- /dev/null
+++ b/docs/rate-limiting.md
@@ -0,0 +1,64 @@
+# Rate Limiting
+
+Documentation on configuring AI Warp's rate limiting.
+
+## Configuring
+
+Configuring rate limiting can be done via your Platformatic config file under the `rateLimiting` object. E.g.
+
+```json
+// platformatic.json
+{
+  "rateLimiting": {
+    // ...
+  }
+}
+```
+
+We utilize the [@fastify/rate-limit](https://github.com/fastify/rate-limit) module for rate limiting. You can
+pass in any configuration options from it into the `rateLimiting` object.
+
+For defining the callbacks allowed by that module, set them in the `fastify.ai.rateLimiting` object.
+See the [plugin API docs](./api.md#fastifyairatelimitingmax) for more information.
+
+## Determining a client's request limit from JWT claims
+
+AI Warp provides an easy and simple way to decide a client's request limit based off of JWT claims.
+This is useful for say differentiating between free and premium users, where premium users get a higher
+request limit.
+
+> \[!NOTE]\
+> This requires authentication to be enabled. Documentation for configuring authentication is available [here](./auth.md).
+
+You can configure this within your Platformatic config under the `rateLimiting.maxByClaims` array:
+
+```json
+{
+  "rateLimiting": {
+    "maxByClaims": [
+      {
+        "claim": "name-of-the-claim",
+        "claimValue": "value-necessary",
+        "max": 10
+      }
+    ]
+  }
+}
+```
+
+So, for differentiating between free and premium users, you could do:
+
+```json
+{
+  "rateLimiting": {
+    "max": 100, // request limit for free users
+    "maxByClaims": {
+      {
+        "claim": "userType",
+        "claimValue": "premium",
+        "max": 1000
+      }
+    }
+  }
+}
+```
diff --git a/docs/rest-api.md b/docs/rest-api.md
new file mode 100644
index 0000000..2bad873
--- /dev/null
+++ b/docs/rest-api.md
@@ -0,0 +1,69 @@
+# REST API Endpoints
+
+Documentation on AI Warp's REST API.
+
+For information on authentication, see [here](./auth.md).
+
+For information on rate limiting, see [here](./rate-limiting.md)
+
+## Endpoints
+
+### POST `/api/v1/prompt`
+
+Prompt the AI Provider and receive the full response.
+
+<details>
+    <summary>Body</summary>
+
+```json
+{ "prompt": "What's 1+1?" }
+```
+</details>
+
+<details>
+    <summary>Response</summary>
+
+```json
+{ "response": "..." }
+```
+</details>
+
+### POST `/api/v1/stream`
+
+Prompt the AI Provider and receive a streamed response. This endpoint supports [Server Side Events](https://html.spec.whatwg.org/multipage/server-sent-events.html).
+
+Event types:
+
+ * `content` - Response chunk
+ * `error` - An error has occured and the stream is closed.
+
+<details>
+    <summary>Body</summary>
+
+```json
+{ "prompt": "What's 1+1?" }
+```
+</details>
+
+<details>
+    <summary>Success response</summary>
+
+```
+event: content
+data: {"response": "..."}
+
+event: content
+data: {"response": "..."}
+```
+</details>
+
+<details>
+    <summary>Error response</summary>
+
+```
+event: error
+data: {"code":"...","message":"..."}
+```
+</details>
+
+When there is no more chunks to return or an error occurs, the stream is closed.
diff --git a/index.d.ts b/index.d.ts
index eccbaf6..abc60c6 100644
--- a/index.d.ts
+++ b/index.d.ts
@@ -7,20 +7,78 @@ declare module 'fastify' {
   interface FastifyInstance {
     platformatic: PlatformaticApp<AiWarpConfig>
     ai: {
+      /**
+       * Send a prompt to the AI provider and receive the full response.
+       */
       warp: (request: FastifyRequest, prompt: string) => Promise<string>
+
+      /**
+       * Send a prompt to the AI provider and receive a streamed response.
+       */
       warpStream: (request: FastifyRequest, prompt: string) => Promise<ReadableStream>
+
+      /**
+       * A function to be called before warp() returns it's result. It can
+       *  modify the response and can be synchronous or asynchronous.
+       */
       preResponseCallback?: ((request: FastifyRequest, response: string) => string) | ((request: FastifyRequest, response: string) => Promise<string>)
+
+      /**
+       * A function to be called on each chunk present in the `ReadableStream`
+       *  returned by warpStream(). It can modify each individual chunk and can
+       *  be synchronous or asynchronous.
+       */
       preResponseChunkCallback?: ((request: FastifyRequest, response: string) => string) | ((request: FastifyRequest, response: string) => Promise<string>)
+
       rateLimiting: {
+        /**
+         * Callback for determining the max amount of requests a client can
+         *  send before they are rate limited. If the `rateLimiting.max`
+         *  property is defined in the Platformatic config, this method will
+         *  not be called.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         max?: ((req: FastifyRequest, key: string) => number) | ((req: FastifyRequest, key: string) => Promise<number>)
+
+        /**
+         * Callback for determining the clients excluded from rate limiting. If
+         *  the `rateLimiting.allowList` property is defined in the Platformatic
+         *  config, this method will not be called.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         allowList?: (req: FastifyRequest, key: string) => boolean | Promise<boolean>
+
+        /**
+         * Callback executed when a client reaches the ban threshold.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         onBanReach?: (req: FastifyRequest, key: string) => void
+
+        /**
+         * Callback for generating the unique rate limiting identifier for each client.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         keyGenerator?: (req: FastifyRequest) => string | number | Promise<string | number>
+
+        /**
+         * Callback for generating custom response objects for rate limiting errors.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         errorResponseBuilder?: (
           req: FastifyRequest,
           context: errorResponseBuilderContext
         ) => object
+
+        /**
+         * Callback executed before a client exceeds their request limit.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         onExceeding?: (req: FastifyRequest, key: string) => void
+
+        /**
+         * Callback executed after a client exceeds their request limit.
+         * @see https://github.com/fastify/fastify-rate-limit?tab=readme-ov-file#options
+         */
         onExceeded?: (req: FastifyRequest, key: string) => void
       }
     }