docs(llms): add a standalone page in the documentation

Ref: #68 Signed-off-by: Tomas Dvorak <[email protected]>
i-am-bee · Oct 8, 2024 · 93f4859 · 93f4859
1 parent 8c14e4c
commit 93f4859
Show file tree

Hide file tree

Showing 4 changed files with 499 additions and 1 deletion.
diff --git a/docs/llms.md b/docs/llms.md
@@ -0,0 +1,242 @@
+# LLMs (inference)
+
+> [!TIP]
+>
+> Location for concrete implementations within the framework `bee-agent-framework/adapters`.
+>
+> Location for base abstraction within the framework `bee-agent-framework/llms`.
+
+A Large Language Model (LLM) is an AI designed to understand and generate human-like text.
+Trained on extensive text data, LLMs learn language patterns, grammar, context, and basic reasoning to perform tasks like text completion, translation, summarization, and answering questions.
+
+To unify differences between various APIs, the framework defines a common interface—a set of actions that can be performed with it.
+
+## Providers (adapters)
+
+| Name                                                                      | LLM                        | Chat LLM                                      | Structured output (constrained decoding) |
+| ------------------------------------------------------------------------- | -------------------------- | --------------------------------------------- | ---------------------------------------- |
+| `WatsonX`                                                                 | ✅                         | ⚠️ (model specific template must be provided) | ❌                                       |
+| `Ollama`                                                                  | ✅                         | ✅                                            | ⚠️ (JSON only)                           |
+| `OpenAI`                                                                  | ❌                         | ✅                                            | ⚠️ (JSON schema only)                    |
+| `LangChain`                                                               | ⚠️ (depends on a provider) | ⚠️ (depends on a provider)                    | ❌                                       |
+| `Groq`                                                                    | ❌                         | ✅                                            | ⚠️ (JSON object only)                    |
+| `BAM (Internal)`                                                          | ✅                         | ⚠️ (model specific template must be provided) | ✅                                       |
+| ➕ [Request](https://github.com/i-am-bee/bee-agent-framework/discussions) |                            |                                               |                                          |
+
+All providers' examples can be found in [examples/llms/providers](/examples/llms/providers).
+
+Are you interested in creating your own adapter? Jump to the [adding a new provider](#adding-a-new-provider-adapter) section.
+
+## Usage
+
+### Plain text generation
+
+<!-- embedme examples/llms/text.ts -->
+
+```ts
+import "dotenv/config.js";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { WatsonXLLM } from "bee-agent-framework/adapters/watsonx/llm";
+
+const llm = new WatsonXLLM({
+  modelId: "google/flan-ul2",
+  projectId: process.env.WATSONX_PROJECT_ID,
+  apiKey: process.env.WATSONX_API_KEY,
+  parameters: {
+    decoding_method: "greedy",
+    max_new_tokens: 50,
+  },
+});
+
+const reader = createConsoleReader();
+
+const prompt = await reader.prompt();
+const response = await llm.generate(prompt);
+reader.write(`LLM 🤖 (text) : `, response.getTextContent());
+process.exit(0);
+```
+
+_Source: [examples/llms/text.ts](/examples/llms/text.ts)_
+
+> [!NOTE]
+>
+> The `generate` method returns a class that extends the base [`BaseLLMOutput`](/src/llms/base.ts) class.
+> This class allows you to retrieve the response as text using the `getTextContent` method and other useful metadata.
+
+> [!TIP]
+>
+> You can enable streaming communication (internally) by passing `{ stream: true }` as a second parameter to the `generate` method.
+
+### Chat text generation
+
+<!-- embedme examples/llms/chat.ts -->
+
+```ts
+import "dotenv/config.js";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+
+const llm = new OllamaChatLLM();
+
+const reader = createConsoleReader();
+
+for await (const { prompt } of reader) {
+  const response = await llm.generate([
+    BaseMessage.of({
+      role: Role.USER,
+      text: prompt,
+    }),
+  ]);
+  reader.write(`LLM 🤖 (txt) : `, response.getTextContent());
+  reader.write(`LLM 🤖 (raw) : `, JSON.stringify(response.finalResult));
+}
+```
+
+_Source: [examples/llms/chat.ts](/examples/llms/chat.ts)_
+
+> [!NOTE]
+>
+> The `generate` method returns a class that extends the base [`ChatLLMOutput`](/src/llms/chat.ts) class.
+> This class allows you to retrieve the response as text using the `getTextContent` method and other useful metadata.
+> To retrieve all messages (chunks) access the `messages` property (getter).
+
+> [!TIP]
+>
+> You can enable streaming communication (internally) by passing `{ stream: true }` as a second parameter to the `generate` method.
+
+#### Streaming
+
+<!-- embedme examples/llms/chatStream.ts -->
+
+```ts
+import "dotenv/config.js";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+
+const llm = new OllamaChatLLM();
+
+const reader = createConsoleReader();
+
+for await (const { prompt } of reader) {
+  for await (const chunk of llm.stream([
+    BaseMessage.of({
+      role: Role.USER,
+      text: prompt,
+    }),
+  ])) {
+    reader.write(`LLM 🤖 (txt) : `, chunk.getTextContent());
+    reader.write(`LLM 🤖 (raw) : `, JSON.stringify(chunk.finalResult));
+  }
+}
+```
+
+_Source: [examples/llms/chatStream.ts](/examples/llms/chatStream.ts)_
+
+#### Callback (Emitter)
+
+<!-- embedme examples/llms/chatCallback.ts -->
+
+```ts
+import "dotenv/config.js";
+import { createConsoleReader } from "examples/helpers/io.js";
+import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+
+const llm = new OllamaChatLLM();
+
+const reader = createConsoleReader();
+
+for await (const { prompt } of reader) {
+  const response = await llm
+    .generate(
+      [
+        BaseMessage.of({
+          role: Role.USER,
+          text: prompt,
+        }),
+      ],
+      {},
+    )
+    .observe((emitter) =>
+      emitter.match("*", (data, event) => {
+        reader.write(`LLM 🤖 (event: ${event.name})`, JSON.stringify(data));
+
+        // if you want to close the stream prematurely, just uncomment the following line
+        // callbacks.abort()
+      }),
+    );
+
+  reader.write(`LLM 🤖 (txt) : `, response.getTextContent());
+  reader.write(`LLM 🤖 (raw) : `, JSON.stringify(response.finalResult));
+}
+```
+
+_Source: [examples/llms/chatCallback.ts](/examples/llms/chatCallback.ts)_
+
+### Structured generation
+
+<!-- embedme examples/llms/structured.ts -->
+
+```ts
+import "dotenv/config.js";
+import { z } from "zod";
+import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat";
+import { JsonDriver } from "bee-agent-framework/llms/drivers/json";
+
+const llm = new OllamaChatLLM();
+const driver = new JsonDriver(llm);
+const response = await driver.generate(
+  z.union([
+    z.object({
+      firstName: z.string().min(1),
+      lastName: z.string().min(1),
+      address: z.string(),
+      age: z.number().int().min(1),
+      hobby: z.string(),
+    }),
+    z.object({
+      error: z.string(),
+    }),
+  ]),
+  [
+    BaseMessage.of({
+      role: Role.USER,
+      text: "Generate a profile of a citizen of Europe.",
+    }),
+  ],
+);
+console.info(response);
+process.exit(0);
+```
+
+_Source: [examples/llms/structured.ts](/examples/llms/structured.ts)_
+
+## Adding a new provider (adapter)
+
+To use an inference provider that is not mentioned in our providers list feel free to [create a request](https://github.com/i-am-bee/bee-agent-framework/discussions).
+
+If approved and you want to create it on your own, you must do the following things. Let's assume the name of your provider is `Custom.`
+
+- Base location within the framework: `bee-agent-framework/adapters/custom`
+  - Text LLM (filename): `llm.ts` ([example implementation](/examples/llms/providers/customProvider.ts))
+  - Chat LLM (filename): `chat.ts` ([example implementation](/examples/llms/providers/customChatProvider.ts))
+
+> [!IMPORTANT]
+>
+> If the target provider provides an SDK, use it.
+
+> [!IMPORTANT]
+>
+> All provider-related dependencies (if any) must be included in `devDependencies` and `peerDependencies` in the [`package.json`](/package.json).
+
+> [!TIP]
+>
+> To simplify work with the target RestAPI feel free to use the helper [`RestfulClient`](/src/internals/fetcher.ts) class.
+> The client usage can be seen in the WatsonX LLM Adapter [here](/src/adapters/watsonx/llm.ts).
+
+> [!TIP]
+>
+> Parsing environment variables should be done via helper functions (`parseEnv` / `hasEnv` / `getEnv`) that can be found [here](/src/internals/env.ts).
diff --git a/examples/llms/chatCallback.ts b/examples/llms/chatCallback.ts
@@ -22,7 +22,7 @@ for await (const { prompt } of reader) {
       emitter.match("*", (data, event) => {
         reader.write(`LLM 🤖 (event: ${event.name})`, JSON.stringify(data));
 
-        // if you want to premature close the stream, just uncomment the following line
+        // if you want to close the stream prematurely, just uncomment the following line
         // callbacks.abort()
       }),
     );

diff --git a/examples/llms/providers/customChatProvider.ts b/examples/llms/providers/customChatProvider.ts
@@ -0,0 +1,131 @@
+import {
+  AsyncStream,
+  BaseLLMTokenizeOutput,
+  ExecutionOptions,
+  GenerateCallbacks,
+  GenerateOptions,
+  LLMCache,
+  LLMMeta,
+} from "bee-agent-framework/llms/base";
+import { shallowCopy } from "bee-agent-framework/serializer/utils";
+import type { GetRunContext } from "bee-agent-framework/context";
+import { Emitter } from "bee-agent-framework/emitter/emitter";
+import { ChatLLM, ChatLLMOutput } from "bee-agent-framework/llms/chat";
+import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message";
+import { sum } from "remeda";
+
+export class CustomChatLLMOutput extends ChatLLMOutput {
+  public readonly chunks: BaseMessage[] = [];
+
+  constructor(chunk: BaseMessage) {
+    super();
+    this.chunks.push(chunk);
+  }
+
+  get messages() {
+    return this.chunks;
+  }
+
+  merge(other: CustomChatLLMOutput): void {
+    this.chunks.push(...other.chunks);
+  }
+
+  getTextContent(): string {
+    return this.chunks.map((result) => result.text).join("");
+  }
+
+  toString(): string {
+    return this.getTextContent();
+  }
+
+  createSnapshot() {
+    return { chunks: shallowCopy(this.chunks) };
+  }
+
+  loadSnapshot(snapshot: ReturnType<typeof this.createSnapshot>): void {
+    Object.assign(this, snapshot);
+  }
+}
+
+// Feel free to extend if you want to support additional parameters
+type CustomGenerateOptions = GenerateOptions;
+
+export interface CustomChatLLMInput {
+  modelId: string;
+  executionOptions?: ExecutionOptions;
+  cache?: LLMCache<CustomChatLLMOutput>;
+  parameters?: Record<string, any>;
+}
+
+export class CustomChatLLM extends ChatLLM<CustomChatLLMOutput, CustomGenerateOptions> {
+  public readonly emitter: Emitter<GenerateCallbacks> = Emitter.root.child({
+    namespace: ["custom", "llm"],
+    creator: this,
+  });
+
+  constructor(protected readonly input: CustomChatLLMInput) {
+    super(input.modelId, input.executionOptions, input.cache);
+  }
+
+  async meta(): Promise<LLMMeta> {
+    // TODO: retrieve data about current model from the given provider API
+    return { tokenLimit: Infinity };
+  }
+
+  async tokenize(input: BaseMessage[]): Promise<BaseLLMTokenizeOutput> {
+    // TODO: retrieve data about current model from the given provider API
+    return {
+      tokensCount: sum(input.map((msg) => Math.ceil(msg.text.length / 4))),
+    };
+  }
+
+  protected async _generate(
+    input: BaseMessage[],
+    options: CustomGenerateOptions,
+    run: GetRunContext<this>,
+  ): Promise<CustomChatLLMOutput> {
+    // this method should do non-stream request to the API
+    // TIP: access inference parameters via `this.input.parameters` and `options`
+    // TIP: use signal from run.signal
+    const result = BaseMessage.of({
+      role: Role.ASSISTANT,
+      text: "TODO: response retrieve from the API",
+      meta: {
+        createdAt: new Date(),
+      },
+    });
+    return new CustomChatLLMOutput(result);
+  }
+
+  protected async *_stream(
+    input: BaseMessage[],
+    options: CustomGenerateOptions,
+    run: GetRunContext<this>,
+  ): AsyncStream<CustomChatLLMOutput, void> {
+    // this method should do stream request to the API
+    // TIP: access inference parameters via `this.input.parameters` and `options`
+    // TIP: use signal from run.signal
+    for await (const chunk of ["Hel", "oo", "world", "!"]) {
+      const result = BaseMessage.of({
+        role: Role.ASSISTANT,
+        text: chunk,
+        meta: {
+          createdAt: new Date(),
+        },
+      });
+      yield new CustomChatLLMOutput(result);
+    }
+  }
+
+  createSnapshot() {
+    return {
+      ...super.createSnapshot(),
+      input: shallowCopy(this.input),
+    };
+  }
+
+  loadSnapshot({ input, ...snapshot }: ReturnType<typeof this.createSnapshot>) {
+    super.loadSnapshot(snapshot);
+    Object.assign(this, { input });
+  }
+}