diff --git a/docs/llms.md b/docs/llms.md new file mode 100644 index 00000000..b45a9b44 --- /dev/null +++ b/docs/llms.md @@ -0,0 +1,242 @@ +# LLMs (inference) + +> [!TIP] +> +> Location for concrete implementations within the framework `bee-agent-framework/adapters`. +> +> Location for base abstraction within the framework `bee-agent-framework/llms`. + +A Large Language Model (LLM) is an AI designed to understand and generate human-like text. +Trained on extensive text data, LLMs learn language patterns, grammar, context, and basic reasoning to perform tasks like text completion, translation, summarization, and answering questions. + +To unify differences between various APIs, the framework defines a common interface—a set of actions that can be performed with it. + +## Providers (adapters) + +| Name | LLM | Chat LLM | Structured output (constrained decoding) | +| ------------------------------------------------------------------------- | -------------------------- | --------------------------------------------- | ---------------------------------------- | +| `WatsonX` | ✅ | ⚠️ (model specific template must be provided) | ❌ | +| `Ollama` | ✅ | ✅ | ⚠️ (JSON only) | +| `OpenAI` | ❌ | ✅ | ⚠️ (JSON schema only) | +| `LangChain` | ⚠️ (depends on a provider) | ⚠️ (depends on a provider) | ❌ | +| `Groq` | ❌ | ✅ | ⚠️ (JSON object only) | +| `BAM (Internal)` | ✅ | ⚠️ (model specific template must be provided) | ✅ | +| ➕ [Request](https://github.com/i-am-bee/bee-agent-framework/discussions) | | | | + +All providers' examples can be found in [examples/llms/providers](/examples/llms/providers). + +Are you interested in creating your own adapter? Jump to the [adding a new provider](#adding-a-new-provider-adapter) section. + +## Usage + +### Plain text generation + + + +```ts +import "dotenv/config.js"; +import { createConsoleReader } from "examples/helpers/io.js"; +import { WatsonXLLM } from "bee-agent-framework/adapters/watsonx/llm"; + +const llm = new WatsonXLLM({ + modelId: "google/flan-ul2", + projectId: process.env.WATSONX_PROJECT_ID, + apiKey: process.env.WATSONX_API_KEY, + parameters: { + decoding_method: "greedy", + max_new_tokens: 50, + }, +}); + +const reader = createConsoleReader(); + +const prompt = await reader.prompt(); +const response = await llm.generate(prompt); +reader.write(`LLM 🤖 (text) : `, response.getTextContent()); +process.exit(0); +``` + +_Source: [examples/llms/text.ts](/examples/llms/text.ts)_ + +> [!NOTE] +> +> The `generate` method returns a class that extends the base [`BaseLLMOutput`](/src/llms/base.ts) class. +> This class allows you to retrieve the response as text using the `getTextContent` method and other useful metadata. + +> [!TIP] +> +> You can enable streaming communication (internally) by passing `{ stream: true }` as a second parameter to the `generate` method. + +### Chat text generation + + + +```ts +import "dotenv/config.js"; +import { createConsoleReader } from "examples/helpers/io.js"; +import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message"; +import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat"; + +const llm = new OllamaChatLLM(); + +const reader = createConsoleReader(); + +for await (const { prompt } of reader) { + const response = await llm.generate([ + BaseMessage.of({ + role: Role.USER, + text: prompt, + }), + ]); + reader.write(`LLM 🤖 (txt) : `, response.getTextContent()); + reader.write(`LLM 🤖 (raw) : `, JSON.stringify(response.finalResult)); +} +``` + +_Source: [examples/llms/chat.ts](/examples/llms/chat.ts)_ + +> [!NOTE] +> +> The `generate` method returns a class that extends the base [`ChatLLMOutput`](/src/llms/chat.ts) class. +> This class allows you to retrieve the response as text using the `getTextContent` method and other useful metadata. +> To retrieve all messages (chunks) access the `messages` property (getter). + +> [!TIP] +> +> You can enable streaming communication (internally) by passing `{ stream: true }` as a second parameter to the `generate` method. + +#### Streaming + + + +```ts +import "dotenv/config.js"; +import { createConsoleReader } from "examples/helpers/io.js"; +import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message"; +import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat"; + +const llm = new OllamaChatLLM(); + +const reader = createConsoleReader(); + +for await (const { prompt } of reader) { + for await (const chunk of llm.stream([ + BaseMessage.of({ + role: Role.USER, + text: prompt, + }), + ])) { + reader.write(`LLM 🤖 (txt) : `, chunk.getTextContent()); + reader.write(`LLM 🤖 (raw) : `, JSON.stringify(chunk.finalResult)); + } +} +``` + +_Source: [examples/llms/chatStream.ts](/examples/llms/chatStream.ts)_ + +#### Callback (Emitter) + + + +```ts +import "dotenv/config.js"; +import { createConsoleReader } from "examples/helpers/io.js"; +import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message"; +import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat"; + +const llm = new OllamaChatLLM(); + +const reader = createConsoleReader(); + +for await (const { prompt } of reader) { + const response = await llm + .generate( + [ + BaseMessage.of({ + role: Role.USER, + text: prompt, + }), + ], + {}, + ) + .observe((emitter) => + emitter.match("*", (data, event) => { + reader.write(`LLM 🤖 (event: ${event.name})`, JSON.stringify(data)); + + // if you want to close the stream prematurely, just uncomment the following line + // callbacks.abort() + }), + ); + + reader.write(`LLM 🤖 (txt) : `, response.getTextContent()); + reader.write(`LLM 🤖 (raw) : `, JSON.stringify(response.finalResult)); +} +``` + +_Source: [examples/llms/chatCallback.ts](/examples/llms/chatCallback.ts)_ + +### Structured generation + + + +```ts +import "dotenv/config.js"; +import { z } from "zod"; +import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message"; +import { OllamaChatLLM } from "bee-agent-framework/adapters/ollama/chat"; +import { JsonDriver } from "bee-agent-framework/llms/drivers/json"; + +const llm = new OllamaChatLLM(); +const driver = new JsonDriver(llm); +const response = await driver.generate( + z.union([ + z.object({ + firstName: z.string().min(1), + lastName: z.string().min(1), + address: z.string(), + age: z.number().int().min(1), + hobby: z.string(), + }), + z.object({ + error: z.string(), + }), + ]), + [ + BaseMessage.of({ + role: Role.USER, + text: "Generate a profile of a citizen of Europe.", + }), + ], +); +console.info(response); +process.exit(0); +``` + +_Source: [examples/llms/structured.ts](/examples/llms/structured.ts)_ + +## Adding a new provider (adapter) + +To use an inference provider that is not mentioned in our providers list feel free to [create a request](https://github.com/i-am-bee/bee-agent-framework/discussions). + +If approved and you want to create it on your own, you must do the following things. Let's assume the name of your provider is `Custom.` + +- Base location within the framework: `bee-agent-framework/adapters/custom` + - Text LLM (filename): `llm.ts` ([example implementation](/examples/llms/providers/customProvider.ts)) + - Chat LLM (filename): `chat.ts` ([example implementation](/examples/llms/providers/customChatProvider.ts)) + +> [!IMPORTANT] +> +> If the target provider provides an SDK, use it. + +> [!IMPORTANT] +> +> All provider-related dependencies (if any) must be included in `devDependencies` and `peerDependencies` in the [`package.json`](/package.json). + +> [!TIP] +> +> To simplify work with the target RestAPI feel free to use the helper [`RestfulClient`](/src/internals/fetcher.ts) class. +> The client usage can be seen in the WatsonX LLM Adapter [here](/src/adapters/watsonx/llm.ts). + +> [!TIP] +> +> Parsing environment variables should be done via helper functions (`parseEnv` / `hasEnv` / `getEnv`) that can be found [here](/src/internals/env.ts). diff --git a/examples/llms/chatCallback.ts b/examples/llms/chatCallback.ts index a3c43fd7..6efb9a14 100644 --- a/examples/llms/chatCallback.ts +++ b/examples/llms/chatCallback.ts @@ -22,7 +22,7 @@ for await (const { prompt } of reader) { emitter.match("*", (data, event) => { reader.write(`LLM 🤖 (event: ${event.name})`, JSON.stringify(data)); - // if you want to premature close the stream, just uncomment the following line + // if you want to close the stream prematurely, just uncomment the following line // callbacks.abort() }), ); diff --git a/examples/llms/providers/customChatProvider.ts b/examples/llms/providers/customChatProvider.ts new file mode 100644 index 00000000..33e48af0 --- /dev/null +++ b/examples/llms/providers/customChatProvider.ts @@ -0,0 +1,131 @@ +import { + AsyncStream, + BaseLLMTokenizeOutput, + ExecutionOptions, + GenerateCallbacks, + GenerateOptions, + LLMCache, + LLMMeta, +} from "bee-agent-framework/llms/base"; +import { shallowCopy } from "bee-agent-framework/serializer/utils"; +import type { GetRunContext } from "bee-agent-framework/context"; +import { Emitter } from "bee-agent-framework/emitter/emitter"; +import { ChatLLM, ChatLLMOutput } from "bee-agent-framework/llms/chat"; +import { BaseMessage, Role } from "bee-agent-framework/llms/primitives/message"; +import { sum } from "remeda"; + +export class CustomChatLLMOutput extends ChatLLMOutput { + public readonly chunks: BaseMessage[] = []; + + constructor(chunk: BaseMessage) { + super(); + this.chunks.push(chunk); + } + + get messages() { + return this.chunks; + } + + merge(other: CustomChatLLMOutput): void { + this.chunks.push(...other.chunks); + } + + getTextContent(): string { + return this.chunks.map((result) => result.text).join(""); + } + + toString(): string { + return this.getTextContent(); + } + + createSnapshot() { + return { chunks: shallowCopy(this.chunks) }; + } + + loadSnapshot(snapshot: ReturnType): void { + Object.assign(this, snapshot); + } +} + +// Feel free to extend if you want to support additional parameters +type CustomGenerateOptions = GenerateOptions; + +export interface CustomChatLLMInput { + modelId: string; + executionOptions?: ExecutionOptions; + cache?: LLMCache; + parameters?: Record; +} + +export class CustomChatLLM extends ChatLLM { + public readonly emitter: Emitter = Emitter.root.child({ + namespace: ["custom", "llm"], + creator: this, + }); + + constructor(protected readonly input: CustomChatLLMInput) { + super(input.modelId, input.executionOptions, input.cache); + } + + async meta(): Promise { + // TODO: retrieve data about current model from the given provider API + return { tokenLimit: Infinity }; + } + + async tokenize(input: BaseMessage[]): Promise { + // TODO: retrieve data about current model from the given provider API + return { + tokensCount: sum(input.map((msg) => Math.ceil(msg.text.length / 4))), + }; + } + + protected async _generate( + input: BaseMessage[], + options: CustomGenerateOptions, + run: GetRunContext, + ): Promise { + // this method should do non-stream request to the API + // TIP: access inference parameters via `this.input.parameters` and `options` + // TIP: use signal from run.signal + const result = BaseMessage.of({ + role: Role.ASSISTANT, + text: "TODO: response retrieve from the API", + meta: { + createdAt: new Date(), + }, + }); + return new CustomChatLLMOutput(result); + } + + protected async *_stream( + input: BaseMessage[], + options: CustomGenerateOptions, + run: GetRunContext, + ): AsyncStream { + // this method should do stream request to the API + // TIP: access inference parameters via `this.input.parameters` and `options` + // TIP: use signal from run.signal + for await (const chunk of ["Hel", "oo", "world", "!"]) { + const result = BaseMessage.of({ + role: Role.ASSISTANT, + text: chunk, + meta: { + createdAt: new Date(), + }, + }); + yield new CustomChatLLMOutput(result); + } + } + + createSnapshot() { + return { + ...super.createSnapshot(), + input: shallowCopy(this.input), + }; + } + + loadSnapshot({ input, ...snapshot }: ReturnType) { + super.loadSnapshot(snapshot); + Object.assign(this, { input }); + } +} diff --git a/examples/llms/providers/customProvider.ts b/examples/llms/providers/customProvider.ts new file mode 100644 index 00000000..7146190e --- /dev/null +++ b/examples/llms/providers/customProvider.ts @@ -0,0 +1,125 @@ +import { LLM } from "bee-agent-framework/llms/llm"; +import { + AsyncStream, + BaseLLMOutput, + BaseLLMTokenizeOutput, + ExecutionOptions, + GenerateCallbacks, + GenerateOptions, + LLMCache, + LLMMeta, +} from "bee-agent-framework/llms/base"; +import { shallowCopy } from "bee-agent-framework/serializer/utils"; +import type { GetRunContext } from "bee-agent-framework/context"; +import { Emitter } from "bee-agent-framework/emitter/emitter"; + +interface CustomLLMChunk { + text: string; + metadata: Record; +} + +export class CustomLLMOutput extends BaseLLMOutput { + public readonly chunks: CustomLLMChunk[] = []; + + constructor(chunk: CustomLLMChunk) { + super(); + this.chunks.push(chunk); + } + + merge(other: CustomLLMOutput): void { + this.chunks.push(...other.chunks); + } + + getTextContent(): string { + return this.chunks.map((result) => result.text).join(""); + } + + toString(): string { + return this.getTextContent(); + } + + createSnapshot() { + return { chunks: shallowCopy(this.chunks) }; + } + + loadSnapshot(snapshot: ReturnType): void { + Object.assign(this, snapshot); + } +} + +// Feel free to extend if you want to support additional parameters +type CustomGenerateOptions = GenerateOptions; + +export interface CustomLLMInput { + modelId: string; + executionOptions?: ExecutionOptions; + cache?: LLMCache; + parameters?: Record; +} + +export class CustomLLM extends LLM { + public readonly emitter: Emitter = Emitter.root.child({ + namespace: ["custom", "llm"], + creator: this, + }); + + constructor(protected readonly input: CustomLLMInput) { + super(input.modelId, input.executionOptions, input.cache); + } + + async meta(): Promise { + // TODO: retrieve data about current model from the given provider API + return { tokenLimit: Infinity }; + } + + async tokenize(input: string): Promise { + // TODO: retrieve data about current model from the given provider API + return { + tokensCount: Math.ceil(input.length / 4), + }; + } + + protected async _generate( + input: string, + options: CustomGenerateOptions, + run: GetRunContext, + ): Promise { + // this method should do non-stream request to the API + // TIP: access inference parameters via `this.input.parameters` and `options` + // TIP: use signal from run.signal + const result: CustomLLMChunk = { + text: "...", + metadata: {}, + }; + return new CustomLLMOutput(result); + } + + protected async *_stream( + input: string, + options: CustomGenerateOptions, + run: GetRunContext, + ): AsyncStream { + // this method should do stream request to the API + // TIP: access inference parameters via `this.input.parameters` and `options` + // TIP: use signal from run.signal + for await (const chunk of ["Hel", "oo", "world", "!"]) { + const result: CustomLLMChunk = { + text: chunk, + metadata: {}, + }; + yield new CustomLLMOutput(result); + } + } + + createSnapshot() { + return { + ...super.createSnapshot(), + input: shallowCopy(this.input), + }; + } + + loadSnapshot({ input, ...snapshot }: ReturnType) { + super.loadSnapshot(snapshot); + Object.assign(this, { input }); + } +}