-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Co-authored-by: soloio-bot <[email protected]>
- Loading branch information
1 parent
d675588
commit a58b36d
Showing
15 changed files
with
6,534 additions
and
939 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,389 @@ | ||
syntax = "proto3"; | ||
package ai.options.gloo.solo.io; | ||
option go_package = "github.com/solo-io/solo-apis/pkg/api/gloo.solo.io/v1/enterprise/options/ai"; | ||
|
||
import "github.com/solo-io/solo-kit/api/v1/ref.proto"; | ||
import "google/protobuf/wrappers.proto"; | ||
import "google/protobuf/struct.proto"; | ||
import "google/protobuf/duration.proto"; | ||
import "google/protobuf/empty.proto"; | ||
import "extproto/ext.proto"; | ||
option (extproto.equal_all) = true; | ||
option (extproto.hash_all) = true; | ||
option (extproto.clone_all) = true; | ||
|
||
message SingleAuthToken { | ||
oneof auth_token_source { | ||
// Provide easy inline way to specify a token | ||
string inline = 1; | ||
// Reference to a secret in the same namespace as the Upstream | ||
core.solo.io.ResourceRef secret_ref = 2; | ||
} | ||
} | ||
|
||
/* | ||
The AI UpstreamSpec represents a logical LLM provider backend. | ||
The purpose of this spec is a way to configure which backend to use | ||
as well as how to authenticate with the backend. | ||
Currently the options are: | ||
- OpenAI | ||
* Default Host: api.openai.com | ||
* Default Port: 443 | ||
* Auth Token: Bearer token to use for the OpenAI API | ||
- Mistral | ||
* Default Host: api.mistral.com | ||
* Default Port: 443 | ||
* Auth Token: Bearer token to use for the Mistral API | ||
- Anthropic | ||
* Default Host: api.anthropic.com | ||
* Default Port: 443 | ||
* Auth Token: x-api-key to use for the Anthropic API | ||
* Version: Optional version header to pass to the Anthropic API | ||
All of the above backends can be configured to use a custom host and port. | ||
This option is meant to allow users to proxy the request, or to use a different | ||
backend altogether which is API compliant with the upstream version. | ||
Examples: | ||
OpenAI with inline auth token: | ||
``` | ||
ai: | ||
openai: | ||
authToken: | ||
inline: "my_token" | ||
``` | ||
Mistral with secret ref: | ||
``` | ||
ai: | ||
mistral: | ||
authToken: | ||
secretRef: | ||
name: "my-secret" | ||
namespace: "my-ns" | ||
``` | ||
Anthropic with inline token and custom Host: | ||
``` | ||
ai: | ||
anthropic: | ||
authToken: | ||
inline: "my_token" | ||
customHost: | ||
host: "my-anthropic-host.com" | ||
port: 443 # Port is optional and will default to 443 for HTTPS | ||
``` | ||
*/ | ||
message UpstreamSpec { | ||
|
||
message CustomHost { | ||
// Custom host to send the traffic to | ||
string host = 1; | ||
// Custom host to send the traffic to | ||
uint32 port = 2; | ||
} | ||
|
||
// Settings for the OpenAI API | ||
message OpenAI { | ||
// Auth Token to use for the OpenAI API | ||
// This token will be placed into the `Authorization` header | ||
// and prefixed with Bearer if not present | ||
// when sending the request to the upstream. | ||
SingleAuthToken auth_token = 1; | ||
// Optional custom host to send the traffic to | ||
CustomHost custom_host = 2; | ||
} | ||
|
||
// Settings for the Mistral API | ||
message Mistral { | ||
// Auth Token to use for the Mistral API. | ||
// This token will be placed into the `Authorization` header | ||
// and prefixed with Bearer if not present | ||
// when sending the request to the upstream. | ||
SingleAuthToken auth_token = 1; | ||
// Optional custom host to send the traffic to | ||
CustomHost custom_host = 2; | ||
} | ||
|
||
message Anthropic { | ||
// Auth Token to use for the Anthropic API. | ||
// This token will be placed into the `x-api-key` header | ||
// when sending the request to the upstream. | ||
SingleAuthToken auth_token = 1; | ||
|
||
CustomHost custom_host = 2; | ||
// An optional version header to pass to the Anthropic API | ||
// See: https://docs.anthropic.com/en/api/versioning for more details | ||
string version = 3; | ||
} | ||
|
||
oneof llm { | ||
OpenAI openai = 1; | ||
Mistral mistral = 2; | ||
Anthropic anthropic = 3; | ||
} | ||
} | ||
|
||
/* | ||
RouteSettings is a way to configure the behavior of the LLM provider on a per-route basis | ||
This allows users to configure things like: | ||
- Prompt Enrichment | ||
- Retrieval Augmented Generation | ||
- Semantic Caching | ||
- Backup Models | ||
- Defaults to merge with the user input fields | ||
- Guardrails | ||
NOTE: These settings may only be applied to a route which uses an LLMProvider backend! | ||
*/ | ||
message RouteSettings { | ||
|
||
/* | ||
Config used to enrich the prompt. This can only be used with LLMProviders using the CHAT API type. | ||
Prompt enrichment allows you to add additional context to the prompt before sending it to the model. | ||
Unlike RAG or other dynamic context methods, prompt enrichment is static and will be applied to every request. | ||
Note: Some providers, including Anthropic do not support SYSTEM role messages, but rather have a dedicated | ||
system field in the input JSON. In this case, `field_defaults` should be used to set the system field. See the docs | ||
for that field for an example. | ||
Example: | ||
``` | ||
promptEnrichment: | ||
prepend: | ||
- role: SYSTEM | ||
content: "answer all questions in french" | ||
append: | ||
- role: USER | ||
content: "Describe the painting as if you were a famous art critic from the 17th century" | ||
``` | ||
*/ | ||
AIPromptEnrichment prompt_enrichment = 1; | ||
|
||
/* | ||
Guards to apply to the LLM requests on this route. | ||
This can be used to reject requests based on the content of the prompt, as well as | ||
mask responses based on the content of the response. These guards can be also be used | ||
at the same time. | ||
Below is a simple example of a prompt guard that will reject any prompt that contains | ||
the string "credit card" and will mask any credit card numbers in the response. | ||
``` | ||
promptGuard: | ||
request: | ||
customResponseMessage: "Rejected due to inappropriate content" | ||
matches: | ||
- "credit card" | ||
response: | ||
matches: | ||
# Mastercard | ||
- '(?:^|\D)(5[1-5][0-9]{2}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4})(?:\D|$)' | ||
```` | ||
*/ | ||
AIPromptGaurd prompt_guard = 2; | ||
|
||
/* | ||
Retrieval Augmented Generation. https://research.ibm.com/blog/retrieval-augmented-generation-RAG | ||
Retrieval Augmented Generation is a process by which you "augment" the information | ||
a model has access to by providing it with a set of documents to use as context. | ||
This can be used to improve the quality of the generated text. | ||
Important Note: The same embedding mechanism must be used for the prompt | ||
which was used for the initial creation of the context documents. | ||
Example using postgres for storage and OpenAI for embedding: | ||
``` | ||
rag: | ||
datastore: | ||
postgres: | ||
connectionString: postgresql+psycopg://gloo:[email protected]:6024/gloo | ||
collectionName: default | ||
embedding: | ||
openai: | ||
authToken: | ||
secretRef: | ||
name: openai-secret | ||
namespace: gloo-system | ||
``` | ||
*/ | ||
RAG rag = 3; | ||
/* | ||
Semantic caching configuration | ||
Semantic caching allows you to cache previous model responses in order to provide | ||
faster responses to similar requests in the future. | ||
Results will vary depending on the embedding mechanism used, as well | ||
as the similarity threshold set. | ||
Example using Redis for storage and OpenAI for embedding: | ||
``` | ||
semanticCache: | ||
datastore: | ||
redis: | ||
connectionString: redis://172.17.0.1:6379 | ||
embedding: | ||
openai: | ||
authToken: | ||
secretRef: | ||
name: openai-secret | ||
namespace: gloo-system | ||
``` | ||
*/ | ||
SemanticCache semantic_cache = 4; | ||
|
||
// Backup models to use in case of a failure with the primary model | ||
// passed in the request. By default each model will be tried 2 times | ||
// before moving on to the next model in the list. If all requests fail then | ||
// the final response will be returned to the client. | ||
repeated string backup_models = 5; | ||
|
||
/* | ||
A list of defaults to be merged with the user input fields. | ||
These will NOT override the user input fields unless override is explicitly set to true. | ||
Some examples include setting the temperature, max_tokens, etc. | ||
Example overriding system field for Anthropic: | ||
``` | ||
# Anthropic doesn't support a system chat type | ||
defaults: | ||
- field: "system" | ||
value: "answer all questions in french" | ||
``` | ||
Example setting the temperature and max_tokens, overriding max_tokens: | ||
``` | ||
defaults: | ||
- field: "temperature" | ||
value: 0.5 | ||
- field: "max_tokens" | ||
value: 100 | ||
``` | ||
*/ | ||
repeated FieldDefault defaults = 6; | ||
} | ||
|
||
message FieldDefault { | ||
// Field name | ||
string field = 1; | ||
// Field Value, this can be any valid JSON value | ||
google.protobuf.Value value = 2; | ||
// Whether or not to override the field if it already exists | ||
bool override = 3; | ||
} | ||
|
||
|
||
message Postgres { | ||
// Connection string to the Postgres database | ||
string connection_string = 1; | ||
// Name of the table to use | ||
string collection_name = 2; | ||
} | ||
|
||
|
||
|
||
message Embedding { | ||
message OpenAI { | ||
oneof auth_token_source { | ||
SingleAuthToken auth_token = 1; | ||
// re-use the token from the backend | ||
// google.protobuf.Empty inherit_backend_token = 3; | ||
} | ||
} | ||
|
||
oneof embedding { | ||
OpenAI openai = 1; | ||
} | ||
} | ||
|
||
message SemanticCache { | ||
message Redis { | ||
// Connection string to the Redis database | ||
string connection_string = 1; | ||
} | ||
// Data store from which to cache the request/response pairs | ||
message DataStore { | ||
oneof datastore { | ||
Redis redis = 1; | ||
} | ||
} | ||
// Which data store to use | ||
DataStore datastore = 1; | ||
// Model to use to get embeddings for prompt | ||
Embedding embedding = 2; | ||
// Time before data in the cache is considered expired | ||
uint32 ttl = 3; | ||
} | ||
|
||
message RAG { | ||
message DataStore { | ||
oneof datastore { | ||
Postgres postgres = 1; | ||
} | ||
} | ||
// Data store from which to fetch the embeddings | ||
DataStore datastore = 1; | ||
// Model to use to get embeddings for prompt | ||
Embedding embedding = 2; | ||
// Template to use to embed the returned context | ||
string prompt_template = 3; | ||
} | ||
|
||
message RateLimiting { | ||
// List of rate_limit configs to apply | ||
repeated string rate_limit_configs = 1; | ||
} | ||
|
||
message AIPromptEnrichment { | ||
message Message { | ||
// Role of the message. | ||
// The available roles depend on the backend model being used, | ||
// please consult the documentation for more information. | ||
string role = 1; | ||
// String content of the message | ||
string content = 2; | ||
} | ||
// A list of messages to be prepended to the prompt sent by the client | ||
repeated Message prepend = 2; | ||
// A list of messages to be appended to the prompt sent by the client | ||
repeated Message append = 3; | ||
|
||
} | ||
|
||
message AIPromptGaurd { | ||
message Request { | ||
// A list of Regex patterns to match against the prompt. | ||
// Each one will be checked against the prompt and if any match | ||
// the request will be rejected. | ||
repeated string matches = 1; | ||
// Custom response message to send back to the client. | ||
// If not specified, the following default message will be used: | ||
// "The request was rejected due to inappropriate content" | ||
string custom_response_message = 2; | ||
} | ||
|
||
message Response { | ||
enum BuiltIn { | ||
// Default REGEX for Social Security Numbers | ||
SSN = 0; | ||
// Default REGEX for Credit Card Numbers | ||
CREDIT_CARD = 1; | ||
// Default REGEX for Email Addresses | ||
EMAIL = 2; | ||
// Default REGEX for Phone Numbers | ||
PHONE_NUMBER = 3; | ||
} | ||
// A list of Regex patterns to match against the response. | ||
// All matches will be masked before being sent back to the client. | ||
// matches and builtins are additive. | ||
repeated string matches = 1; | ||
// A list of built-in regexes to mask in the response. | ||
// matches and builtins are additive. | ||
repeated BuiltIn builtins = 2; | ||
} | ||
// Guards for the prompt request | ||
Request request = 2; | ||
// Guards for the LLM response | ||
Response response = 3; | ||
} |
Oops, something went wrong.