Skip to content

Commit

Permalink
Sync APIs. @tag-name=gloo-v1.18.0-beta9 (#1254)
Browse files Browse the repository at this point in the history
Co-authored-by: soloio-bot <[email protected]>
  • Loading branch information
soloio-bot and soloio-bot authored Jul 19, 2024
1 parent d675588 commit a58b36d
Show file tree
Hide file tree
Showing 15 changed files with 6,534 additions and 939 deletions.
389 changes: 389 additions & 0 deletions api/gloo/gloo/v1/enterprise/options/ai/ai.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,389 @@
syntax = "proto3";
package ai.options.gloo.solo.io;
option go_package = "github.com/solo-io/solo-apis/pkg/api/gloo.solo.io/v1/enterprise/options/ai";

import "github.com/solo-io/solo-kit/api/v1/ref.proto";
import "google/protobuf/wrappers.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "extproto/ext.proto";
option (extproto.equal_all) = true;
option (extproto.hash_all) = true;
option (extproto.clone_all) = true;

message SingleAuthToken {
oneof auth_token_source {
// Provide easy inline way to specify a token
string inline = 1;
// Reference to a secret in the same namespace as the Upstream
core.solo.io.ResourceRef secret_ref = 2;
}
}

/*
The AI UpstreamSpec represents a logical LLM provider backend.
The purpose of this spec is a way to configure which backend to use
as well as how to authenticate with the backend.
Currently the options are:
- OpenAI
* Default Host: api.openai.com
* Default Port: 443
* Auth Token: Bearer token to use for the OpenAI API
- Mistral
* Default Host: api.mistral.com
* Default Port: 443
* Auth Token: Bearer token to use for the Mistral API
- Anthropic
* Default Host: api.anthropic.com
* Default Port: 443
* Auth Token: x-api-key to use for the Anthropic API
* Version: Optional version header to pass to the Anthropic API
All of the above backends can be configured to use a custom host and port.
This option is meant to allow users to proxy the request, or to use a different
backend altogether which is API compliant with the upstream version.
Examples:
OpenAI with inline auth token:
```
ai:
openai:
authToken:
inline: "my_token"
```
Mistral with secret ref:
```
ai:
mistral:
authToken:
secretRef:
name: "my-secret"
namespace: "my-ns"
```
Anthropic with inline token and custom Host:
```
ai:
anthropic:
authToken:
inline: "my_token"
customHost:
host: "my-anthropic-host.com"
port: 443 # Port is optional and will default to 443 for HTTPS
```
*/
message UpstreamSpec {

message CustomHost {
// Custom host to send the traffic to
string host = 1;
// Custom host to send the traffic to
uint32 port = 2;
}

// Settings for the OpenAI API
message OpenAI {
// Auth Token to use for the OpenAI API
// This token will be placed into the `Authorization` header
// and prefixed with Bearer if not present
// when sending the request to the upstream.
SingleAuthToken auth_token = 1;
// Optional custom host to send the traffic to
CustomHost custom_host = 2;
}

// Settings for the Mistral API
message Mistral {
// Auth Token to use for the Mistral API.
// This token will be placed into the `Authorization` header
// and prefixed with Bearer if not present
// when sending the request to the upstream.
SingleAuthToken auth_token = 1;
// Optional custom host to send the traffic to
CustomHost custom_host = 2;
}

message Anthropic {
// Auth Token to use for the Anthropic API.
// This token will be placed into the `x-api-key` header
// when sending the request to the upstream.
SingleAuthToken auth_token = 1;

CustomHost custom_host = 2;
// An optional version header to pass to the Anthropic API
// See: https://docs.anthropic.com/en/api/versioning for more details
string version = 3;
}

oneof llm {
OpenAI openai = 1;
Mistral mistral = 2;
Anthropic anthropic = 3;
}
}

/*
RouteSettings is a way to configure the behavior of the LLM provider on a per-route basis
This allows users to configure things like:
- Prompt Enrichment
- Retrieval Augmented Generation
- Semantic Caching
- Backup Models
- Defaults to merge with the user input fields
- Guardrails
NOTE: These settings may only be applied to a route which uses an LLMProvider backend!
*/
message RouteSettings {

/*
Config used to enrich the prompt. This can only be used with LLMProviders using the CHAT API type.
Prompt enrichment allows you to add additional context to the prompt before sending it to the model.
Unlike RAG or other dynamic context methods, prompt enrichment is static and will be applied to every request.
Note: Some providers, including Anthropic do not support SYSTEM role messages, but rather have a dedicated
system field in the input JSON. In this case, `field_defaults` should be used to set the system field. See the docs
for that field for an example.
Example:
```
promptEnrichment:
prepend:
- role: SYSTEM
content: "answer all questions in french"
append:
- role: USER
content: "Describe the painting as if you were a famous art critic from the 17th century"
```
*/
AIPromptEnrichment prompt_enrichment = 1;

/*
Guards to apply to the LLM requests on this route.
This can be used to reject requests based on the content of the prompt, as well as
mask responses based on the content of the response. These guards can be also be used
at the same time.
Below is a simple example of a prompt guard that will reject any prompt that contains
the string "credit card" and will mask any credit card numbers in the response.
```
promptGuard:
request:
customResponseMessage: "Rejected due to inappropriate content"
matches:
- "credit card"
response:
matches:
# Mastercard
- '(?:^|\D)(5[1-5][0-9]{2}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4})(?:\D|$)'
````
*/
AIPromptGaurd prompt_guard = 2;

/*
Retrieval Augmented Generation. https://research.ibm.com/blog/retrieval-augmented-generation-RAG
Retrieval Augmented Generation is a process by which you "augment" the information
a model has access to by providing it with a set of documents to use as context.
This can be used to improve the quality of the generated text.
Important Note: The same embedding mechanism must be used for the prompt
which was used for the initial creation of the context documents.
Example using postgres for storage and OpenAI for embedding:
```
rag:
datastore:
postgres:
connectionString: postgresql+psycopg://gloo:[email protected]:6024/gloo
collectionName: default
embedding:
openai:
authToken:
secretRef:
name: openai-secret
namespace: gloo-system
```
*/
RAG rag = 3;
/*
Semantic caching configuration
Semantic caching allows you to cache previous model responses in order to provide
faster responses to similar requests in the future.
Results will vary depending on the embedding mechanism used, as well
as the similarity threshold set.
Example using Redis for storage and OpenAI for embedding:
```
semanticCache:
datastore:
redis:
connectionString: redis://172.17.0.1:6379
embedding:
openai:
authToken:
secretRef:
name: openai-secret
namespace: gloo-system
```
*/
SemanticCache semantic_cache = 4;

// Backup models to use in case of a failure with the primary model
// passed in the request. By default each model will be tried 2 times
// before moving on to the next model in the list. If all requests fail then
// the final response will be returned to the client.
repeated string backup_models = 5;

/*
A list of defaults to be merged with the user input fields.
These will NOT override the user input fields unless override is explicitly set to true.
Some examples include setting the temperature, max_tokens, etc.
Example overriding system field for Anthropic:
```
# Anthropic doesn't support a system chat type
defaults:
- field: "system"
value: "answer all questions in french"
```
Example setting the temperature and max_tokens, overriding max_tokens:
```
defaults:
- field: "temperature"
value: 0.5
- field: "max_tokens"
value: 100
```
*/
repeated FieldDefault defaults = 6;
}

message FieldDefault {
// Field name
string field = 1;
// Field Value, this can be any valid JSON value
google.protobuf.Value value = 2;
// Whether or not to override the field if it already exists
bool override = 3;
}


message Postgres {
// Connection string to the Postgres database
string connection_string = 1;
// Name of the table to use
string collection_name = 2;
}



message Embedding {
message OpenAI {
oneof auth_token_source {
SingleAuthToken auth_token = 1;
// re-use the token from the backend
// google.protobuf.Empty inherit_backend_token = 3;
}
}

oneof embedding {
OpenAI openai = 1;
}
}

message SemanticCache {
message Redis {
// Connection string to the Redis database
string connection_string = 1;
}
// Data store from which to cache the request/response pairs
message DataStore {
oneof datastore {
Redis redis = 1;
}
}
// Which data store to use
DataStore datastore = 1;
// Model to use to get embeddings for prompt
Embedding embedding = 2;
// Time before data in the cache is considered expired
uint32 ttl = 3;
}

message RAG {
message DataStore {
oneof datastore {
Postgres postgres = 1;
}
}
// Data store from which to fetch the embeddings
DataStore datastore = 1;
// Model to use to get embeddings for prompt
Embedding embedding = 2;
// Template to use to embed the returned context
string prompt_template = 3;
}

message RateLimiting {
// List of rate_limit configs to apply
repeated string rate_limit_configs = 1;
}

message AIPromptEnrichment {
message Message {
// Role of the message.
// The available roles depend on the backend model being used,
// please consult the documentation for more information.
string role = 1;
// String content of the message
string content = 2;
}
// A list of messages to be prepended to the prompt sent by the client
repeated Message prepend = 2;
// A list of messages to be appended to the prompt sent by the client
repeated Message append = 3;

}

message AIPromptGaurd {
message Request {
// A list of Regex patterns to match against the prompt.
// Each one will be checked against the prompt and if any match
// the request will be rejected.
repeated string matches = 1;
// Custom response message to send back to the client.
// If not specified, the following default message will be used:
// "The request was rejected due to inappropriate content"
string custom_response_message = 2;
}

message Response {
enum BuiltIn {
// Default REGEX for Social Security Numbers
SSN = 0;
// Default REGEX for Credit Card Numbers
CREDIT_CARD = 1;
// Default REGEX for Email Addresses
EMAIL = 2;
// Default REGEX for Phone Numbers
PHONE_NUMBER = 3;
}
// A list of Regex patterns to match against the response.
// All matches will be masked before being sent back to the client.
// matches and builtins are additive.
repeated string matches = 1;
// A list of built-in regexes to mask in the response.
// matches and builtins are additive.
repeated BuiltIn builtins = 2;
}
// Guards for the prompt request
Request request = 2;
// Guards for the LLM response
Response response = 3;
}
Loading

0 comments on commit a58b36d

Please sign in to comment.