Sync APIs. @tag-name=gloo-v1.18.0-beta9 (#1254)

Co-authored-by: soloio-bot <[email protected]>
solo-io · Jul 19, 2024 · a58b36d · a58b36d
1 parent d675588
commit a58b36d
Show file tree

Hide file tree

Showing 15 changed files with 6,534 additions and 939 deletions.
diff --git a/api/gloo/gloo/v1/enterprise/options/ai/ai.proto b/api/gloo/gloo/v1/enterprise/options/ai/ai.proto
@@ -0,0 +1,389 @@
+syntax = "proto3";
+package ai.options.gloo.solo.io;
+option go_package = "github.com/solo-io/solo-apis/pkg/api/gloo.solo.io/v1/enterprise/options/ai";
+
+import "github.com/solo-io/solo-kit/api/v1/ref.proto";
+import "google/protobuf/wrappers.proto";
+import "google/protobuf/struct.proto";
+import "google/protobuf/duration.proto";
+import "google/protobuf/empty.proto";
+import "extproto/ext.proto";
+option (extproto.equal_all) = true;
+option (extproto.hash_all) = true;
+option (extproto.clone_all) = true;
+
+message SingleAuthToken {
+  oneof auth_token_source {
+    // Provide easy inline way to specify a token
+    string inline = 1;
+    // Reference to a secret in the same namespace as the Upstream
+    core.solo.io.ResourceRef secret_ref = 2;
+  }
+}
+
+/*
+  The AI UpstreamSpec represents a logical LLM provider backend.
+  The purpose of this spec is a way to configure which backend to use
+  as well as how to authenticate with the backend.
+
+  Currently the options are:
+  - OpenAI
+    * Default Host: api.openai.com
+    * Default Port: 443
+    * Auth Token: Bearer token to use for the OpenAI API
+  - Mistral
+    * Default Host: api.mistral.com
+    * Default Port: 443
+    * Auth Token: Bearer token to use for the Mistral API
+  - Anthropic
+    * Default Host: api.anthropic.com
+    * Default Port: 443
+    * Auth Token: x-api-key to use for the Anthropic API
+    * Version: Optional version header to pass to the Anthropic API
+  
+  All of the above backends can be configured to use a custom host and port.
+  This option is meant to allow users to proxy the request, or to use a different
+  backend altogether which is API compliant with the upstream version.
+
+  Examples:
+
+  OpenAI with inline auth token:
+  ```
+  ai:
+    openai:
+      authToken:
+        inline: "my_token"
+  ```
+
+  Mistral with secret ref:
+  ```
+  ai:
+    mistral:
+      authToken:
+        secretRef:
+          name: "my-secret"
+          namespace: "my-ns"
+  ```
+
+  Anthropic with inline token and custom Host:
+  ```
+  ai:
+    anthropic:
+      authToken:
+        inline: "my_token"
+      customHost:
+        host: "my-anthropic-host.com"
+        port: 443 # Port is optional and will default to 443 for HTTPS
+  ```
+*/
+message UpstreamSpec {
+
+  message CustomHost {
+    // Custom host to send the traffic to
+    string host = 1;
+    // Custom host to send the traffic to
+    uint32 port = 2;
+  }
+
+  // Settings for the OpenAI API
+  message OpenAI {
+    // Auth Token to use for the OpenAI API
+    // This token will be placed into the `Authorization` header
+    // and prefixed with Bearer if not present
+    // when sending the request to the upstream.
+    SingleAuthToken auth_token = 1;
+    // Optional custom host to send the traffic to
+    CustomHost custom_host = 2;
+  }
+
+  // Settings for the Mistral API
+  message Mistral {
+    // Auth Token to use for the Mistral API.
+    // This token will be placed into the `Authorization` header
+    // and prefixed with Bearer if not present
+    // when sending the request to the upstream.
+    SingleAuthToken auth_token = 1;
+    // Optional custom host to send the traffic to
+    CustomHost custom_host = 2;
+  }
+
+  message Anthropic {
+    // Auth Token to use for the Anthropic API.
+    // This token will be placed into the `x-api-key` header
+    // when sending the request to the upstream.
+    SingleAuthToken auth_token = 1;
+
+    CustomHost custom_host = 2;
+    // An optional version header to pass to the Anthropic API
+    // See: https://docs.anthropic.com/en/api/versioning for more details
+    string version = 3;
+  }
+
+  oneof llm {
+    OpenAI openai = 1;
+    Mistral mistral = 2;
+    Anthropic anthropic = 3;
+  }
+}
+
+/*
+  RouteSettings is a way to configure the behavior of the LLM provider on a per-route basis
+  This allows users to configure things like:
+  - Prompt Enrichment
+  - Retrieval Augmented Generation
+  - Semantic Caching
+  - Backup Models
+  - Defaults to merge with the user input fields
+  - Guardrails
+
+  NOTE: These settings may only be applied to a route which uses an LLMProvider backend!
+*/
+message RouteSettings {
+
+  /*
+    Config used to enrich the prompt. This can only be used with LLMProviders using the CHAT API type.
+
+    Prompt enrichment allows you to add additional context to the prompt before sending it to the model.
+    Unlike RAG or other dynamic context methods, prompt enrichment is static and will be applied to every request.
+
+    Note: Some providers, including Anthropic do not support SYSTEM role messages, but rather have a dedicated
+    system field in the input JSON. In this case, `field_defaults` should be used to set the system field. See the docs
+    for that field for an example.
+
+    Example:
+    ```
+    promptEnrichment:
+      prepend:
+      - role: SYSTEM
+        content: "answer all questions in french"
+      append:
+      - role: USER
+        content: "Describe the painting as if you were a famous art critic from the 17th century"
+    ```
+  */
+  AIPromptEnrichment prompt_enrichment = 1;
+
+  /*
+    Guards to apply to the LLM requests on this route.
+    This can be used to reject requests based on the content of the prompt, as well as 
+    mask responses based on the content of the response. These guards can be also be used
+    at the same time.
+
+    Below is a simple example of a prompt guard that will reject any prompt that contains
+    the string "credit card" and will mask any credit card numbers in the response.
+    
+    ```
+    promptGuard:
+      request:
+        customResponseMessage: "Rejected due to inappropriate content"
+        matches:
+        - "credit card"
+      response:
+        matches:
+        # Mastercard
+        - '(?:^|\D)(5[1-5][0-9]{2}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4}(?:\ |\-|)[0-9]{4})(?:\D|$)'
+    ````
+  */
+  AIPromptGaurd prompt_guard = 2;
+
+  /*
+    Retrieval Augmented Generation. https://research.ibm.com/blog/retrieval-augmented-generation-RAG
+    Retrieval Augmented Generation is a process by which you "augment" the information
+    a model has access to by providing it with a set of documents to use as context.
+    This can be used to improve the quality of the generated text.
+    Important Note: The same embedding mechanism must be used for the prompt
+    which was used for the initial creation of the context documents.
+
+    Example using postgres for storage and OpenAI for embedding:
+    ```
+    rag:
+      datastore:
+        postgres:
+          connectionString: postgresql+psycopg://gloo:[email protected]:6024/gloo
+          collectionName: default
+      embedding:
+        openai:
+          authToken:
+            secretRef:
+              name: openai-secret
+              namespace: gloo-system
+    ```
+  */
+  RAG rag = 3;
+  /*
+    Semantic caching configuration
+    Semantic caching allows you to cache previous model responses in order to provide 
+    faster responses to similar requests in the future.
+    Results will vary depending on the embedding mechanism used, as well
+    as the similarity threshold set.
+
+    Example using Redis for storage and OpenAI for embedding:
+    ```
+    semanticCache:
+      datastore:
+        redis:
+          connectionString: redis://172.17.0.1:6379
+      embedding:
+        openai:
+          authToken:
+            secretRef:
+              name: openai-secret
+              namespace: gloo-system
+    ```
+  */
+  SemanticCache semantic_cache = 4;
+
+  // Backup models to use in case of a failure with the primary model
+  // passed in the request. By default each model will be tried 2 times
+  // before moving on to the next model in the list. If all requests fail then 
+  // the final response will be returned to the client.
+  repeated string backup_models = 5;
+
+  /*
+    A list of defaults to be merged with the user input fields.
+    These will NOT override the user input fields unless override is explicitly set to true.
+    Some examples include setting the temperature, max_tokens, etc.
+
+    Example overriding system field for Anthropic:
+    ```
+    # Anthropic doesn't support a system chat type
+    defaults:
+    - field: "system"
+      value: "answer all questions in french"
+    ```
+
+    Example setting the temperature and max_tokens, overriding max_tokens:
+    ```
+    defaults:
+    - field: "temperature"
+      value: 0.5
+    - field: "max_tokens"
+      value: 100
+    ```
+  */
+  repeated FieldDefault defaults = 6;
+}
+
+message FieldDefault {
+  // Field name
+  string field = 1;
+  // Field Value, this can be any valid JSON value
+  google.protobuf.Value value = 2;
+  // Whether or not to override the field if it already exists
+  bool override = 3;
+}
+
+
+message Postgres {
+  // Connection string to the Postgres database
+  string connection_string = 1;
+  // Name of the table to use
+  string collection_name = 2;
+}
+
+
+
+message Embedding {
+  message OpenAI {
+    oneof auth_token_source {
+      SingleAuthToken auth_token = 1;
+      // re-use the token from the backend
+      // google.protobuf.Empty inherit_backend_token = 3;
+    }
+  }
+
+  oneof embedding {
+    OpenAI openai = 1;
+  }
+}
+
+message SemanticCache {
+  message Redis {
+    // Connection string to the Redis database
+    string connection_string = 1;
+  }
+  // Data store from which to cache the request/response pairs
+  message DataStore {
+    oneof datastore {
+      Redis redis = 1;
+    }
+  }
+  // Which data store to use
+  DataStore datastore = 1;
+  // Model to use to get embeddings for prompt
+  Embedding embedding = 2;
+  // Time before data in the cache is considered expired
+  uint32 ttl = 3;
+}
+
+message RAG {
+  message DataStore {
+    oneof datastore {
+      Postgres postgres = 1;
+    }
+  }
+  // Data store from which to fetch the embeddings
+  DataStore datastore = 1;
+  // Model to use to get embeddings for prompt
+  Embedding embedding = 2;
+  // Template to use to embed the returned context
+  string prompt_template = 3;
+}
+
+message RateLimiting {
+  // List of rate_limit configs to apply
+  repeated string rate_limit_configs = 1;
+}
+
+message AIPromptEnrichment {
+  message Message {
+    // Role of the message.
+    // The available roles depend on the backend model being used,
+    // please consult the documentation for more information.
+    string role = 1;
+    // String content of the message
+    string content = 2;
+  }
+  // A list of messages to be prepended to the prompt sent by the client
+  repeated Message prepend = 2;
+  // A list of messages to be appended to the prompt sent by the client
+  repeated Message append = 3;
+
+}
+
+message AIPromptGaurd {
+  message Request {
+    // A list of Regex patterns to match against the prompt.
+    // Each one will be checked against the prompt and if any match
+    // the request will be rejected.
+    repeated string matches = 1;
+    // Custom response message to send back to the client.
+    // If not specified, the following default message will be used:
+    // "The request was rejected due to inappropriate content"
+    string custom_response_message = 2;
+  }
+
+  message Response {
+      enum BuiltIn {
+        // Default REGEX for Social Security Numbers
+        SSN = 0;
+        // Default REGEX for Credit Card Numbers
+        CREDIT_CARD = 1;
+        // Default REGEX for Email Addresses
+        EMAIL = 2;
+        // Default REGEX for Phone Numbers
+        PHONE_NUMBER = 3;
+      }
+      // A list of Regex patterns to match against the response.
+      // All matches will be masked before being sent back to the client.
+      // matches and builtins are additive.
+      repeated string matches = 1;
+      // A list of built-in regexes to mask in the response.
+      // matches and builtins are additive.
+      repeated BuiltIn builtins = 2;
+  }
+  // Guards for the prompt request
+  Request request = 2;
+  // Guards for the LLM response
+  Response response = 3;
+}