scaleapi · ewkoch · Oct 23, 2024 · Nov 1, 2024 · ewkoch · Oct 23, 2024
diff --git a/charts/model-engine/templates/_helpers.tpl b/charts/model-engine/templates/_helpers.tpl
@@ -341,9 +341,12 @@ env:
     value: "true"
   - name: LAUNCH_SERVICE_TEMPLATE_FOLDER
     value: "/workspace/model-engine/model_engine_server/infra/gateways/resources/templates"
-  {{- if .Values.redis.auth}}
+  {{- if .Values.secrets.kubernetesRedisSecretName }}
   - name: REDIS_AUTH_TOKEN
-    value: {{ .Values.redis.auth }}
+    valueFrom:
+      secretKeyRef:
+        name: {{ .Values.secrets.kubernetesRedisSecretName }}
+        key: auth_token
   {{- end }}
   {{- if .Values.azure}}
   - name: AZURE_IDENTITY_NAME

diff --git a/charts/model-engine/templates/aws_config_map.yaml b/charts/model-engine/templates/aws_config_map.yaml
@@ -20,6 +20,9 @@ data:
     [profile {{ $profileName }}]
     role_arn = {{ index $annotations "eks.amazonaws.com/role-arn" }}
     web_identity_token_file = /var/run/secrets/eks.amazonaws.com/serviceaccount/token
+    [profile {{ $.Values.serviceAccount.sqsProfileName }}]
+    role_arn = {{ index $annotations "eks.amazonaws.com/role-arn" }}
+    web_identity_token_file = /var/run/secrets/eks.amazonaws.com/serviceaccount/token
 ---
 {{- end }}
 {{- end }}

diff --git a/charts/model-engine/templates/inference_framework_config.yaml b/charts/model-engine/templates/inference_framework_config.yaml
@@ -2,17 +2,18 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
   name: {{ include "modelEngine.fullname" . }}-inference-framework-latest-config
+  namespace: {{ .Release.Namespace }}
   labels:
     product: common
     team: infra
   annotations:
-    "helm.sh/hook": pre-install
+    "helm.sh/hook": pre-install,pre-upgrade
     "helm.sh/hook-weight": "-2"
 data:
   deepspeed: "latest"
   text_generation_inference: "latest"
-  vllm: "latest"
-  vllm_batch: "latest"
-  vllm_batch_v2: "latest"
+  vllm: "{{ .Values.vLLM.primaryTag }}"
+  vllm_batch:  "{{ .Values.vLLM.batchTag }}"
+  vllm_batch_v2:  "{{ .Values.vLLM.batchV2Tag }}"
   lightllm: "latest"
   tensorrt_llm: "latest"
diff --git a/charts/model-engine/templates/istio-virtualservice.yaml b/charts/model-engine/templates/istio-virtualservice.yaml
@@ -1,24 +1,20 @@
-{{- if .Values.virtualservice.enabled -}}
+{{- if .values.virtualService.enabled -}}
 {{- $fullName := include "modelEngine.fullname" . -}}
 apiVersion: networking.istio.io/v1alpha3
 kind: VirtualService
 metadata:
   name: {{ $fullName }}
   labels:
     {{- include "modelEngine.labels" . | nindent 4 }}
-  {{- with .Values.virtualservice.annotations }}
+  {{- with .values.virtualService.annotations }}
   annotations:
     {{- toYaml . | nindent 4 }}
   {{- end }}
 spec:
   hosts:
-  {{- range .Values.virtualservice.hostDomains }}
-    - "{{ $fullName }}.{{ . }}"
-  {{- end }}
+    - model-engine.{{ $.Values.global.networking.internalDomain }}
   gateways:
-  {{- range .Values.virtualservice.gateways }}
-    - {{ . | quote }}
-  {{- end }}
+    - {{ $.Values.global.networking.internalGateway }}
   http:
     - route:
         - destination:

diff --git a/charts/model-engine/templates/service_account_inference.yaml b/charts/model-engine/templates/service_account_inference.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.serviceTemplate) (.Values.serviceTemplate.createServiceAccount) (.Values.serviceTemplate.serviceAccountAnnotations) (.Values.serviceTemplate.serviceAccountName) (.Values.config.values.launch.endpoint_namespace)}}
+{{- if and (.Values.serviceTemplate) (.Values.serviceTemplate.createInferenceServiceAccount) (.Values.serviceTemplate.serviceAccountAnnotations) (.Values.serviceTemplate.serviceAccountName) (.Values.config.values.launch.endpoint_namespace)}}
 {{- $annotations := .Values.serviceTemplate.serviceAccountAnnotations }}
 {{- $inferenceServiceAccountName := .Values.serviceTemplate.serviceAccountName }}
 {{- $inferenceServiceAccountNamespace := .Values.config.values.launch.endpoint_namespace }}
@@ -22,4 +22,4 @@ imagePullSecrets:
   - name: egp-ecr-regcred
 {{- end }}
 ---
-{{- end }}
+{{- end }}
diff --git a/charts/model-engine/templates/service_config_map.yaml b/charts/model-engine/templates/service_config_map.yaml
@@ -3,6 +3,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
   name: {{ include "modelEngine.fullname" . }}-service-config
+  namespace: {{ .Release.Namespace }}
   labels:
     {{- include "modelEngine.labels" . | nindent 4 }}
   annotations:
@@ -11,46 +12,110 @@ metadata:
 data:
   launch_service_config: |-
     dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
+
+    # Config to know where model-engine is running
     gateway_namespace: {{ .Release.Namespace | quote }}
-    {{- with .Values.config.values.launch }}
-    {{- range $key, $value := . }}
-    {{ $key }}: {{ $value | quote }}
-    {{- end }}
-    {{- end }}
-  infra_service_config: |-
-    env: {{ .Values.context | quote }}
-    {{- with .Values.config.values.infra }}
-    {{- range $key, $value := . }}
-    {{ $key }}: {{ $value | quote }}
-    {{- end }}
-    {{- end }}
 
----
+    # Config for scale-hosted Hosted Model Inference in the prod cluster, plus a bunch of other config-ish notes
+    # NOTE: If you add/change values inside this file that need to apply to all clusters, please make changes in
+    # all service_config_{env}.yaml files as well.
 
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "modelEngine.fullname" . }}-service-config
-  namespace: {{ .Values.config.values.launch.endpoint_namespace }}
-  labels:
-    {{- include "modelEngine.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": pre-install,pre-upgrade
-    "helm.sh/hook-weight": "-2"
-data:
-  launch_service_config: |-
-    dd_trace_enabled: {{ .Values.dd_trace_enabled | default false | quote }}
-    gateway_namespace: {{ .Release.Namespace | quote }}
-    {{- with .Values.config.values.launch }}
-    {{- range $key, $value := . }}
-    {{ $key }}: {{ $value | quote }}
-    {{- end }}
-    {{- end }}
+    # Config for scale-hosted Hosted Model Inference in the prod cluster, see `service_config` for more details
+    model_primitive_host: model-server.{{ .Release.Namespace }}.svc.cluster.local
+
+    # # Endpoint config
+    # K8s namespace the endpoints will be created in
+    endpoint_namespace: {{ .Release.Namespace | quote }}
+
+    # Asynchronous endpoints
+    sqs_profile: {{ $.Values.serviceAccount.sqsProfileName }}
+    sqs_queue_policy_template: |-
+      {
+          "Version": "2012-10-17",
+          "Id": "__default_policy_ID",
+          "Statement": [
+          {
+              "Sid": "__owner_statement",
+              "Effect": "Allow",
+              "Principal": {
+              "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:root"
+              },
+              "Action": "sqs:*",
+              "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
+          },
+          {
+              "Effect": "Allow",
+              "Principal": {
+              "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/{{ $.Values.serviceAccount.sqsProfileName }}"
+              },
+              "Action": "sqs:*",
+              "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
+          },
+          {
+              "Effect": "Allow",
+              "Principal": {
+              "AWS": "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/ml_hosted_model_inference"
+              },
+              "Action": "sqs:*",
+              "Resource": "arn:{{ .Values.aws.partition }}:sqs:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:${queue_name}"
+          }
+          ]
+      }
+
+    sqs_queue_tag_template: |-
+      {
+          "{{ .Values.tagging.organization }}/product": "{{ .Values.tagging.productTag }}",
+          "{{ .Values.tagging.organization }}/team": "${team}",
+          "{{ .Values.tagging.organization }}/contact": "{{ .Values.tagging.contactEmail }}",
+          "{{ .Values.tagging.organization }}/customer": "AllCustomers",
+          "{{ .Values.tagging.organization }}/financialOwner": "{{ .Values.tagging.contactEmail }}",
+          "Launch-Endpoint-Id": "${endpoint_id}",
+          "Launch-Endpoint-Name": "${endpoint_name}",
+          "Launch-Endpoint-Created-By": "${endpoint_created_by}"
+      }
+
+    # Billing
+    billing_queue_arn: arn:aws:events:{{ .Values.aws.region }}:{{ .Values.aws.accountId }}:event-bus/money
+
+    # The below redis URL would not work if we needed auth, which we do, so we have to pull cache_url from the cache_redis_aws_secret_name
+    cache_redis_aws_secret_name: "{{ .Values.secrets.redisAwsSecretName }}"
+
+    cloud_file_llm_fine_tune_repository: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/llm-ft-job-repository/prod"
+
+    dd_trace_enabled: true
+    istio_enabled: true
+    sensitive_log_mode: true
+    tgi_repository: "text-generation-inference"
+    vllm_repository: "vllm"
+    lightllm_repository: "lightllm"
+    tensorrt_llm_repository: "tensorrt-llm"
+    batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
+    user_inference_base_repository: "launch/inference"
+    user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
+    user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
+    docker_image_layer_cache_repository: "kaniko-cache"
+
+    # S3 access
+    hf_user_fine_tuned_weights_prefix: "s3://{{ .Values.aws.s3Bucket }}/hosted-model-inference/fine_tuned_weights"
   infra_service_config: |-
     env: {{ .Values.context | quote }}
-    {{- with .Values.config.values.infra }}
-    {{- range $key, $value := . }}
-    {{ $key }}: {{ $value | quote }}
-    {{- end }}
-    {{- end }}
+    cloud_provider: "aws"
+    env: "prod"
+    k8s_cluster_name: "{{ .Values.clusterName }}"
+    dns_host_domain: "model-engine.{{ $.Values.global.networking.internalDomain }}"
+    default_region: "{{ .Values.aws.region }}"
+    ml_account_id: "{{ .Values.aws.accountId }}"
+    docker_repo_prefix: "{{ .Values.aws.accountId }}.dkr.ecr.{{ .Values.aws.region }}.amazonaws.com"
+    redis_host: "{{ .Values.redis.hostname }}"
+    s3_bucket: "{{ .Values.aws.s3Bucket }}"
+    profile_ml_worker: "ml-worker"
+    profile_ml_inference_worker: "ml-worker"
+    identity_service_url: "{{ .Values.identityServiceUrl }}"
+    firehose_role_arn: "arn:{{ .Values.aws.partition }}:iam::{{ .Values.aws.accountId }}:role/firehose-stream-logging-role"
+    firehose_stream_name: "{{ .Values.firehoseStreamName }}"
+    db_engine_pool_size: 20
+    db_engine_max_overflow: 10
+    db_engine_echo: false
+    db_engine_echo_pool: true
+    db_engine_disconnect_strategy: "pessimistic"
 {{- end }}
diff --git a/charts/model-engine/templates/service_template_config_map.yaml b/charts/model-engine/templates/service_template_config_map.yaml
@@ -486,7 +486,7 @@ data:
           protocol: TCP
           name: http
           ${NODE_PORT_DICT}
-  {{- if .Values.virtualservice.enabled }}
+  {{- if .values.virtualService.enabled }}
   virtual-service.yaml: |-
     apiVersion: networking.istio.io/v1alpha3
     kind: VirtualService
@@ -522,6 +522,7 @@ data:
         loadBalancer:
           simple: LEAST_REQUEST
   {{- end }}
+  {{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.autoscaling.vertical.enabled) }}
   vertical-pod-autoscaler.yaml: |-
     apiVersion: "autoscaling.k8s.io/v1"
     kind: VerticalPodAutoscaler
@@ -548,6 +549,7 @@ data:
               cpu: ${CPUS}
               memory: ${MEMORY}
             controlledResources: ["cpu", "memory"]
+  {{- end }}
   pod-disruption-budget.yaml: |-
     apiVersion: policy/v1
     kind: PodDisruptionBudget