From 552cb1f5c589c1d4b7b3ac5ddf1a29540f6ce8dc Mon Sep 17 00:00:00 2001 From: "Jorge E. Gamboa G." Date: Wed, 3 Jan 2024 17:30:50 +0100 Subject: [PATCH] Creating terraform hugging face and scripts --- generic/huggingface/main.tf | 79 +++++++++++++++++++++++ generic/huggingface/scripts/create_hf.sh | 31 +++++++++ generic/huggingface/scripts/delete_hf.sh | 28 ++++++++ generic/huggingface/scripts/read_hf.sh | 27 ++++++++ generic/huggingface/scripts/update_hf.sh | 33 ++++++++++ generic/huggingface/terraform.tf | 14 ++++ generic/huggingface/variables.tf | 82 ++++++++++++++++++++++++ 7 files changed, 294 insertions(+) create mode 100644 generic/huggingface/main.tf create mode 100755 generic/huggingface/scripts/create_hf.sh create mode 100755 generic/huggingface/scripts/delete_hf.sh create mode 100755 generic/huggingface/scripts/read_hf.sh create mode 100755 generic/huggingface/scripts/update_hf.sh create mode 100644 generic/huggingface/terraform.tf create mode 100644 generic/huggingface/variables.tf diff --git a/generic/huggingface/main.tf b/generic/huggingface/main.tf new file mode 100644 index 00000000..e97e8176 --- /dev/null +++ b/generic/huggingface/main.tf @@ -0,0 +1,79 @@ +locals { + + hf_create = jsonencode( + { + "name" : var.hf_name, + "type" : "protected", + "accountId" : null, + "provider" : { + "vendor" : "aws", + "region" : var.hf_region + }, + "compute" : { + "accelerator" : "gpu", + "instanceType" : var.instance_type, + "instanceSize" : var.instance_size, + "scaling" : { + "minReplica" : var.min_replica, + "maxReplica" : var.max_replica + } + }, + "model" : { + "repository" : var.model_repo, + "task" : var.task, + "framework" : var.framework, + "image" : { + "custom" : { + "url" : var.image_url, + "health_route" : "/health", + "env" : var.env_var + } + } + } + }) + base64_encoded_hf_create = base64encode(local.hf_create) + + hf_update = jsonencode( + { + "compute" : { + "accelerator" : "gpu", + "instanceType" : var.instance_type, + "instanceSize" : var.instance_size, + "scaling" : { + "minReplica" : var.min_replica, + "maxReplica" : var.max_replica + } + }, + "model" : { + "framework" : var.framework, + "image" : { + "huggingface" : {} + }, + "repository" : var.model_repo, + "task" : var.task, + } + } + ) + base64_encoded_hf_update = base64encode(local.hf_update) +} + + +locals { + program_create = "${path.module}/scripts/create_hf.sh ${var.namespace} ${var.write_token} ${local.base64_encoded_hf_create}" + program_delete = "${path.module}/scripts/delete_hf.sh ${var.namespace} ${var.write_token} ${var.hf_name}" + program_update = "${path.module}/scripts/update_hf.sh ${var.namespace} ${var.write_token} ${var.hf_name} ${local.base64_encoded_hf_update}" + program_read = "${path.module}/scripts/read_hf.sh ${var.namespace} ${var.read_token} ${var.hf_name}" +} + +resource "shell_script" "invoke_hf" { + lifecycle_commands { + create = local.program_create + delete = local.program_delete + update = local.program_update + read = local.program_read + } + + triggers = { + when_value_changed = "${var.namespace} ${var.write_token} ${local.base64_encoded_hf_create} ${local.base64_encoded_hf_update}" + } +} diff --git a/generic/huggingface/scripts/create_hf.sh b/generic/huggingface/scripts/create_hf.sh new file mode 100755 index 00000000..32966fa3 --- /dev/null +++ b/generic/huggingface/scripts/create_hf.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -o xtrace + +NAMESPACE=$1 +shift +WRITE_TOKEN=$1 +shift +JSON_CONF_64=$1 + +JSON_CONF=$(echo "$JSON_CONF_64" | base64 --decode) + +URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE" + +CURL_RESULT=$(curl $URL \ + --header 'Content-Type: application/json' \ + --header "Authorization: Bearer $WRITE_TOKEN" \ + --write-out %{http_code} \ + --data-raw "$JSON_CONF") + +# Extract JSON error message +JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') + +# Extract HTTP status code +HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') + +# Check if HTTP status code is different from 201 and exit with an error code +if [ "$HTTP_STATUS_CODE" -ne 202 ]; then + echo "Error Message: $JSON_ERROR_MESSAGE" + exit 1 +fi +exit 0 diff --git a/generic/huggingface/scripts/delete_hf.sh b/generic/huggingface/scripts/delete_hf.sh new file mode 100755 index 00000000..5cf5fdf1 --- /dev/null +++ b/generic/huggingface/scripts/delete_hf.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -o xtrace + +NAMESPACE=$1 +shift +WRITE_TOKEN=$1 +shift +DEPL_NAME=$1 + + +URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME" + +CURL_RESULT=$(curl $URL \ + --request DELETE \ + --header "Authorization: Bearer $WRITE_TOKEN" \ + --write-out %{http_code}) + +# Extract JSON error message +JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') + +# Extract HTTP status code +HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') + +# Check if HTTP status code is different from 200 and exit with an error code +if [ "$HTTP_STATUS_CODE" -ne 200 ]; then + echo "Error Message: $JSON_ERROR_MESSAGE" + exit 1 +fi diff --git a/generic/huggingface/scripts/read_hf.sh b/generic/huggingface/scripts/read_hf.sh new file mode 100755 index 00000000..c2382806 --- /dev/null +++ b/generic/huggingface/scripts/read_hf.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -o xtrace + +NAMESPACE=$1 +shift +READ_TOKEN=$1 +shift +DEPL_NAME=$1 + + +URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME" + +CURL_RESULT=$(curl $URL \ + --header "Authorization: Bearer $READ_TOKEN" \ + --write-out %{http_code}) + +# Extract JSON error message +JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') + +# Extract HTTP status code +HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') + +# Check if HTTP status code is different from 200 and exit with an error code +if [ "$HTTP_STATUS_CODE" -ne 200 ]; then + echo "Error Message: $JSON_ERROR_MESSAGE" + exit 1 +fi diff --git a/generic/huggingface/scripts/update_hf.sh b/generic/huggingface/scripts/update_hf.sh new file mode 100755 index 00000000..eb77f98a --- /dev/null +++ b/generic/huggingface/scripts/update_hf.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -o xtrace + +NAMESPACE=$1 +shift +WRITE_TOKEN=$1 +shift +DEPL_NAME=$1 +shift +JSON_CONF_64=$1 + +JSON_CONF=$(echo "$JSON_CONF_64" | base64 --decode) + + +URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME" + +CURL_RESULT=$(curl $URL \ + --request PUT \ + --header "Authorization: Bearer $WRITE_TOKEN" \ + --write-out %{http_code} \ + --data-raw "$JSON_CONF") + +# Extract JSON error message +JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') + +# Extract HTTP status code +HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') + +# Check if HTTP status code is different from 200 and exit with an error code +if [ "$HTTP_STATUS_CODE" -ne 200 ]; then + echo "Error Message: $JSON_ERROR_MESSAGE" + exit 1 +fi diff --git a/generic/huggingface/terraform.tf b/generic/huggingface/terraform.tf new file mode 100644 index 00000000..897c4c46 --- /dev/null +++ b/generic/huggingface/terraform.tf @@ -0,0 +1,14 @@ +terraform { + required_version = "1.3.7" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "4.50.0" + } + shell = { + source = "scottwinkler/shell" + version = "1.7.10" + } + } +} diff --git a/generic/huggingface/variables.tf b/generic/huggingface/variables.tf new file mode 100644 index 00000000..5ce0f492 --- /dev/null +++ b/generic/huggingface/variables.tf @@ -0,0 +1,82 @@ +variable "hf_name" { + description = " The name for the instance service." + type = string +} + +variable "hf_region" { + description = "Hugging face region where to create the resource." + type = string + default = "eu-west-1" +} + +variable "namespace" { + description = "Name of the hugging face namespace" + type = string +} + +variable "write_token" { + description = "Token for writing into hugging face" + type = string +} + +variable "read_token" { + description = "Token for reading into hugging face" + type = string +} + +variable "instance_type" { + description = "The type of instance where to deploy the model." + type = string +} + +variable "instance_size" { + description = "The size of the instance where to deploy the model." + type = string +} + +variable "min_replica" { + description = "The minimum number of replicas to deploy." + type = number + default = 0 +} + +variable "max_replica" { + description = "The maximum number of replicas to deploy." + type = number + default = 1 +} + +variable "model_repo" { + description = "Model repository" + type = string +} + +variable "task" { + description = "Task for the model" + type = string + default = "text-generation" +} + +variable "framework" { + description = "Framework for the model" + type = string + default = "pytorch" +} + +variable "image_url" { + description = "Custom image URL" + type = string + default = "ghcr.io/huggingface/text-generation-inference:1.1.0" +} + +variable "env_var" { + description = "Environment variables for the model" + type = map(string) + default = { + MAX_BATCH_PREFILL_TOKENS = "8192", + MAX_INPUT_LENGTH = "7168", + MAX_TOTAL_TOKENS = "8192", + MODEL_ID = "/repository", + QUANTIZE = "awq", + } +}