From 552cb1f5c589c1d4b7b3ac5ddf1a29540f6ce8dc Mon Sep 17 00:00:00 2001
From: "Jorge E. Gamboa G." <jogamboa4@gmail.com>
Date: Wed, 3 Jan 2024 17:30:50 +0100
Subject: [PATCH] Creating terraform hugging face and scripts

---
 generic/huggingface/main.tf              | 79 +++++++++++++++++++++++
 generic/huggingface/scripts/create_hf.sh | 31 +++++++++
 generic/huggingface/scripts/delete_hf.sh | 28 ++++++++
 generic/huggingface/scripts/read_hf.sh   | 27 ++++++++
 generic/huggingface/scripts/update_hf.sh | 33 ++++++++++
 generic/huggingface/terraform.tf         | 14 ++++
 generic/huggingface/variables.tf         | 82 ++++++++++++++++++++++++
 7 files changed, 294 insertions(+)
 create mode 100644 generic/huggingface/main.tf
 create mode 100755 generic/huggingface/scripts/create_hf.sh
 create mode 100755 generic/huggingface/scripts/delete_hf.sh
 create mode 100755 generic/huggingface/scripts/read_hf.sh
 create mode 100755 generic/huggingface/scripts/update_hf.sh
 create mode 100644 generic/huggingface/terraform.tf
 create mode 100644 generic/huggingface/variables.tf

diff --git a/generic/huggingface/main.tf b/generic/huggingface/main.tf
new file mode 100644
index 00000000..e97e8176
--- /dev/null
+++ b/generic/huggingface/main.tf
@@ -0,0 +1,79 @@
+locals {
+
+  hf_create = jsonencode(
+    {
+      "name" : var.hf_name,
+      "type" : "protected",
+      "accountId" : null,
+      "provider" : {
+        "vendor" : "aws",
+        "region" : var.hf_region
+      },
+      "compute" : {
+        "accelerator" : "gpu",
+        "instanceType" : var.instance_type,
+        "instanceSize" : var.instance_size,
+        "scaling" : {
+          "minReplica" : var.min_replica,
+          "maxReplica" : var.max_replica
+        }
+      },
+      "model" : {
+        "repository" : var.model_repo,
+        "task" : var.task,
+        "framework" : var.framework,
+        "image" : {
+          "custom" : {
+            "url" : var.image_url,
+            "health_route" : "/health",
+            "env" : var.env_var
+          }
+        }
+      }
+  })
+  base64_encoded_hf_create = base64encode(local.hf_create)
+
+  hf_update = jsonencode(
+    {
+      "compute" : {
+        "accelerator" : "gpu",
+        "instanceType" : var.instance_type,
+        "instanceSize" : var.instance_size,
+        "scaling" : {
+          "minReplica" : var.min_replica,
+          "maxReplica" : var.max_replica
+        }
+      },
+      "model" : {
+        "framework" : var.framework,
+        "image" : {
+          "huggingface" : {}
+        },
+        "repository" : var.model_repo,
+        "task" : var.task,
+      }
+    }
+  )
+  base64_encoded_hf_update = base64encode(local.hf_update)
+}
+
+
+locals {
+  program_create = "${path.module}/scripts/create_hf.sh ${var.namespace} ${var.write_token} ${local.base64_encoded_hf_create}"
+  program_delete = "${path.module}/scripts/delete_hf.sh  ${var.namespace} ${var.write_token} ${var.hf_name}"
+  program_update = "${path.module}/scripts/update_hf.sh  ${var.namespace} ${var.write_token} ${var.hf_name} ${local.base64_encoded_hf_update}"
+  program_read   = "${path.module}/scripts/read_hf.sh  ${var.namespace} ${var.read_token} ${var.hf_name}"
+}
+
+resource "shell_script" "invoke_hf" {
+  lifecycle_commands {
+    create = local.program_create
+    delete = local.program_delete
+    update = local.program_update
+    read   = local.program_read
+  }
+
+  triggers = {
+    when_value_changed = "${var.namespace} ${var.write_token} ${local.base64_encoded_hf_create} ${local.base64_encoded_hf_update}"
+  }
+}
diff --git a/generic/huggingface/scripts/create_hf.sh b/generic/huggingface/scripts/create_hf.sh
new file mode 100755
index 00000000..32966fa3
--- /dev/null
+++ b/generic/huggingface/scripts/create_hf.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+set -o xtrace
+
+NAMESPACE=$1
+shift
+WRITE_TOKEN=$1
+shift
+JSON_CONF_64=$1
+
+JSON_CONF=$(echo "$JSON_CONF_64" | base64 --decode)
+
+URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE"
+
+CURL_RESULT=$(curl $URL \
+  --header 'Content-Type: application/json' \
+  --header "Authorization: Bearer $WRITE_TOKEN" \
+  --write-out %{http_code} \
+  --data-raw "$JSON_CONF")
+
+# Extract JSON error message
+JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p')
+
+# Extract HTTP status code
+HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/')
+
+# Check if HTTP status code is different from 201 and exit with an error code
+if [ "$HTTP_STATUS_CODE" -ne 202 ]; then
+  echo "Error Message: $JSON_ERROR_MESSAGE"
+  exit 1
+fi
+exit 0 
diff --git a/generic/huggingface/scripts/delete_hf.sh b/generic/huggingface/scripts/delete_hf.sh
new file mode 100755
index 00000000..5cf5fdf1
--- /dev/null
+++ b/generic/huggingface/scripts/delete_hf.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+set -o xtrace
+
+NAMESPACE=$1
+shift
+WRITE_TOKEN=$1
+shift
+DEPL_NAME=$1
+
+
+URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME"
+
+CURL_RESULT=$(curl $URL \
+    --request DELETE \
+    --header "Authorization: Bearer $WRITE_TOKEN" \
+    --write-out %{http_code})
+
+# Extract JSON error message
+JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p')
+
+# Extract HTTP status code
+HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/')
+
+# Check if HTTP status code is different from 200 and exit with an error code
+if [ "$HTTP_STATUS_CODE" -ne 200 ]; then
+  echo "Error Message: $JSON_ERROR_MESSAGE"
+  exit 1
+fi
diff --git a/generic/huggingface/scripts/read_hf.sh b/generic/huggingface/scripts/read_hf.sh
new file mode 100755
index 00000000..c2382806
--- /dev/null
+++ b/generic/huggingface/scripts/read_hf.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+set -o xtrace
+
+NAMESPACE=$1
+shift
+READ_TOKEN=$1
+shift
+DEPL_NAME=$1
+
+
+URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME"
+
+CURL_RESULT=$(curl $URL \
+    --header "Authorization: Bearer $READ_TOKEN" \
+    --write-out %{http_code})
+
+# Extract JSON error message
+JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p')
+
+# Extract HTTP status code
+HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/')
+
+# Check if HTTP status code is different from 200 and exit with an error code
+if [ "$HTTP_STATUS_CODE" -ne 200 ]; then
+  echo "Error Message: $JSON_ERROR_MESSAGE"
+  exit 1
+fi
diff --git a/generic/huggingface/scripts/update_hf.sh b/generic/huggingface/scripts/update_hf.sh
new file mode 100755
index 00000000..eb77f98a
--- /dev/null
+++ b/generic/huggingface/scripts/update_hf.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+set -o xtrace
+
+NAMESPACE=$1
+shift
+WRITE_TOKEN=$1
+shift
+DEPL_NAME=$1
+shift
+JSON_CONF_64=$1
+
+JSON_CONF=$(echo "$JSON_CONF_64" | base64 --decode)
+
+
+URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME"
+
+CURL_RESULT=$(curl $URL \
+    --request PUT \
+    --header "Authorization: Bearer $WRITE_TOKEN" \
+    --write-out %{http_code} \
+    --data-raw "$JSON_CONF")
+
+# Extract JSON error message
+JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p')
+
+# Extract HTTP status code
+HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/')
+
+# Check if HTTP status code is different from 200 and exit with an error code
+if [ "$HTTP_STATUS_CODE" -ne 200 ]; then
+  echo "Error Message: $JSON_ERROR_MESSAGE"
+  exit 1
+fi
diff --git a/generic/huggingface/terraform.tf b/generic/huggingface/terraform.tf
new file mode 100644
index 00000000..897c4c46
--- /dev/null
+++ b/generic/huggingface/terraform.tf
@@ -0,0 +1,14 @@
+terraform {
+  required_version = "1.3.7"
+
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "4.50.0"
+    }
+    shell = {
+      source  = "scottwinkler/shell"
+      version = "1.7.10"
+    }
+  }
+}
diff --git a/generic/huggingface/variables.tf b/generic/huggingface/variables.tf
new file mode 100644
index 00000000..5ce0f492
--- /dev/null
+++ b/generic/huggingface/variables.tf
@@ -0,0 +1,82 @@
+variable "hf_name" {
+  description = " The name for the instance service."
+  type        = string
+}
+
+variable "hf_region" {
+  description = "Hugging face region where to create the resource."
+  type        = string
+  default     = "eu-west-1"
+}
+
+variable "namespace" {
+  description = "Name of the hugging face namespace"
+  type        = string
+}
+
+variable "write_token" {
+  description = "Token for writing into hugging face"
+  type        = string
+}
+
+variable "read_token" {
+  description = "Token for reading into hugging face"
+  type        = string
+}
+
+variable "instance_type" {
+  description = "The type of instance where to deploy the model."
+  type        = string
+}
+
+variable "instance_size" {
+  description = "The size of the instance where to deploy the model."
+  type        = string
+}
+
+variable "min_replica" {
+  description = "The minimum number of replicas to deploy."
+  type        = number
+  default     = 0
+}
+
+variable "max_replica" {
+  description = "The maximum number of replicas to deploy."
+  type        = number
+  default     = 1
+}
+
+variable "model_repo" {
+  description = "Model repository"
+  type        = string
+}
+
+variable "task" {
+  description = "Task for the model"
+  type        = string
+  default     = "text-generation"
+}
+
+variable "framework" {
+  description = "Framework for the model"
+  type        = string
+  default     = "pytorch"
+}
+
+variable "image_url" {
+  description = "Custom image URL"
+  type        = string
+  default     = "ghcr.io/huggingface/text-generation-inference:1.1.0"
+}
+
+variable "env_var" {
+  description = "Environment variables for the model"
+  type        = map(string)
+  default = {
+    MAX_BATCH_PREFILL_TOKENS = "8192",
+    MAX_INPUT_LENGTH         = "7168",
+    MAX_TOTAL_TOKENS         = "8192",
+    MODEL_ID                 = "/repository",
+    QUANTIZE                 = "awq",
+  }
+}