-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Creating terraform hugging face and scripts
- Loading branch information
1 parent
b32cf74
commit 552cb1f
Showing
7 changed files
with
294 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
locals { | ||
|
||
hf_create = jsonencode( | ||
{ | ||
"name" : var.hf_name, | ||
"type" : "protected", | ||
"accountId" : null, | ||
"provider" : { | ||
"vendor" : "aws", | ||
"region" : var.hf_region | ||
}, | ||
"compute" : { | ||
"accelerator" : "gpu", | ||
"instanceType" : var.instance_type, | ||
"instanceSize" : var.instance_size, | ||
"scaling" : { | ||
"minReplica" : var.min_replica, | ||
"maxReplica" : var.max_replica | ||
} | ||
}, | ||
"model" : { | ||
"repository" : var.model_repo, | ||
"task" : var.task, | ||
"framework" : var.framework, | ||
"image" : { | ||
"custom" : { | ||
"url" : var.image_url, | ||
"health_route" : "/health", | ||
"env" : var.env_var | ||
} | ||
} | ||
} | ||
}) | ||
base64_encoded_hf_create = base64encode(local.hf_create) | ||
|
||
hf_update = jsonencode( | ||
{ | ||
"compute" : { | ||
"accelerator" : "gpu", | ||
"instanceType" : var.instance_type, | ||
"instanceSize" : var.instance_size, | ||
"scaling" : { | ||
"minReplica" : var.min_replica, | ||
"maxReplica" : var.max_replica | ||
} | ||
}, | ||
"model" : { | ||
"framework" : var.framework, | ||
"image" : { | ||
"huggingface" : {} | ||
}, | ||
"repository" : var.model_repo, | ||
"task" : var.task, | ||
} | ||
} | ||
) | ||
base64_encoded_hf_update = base64encode(local.hf_update) | ||
} | ||
|
||
|
||
locals { | ||
program_create = "${path.module}/scripts/create_hf.sh ${var.namespace} ${var.write_token} ${local.base64_encoded_hf_create}" | ||
program_delete = "${path.module}/scripts/delete_hf.sh ${var.namespace} ${var.write_token} ${var.hf_name}" | ||
program_update = "${path.module}/scripts/update_hf.sh ${var.namespace} ${var.write_token} ${var.hf_name} ${local.base64_encoded_hf_update}" | ||
program_read = "${path.module}/scripts/read_hf.sh ${var.namespace} ${var.read_token} ${var.hf_name}" | ||
} | ||
|
||
resource "shell_script" "invoke_hf" { | ||
lifecycle_commands { | ||
create = local.program_create | ||
delete = local.program_delete | ||
update = local.program_update | ||
read = local.program_read | ||
} | ||
|
||
triggers = { | ||
when_value_changed = "${var.namespace} ${var.write_token} ${local.base64_encoded_hf_create} ${local.base64_encoded_hf_update}" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/bin/bash | ||
set -o xtrace | ||
|
||
NAMESPACE=$1 | ||
shift | ||
WRITE_TOKEN=$1 | ||
shift | ||
JSON_CONF_64=$1 | ||
|
||
JSON_CONF=$(echo "$JSON_CONF_64" | base64 --decode) | ||
|
||
URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE" | ||
|
||
CURL_RESULT=$(curl $URL \ | ||
--header 'Content-Type: application/json' \ | ||
--header "Authorization: Bearer $WRITE_TOKEN" \ | ||
--write-out %{http_code} \ | ||
--data-raw "$JSON_CONF") | ||
|
||
# Extract JSON error message | ||
JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') | ||
|
||
# Extract HTTP status code | ||
HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') | ||
|
||
# Check if HTTP status code is different from 201 and exit with an error code | ||
if [ "$HTTP_STATUS_CODE" -ne 202 ]; then | ||
echo "Error Message: $JSON_ERROR_MESSAGE" | ||
exit 1 | ||
fi | ||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
set -o xtrace | ||
|
||
NAMESPACE=$1 | ||
shift | ||
WRITE_TOKEN=$1 | ||
shift | ||
DEPL_NAME=$1 | ||
|
||
|
||
URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME" | ||
|
||
CURL_RESULT=$(curl $URL \ | ||
--request DELETE \ | ||
--header "Authorization: Bearer $WRITE_TOKEN" \ | ||
--write-out %{http_code}) | ||
|
||
# Extract JSON error message | ||
JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') | ||
|
||
# Extract HTTP status code | ||
HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') | ||
|
||
# Check if HTTP status code is different from 200 and exit with an error code | ||
if [ "$HTTP_STATUS_CODE" -ne 200 ]; then | ||
echo "Error Message: $JSON_ERROR_MESSAGE" | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
set -o xtrace | ||
|
||
NAMESPACE=$1 | ||
shift | ||
READ_TOKEN=$1 | ||
shift | ||
DEPL_NAME=$1 | ||
|
||
|
||
URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME" | ||
|
||
CURL_RESULT=$(curl $URL \ | ||
--header "Authorization: Bearer $READ_TOKEN" \ | ||
--write-out %{http_code}) | ||
|
||
# Extract JSON error message | ||
JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') | ||
|
||
# Extract HTTP status code | ||
HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') | ||
|
||
# Check if HTTP status code is different from 200 and exit with an error code | ||
if [ "$HTTP_STATUS_CODE" -ne 200 ]; then | ||
echo "Error Message: $JSON_ERROR_MESSAGE" | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/bin/bash | ||
set -o xtrace | ||
|
||
NAMESPACE=$1 | ||
shift | ||
WRITE_TOKEN=$1 | ||
shift | ||
DEPL_NAME=$1 | ||
shift | ||
JSON_CONF_64=$1 | ||
|
||
JSON_CONF=$(echo "$JSON_CONF_64" | base64 --decode) | ||
|
||
|
||
URL="https://api.endpoints.huggingface.cloud/v2/endpoint/$NAMESPACE/$DEPL_NAME" | ||
|
||
CURL_RESULT=$(curl $URL \ | ||
--request PUT \ | ||
--header "Authorization: Bearer $WRITE_TOKEN" \ | ||
--write-out %{http_code} \ | ||
--data-raw "$JSON_CONF") | ||
|
||
# Extract JSON error message | ||
JSON_ERROR_MESSAGE=$(echo "$CURL_RESULT" | sed -n 's/{\(.*\)}.*/{\1}/p') | ||
|
||
# Extract HTTP status code | ||
HTTP_STATUS_CODE=$(echo "$CURL_RESULT" | sed 's/.*\([0-9]\{3\}\)$/\1/') | ||
|
||
# Check if HTTP status code is different from 200 and exit with an error code | ||
if [ "$HTTP_STATUS_CODE" -ne 200 ]; then | ||
echo "Error Message: $JSON_ERROR_MESSAGE" | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
terraform { | ||
required_version = "1.3.7" | ||
|
||
required_providers { | ||
aws = { | ||
source = "hashicorp/aws" | ||
version = "4.50.0" | ||
} | ||
shell = { | ||
source = "scottwinkler/shell" | ||
version = "1.7.10" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
variable "hf_name" { | ||
description = " The name for the instance service." | ||
type = string | ||
} | ||
|
||
variable "hf_region" { | ||
description = "Hugging face region where to create the resource." | ||
type = string | ||
default = "eu-west-1" | ||
} | ||
|
||
variable "namespace" { | ||
description = "Name of the hugging face namespace" | ||
type = string | ||
} | ||
|
||
variable "write_token" { | ||
description = "Token for writing into hugging face" | ||
type = string | ||
} | ||
|
||
variable "read_token" { | ||
description = "Token for reading into hugging face" | ||
type = string | ||
} | ||
|
||
variable "instance_type" { | ||
description = "The type of instance where to deploy the model." | ||
type = string | ||
} | ||
|
||
variable "instance_size" { | ||
description = "The size of the instance where to deploy the model." | ||
type = string | ||
} | ||
|
||
variable "min_replica" { | ||
description = "The minimum number of replicas to deploy." | ||
type = number | ||
default = 0 | ||
} | ||
|
||
variable "max_replica" { | ||
description = "The maximum number of replicas to deploy." | ||
type = number | ||
default = 1 | ||
} | ||
|
||
variable "model_repo" { | ||
description = "Model repository" | ||
type = string | ||
} | ||
|
||
variable "task" { | ||
description = "Task for the model" | ||
type = string | ||
default = "text-generation" | ||
} | ||
|
||
variable "framework" { | ||
description = "Framework for the model" | ||
type = string | ||
default = "pytorch" | ||
} | ||
|
||
variable "image_url" { | ||
description = "Custom image URL" | ||
type = string | ||
default = "ghcr.io/huggingface/text-generation-inference:1.1.0" | ||
} | ||
|
||
variable "env_var" { | ||
description = "Environment variables for the model" | ||
type = map(string) | ||
default = { | ||
MAX_BATCH_PREFILL_TOKENS = "8192", | ||
MAX_INPUT_LENGTH = "7168", | ||
MAX_TOTAL_TOKENS = "8192", | ||
MODEL_ID = "/repository", | ||
QUANTIZE = "awq", | ||
} | ||
} |