From e2c792040c42476655f0019f14d637bda2bd8792 Mon Sep 17 00:00:00 2001 From: bcirh <72753984+bcirh@users.noreply.github.com> Date: Fri, 20 Dec 2024 16:33:11 +0100 Subject: [PATCH] Nomad job example (#4469) * Add nomad job microservices mode example * Remove unused tag from query-frontend * Update nomad example README.md * Add link to nomad mem oversub in README.md file * Remove tags from tempo.hcl. Update README.md --- example/nomad/tempo-distributed/README.md | 44 ++ example/nomad/tempo-distributed/config.yml | 81 ++++ example/nomad/tempo-distributed/tempo.hcl | 493 +++++++++++++++++++++ 3 files changed, 618 insertions(+) create mode 100644 example/nomad/tempo-distributed/README.md create mode 100644 example/nomad/tempo-distributed/config.yml create mode 100644 example/nomad/tempo-distributed/tempo.hcl diff --git a/example/nomad/tempo-distributed/README.md b/example/nomad/tempo-distributed/README.md new file mode 100644 index 00000000000..adb5fc77ab3 --- /dev/null +++ b/example/nomad/tempo-distributed/README.md @@ -0,0 +1,44 @@ +# Microservices mode + +This Nomad job will deploy Tempo in +[microservices mode](https://grafana.com/docs/tempo/latest/setup/deployment/#microservices-mode) using S3 backend. + +## Usage + +### Prerequisites +- S3 compatible storage +- [Nomad memory oversubscription](https://developer.hashicorp.com/nomad/tutorials/advanced-scheduling/memory-oversubscription). If memory oversubscription is not enabled, remove `memory_max` from tempo.hcl + +Have a look at the job file and Tempo configuration file and change it to suite your environment. (e.g. in `config.yml` change s3 endpoint to your s3 compatible storge, prometheus endpoint, etc...) + +Variables +-------------- + +| Name | Value | Description | +|---|---|---| +| version | Default = "2.3.1" | Tempo version | +| s3_access_key_id | Default = "any" | S3 Access Key ID | +| s3_secret_access_key | Default = "any" | S3 Secret Access Key | + +### Run job + +Inside directory with job run: + +```shell +nomad job run tempo.hcl +``` + +To deploy a different version change `variable.version` default value or +specify from command line: + +```shell +nomad job run -var="version=2.6.1" tempo.hcl +``` + +### Scale Tempo + +Nomad CLI + +```shell +nomad job scale tempo distributor +``` diff --git a/example/nomad/tempo-distributed/config.yml b/example/nomad/tempo-distributed/config.yml new file mode 100644 index 00000000000..5f2c643134e --- /dev/null +++ b/example/nomad/tempo-distributed/config.yml @@ -0,0 +1,81 @@ +server: + log_level: info + http_listen_port: {{ env "NOMAD_PORT_http" }} + grpc_listen_port: {{ env "NOMAD_PORT_grpc" }} + +distributor: + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + + receivers: # this configuration will listen on all ports and protocols that tempo is capable of. + otlp: + protocols: + http: + grpc: + +ingester: + max_block_duration: 5m + lifecycler: + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + replication_factor: 3 + +compactor: + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + + compaction: + block_retention: 336h #Duration to keep blocks. Default is 14 days (336h). + +querier: + frontend_worker: + frontend_address: tempo-query-frontend-grpc.service.consul:9095 + +metrics_generator: + processor: + service_graphs: + max_items: 10000 + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + storage: + path: {{ env "NOMAD_ALLOC_DIR" }}/tempo/wal + remote_write: + - url: http://prometheus.service.consul/api/v1/write + send_exemplars: true + +storage: + trace: + backend: s3 + wal: + path: {{ env "NOMAD_ALLOC_DIR" }}/tempo/wal + local: + path: {{ env "NOMAD_ALLOC_DIR" }}/tempo/blocks + s3: + bucket: tempo # how to store data in s3 + endpoint: seaweedfs-s3.service.consul + insecure: true + access_key: ${S3_ACCESS_KEY_ID} + secret_key: ${S3_SECRET_ACCESS_KEY} + +overrides: + defaults: + metrics_generator: + processors: + - service-graphs + - span-metrics diff --git a/example/nomad/tempo-distributed/tempo.hcl b/example/nomad/tempo-distributed/tempo.hcl new file mode 100644 index 00000000000..13ca420758b --- /dev/null +++ b/example/nomad/tempo-distributed/tempo.hcl @@ -0,0 +1,493 @@ +variable "version" { + type = string + description = "Tempo version" + default = "2.3.1" +} + +variable "s3_access_key_id" { + type = string + description = "S3 Access Key ID" + default = "any" +} + +variable "s3_secret_access_key" { + type = string + description = "S3 Secret Access Key" + default = "any" +} + +job "tempo" { + datacenters = ["*"] + + group "metrics-generator" { + count = 1 + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-metrics-generator" + port = "http" + tags = [] + check { + name = "metrics-generator" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-metrics-generator-grpc" + port = "grpc" + tags = [] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "metrics-generator" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=metrics-generator", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + + group "query-frontend" { + count = 1 + + network { + port "http" {} + port "grpc" { static = 9095} + } + + service { + name = "tempo-query-frontend" + port = "http" + tags = [] + } + + service { + name = "tempo-query-frontend-grpc" + port = "grpc" + tags = [] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "query-frontend" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=query-frontend", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + + group "ingester" { + count = 3 + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-ingester" + port = "http" + tags = [] + check { + name = "Tempo ingester" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-ingester-grpc" + port = "grpc" + tags = [] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "ingester" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=ingester", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + network_mode = "host" + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 300 + memory = 128 + memory_max = 2048 + } + } + } + + group "compactor" { + count = 1 + + ephemeral_disk { + size = 1000 + sticky = true + } + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-compactor" + port = "http" + tags = [] + check { + name = "Tempo compactor" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-compactor-grpc" + port = "grpc" + tags = [] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "compactor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=compactor", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 3000 + memory = 256 + memory_max = 1024 + } + } + } + group "distributor" { + count = 1 + + network { + port "http" {} + port "grpc" {} + port "otpl" { to = 4317 } + } + + service { + name = "tempo-distributor" + port = "http" + tags = [] + check { + name = "Tempo distributor" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-distributor-otpl" + port = "otpl" + tags = [] + } + + service { + name = "tempo-distributor-grpc" + port = "grpc" + tags = [] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "distributor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + "otpl", + ] + + args = [ + "-target=distributor", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + group "querier" { + count = 1 + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-querier" + port = "http" + tags = [] + check { + name = "Tempo querier" + port = "http" + type = "http" + path = "/ready" + interval = "50s" + timeout = "1s" + } + } + + service { + name = "tempo-querier-grpc" + port = "grpc" + tags = [] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "querier" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=querier", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 2048 + } + } + } +}