From b678e91684269945ec8ae4ae2ce3d07c094b3940 Mon Sep 17 00:00:00 2001 From: bcirh <72753984+bcirh@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:21:39 +0100 Subject: [PATCH 1/5] Add nomad job microservices mode example --- example/nomad/tempo-distributed/README.md | 31 ++ example/nomad/tempo-distributed/config.yml | 81 +++ example/nomad/tempo-distributed/tempo.hcl | 546 +++++++++++++++++++++ 3 files changed, 658 insertions(+) create mode 100644 example/nomad/tempo-distributed/README.md create mode 100644 example/nomad/tempo-distributed/config.yml create mode 100644 example/nomad/tempo-distributed/tempo.hcl diff --git a/example/nomad/tempo-distributed/README.md b/example/nomad/tempo-distributed/README.md new file mode 100644 index 00000000000..17d041ed6c2 --- /dev/null +++ b/example/nomad/tempo-distributed/README.md @@ -0,0 +1,31 @@ +# Microservices mode + +This Nomad job will deploy Tempo in +[microservices mode](https://grafana.com/docs/tempo/latest/setup/deployment/#microservices-mode) using S3 backend. + +## Usage + +Have a look at the job file and Tempo configuration file and change it to suite your environment. + +### Run job + +Inside directory with job run: + +```shell +nomad job run tempo.hcl +``` + +To deploy a different version change `variable.version` default value or +specify from command line: + +```shell +nomad job run -var="version=2.6.1" tempo.hcl +``` + +### Scale Tempo + +Nomad CLI + +```shell +nomad job scale tempo distributor +``` diff --git a/example/nomad/tempo-distributed/config.yml b/example/nomad/tempo-distributed/config.yml new file mode 100644 index 00000000000..5f2c643134e --- /dev/null +++ b/example/nomad/tempo-distributed/config.yml @@ -0,0 +1,81 @@ +server: + log_level: info + http_listen_port: {{ env "NOMAD_PORT_http" }} + grpc_listen_port: {{ env "NOMAD_PORT_grpc" }} + +distributor: + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + + receivers: # this configuration will listen on all ports and protocols that tempo is capable of. + otlp: + protocols: + http: + grpc: + +ingester: + max_block_duration: 5m + lifecycler: + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + replication_factor: 3 + +compactor: + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + + compaction: + block_retention: 336h #Duration to keep blocks. Default is 14 days (336h). + +querier: + frontend_worker: + frontend_address: tempo-query-frontend-grpc.service.consul:9095 + +metrics_generator: + processor: + service_graphs: + max_items: 10000 + ring: + kvstore: + store: consul + prefix: tempo/ + consul: + host: {{ env "attr.unique.network.ip-address" }}:8500 + storage: + path: {{ env "NOMAD_ALLOC_DIR" }}/tempo/wal + remote_write: + - url: http://prometheus.service.consul/api/v1/write + send_exemplars: true + +storage: + trace: + backend: s3 + wal: + path: {{ env "NOMAD_ALLOC_DIR" }}/tempo/wal + local: + path: {{ env "NOMAD_ALLOC_DIR" }}/tempo/blocks + s3: + bucket: tempo # how to store data in s3 + endpoint: seaweedfs-s3.service.consul + insecure: true + access_key: ${S3_ACCESS_KEY_ID} + secret_key: ${S3_SECRET_ACCESS_KEY} + +overrides: + defaults: + metrics_generator: + processors: + - service-graphs + - span-metrics diff --git a/example/nomad/tempo-distributed/tempo.hcl b/example/nomad/tempo-distributed/tempo.hcl new file mode 100644 index 00000000000..b7397a6dc92 --- /dev/null +++ b/example/nomad/tempo-distributed/tempo.hcl @@ -0,0 +1,546 @@ +variable "version" { + type = string + description = "Tempo version" + default = "2.3.1" +} + +variable "s3_access_key_id" { + type = string + description = "S3 Access Key ID" + default = "any" +} + +variable "s3_secret_access_key" { + type = string + description = "S3 Secret Access Key ID" + default = "any" +} + +job "tempo" { + datacenters = ["*"] + + group "metrics-generator" { + count = 1 + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-metrics-generator" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.tempo-metrics-generator.rule=Host(`tempo-metrics-generator.service.consul`)" + ] + check { + name = "metrics-generator" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-metrics-generator-grpc" + port = "grpc" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-metrics-generator-grpc.rule=Host(`tempo-metrics-generator-grpc.service.consul`)", + "traefik.http.services.tempo-metrics-generator-grpc.loadbalancer.server.scheme=h2c" + ] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "metrics-generator" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=metrics-generator", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + + group "query-frontend" { + count = 1 + + network { + port "http" {} + port "grpc" { static = 9095} + } + + service { + name = "tempo-query-frontend" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.tempo-query-frontend.rule=Host(`tempo-query-frontend.service.consul`,`tempo-query-frontend.{{ data_center_name }}.test`)" + ] + } + + service { + name = "tempo-query-frontend-grpc" + port = "grpc" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-query-frontend-grpc.rule=Host(`tempo-query-frontend-grpc.service.consul`)", + "traefik.http.services.tempo-query-frontend-grpc.loadbalancer.server.scheme=h2c" + ] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "query-frontend" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=query-frontend", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + + group "ingester" { + count = 3 + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-ingester" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.tempo-ingester.rule=Host(`tempo-ingester.service.consul`)" + ] + check { + name = "Tempo ingester" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-ingester-grpc" + port = "grpc" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-ingester-grpc.rule=Host(`tempo-ingester-grpc.service.consul`)", + "traefik.http.services.tempo-ingester-grpc.loadbalancer.server.scheme=h2c" + ] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "ingester" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=ingester", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + network_mode = "host" + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 300 + memory = 128 + memory_max = 2048 + } + } + } + + group "compactor" { + count = 1 + + ephemeral_disk { + size = 1000 + sticky = true + } + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-compactor" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.tempo-compactor.rule=Host(`tempo-compactor.service.consul`)" + ] + check { + name = "Tempo compactor" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-compactor-grpc" + port = "grpc" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-compactor-grpc.rule=Host(`tempo-compactor-grpc.service.consul`)", + "traefik.http.services.tempo-compactor-grpc.loadbalancer.server.scheme=h2c" + ] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "compactor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=compactor", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 3000 + memory = 256 + memory_max = 1024 + } + } + } + group "distributor" { + count = 1 + + network { + port "http" {} + port "grpc" {} + port "otpl" { to = 4317 } + } + + service { + name = "tempo-distributor" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.tempo-distributor.rule=Host(`tempo-distributor.service.consul`)" + ] + check { + name = "Tempo distributor" + port = "http" + type = "http" + path = "/ready" + interval = "20s" + timeout = "1s" + } + } + + service { + name = "tempo-distributor-otpl" + port = "otpl" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-distributor-otpl.rule=Host(`tempo-distributor-otpl.service.consul`)", + "traefik.http.services.tempo-distributor-otpl.loadbalancer.server.scheme=h2c" + ] + } + + service { + name = "tempo-distributor-grpc" + port = "grpc" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-distributor-grpc.rule=Host(`tempo-distributor-grpc.service.consul`)", + "traefik.http.services.tempo-distributor-grpc.loadbalancer.server.scheme=h2c" + ] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "distributor" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + "otpl", + ] + + args = [ + "-target=distributor", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 1024 + } + } + } + group "querier" { + count = 1 + + network { + port "http" {} + port "grpc" {} + } + + service { + name = "tempo-querier" + port = "http" + tags = [ + "traefik.enable=true", + "traefik.http.routers.tempo-querier.rule=Host(`tempo-querier.service.consul`)" + ] + check { + name = "Tempo querier" + port = "http" + type = "http" + path = "/ready" + interval = "50s" + timeout = "1s" + } + } + + service { + name = "tempo-querier-grpc" + port = "grpc" + tags = [ + "grpc", + "traefik.enable=true", + "traefik.http.routers.tempo-querier-grpc.rule=Host(`tempo-querier-grpc.service.consul`)", + "traefik.http.services.tempo-querier-grpc.loadbalancer.server.scheme=h2c" + ] + check { + port = "grpc" + type = "grpc" + interval = "20s" + timeout = "1s" + grpc_use_tls = false + tls_skip_verify = true + } + } + + task "querier" { + driver = "docker" + user = "nobody" + kill_timeout = "90s" + + config { + image = "grafana/tempo:${var.version}" + ports = [ + "http", + "grpc", + ] + + args = [ + "-target=querier", + "-config.file=/local/config.yml", + "-config.expand-env=true", + ] + } + + template { + data = file("config.yml") + destination = "local/config.yml" + } + + template { + data = <<-EOH + S3_ACCESS_KEY_ID=${var.s3_access_key_id} + S3_SECRET_ACCESS_KEY=${var.s3_secret_access_key} + EOH + + destination = "secrets/s3.env" + env = true + } + + resources { + cpu = 200 + memory = 128 + memory_max = 2048 + } + } + } +} From 8ae5c8c2d9a09990a1682628f4c16adfa1e80085 Mon Sep 17 00:00:00 2001 From: bcirh <72753984+bcirh@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:33:35 +0100 Subject: [PATCH 2/5] Remove unused tag from query-frontend --- example/nomad/tempo-distributed/tempo.hcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/nomad/tempo-distributed/tempo.hcl b/example/nomad/tempo-distributed/tempo.hcl index b7397a6dc92..2e461ceb81b 100644 --- a/example/nomad/tempo-distributed/tempo.hcl +++ b/example/nomad/tempo-distributed/tempo.hcl @@ -118,7 +118,7 @@ job "tempo" { port = "http" tags = [ "traefik.enable=true", - "traefik.http.routers.tempo-query-frontend.rule=Host(`tempo-query-frontend.service.consul`,`tempo-query-frontend.{{ data_center_name }}.test`)" + "traefik.http.routers.tempo-query-frontend.rule=Host(`tempo-query-frontend.service.consul`)" ] } From 84b5db513ccb62d8591200cd8534def8afdf8fde Mon Sep 17 00:00:00 2001 From: bcirh <72753984+bcirh@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:33:55 +0100 Subject: [PATCH 3/5] Update nomad example README.md --- example/nomad/tempo-distributed/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/example/nomad/tempo-distributed/README.md b/example/nomad/tempo-distributed/README.md index 17d041ed6c2..b73bbd1e2a8 100644 --- a/example/nomad/tempo-distributed/README.md +++ b/example/nomad/tempo-distributed/README.md @@ -5,6 +5,9 @@ This Nomad job will deploy Tempo in ## Usage +### Prerequisites +- S3 compatible storage + Have a look at the job file and Tempo configuration file and change it to suite your environment. ### Run job From e2d58967a3f3d0dc1fdb6000a46de678a8a048ba Mon Sep 17 00:00:00 2001 From: bcirh <72753984+bcirh@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:36:40 +0100 Subject: [PATCH 4/5] Add link to nomad mem oversub in README.md file --- example/nomad/tempo-distributed/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/example/nomad/tempo-distributed/README.md b/example/nomad/tempo-distributed/README.md index b73bbd1e2a8..feaa1f7ad4d 100644 --- a/example/nomad/tempo-distributed/README.md +++ b/example/nomad/tempo-distributed/README.md @@ -7,6 +7,7 @@ This Nomad job will deploy Tempo in ### Prerequisites - S3 compatible storage +- [Nomad memory oversubscription](https://developer.hashicorp.com/nomad/tutorials/advanced-scheduling/memory-oversubscription) Have a look at the job file and Tempo configuration file and change it to suite your environment. From 536c2447340a5d5fa601bca46a2da4b4f298eb67 Mon Sep 17 00:00:00 2001 From: bcirh <72753984+bcirh@users.noreply.github.com> Date: Fri, 20 Dec 2024 07:03:13 +0100 Subject: [PATCH 5/5] Remove tags from tempo.hcl. Update README.md --- example/nomad/tempo-distributed/README.md | 13 +++- example/nomad/tempo-distributed/tempo.hcl | 81 ++++------------------- 2 files changed, 25 insertions(+), 69 deletions(-) diff --git a/example/nomad/tempo-distributed/README.md b/example/nomad/tempo-distributed/README.md index feaa1f7ad4d..adb5fc77ab3 100644 --- a/example/nomad/tempo-distributed/README.md +++ b/example/nomad/tempo-distributed/README.md @@ -7,9 +7,18 @@ This Nomad job will deploy Tempo in ### Prerequisites - S3 compatible storage -- [Nomad memory oversubscription](https://developer.hashicorp.com/nomad/tutorials/advanced-scheduling/memory-oversubscription) +- [Nomad memory oversubscription](https://developer.hashicorp.com/nomad/tutorials/advanced-scheduling/memory-oversubscription). If memory oversubscription is not enabled, remove `memory_max` from tempo.hcl -Have a look at the job file and Tempo configuration file and change it to suite your environment. +Have a look at the job file and Tempo configuration file and change it to suite your environment. (e.g. in `config.yml` change s3 endpoint to your s3 compatible storge, prometheus endpoint, etc...) + +Variables +-------------- + +| Name | Value | Description | +|---|---|---| +| version | Default = "2.3.1" | Tempo version | +| s3_access_key_id | Default = "any" | S3 Access Key ID | +| s3_secret_access_key | Default = "any" | S3 Secret Access Key | ### Run job diff --git a/example/nomad/tempo-distributed/tempo.hcl b/example/nomad/tempo-distributed/tempo.hcl index 2e461ceb81b..13ca420758b 100644 --- a/example/nomad/tempo-distributed/tempo.hcl +++ b/example/nomad/tempo-distributed/tempo.hcl @@ -12,7 +12,7 @@ variable "s3_access_key_id" { variable "s3_secret_access_key" { type = string - description = "S3 Secret Access Key ID" + description = "S3 Secret Access Key" default = "any" } @@ -30,10 +30,7 @@ job "tempo" { service { name = "tempo-metrics-generator" port = "http" - tags = [ - "traefik.enable=true", - "traefik.http.routers.tempo-metrics-generator.rule=Host(`tempo-metrics-generator.service.consul`)" - ] + tags = [] check { name = "metrics-generator" port = "http" @@ -47,12 +44,7 @@ job "tempo" { service { name = "tempo-metrics-generator-grpc" port = "grpc" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-metrics-generator-grpc.rule=Host(`tempo-metrics-generator-grpc.service.consul`)", - "traefik.http.services.tempo-metrics-generator-grpc.loadbalancer.server.scheme=h2c" - ] + tags = [] check { port = "grpc" type = "grpc" @@ -116,21 +108,13 @@ job "tempo" { service { name = "tempo-query-frontend" port = "http" - tags = [ - "traefik.enable=true", - "traefik.http.routers.tempo-query-frontend.rule=Host(`tempo-query-frontend.service.consul`)" - ] + tags = [] } service { name = "tempo-query-frontend-grpc" port = "grpc" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-query-frontend-grpc.rule=Host(`tempo-query-frontend-grpc.service.consul`)", - "traefik.http.services.tempo-query-frontend-grpc.loadbalancer.server.scheme=h2c" - ] + tags = [] check { port = "grpc" type = "grpc" @@ -194,10 +178,7 @@ job "tempo" { service { name = "tempo-ingester" port = "http" - tags = [ - "traefik.enable=true", - "traefik.http.routers.tempo-ingester.rule=Host(`tempo-ingester.service.consul`)" - ] + tags = [] check { name = "Tempo ingester" port = "http" @@ -211,12 +192,7 @@ job "tempo" { service { name = "tempo-ingester-grpc" port = "grpc" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-ingester-grpc.rule=Host(`tempo-ingester-grpc.service.consul`)", - "traefik.http.services.tempo-ingester-grpc.loadbalancer.server.scheme=h2c" - ] + tags = [] check { port = "grpc" type = "grpc" @@ -286,10 +262,7 @@ job "tempo" { service { name = "tempo-compactor" port = "http" - tags = [ - "traefik.enable=true", - "traefik.http.routers.tempo-compactor.rule=Host(`tempo-compactor.service.consul`)" - ] + tags = [] check { name = "Tempo compactor" port = "http" @@ -303,12 +276,7 @@ job "tempo" { service { name = "tempo-compactor-grpc" port = "grpc" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-compactor-grpc.rule=Host(`tempo-compactor-grpc.service.consul`)", - "traefik.http.services.tempo-compactor-grpc.loadbalancer.server.scheme=h2c" - ] + tags = [] check { port = "grpc" type = "grpc" @@ -372,10 +340,7 @@ job "tempo" { service { name = "tempo-distributor" port = "http" - tags = [ - "traefik.enable=true", - "traefik.http.routers.tempo-distributor.rule=Host(`tempo-distributor.service.consul`)" - ] + tags = [] check { name = "Tempo distributor" port = "http" @@ -389,23 +354,13 @@ job "tempo" { service { name = "tempo-distributor-otpl" port = "otpl" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-distributor-otpl.rule=Host(`tempo-distributor-otpl.service.consul`)", - "traefik.http.services.tempo-distributor-otpl.loadbalancer.server.scheme=h2c" - ] + tags = [] } service { name = "tempo-distributor-grpc" port = "grpc" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-distributor-grpc.rule=Host(`tempo-distributor-grpc.service.consul`)", - "traefik.http.services.tempo-distributor-grpc.loadbalancer.server.scheme=h2c" - ] + tags = [] check { port = "grpc" type = "grpc" @@ -469,10 +424,7 @@ job "tempo" { service { name = "tempo-querier" port = "http" - tags = [ - "traefik.enable=true", - "traefik.http.routers.tempo-querier.rule=Host(`tempo-querier.service.consul`)" - ] + tags = [] check { name = "Tempo querier" port = "http" @@ -486,12 +438,7 @@ job "tempo" { service { name = "tempo-querier-grpc" port = "grpc" - tags = [ - "grpc", - "traefik.enable=true", - "traefik.http.routers.tempo-querier-grpc.rule=Host(`tempo-querier-grpc.service.consul`)", - "traefik.http.services.tempo-querier-grpc.loadbalancer.server.scheme=h2c" - ] + tags = [] check { port = "grpc" type = "grpc"