-
Notifications
You must be signed in to change notification settings - Fork 85
/
otel.yml
197 lines (180 loc) · 8.02 KB
/
otel.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# Define the protocols to receive data for.
# See https://opentelemetry.io/docs/collector/configuration/#receivers
receivers:
# Configure receiving OTLP data via gRPC on port 4317.
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
# Defines a Prometheus configuration set.
prometheus:
# Define a set of configurations for scraping by the OpenTelemetry Collector.
config:
# The `scrape_configs` section pertains to the Prometheus `scrape_configs`
# configuration block.
# See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
scrape_configs:
# Scrape Mimir metrics.
- job_name: 'mimir'
static_configs:
- targets: ['mimir:9009']
labels:
service: 'mimir'
group: 'infrastructure'
# Scrape Loki metrics.
- job_name: 'loki'
static_configs:
- targets: ['loki:3100']
labels:
service: 'loki'
group: 'infrastructure'
# Scrape Tempo metrics.
- job_name: 'tempo'
static_configs:
- targets: ['tempo:3200']
labels:
service: 'tempo'
group: 'infrastructure'
# Scrape Grafana metrics.
- job_name: 'grafana'
static_configs:
- targets: ['grafana:3000']
labels:
service: 'grafana'
group: 'infrastructure'
# Scrape from the Mythical Server and Requester service.
- job_name: 'mythical'
scrape_interval: 2s
static_configs:
- targets: ['mythical-server:4000']
labels:
service: 'mythical-server'
group: 'mythical'
- targets: ['mythical-requester:4001']
labels:
service: 'mythical-requester'
group: 'mythical'
# Scrape from the Beyla Mythical services.
- job_name: 'beyla-infra'
scrape_interval: 15s
static_configs:
- targets: ['beyla-requester:9090']
labels:
service: 'beyla-requester'
group: 'beyla'
- targets: ['beyla-server:9090']
labels:
service: 'beyla-server'
group: 'beyla'
- targets: ['beyla-recorder:9090']
labels:
service: 'beyla-recorder'
group: 'beyla'
# Define processors to process received data.
# See https://opentelemetry.io/docs/collector/configuration/#processors
processors:
# Use the in-built `batch` processor to batch up data before writing it for export.
# Use the default values for it.
batch:
# The tail sampler processor will only keep traces where spans match the defined policies.
tail_sampling:
decision_wait: 30s # The time to wait for a decision to be made.
# The following policies follow a logical OR pattern, meaning that if any of the policies match,
# the trace will be kept. For logical AND, you can use the `and` policy. Every span of a trace is
# examined by each policy in turn. A match will cause a short-circuit.
policies: [
# This policy defines that traces that include spans that contain errors should be kept.
{
name: sample-erroring-traces, # Name of the policy.
type: status_code, # The type must match the type of policy to be used.
status_code: { status_codes: [ERROR] } # Only sample traces which have a span containing an error.
},
# This policy defines that traces that are over 200ms should be sampled.
{
name: sample-long-traces, # Name of the policy.
type: latency, # The type must match the type of policy to be used.
latency: { threshold_ms: 200 }, # Only sample traces which are longer than 200ms in duration.
},
]
# Define processors to process received data.
# See https://opentelemetry.io/docs/collector/configuration/#connectors
connectors:
# The spanmetrics connector is used to output span metrics based on received trace spans.
spanmetrics:
namespace: traces.spanmetrics # Prefix all metrics with `traces.spanmetrics` (this becomes `traces_spanmetrics`).
# Determine the type of histogram to use for span metrics.
histogram:
explicit: # Explicit histograms have pre-defined bucket sizes (use default here).
# Defines additional label dimensions of the metrics from trace span attributes present.
dimensions:
- name: http.method
- name: http.target
- name: http.status_code
- name: service.version
# Ensure exemplars are enabled and sent to the metrics store.
exemplars:
enabled: true
# The servicegraph connector is used to output service node metrics based on received trace spans.
servicegraph:
# Defines which exporter the processor will write metrics to.
metrics_exporter: prometheusremotewrite
# Defines additional label dimensions of the metrics from trace span attributes present.
store: # Configuration for the in-memory store.
ttl: 2s # Time to wait for an edge to be completed.
max_items: 200 # Number of edges that will be stored in the storeMap.
cache_loop: 2m # The timeout used to clean the cache periodically.
store_expiration_loop: 10s # The timeout used to expire old entries from the store periodically.
# Virtual node peer attributes allow server nodes to be generated where instrumentation isn't present (eg. where
# service client calls remotely to a service that does not include instrumentation).
# Service nodes/edges will be generated for any attribute defined.
virtual_node_peer_attributes:
- db.name
# Define exporters to data stores.
# See https://opentelemetry.io/docs/collector/configuration/#exporters
# Also see https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor#recommended-processors
exporters:
# Exporter for sending trace data to Tempo.
otlp/grafana:
# Send to the locally running Tempo service.
endpoint: tempo:4317
# TLS is not enabled for the instance.
tls:
insecure: true
# Exporter for sending Prometheus data to Mimir.
prometheusremotewrite:
# Send to the locally running Mimir service.
endpoint: http://mimir:9009/api/v1/push
# TLS is not enabled for the instance.
tls:
insecure: true
# Define the full service graph for the OpenTelemetry collector.
service:
# A pipeline can exist for each of the signals received.
pipelines:
# Define the trace pipeline.
traces:
# Receive from the `otlp` receiver.
receivers: [otlp]
# Use the `batch` processor to process received trace spans.
processors: [batch]
# Comment out other `processor` definitions and uncomment the line below to use tail sampling.
#processors: [tail_sampling, batch]
# Comment out other `processor` definitions and uncomment the line below to generate service graph metrics
# from within the OpenTelemetry Collector.
#processors: [servicegraph, batch]
# Export to the `otlp/grafana` exporter.
exporters: [otlp/grafana]
# Comment out other `exporters` definitions and uncomment the line below to generate span metrics
# from within the OpenTelemetry Collector as well as exporting traces to Tempo.
#exporters: [otlp/grafana, spanmetrics]
# Define the metrics pipeline.
metrics:
# Receive metrics from the `prometheus` receiver.
receivers: [prometheus]
# Comment out other `receivers` definitions and uncomment the line below to import spanmetrics as well
# as prometheus metrics.
#receivers: [prometheus, spanmetrics]
# Use the `batch` processor to process received metrics.
processors: [batch]
# Export to the `prometheusremtotewrite` exporter.
exporters: [prometheusremotewrite]