example.yaml

---
# == COLLECTION ==

# The addresses on which to listen for statsd metrics. These are
# formatted as URLs, with schemes corresponding to valid "network"
# arguments on https://golang.org/pkg/net/#Listen. Currently, only udp,
# tcp(including IPv4 and 6-only) and unixgram(datagram only) schemes are
# supported. This option supersedes the "udp_address" and "tcp_address" options.
statsd_listen_addresses:
 - udp://localhost:8126
 - tcp://localhost:8126
 - unixgram:///tmp/veneur-statsd.sock

# The addresses on which to listen for SSF data. As with
# statsd_listen_addresses, these are formatted as URLs, with schemes
# corresponding to valid "network" arguments on
# https://golang.org/pkg/net/#Listen. Currently, only UDP and Unix
# domain sockets are supported.
# Note: SSF sockets are required to ingest trace data.
# This option supersedes the "ssf_address" option.
ssf_listen_addresses:
  - udp://localhost:8128
  - unix:///tmp/veneur-ssf.sock

# TLS
# These are only useful in conjunction with TCP listening sockets

# TLS server private key and certificate for encryption (specify both)
# These are the key/certificate contents, not a file path
tls_key: ""
tls_certificate: ""

# Authority certificate: requires clients to be authenticated
tls_authority_certificate: ""

# == BEHAVIOR ==

# Use a static host for forwarding
#forward_address: "http://veneur.example.com"
# Do not add a prefix when setting the forward address for gRPC.
#forward_address: "veneur.example.com"
forward_address: ""

# Whether or not to forward to an upstream Veneur over gRPC.  If this is false
# or unset, HTTP will be used.
forward_use_grpc: false

# How often to flush. When flushing to Datadog, changing this
# value when you've already emitted metrics will break your time
# series data.
interval: "10s"

# How many flushes veneur may miss before it considers itself buggy
# and terminates. Leaving this at the default of 0 disables the
# watchdog.
flush_watchdog_missed_flushes: 0

# Veneur can "sychronize" it's flushes with the system clock, flushing at even
# intervals i.e. 0, 10, 20… to align with the `interval`. This is disabled by
# default for now, as it can cause thundering herds in large installations.
synchronize_with_interval: false

# Veneur emits its own metrics; this configures where we send them. It's ok
# to point veneur at itself for metrics consumption!
# This can be host:port combination or a Unix Domain Socket(eg: unix:///tmp/veneur-statsd.sock)
stats_address: "localhost:8126"

# The address on which to listen for HTTP imports and/or healthchecks.
# http_address: "einhorn@0"
http_address: "0.0.0.0:8127"

# The address on which to listen for imports over gRPC.
grpc_address: "0.0.0.0:8128"

# The name of timer metrics that "indicator" spans should be tracked
# under. If this is unset, veneur doesn't report an additional timer
# metric for indicator spans.
indicator_span_timer_name: "indicator_span.duration_ns"

# The name of timer metrics that objectives, derived from indicator
# spans, should be tracked under. If this is unset, veneur doesn't
# report an additional timer metric for indicator spans.
objective_span_timer_name: "objective_span.duration_ns"

# If enabled, issuing an unathenticated HTTP POST request to /quitquitquit
# will gracefully shut down the server.
# This is intended to be used in environments where network access is already
# restricted, such as inside containerized deployments.
http_quit: false

# == METRICS CONFIGURATION ==

# Defaults to the os.Hostname()!
hostname: ""

# If true and hostname is "" or absent, don't add the host tag
omit_empty_hostname: false

# Tags supplied here will be added to all metrics and spans ingested by this
# instance. Example:
# tags:
#  - "foo:bar"
#  - "baz:quz"
tags:
  - ""

# Tags listed here will be excluded from sinks. A pipe ("|") delimiter
# can be used to specify the name of a sink, in which case the tag will
# only be excluded from that one sink.
# Sinks must support this behavior by providing a SetExcludedTags method,
# or the exclusion rule will not be applied.
tags_exclude:
  - "nonce"
  - "host_env|signalfx"

# Set to floating point values that you'd like to output percentiles for from
# histograms.
percentiles:
  - 0.5
  - 0.75
  - 0.99

# Aggregations you'd like to output for histograms. Possible values can be any
# or all of:
# - `min`: the minimum value in the histogram during the flush period
# - `max`: the maximum value in the histogram during the flush period
# - `median`: the median value in the histogram during the flush period
# - `avg`: the average value in the histogram during the flush period
# - `count`: the number of values added to the histogram during the flush period
# - `sum`: the sum of all values added to the histogram during the flush period
# - `hmean`: the harmonic mean of the all the values added to the histogram during the flush period
aggregates:
 - "min"
 - "max"
 - "count"

# Metrics that Veneur reports about its own operation. Each of the
# entries here can have the value "global", "local", "default" and ""
# ("default" and "" mean the same thing). Setting
# this to any value other than the default will make all metrics
# of that type have the following behavior:
#
#  - "default"/"": scope remains unchanged
#  - "global": scope for "default"-scoped metrics of that type will be
#    changed to global, so they get forwarded to a global veneur node.
#  - "local": scope for "default"-scoped metrics of that type will be
#    changed to local, so they get reported from the local veneur node
#    only.
#
# When this is unset in configuration, the default values for all
# metric types are "", indicating that veneur will use the default
# scope for each of the metrics it reports.
veneur_metrics_scopes:
  counter: local

  # changing the setting for "gauge" to "global" is not recommended,
  # as the global aggregation method for gauges is "last write wins".
  gauge: local

  histogram: global

  set: global

  status: local


# Tags supplied here will be attached to all metrics that veneur
# reports about its own operation.
veneur_metrics_additional_tags:
  - "veneur_internal_metric:true"

count_unique_timeseries: false

# == DEPRECATED ==

# This configuration has been replaced by datadog_flush_max_per_body.
flush_max_per_body: 0
# This configuration has been replaced by datadog_span_buffer_size.
ssf_buffer_size: 0
# This has been replaced by lightstep_access_token
trace_lightstep_access_token: ""
# This has been replaced by lightstep_collector_host
trace_lightstep_collector_host: ""
# This has been replaced by lightstep_reconnect_period
trace_lightstep_reconnect_period: ""
# This has been replaced by lightstep_maximum_spans
trace_lightstep_maximum_spans: 0
# This has been replaced by lightstep_num_clients
trace_lightstep_num_clients: 0

# == PERFORMANCE ==

# Adjusts the number of metrics workers across which Veneur will
# distribute aggregation.  More decreases contention but has
# diminishing returns. The default value is 1, no parallel ingestion
# of metrics.
num_workers: 96

# Adjusts the number of listening goroutines on any UDP listener
# (statsd and SSF). Numbers larger than 1 will enable the use of
# SO_REUSEPORT, so make sure this is supported on your platform!
num_readers: 1

# Adjusts the number of span workers across which Veneur will
# distribute span ingestion. The default value is 1, no parallel
# ingestion of spans.
num_span_workers: 10

# Adjusts the number of spans that can be accomodated before the span
# ingestion buffer blocks. This is good to tweak when you're seeing
# spiky span ingestion patterns and a lot of spans get dropped. This
# corresponds directly to a Go channel's capacity, for which the
# default is zero (unbuffered).
span_channel_capacity: 100

# == LIMITS ==

# How big of a buffer to allocate for incoming metrics. Metrics longer than this
# will be truncated!
metric_max_length: 4096

# How big of a buffer to allocate for incoming traces.
trace_max_length_bytes: 16384

# The size of the buffer we'll use to buffer socket reads. Tune this if you
# you think Veneur needs more room to keep up with all packets.
read_buffer_size_bytes: 2097152

# == DIAGNOSTICS ==

# Sets the log level to DEBUG
debug: false

# Log (at level DEBUG) information about every ingested span. Be
# careful with this setting in a real deployment - it is extremely
# verbose.
debug_ingested_spans: false

# Log (at level DEBUG) information about every batch of flushed
# metrics. Be careful with this setting in a real deployment - it is
# extremely verbose.
debug_flushed_metrics: false

# runtime.SetMutexProfileFraction
# The fraction of mutex contention events that are reported in the mutex profile.
# On average, 1/n events are reported, so higher numbers will sample fewer events.
# Default (0) disables mutex profiling altogether.
mutex_profile_fraction: 0

# runtime.SetBlockProfileRate.
# The fraction of goroutine blocking events that are reported in the blocking profile.
# On average, one blocking event will be sampled for every N nanoseconds spent blocked.
# Default (0) disables block profiling altogether.
block_profile_rate: 0

# Providing a Sentry DSN here will send internal exceptions to Sentry
sentry_dsn: ""

# Enables Go profiling
enable_profiling: false


# == SINKS ==

# == Datadog ==
# Datadog can be a sink for metrics, events, service checks and trace spans.

# Hostname to send Datadog data to.
datadog_api_hostname: https://app.datadoghq.com

# API key for acessing Datadog
datadog_api_key: "farts"

# How many metrics to include in the body of each POST to Datadog. Veneur
# will post multiple times in parallel if the limit is exceeded.
datadog_flush_max_per_body: 25000

# Hostname to send Datadog trace data to.
datadog_trace_api_address: ""

# A list of metric *prefixes* to drop. Note that this is not the whole string
# and not a regexp. Just a prefix. Any metrics that have this prefix as a name
# will be dropped before sending to Datadog.
datadog_metric_name_prefix_drops:
  - "an_ignorable_metric."

# Excluded tags *prefixes* from specific metric *prefixes*
# Any metrics that have tags *prefixes* will be dropped before sending to Datadog.
datadog_exclude_tags_prefix_by_prefix_metric:
  - metric_prefix: "metric_prefix"
    tags: 
      - an_ignorable_tag_prefix"

# The size of the ring buffer used for retaining spans during a flush interval.
datadog_span_buffer_size: 16384

# == SignalFx ==
# SignalFx can be a sink for metrics and events.

# The API token to use, either always, or if no
# signalfx_per_tag_api_keys match
signalfx_api_key: "abc123"

# Where to send metrics
signalfx_endpoint_base: "https://ingest.signalfx.com"

# Where to send API calls
# Substitute the correct SignalFX realm
signalfx_endpoint_api: "https://api.REALM.signalfx.com"

# The tag we'll add to each metric that contains the hostname we came from
signalfx_hostname_tag: "host"

# The tag that we'll (optionally) use to look up values in
# signalfx_per_tag_api_keys. If this is empty, the SignalFX sink uses
# only signalfx_api_key.
signalfx_vary_key_by: host_operator

# If signalfx_vary_key_by is set, and matches one of the keys in here,
# use the value as the signalfx api token. If this is empty, only
# signalfx_api_key is used to submit metrics, unless
# signalfx_dynamic_per_tag_api_tokens_enable is true.
signalfx_per_tag_api_keys:
  # metrics tagged with host_operator:cory will be submitted with api
  # key "farts_in_a_general_direction"
  - name: "cory"
    api_key: "farts_in_a_general_direction"
  # metrics tagged with host_operator:asf will be submitted with api
  # key "definitely_no_farts"
  - name: "asf"
    api_key: "definitely_no_farts"

# A list of metric *prefixes* to drop. Note that this is not the whole string
# and not a regexp. Just a prefix. Any metrics that have this prefix as a name
# (not a tag!) will be dropped before sending to SignalFx.
signalfx_metric_name_prefix_drops:
  - ""

# A list of tag `key:value` *prefixes* pairs to drop. Note that this is not the
# whole string and not a regexp. Just a prefix. You can use it in a few ways:
# * `foo:` will match any tag key, so you can ignore the value
# * `foo:bar` will match the key and value (well, unless there's `foo:barsnort`)
# * `foo` to matching things like `foobar:gorch` and `foofart:fighter`
signalfx_metric_tag_prefix_drops:
  - ""

# The maximum number of datapoints in a single HTTP request to
# signalfx. On flush time, if veneur would flush more than the number
# configured here, it breaks the flushes apart into batches of this
# configured max size and submits them in parallel HTTP requests. If
# set to zero (the default), veneur makes a single HTTP request per
# signalfx flush endpoint.
signalfx_flush_max_per_body: 0

# Enables dynamic fetching of per-tag API keys from SignalFx. Any
# API keys fetched from SignalFx will be added to the list specified
# in signalfx_per_tag_api_keys, and will overwrite any keys in that
# list on name collision. The name of the key specified in SignalFX will
# be used as the name. signalfx_api_key is the fallback key used to submit
# metrics if signalfx_vary_key_by doesn't match any of the dynamic API
# keys' names. signalfx_api_key is used to fetch the API keys from SignalFx,
# so it must be set for this to work properly.
signalfx_dynamic_per_tag_api_keys_enable: false

# The period of the polling performed on SignalFX to fetch dynamic
# tokens. This only matters if signalfx_dynamic_per_tag_api_keys_enable
# is true. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
# If missing (or set to zero), it will default to "10m"
signalfx_dynamic_per_tag_api_keys_refresh_period: "10m"

# == AWS X-Ray ==
# X-Ray can be a sink for trace spans.

# If present, X-Ray will be enabled as a tracing sink
xray_address: "localhost:2000"

# Sample rate in percent (as an integer)
# This should ideally be a floating point number, but at the time this was
# written, gojson interpreted whole-number floats in yaml as integers.
# The sink will hash the trace id of the span such that all Veneur instances
# will sample the same segments by using the trace id as input for a checksum.
xray_sample_percentage: 100

# All tags are sent as (unindexed) Metadata to X-ray. Up to 50 tags per trace
# (not per span) can be indexed as Annotations for search. Tag keys specified here
# will be provided as Annotations
xray_annotation_tags:
  - ""

# == LightStep ==
# LightStep can be a sink for trace spans.

# If present, lightstep will be enabled as a tracing sink
# and this access token will be used
# Access token for accessing LightStep
lightstep_access_token: ""

# Host to send trace data to
lightstep_collector_host: ""

# How often LightStep should reconnect to collectors. If your workload is
# imbalanced — some veneur instances see more spans than others — then you may
# want to reconnect more often.
lightstep_reconnect_period: "5m"

# The LightStep client has internal throttling to prevent you overwhelming
# things. Anything that exceeds this many spans in the reporting period
# — which is a minimum of 500ms and maxmium 2.5s at the time of this writing
# — will be dropped. In other words, you can only submit this many spans per
# flush! If left at zero, veneur will set the maximum to the size of
# `ssf_buffer_size`.
lightstep_maximum_spans: 0

# Multiple clients can be used to load-balance spans cross multiple collectors,
# improving span indexing success rates.
# If missing (or set to zero), it will default
# to a minimum of one client
lightstep_num_clients: 1

# == Kafka ==

# Comma-delimited list of brokers suitable for Sarama's [NewAsyncProducer](https://godoc.org/github.com/Shopify/sarama#NewAsyncProducer)
# in the form hostname:port, such as localhost:9092
kafka_broker: ""

# Name of the topic we'll be publishing checks to
kafka_check_topic: "veneur_checks"

# Name of the topic we'll be publishing events to
kafka_event_topic: "veneur_events"

# Name of the topic we'll be publishing metrics to
kafka_metric_topic: ""

# Name of the topic we'll be publishing spans to
kafka_span_topic: "veneur_spans"

# Name of a tag to hash on for sampling; if empty, spans are sampled based off
# of traceID
kafka_span_sample_tag: ""

# Sample rate (as a percent)
kafka_span_sample_rate_percent: 100.0

kafka_metric_buffer_bytes: 0

kafka_metric_buffer_messages: 0

kafka_metric_buffer_frequency: ""

kafka_span_serialization_format: "protobuf"

# The type of partitioner to use.
kafka_partitioner: "hash"

# What type of acks to require for metrics? One of none, local or all.
kafka_metric_require_acks: "all"

# What type of acks to require for span? One of none, local or all.
kafka_span_require_acks: "all"

kafka_span_buffer_bytes: 0

kafka_span_buffer_mesages: 0

kafka_span_buffer_frequency: ""

# The number of retries before giving up.
kafka_retry_max: 0

# == Falconer ==
#
# Falconer (https://github.com/stripe/falconer) is an ephemeral (in-memory)
# trace data sink. Veneur relays data to Falconer via gRPC.

falconer_address: "falconer.service.consul"

# == Splunk ==
#
# Veneur can feed spans to splunk through the HTTP Event Consumer
# (HEC) interface
# See also http://dev.splunk.com/view/event-collector/SP-CAAAE6M

# The URL to use for a connection to the splunk
splunk_hec_address: "https://localhost:8088"

# The authentication token veneur will use to authenticate to the HEC
splunk_hec_token: "00000000-0000-0000-0000-000000000000"

# (optional) The number of spans to submit in a single request to the
# Splunk HEC endpoint. If unset, defaults to 100 (the recommended
# maximum event count per batch according to Splunk).
splunk_hec_batch_size: 100

# (optional) The maximum number of parallel submissions to do to the
# splunk HEC endpoint. Must be greater than 0. If this setting is
# omitted, defaults to 1.
splunk_hec_submission_workers: 3

# (optional) server name set on the TLS configuration. This is useful
# if the host you're reaching identifies with a different name than on
# the URL.
splunk_hec_tls_validate_hostname: "some-other-hostname"

# (optional) The maximum amount of time to wait before timing out
# sending a batch of spans to the Splunk HEC. If omitted / set to 0,
# sending batches happens without a timeout.
splunk_hec_send_timeout: "10ms"

# (optional) The maximum amount of time to wait before timing out
# ingesting a single span to the Splunk HEC sink. If omitted / set to
# 0, ingestion will wait indefintely until the span can be ingested.
splunk_hec_ingest_timeout: "10ms"


# (optional) The fraction of traces that are chosen to be reported to
# Splunk.  On average, 1/N traces will be chosen to be reported to
# Splunk. Setting this value to 1 or 0 disables sampling, reporting
# all spans from all traces to Splunk.  Sampling is performed on the
# trace ID, so either all spans from a given trace will be reported,
# or none will.  Spans get excluded from sampling if they have
# indicator=true set, or if they have a trace ID of 0.
splunk_span_sample_rate: 10

# (optional) The maximum duration to keep an HEC submission HTTP
# request. After this duration, veneur will close & re-open the HTTP
# connection even if less than `splunk_hec_batch_size` have been
# ingested. This defaults to the flush `interval` setting (10s).
splunk_hec_max_connection_lifetime: "10s"

# (optional) The maximum (random) amount of jitter to add to
# splunk_hec_max_connection_lifetime. This can help reduce the number
# of submission workers that close & re-open their HTTP connections at
# the same time. If set to 0, there will be no jitter.
splunk_hec_connection_lifetime_jitter: "10s"

# == PLUGINS ==

# == S3 Output ==
# Include these if you want to archive data to S3
aws_access_key_id: ""
aws_secret_access_key: ""
aws_region: ""
aws_s3_bucket: ""

# == LocalFile Output ==
# Include this if you want to archive data to a local file (which should then be rotated/cleaned)
flush_file: ""