spandex-project · reachfh · Aug 9, 2022 · Aug 9, 2022 · Aug 9, 2022 · Aug 9, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1,7 +1,7 @@
-# Elixir CircleCI 2.0 configuration file
-#
+---
+# CircleCI 2.0 configuration file
 # Check https://circleci.com/docs/2.0/language-elixir/ for more details
-version: 2
+version: 2.1
 jobs:
   build:
     docker:
@@ -13,7 +13,7 @@ jobs:
       - checkout
       - run: mix local.hex --force
       - run: mix local.rebar --force
-      - run: mix format --check-formatted
       - run: mix deps.get
+      - run: mix format --check-formatted
       - run: mix compile --warnings-as-errors
       - run: mix test
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,61 @@
+---
+name: CI
+on: push
+
+jobs:
+  elixir:
+    name: Elixir Tests
+    runs-on: ubuntu-20.04
+    env:
+      MIX_ENV: test
+    strategy:
+      matrix:
+        elixir: ['1.11.4', '1.13.4']
+        otp: ['24.3']
+    steps:
+      - name: Cancel previous runs
+        uses: styfle/[email protected]
+        with:
+          access_token: ${{ github.token }}
+
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Setup Elixir
+        uses: erlef/setup-beam@v1
+        with:
+          otp-version: ${{ matrix.otp }}
+          elixir-version: ${{ matrix.elixir }}
+
+      - name: Get deps cache
+        uses: actions/cache@v2
+        with:
+          path: deps/
+          key: deps-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
-          key: deps-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
+          key: deps-${{ hashFiles('mix.lock') }}
-          key: deps-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
+          key: deps-${{ hashFiles('mix.lock') }}
+
+      - name: Get build cache
+        uses: actions/cache@v2
+        with:
+          path: _build/test/
+          key: build-${{ runner.os }}-${{ matrix.otp }}-${{ matrix.elixir }}-${{ hashFiles('**/mix.lock') }}
+
+      - name: Install deps
+        run: |
+          mix local.rebar --force
+          mix local.hex --force
+          mix deps.get
+
+      - name: Compile code
+        run: |
+          mix compile --warnings-as-errors
+
+      - name: Run tests
+        run: |
+          mix test
+          mix format --check-formatted
+
+      # - name: Publish unit test results to GitHub
+      #   uses: EnricoMi/publish-unit-test-result-action@v2
+      #   if: always() # always run even if tests fail
+      #   with:
+      #     junit_files: _build/test/lib/*/test-junit-report.xml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,20 @@ See [Conventional Commits](Https://conventionalcommits.org) for commit guideline
 
 <!-- changelog -->
 
+### Breaking Changes:
+* Change default `priority` to `nil` (not set) instead of 1
+* Do not send metric for `priority` unless set
+* Do not set `x-datadog-sampling-priority` header unless priority is set
+* Use `sample_rate` tag to set `_dd1.sr.eausr` value
+* Use `rule_sample_rate` tag to set `_dd.rule_psr` without default (previously hard coded to 1.0)
+* Use `rate_limiter_rate` tag to set `_dd.limit_psr`, without default (previously hard coded to 1.0)
+* Update library deps
+
+### Features:
+* Handle `x-datadog-origin` header
+* Use `agent_sample_rate` tag to set `_dd.agent_psr`, without default (was 1.0)
+* Add sampler which samples a proportion of traces, setting `priority`
+
 ## [1.2.0](https://github.com/spandex-project/spandex_datadog/compare/1.1.0...1.2.0) (2021-10-23)
 
 ### Features:

diff --git a/README.md b/README.md
@@ -6,12 +6,11 @@
 [![License](https://img.shields.io/hexpm/l/spandex_datadog.svg)](https://github.com/spandex-project/spandex_datadog/blob/master/LICENSE)
 [![Last Updated](https://img.shields.io/github/last-commit/spandex-project/spandex_datadog.svg)](https://github.com/spandex-project/spandex_datadog/commits/master)
 
-A datadog adapter for the `:spandex` library.
+This is the Datadog adapter for the [Spandex](https://github.com/spandex-project/spandex) tracing library.
 
 ## Installation
 
-The package can be installed by adding `:spandex_datadog` to your list of
-dependencies in `mix.exs`:
+Add `:spandex_datadog` to the list of dependencies in `mix.exs`:
 
 ```elixir
 def deps do
@@ -21,14 +20,13 @@ def deps do
 end
 ```
 
-To start the datadog adapter, add a worker to your application's supervisor
+To start the Datadog adapter, add a worker to your application's supervisor.
 
 ```elixir
 # Example configuration
 
-# Note: You should put ApiServer before any other children in the list that
-# might try to send traces before the ApiServer has started up, for example
-# Ecto.Repo and Phoenix.Endpoint
+# Note: Put ApiServer before other children that might try to send traces
+# before ApiServer has started up, e.g. Ecto.Repo and Phoenix.Endpoint
 
 spandex_opts =
   [
@@ -53,15 +51,73 @@ Supvervisor.start_link(children, opts)
 
 ## Distributed Tracing
 
-Distributed tracing is supported via headers `x-datadog-trace-id`,
-`x-datadog-parent-id`, and `x-datadog-sampling-priority`. If they are set, the
-`Spandex.Plug.StartTrace` plug will act accordingly, continuing that trace and
-span instead of starting a new one.  *Both* `x-datadog-trace-id` and
-`x-datadog-parent-id` must be set for distributed tracing to work. You can
-learn more about the behavior of `x-datadog-sampling-priority` in the [Datadog
-priority sampling documentation].
+In distributed tracing, multiple processes contribute to the same trace.
 
-[Datadog priority sampling documentation]: https://docs.datadoghq.com/tracing/getting_further/trace_sampling_and_storage/#priority-sampling-for-distributed-tracing
+That is handled in this adapter by `SpandexDatadog.Adapter.distributed_context/2` and
+`SpandexDatadog.Adapter.inject_context/3`, which read and generate Datadog-specific
+HTTP headers based on the state of the trace. If these headers are set, the
+trace will be continued instead of starting a new one.
+These functions are called in [spandex_phoenix](https://github.com/spandex-project/spandex_phoenix)
+when receiving a request and by your own HTTP requests to downstream services.
+
+## Sampling and Rate Limiting
+
+When the load or cost from tracing increases, it is useful to use sampling or
+rate limiting to reduce tracing. When many traces are the same, it's enough to
+trace only e.g. 10% of them, reducing the bill by 90% while still preserving
+the ability to troubleshoot the system.
+
+With sampling, the tracing still happens, but Datadog may drop it or not
+retain detailed information. This keeps metrics such as the number of requests
+correct, even without detailed trace data.
+
+This adapter supports distributed trace sampling according to the new Datadog
+[ingestion controls](https://docs.datadoghq.com/tracing/trace_pipeline/ingestion_controls/).
+
+Spandex stores the `priority` as an integer in the top level `Spandex.Trace`.
+
+In Datadog, there are four values:
+* `MANUAL_KEEP`(2) indicates that the application wants to ensure that a trace is
+  sampled, e.g. if there is an error
+* `AUTO_KEEP` (1) indicates that a trace has been selected for sampling
+* `AUTO_REJECT` (0) indicates that the trace has not been selected for sampling
+* `MANUAL_REJECT` (-1) indicates that the application wants a trace to be dropped
+
+Datadog uses the `x-datadog-sampling-priority` header to determine whether a
+trace should be sampled. See the [Datadog documentation]:
+https://docs.datadoghq.com/tracing/getting_further/trace_sampling_and_storage/#priority-sampling-for-distributed-tracing
+
+When sampling, the process that starts the trace can make a decision about
+whether it should be sampled. It then passes that information to downstream
+processes via the HTTP headers.
+
+In distributed tracing, the priority may be set as follows:
+
+* Set the priority based on the `x-datadog-sampling-priority` header on the
+  inbound request, propagating it to downstream http requests.
+
+* If no priority header is present (i.e. this is the first app in the trace),
+  make a sampling decision and set the `priority` to 0 or 1 on the current trace.
+  `SpandexDatadog.RateSampler.sampled?/2` supports sampling a proportion of
+  traces based on the `trace_id` (which should be assigned randomly).  This would
+  typically be called from in a Phoenix `plug`, which then sets the priority to 1
+  on the current trace using `Spandex.Tracer.update_priority/2`.
+
+* Force tracking manually by setting priority to 2 on the trace in application
+  code. This is usually done for requests with errors, as they are the ones
+  that need troubleshooting. You can also enable tracing dynamically with a
+  feature flag to debug a feature in production. This overrides whatever sampling
+  decision was made upstream, and Datadog will keep the complete trace.
+
+Older versions of Datadog used rate sampling to tune the priority of spans, but
+those are considered obsolete. You can still set them by adding tags to spans:
+
+* Use `sample_rate` tag to set `_dd1.sr.eausr`
+* Use `rule_sample_rate` tag to set `_dd.rule_psr` without default (previously
+  hard coded to 1.0)
+* Use `rate_limiter_rate` tag to set `_dd.limit_psr`, without default
+  (previously hard coded to 1.0)
+* Use `agent_sample_rate` tag to set `_dd.agent_psr` without default (was 1.0)
 
 ## Telemetry
 
@@ -145,22 +201,23 @@ per trace, then you probably want to keep that number low. If you send only a
 few spans, then you could set it significantly higher.
 
 Sync threshold is how many _simultaneous_ HTTP pushes will be going to Datadog
-before it blocks/throttles your application by making the tracing call synchronous instead of async.
+before it blocks/throttles your application by making the tracing call
+synchronous instead of async.
 
-Ideally, the sync threshold would be set to a point that you wouldn't reasonably reach often, but
-that is low enough to not cause systemic performance issues if you don't apply
+Ideally, the sync threshold would be set to a point that you wouldn't
+reasonably reach often, but that is low enough to not cause systemic
+performance issues if you don't apply
 backpressure.
 
-A simple way to think about it is that if you are seeing 1000
-request per second and `batch_size` is 10, then you'll be making 100
-requests per second to Datadog (probably a bad config).
-With `sync_threshold` set to 10, only 10 of those requests can be
-processed concurrently before trace calls become synchronous.
-
-This concept of backpressure is very important, and strategies
-for switching to synchronous operation are often surprisingly far more
-performant than purely asynchronous strategies (and much more predictable).
+A simple way to think about it is that if you are seeing 1000 request per
+second and `batch_size` is 10, then you'll be making 100 requests per second to
+Datadog (probably a bad config).  With `sync_threshold` set to 10, only 10 of
+those requests can be processed concurrently before trace calls become
+synchronous.
 
+This concept of backpressure is very important, and strategies for switching to
+synchronous operation are often surprisingly far more performant than purely
+asynchronous strategies (and much more predictable).
 
 ## Copyright and License
 

diff --git a/config/config.exs b/config/config.exs
@@ -1,5 +1,12 @@
 # This file is responsible for configuring your application
-# and its dependencies with the aid of the Mix.Config module.
-use Mix.Config
+# and its dependencies with the aid of the Config module.
+#
+# This configuration file is loaded before any dependency and
+# is restricted to this project.
 
-import_config "./#{Mix.env()}.exs"
+# General application configuration
+import Config
+
+# Import environment specific config. This must remain at the bottom
+# of this file so it overrides the configuration defined above.
+import_config "#{config_env()}.exs"
diff --git a/config/dev.exs b/config/dev.exs
@@ -1,4 +1,4 @@
-use Mix.Config
+import Config
 
 config :git_ops,
   mix_project: SpandexDatadog.MixProject,

diff --git a/config/test.exs b/config/test.exs
@@ -1,10 +1,10 @@
-use Mix.Config
+import Config
 
 config :logger, :console,
   level: :debug,
   colors: [enabled: false],
   format: "$time $metadata[$level] $message\n",
-  metadata: [:trace_id, :span_id]
+  metadata: [:trace_id, :span_id, :file, :line]
 
 config :spandex_datadog, SpandexDatadog.Test.Support.Tracer,
   service: :spandex_test,