pydantic · alexmojaki · Jan 9, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/logfire/_internal/ulid.py b/logfire/_internal/ulid.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import time
+from random import Random
+
+_random = Random()
+
+
+def ulid(random: Random = _random) -> int:
+    """Generate an integer ULID compatible with UUID v4.
+
+    ULIDs as defined by the [spec](https://github.com/ulid/spec) look like this:
+
+     01AN4Z07BY      79KA1307SR9X4MV3
+    |----------|    |----------------|
+     Timestamp         Randomness
+     48bits            80bits
+
+    In the future it would be nice to make this compatible with a UUID,
+    e.g. v4 UUIDs by setting the version and variant bits correctly.
+    We can't currently do this because setting these bits would leave us with only 7 bytes of randomness,
+    which isn't enough for the Python SDK's sampler that currently expects 8 bytes of randomness.
+    In the future OTEL will probably adopt https://www.w3.org/TR/trace-context-2/#random-trace-id-flag
+    which relies only on the lower 7 bytes of the trace ID, then all SDKs and tooling should be updated
+    and leaving only 7 bytes of randomness should be fine.
+
+    Right now we only care about:
+    - Our SDK / Python SDK's in general.
+    - The OTEL collector.
+
+    And both behave properly with 8 bytes of randomness because trace IDs were originally 64 bits
+    so to be compatible with old trace IDs nothing in OTEL can assume >8 bytes of randomness in trace IDs
+    unless they generated the trace ID themselves (e.g. the Go SDK _does_ expect >8 bytes of randomness internally).
+    """
+    # Timestamp: first 6 bytes of the ULID (48 bits)
+    # Note that it's not important that this timestamp is super precise or unique.
+    # It just needs to be monotonically increasing so that the ULID is sortable, at least for our purposes.
+    timestamp = int(time.time() * 1000).to_bytes(6, byteorder='big')
+    # Randomness: next 10 bytes of the ULID (80 bits)
+    randomness = random.getrandbits(80).to_bytes(10, byteorder='big')
+    # Convert to int and return
+    return int.from_bytes(timestamp + randomness, byteorder='big')
diff --git a/logfire/_internal/utils.py b/logfire/_internal/utils.py
@@ -22,6 +22,7 @@
 from requests import RequestException, Response
 
 from logfire._internal.stack_info import is_user_code
+from logfire._internal.ulid import ulid
 
 if TYPE_CHECKING:
     from packaging.version import Version
@@ -384,7 +385,7 @@ def generate_span_id(self) -> int:
         return span_id
 
     def generate_trace_id(self) -> int:
-        trace_id = self.random.getrandbits(128)
+        trace_id = ulid(self.random)
         while trace_id == trace_api.INVALID_TRACE_ID:  # pragma: no cover
-            trace_id = self.random.getrandbits(128)
+            trace_id = ulid(self.random)
         return trace_id
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -11,6 +11,7 @@
 from opentelemetry import trace
 from opentelemetry.sdk.metrics.export import InMemoryMetricReader
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.id_generator import IdGenerator
 
 import logfire
 from logfire import configure
@@ -55,7 +56,7 @@ def metrics_reader() -> InMemoryMetricReader:
 @pytest.fixture
 def config_kwargs(
     exporter: TestExporter,
-    id_generator: IncrementalIdGenerator,
+    id_generator: IdGenerator,
     time_generator: TimeGenerator,
 ) -> dict[str, Any]:
     """

diff --git a/tests/test_logfire.py b/tests/test_logfire.py
@@ -4,7 +4,7 @@
 import re
 import sys
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
-from contextlib import asynccontextmanager, contextmanager
+from contextlib import ExitStack, asynccontextmanager, contextmanager
 from dataclasses import dataclass
 from functools import partial
 from logging import getLogger
@@ -38,7 +38,7 @@
 from logfire._internal.formatter import FormattingFailedWarning, InspectArgumentsFailedWarning
 from logfire._internal.main import NoopSpan
 from logfire._internal.tracer import record_exception
-from logfire._internal.utils import is_instrumentation_suppressed
+from logfire._internal.utils import SeededRandomIdGenerator, is_instrumentation_suppressed
 from logfire.integrations.logging import LogfireLoggingHandler
 from logfire.testing import TestExporter
 from tests.test_metrics import get_collected_metrics
@@ -3213,3 +3213,23 @@ def test_exit_ended_span(exporter: TestExporter):
             }
         ]
     )
+
+
+@pytest.mark.parametrize(
+    'id_generator',
+    [SeededRandomIdGenerator()],
+)
+def test_default_id_generator(exporter: TestExporter) -> None:
+    """Test that SeededRandomIdGenerator generates trace and span ids without errors."""
+    with ExitStack() as stack:
+        for i in range(1024):
+            for j in range(8):
+                stack.enter_context(logfire.span(f'span {i}:{j}'))
+
+    exported = exporter.exported_spans_as_dict()
+
+    # Check that trace ids are sortable and unique
+    # We use ULIDs to generate trace ids, so they should be sortable.
+    sorted_by_trace_id = [export['name'] for export in sorted(exported, key=lambda span: span['context']['trace_id'])]
+    sorted_by_start_timestamp = [export['name'] for export in sorted(exported, key=lambda span: span['start_time'])]
+    assert sorted_by_trace_id == sorted_by_start_timestamp
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
@@ -70,7 +70,7 @@ def test_sample_rate_config(exporter: TestExporter, config_kwargs: dict[str, Any
 
     # 1000 iterations of 2 spans -> 2000 spans
     # 30% sampling -> 600 spans (approximately)
-    assert len(exporter.exported_spans_as_dict()) == 634
+    assert len(exporter.exported_spans_as_dict()) == 588, len(exporter.exported_spans_as_dict())
 
 
 @pytest.mark.skipif(

diff --git a/tests/test_tail_sampling.py b/tests/test_tail_sampling.py
@@ -345,7 +345,7 @@ def test_background_rate(config_kwargs: dict[str, Any], exporter: TestExporter):
     # None of them meet the tail sampling criteria.
     for _ in range(1000):
         logfire.info('info')
-    assert len(exporter.exported_spans) == 100 + 321
+    assert len(exporter.exported_spans) == 100 + 299, len(exporter.exported_spans)
 
 
 class TestSampler(Sampler):
@@ -406,7 +406,7 @@ def test_raw_head_sampler_with_tail_sampling(config_kwargs: dict[str, Any], expo
     # None of them meet the tail sampling criteria.
     for _ in range(1000):
         logfire.info('info')
-    assert len(exporter.exported_spans) == 100 + 315
+    assert len(exporter.exported_spans) == 100 + 293, len(exporter.exported_spans)
 
 
 def test_custom_head_and_tail(config_kwargs: dict[str, Any], exporter: TestExporter):
@@ -432,20 +432,20 @@ def get_tail_sample_rate(span_info: TailSamplingSpanInfo) -> float:
 
     for _ in range(1000):
         logfire.warn('warn')
-    assert span_counts == snapshot({'start': 720, 'end': 617})
-    assert len(exporter.exported_spans) == snapshot(103)
+    assert span_counts == snapshot({'start': 719, 'end': 611})
+    assert len(exporter.exported_spans) == snapshot(108)
     assert span_counts['end'] + len(exporter.exported_spans) == span_counts['start']
 
     exporter.clear()
     for _ in range(1000):
         with logfire.span('span'):
             pass
-    assert len(exporter.exported_spans_as_dict()) == snapshot(505)
+    assert len(exporter.exported_spans_as_dict()) == snapshot(511)
 
     exporter.clear()
     for _ in range(1000):
         logfire.error('error')
-    assert len(exporter.exported_spans) == snapshot(282)
+    assert len(exporter.exported_spans) == snapshot(298)
 
 
 def test_span_levels():