diff --git a/Makefile b/Makefile index d3d058c7..27580837 100644 --- a/Makefile +++ b/Makefile @@ -26,15 +26,18 @@ removecert: test: certgen runtest removecert testfast: certgen runtestfast removecert -examples: - @cd examples && go test -race -timeout 1m -short ./... && echo "All examples passed." - vet: @go vet -composites=false ./... && echo "Go vet analysis passed." clean: @go clean -testcache -sure: clean test examples vet +lint: + golangci-lint run ./... + +license-check: + ./license_check.sh + +sure: clean test vet lint license-check -.PHONY: certgen test removecert examples vet clean +.PHONY: certgen test removecert vet clean lint license-check diff --git a/instrumentation/net/http/swohttp/handler_test.go b/instrumentation/net/http/swohttp/handler_test.go index 2c7c70a2..144064d2 100644 --- a/instrumentation/net/http/swohttp/handler_test.go +++ b/instrumentation/net/http/swohttp/handler_test.go @@ -15,7 +15,6 @@ package swohttp import ( - "github.com/solarwinds/apm-go/internal/reporter" "github.com/solarwinds/apm-go/swo" "github.com/stretchr/testify/require" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" @@ -32,12 +31,10 @@ const ( XTraceOptionsResponse = "X-Trace-Options-Response" ) -var xtraceRegexp = regexp.MustCompile(`\A00-[[:xdigit:]]{32}-[[:xdigit:]]{16}-01\z`) +// TODO future: we should figure out a way to mock oboe so we can test for sampled == 01 +var xtraceRegexp = regexp.MustCompile(`\A00-[[:xdigit:]]{32}-[[:xdigit:]]{16}-00\z`) func TestHandlerNoXOptsResponse(t *testing.T) { - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.DefaultST)) - defer r.Close(0) - cb, err := swo.Start() require.NoError(t, err) defer cb() @@ -49,9 +46,6 @@ func TestHandlerNoXOptsResponse(t *testing.T) { } func TestHandlerWithXOptsResponse(t *testing.T) { - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.DefaultST)) - defer r.Close(0) - cb, err := swo.Start() require.NoError(t, err) defer cb() @@ -61,7 +55,8 @@ func TestHandlerWithXOptsResponse(t *testing.T) { require.Equal(t, http.StatusOK, resp.StatusCode) require.Equal(t, XTrace+","+XTraceOptionsResponse, resp.Header.Get(ACEHdr)) require.Regexp(t, xtraceRegexp, resp.Header.Get(XTrace)) - require.Regexp(t, "trigger-trace=ok", resp.Header.Get(XTraceOptionsResponse)) + // TODO: it'd be nice to have this actually receive settings from oboe and test for `trigger-trace=ok` + require.Regexp(t, "trigger-trace=settings-not-available", resp.Header.Get(XTraceOptionsResponse)) } func doRequest(t *testing.T, xtOpts string) *http.Response { diff --git a/internal/constants/constants.go b/internal/constants/constants.go index 96249ca7..27c22c04 100644 --- a/internal/constants/constants.go +++ b/internal/constants/constants.go @@ -32,3 +32,17 @@ const ( InfoLabel = "info" UnknownLabel = "UNKNOWN" ) + +const ( + KvSignatureKey = "SignatureKey" + KvBucketCapacity = "BucketCapacity" + KvBucketRate = "BucketRate" + KvTriggerTraceRelaxedBucketCapacity = "TriggerRelaxedBucketCapacity" + KvTriggerTraceRelaxedBucketRate = "TriggerRelaxedBucketRate" + KvTriggerTraceStrictBucketCapacity = "TriggerStrictBucketCapacity" + KvTriggerTraceStrictBucketRate = "TriggerStrictBucketRate" + KvMetricsFlushInterval = "MetricsFlushInterval" + KvEventsFlushInterval = "EventsFlushInterval" + KvMaxTransactions = "MaxTransactions" + KvMaxCustomMetrics = "MaxCustomMetrics" +) diff --git a/internal/exporter/exporter.go b/internal/exporter/exporter.go index 5956a99f..f950ff8d 100644 --- a/internal/exporter/exporter.go +++ b/internal/exporter/exporter.go @@ -30,9 +30,10 @@ import ( ) type exporter struct { + r reporter.Reporter } -func exportSpan(_ context.Context, s sdktrace.ReadOnlySpan) { +func (e *exporter) exportSpan(_ context.Context, s sdktrace.ReadOnlySpan) { evt := reporter.CreateEntryEvent(s.SpanContext(), s.StartTime(), s.Parent()) layer := fmt.Sprintf("%s:%s", strings.ToUpper(s.SpanKind().String()), s.Name()) evt.SetLayer(layer) @@ -68,7 +69,7 @@ func exportSpan(_ context.Context, s sdktrace.ReadOnlySpan) { evt.AddKVs(s.Attributes()) - if err := reporter.ReportEvent(evt); err != nil { + if err := e.r.ReportEvent(evt); err != nil { log.Warning("cannot send entry event", err) return } @@ -88,7 +89,7 @@ func exportSpan(_ context.Context, s sdktrace.ReadOnlySpan) { } } evt.AddKVs(otEvt.Attributes) - if err := reporter.ReportEvent(evt); err != nil { + if err := e.r.ReportEvent(evt); err != nil { log.Warningf("could not send %s event: %s", s.Name(), err) continue } @@ -96,7 +97,7 @@ func exportSpan(_ context.Context, s sdktrace.ReadOnlySpan) { evt = reporter.CreateExitEvent(s.SpanContext(), s.EndTime()) evt.AddKV(attribute.String(constants.Layer, layer)) - if err := reporter.ReportEvent(evt); err != nil { + if err := e.r.ReportEvent(evt); err != nil { log.Warning("cannot send exit event", err) return } @@ -104,17 +105,19 @@ func exportSpan(_ context.Context, s sdktrace.ReadOnlySpan) { } func (e *exporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error { - reporter.WaitForReady(ctx) + e.r.WaitForReady(ctx) for _, s := range spans { - exportSpan(ctx, s) + e.exportSpan(ctx, s) } return nil } func (e *exporter) Shutdown(ctx context.Context) error { - return reporter.Shutdown(ctx) + return e.r.Shutdown(ctx) } -func NewExporter() sdktrace.SpanExporter { - return &exporter{} +func NewExporter(r reporter.Reporter) sdktrace.SpanExporter { + return &exporter{ + r: r, + } } diff --git a/internal/exporter/exporter_test.go b/internal/exporter/exporter_test.go index e4093da1..84c24e87 100644 --- a/internal/exporter/exporter_test.go +++ b/internal/exporter/exporter_test.go @@ -34,8 +34,7 @@ import ( func TestExportSpan(t *testing.T) { r := &capturingReporter{} - defer reporter.SetGlobalReporter(r)() - tr, cb := testutils.TracerWithExporter(NewExporter()) + tr, cb := testutils.TracerWithExporter(NewExporter(r)) defer cb() ctx := context.Background() @@ -156,8 +155,7 @@ func TestExportSpan(t *testing.T) { func TestExportSpanBacktrace(t *testing.T) { r := &capturingReporter{} - defer reporter.SetGlobalReporter(r)() - tr, cb := testutils.TracerWithExporter(NewExporter()) + tr, cb := testutils.TracerWithExporter(NewExporter(r)) defer cb() ctx := context.Background() @@ -199,8 +197,7 @@ func getBsonFromEvent(t *testing.T, event reporter.Event) map[string]interface{} func TestExportSpanStatusCodes(t *testing.T) { r := &capturingReporter{} - defer reporter.SetGlobalReporter(r)() - tr, cb := testutils.TracerWithExporter(NewExporter()) + tr, cb := testutils.TracerWithExporter(NewExporter(r)) defer cb() permutations := []struct { diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index d7aa06cb..b46b4d4a 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -16,15 +16,11 @@ package metrics import ( "github.com/solarwinds/apm-go/internal/bson" + "github.com/solarwinds/apm-go/internal/config" "github.com/solarwinds/apm-go/internal/hdrhist" "github.com/solarwinds/apm-go/internal/host" "github.com/solarwinds/apm-go/internal/log" - "github.com/solarwinds/apm-go/internal/swotel/semconv" "github.com/solarwinds/apm-go/internal/utils" - "go.opentelemetry.io/otel/codes" - sdktrace "go.opentelemetry.io/otel/sdk/trace" - "go.opentelemetry.io/otel/trace" - "os" "runtime" "sort" "strconv" @@ -52,7 +48,6 @@ const ( // Special transaction names const ( - CustomTransactionNamePrefix = "custom" OtherTransactionName = "other" MetricIDSeparator = "&" TagsKVSeparator = ":" @@ -92,18 +87,9 @@ var ( ErrMetricsWithNonPositiveCount = errors.New("metrics with non-positive count") ) -// Package-level state - -var ApmMetrics = NewMeasurements(false, metricsTransactionsMaxDefault) -var CustomMetrics = NewMeasurements(true, metricsCustomMetricsMaxDefault) -var apmHistograms = &histograms{ - histograms: make(map[string]*histogram), - precision: metricsHistPrecisionDefault, -} - // SpanMessage defines a span message type SpanMessage interface { - Process(m *Measurements) + Process(m *measurements) } // BaseSpanMessage is the base span message with properties found in all types of span messages @@ -122,8 +108,8 @@ type HTTPSpanMessage struct { Method string // HTTP method (e.g. GET, POST, ...) } -// Measurement is a single measurement for reporting -type Measurement struct { +// measurement is a single measurement for reporting +type measurement struct { Name string // the name of the measurement (e.g. TransactionResponseTime) Tags map[string]string // map of KVs. It may be nil Count int // count of this measurement @@ -131,21 +117,30 @@ type Measurement struct { ReportSum bool // include the sum in the report? } -// Measurements are a collection of mutex-protected measurements -type Measurements struct { - m map[string]*Measurement - transMap *TransMap - IsCustom bool - FlushInterval int32 +// measurements are a collection of mutex-protected measurements +type measurements struct { + m map[string]*measurement + txnMap *txnMap + isCustom bool + flushInterval int32 sync.Mutex // protect access to this collection } -func NewMeasurements(isCustom bool, maxCount int32) *Measurements { - return &Measurements{ - m: make(map[string]*Measurement), - transMap: NewTransMap(maxCount), - IsCustom: isCustom, - FlushInterval: ReportingIntervalDefault, +func newMeasurements(isCustom bool, maxCount int32) *measurements { + return &measurements{ + m: make(map[string]*measurement), + txnMap: newTxnMap(maxCount), + isCustom: isCustom, + flushInterval: ReportingIntervalDefault, + } +} + +func getPrecision() int { + if precision := config.GetPrecision(); precision >= 0 && precision <= 5 { + return precision + } else { + log.Errorf("value of config.Precision or SW_APM_HISTOGRAM_PRECISION must be between 0 and 5: %v", precision) + return metricsHistPrecisionDefault } } @@ -242,121 +237,6 @@ func (c *RateCounts) Through() int64 { return atomic.LoadInt64(&c.through) } -// TransMap records the received transaction names in a metrics report cycle. It will refuse -// new transaction names if reaching the capacity. -type TransMap struct { - // The map to store transaction names - transactionNames map[string]struct{} - // The maximum capacity of the transaction map. The value is got from server settings which - // is updated periodically. - // The default value metricsTransactionsMaxDefault is used when a new TransMap - // is initialized. - currCap int32 - // The maximum capacity which is set by the server settings. This update usually happens in - // between two metrics reporting cycles. To avoid affecting the map capacity of the current reporting - // cycle, the new capacity got from the server is stored in nextCap and will only be flushed to currCap - // when the Reset() is called. - nextCap int32 - // Whether there is an overflow. Overflow means the user tried to store more transaction names - // than the capacity defined by settings. - // This flag is cleared in every metrics cycle. - overflow bool - // The mutex to protect this whole struct. If the performance is a concern we should use separate - // mutexes for each of the fields. But for now it seems not necessary. - mutex sync.Mutex -} - -// NewTransMap initializes a new TransMap struct -func NewTransMap(cap int32) *TransMap { - return &TransMap{ - transactionNames: make(map[string]struct{}), - currCap: cap, - nextCap: cap, - overflow: false, - } -} - -// SetCap sets the capacity of the transaction map -func (t *TransMap) SetCap(cap int32) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.nextCap = cap -} - -// Cap returns the current capacity -func (t *TransMap) Cap() int32 { - t.mutex.Lock() - defer t.mutex.Unlock() - return t.currCap -} - -// Reset resets the transaction map to a initialized state. The new capacity got from the -// server will be used in next metrics reporting cycle after reset. -func (t *TransMap) Reset() { - t.mutex.Lock() - defer t.mutex.Unlock() - t.transactionNames = make(map[string]struct{}) - t.currCap = t.nextCap - t.overflow = false -} - -// Clone returns a shallow copy -func (t *TransMap) Clone() *TransMap { - return &TransMap{ - transactionNames: t.transactionNames, - currCap: t.currCap, - nextCap: t.nextCap, - overflow: t.overflow, - } -} - -// IsWithinLimit checks if the transaction name is stored in the TransMap. It will store this new -// transaction name and return true if not stored before and the map isn't full, or return false -// otherwise. -func (t *TransMap) IsWithinLimit(name string) bool { - t.mutex.Lock() - defer t.mutex.Unlock() - - if _, ok := t.transactionNames[name]; !ok { - // only record if we haven't reached the limits yet - if int32(len(t.transactionNames)) < t.currCap { - t.transactionNames[name] = struct{}{} - return true - } - t.overflow = true - return false - } - - return true -} - -// Overflow returns true is the transaction map is overflow (reached its limit) -// or false if otherwise. -func (t *TransMap) Overflow() bool { - t.mutex.Lock() - defer t.mutex.Unlock() - return t.overflow -} - -// TODO: use config package, and add validator (0-5) -// initialize values according to env variables -func init() { - pEnv := "SW_APM_HISTOGRAM_PRECISION" - precision := os.Getenv(pEnv) - if precision != "" { - log.Infof("Non-default SW_APM_HISTOGRAM_PRECISION: %s", precision) - if p, err := strconv.Atoi(precision); err == nil { - if p >= 0 && p <= 5 { - apmHistograms.precision = p - } else { - log.Errorf("value of %v must be between 0 and 5: %v", pEnv, precision) - } - } else { - log.Errorf("value of %v is not an int: %v", pEnv, precision) - } - } -} - // addRequestCounters add various request-related counters to the metrics message buffer. func addRequestCounters(bbuf *bson.Buffer, index *int, rcs map[string]*RateCounts) { var requested, traced, limited, ttTraced int64 @@ -386,77 +266,40 @@ func addRequestCounters(bbuf *bson.Buffer, index *int, rcs map[string]*RateCount addMetricsValue(bbuf, index, TriggeredTraceCount, ttTraced) } -// BuildMessage creates and encodes the custom metrics message. -func BuildMessage(m *Measurements, serverless bool) []byte { - if m == nil { - return nil - } - - bbuf := bson.NewBuffer() - if m.IsCustom { - bbuf.AppendBool("IsCustom", m.IsCustom) - } - - if !serverless { - appendHostId(bbuf) - bbuf.AppendInt32("MetricsFlushInterval", m.FlushInterval) - } - - bbuf.AppendInt64("Timestamp_u", time.Now().UnixNano()/1000) - - start := bbuf.AppendStartArray("measurements") - index := 0 - - for _, measurement := range m.m { - addMeasurementToBSON(bbuf, &index, measurement) - } - - bbuf.AppendFinishObject(start) - - bbuf.Finish() - return bbuf.GetBuf() -} - // SetCap sets the maximum number of distinct metrics allowed. -func (m *Measurements) SetCap(cap int32) { - m.transMap.SetCap(cap) +func (m *measurements) SetCap(cap int32) { + m.txnMap.SetCap(cap) } // Cap returns the maximum number of distinct metrics allowed. -func (m *Measurements) Cap() int32 { - return m.transMap.Cap() +func (m *measurements) Cap() int32 { + return m.txnMap.cap() } // CopyAndReset resets the custom metrics and return a copy of the old one. -func (m *Measurements) CopyAndReset(flushInterval int32) *Measurements { +func (m *measurements) CopyAndReset(flushInterval int32) *measurements { m.Lock() defer m.Unlock() - if len(m.m) == 0 { - m.FlushInterval = flushInterval - m.transMap.Reset() - return nil - } - clone := m.Clone() - m.m = make(map[string]*Measurement) - m.transMap.Reset() - m.FlushInterval = flushInterval + m.m = make(map[string]*measurement) + m.txnMap.reset() + m.flushInterval = flushInterval return clone } // Clone returns a shallow copy -func (m *Measurements) Clone() *Measurements { - return &Measurements{ +func (m *measurements) Clone() *measurements { + return &measurements{ m: m.m, - transMap: m.transMap.Clone(), - IsCustom: m.IsCustom, - FlushInterval: m.FlushInterval, + txnMap: m.txnMap.clone(), + isCustom: m.isCustom, + flushInterval: m.flushInterval, } } // Summary submits the summary measurement to the reporter. -func (m *Measurements) Summary(name string, value float64, opts MetricOptions) error { +func (m *measurements) Summary(name string, value float64, opts MetricOptions) error { if err := opts.validate(); err != nil { return err } @@ -464,7 +307,7 @@ func (m *Measurements) Summary(name string, value float64, opts MetricOptions) e } // Increment submits the incremental measurement to the reporter. -func (m *Measurements) Increment(name string, opts MetricOptions) error { +func (m *measurements) Increment(name string, opts MetricOptions) error { if err := opts.validate(); err != nil { return err } @@ -522,78 +365,6 @@ func addRuntimeMetrics(bbuf *bson.Buffer, index *int) { addMetricsValue(bbuf, index, "trace.go.memory.StackSys", int64(mem.StackSys)) } -// BuildBuiltinMetricsMessage generates a metrics message in BSON format with all the currently available values -// metricsFlushInterval current metrics flush interval -// -// return metrics message in BSON format -func BuildBuiltinMetricsMessage(m *Measurements, qs *EventQueueStats, - rcs map[string]*RateCounts, runtimeMetrics bool) []byte { - if m == nil { - return nil - } - - bbuf := bson.NewBuffer() - - appendHostId(bbuf) - bbuf.AppendInt32("MetricsFlushInterval", m.FlushInterval) - - bbuf.AppendInt64("Timestamp_u", int64(time.Now().UnixNano()/1000)) - - // measurements - // ========================================== - start := bbuf.AppendStartArray("measurements") - index := 0 - - // request counters - addRequestCounters(bbuf, &index, rcs) - - // Queue states - if qs != nil { - addMetricsValue(bbuf, &index, "NumSent", qs.numSent) - addMetricsValue(bbuf, &index, "NumOverflowed", qs.numOverflowed) - addMetricsValue(bbuf, &index, "NumFailed", qs.numFailed) - addMetricsValue(bbuf, &index, "TotalEvents", qs.totalEvents) - addMetricsValue(bbuf, &index, "QueueLargest", qs.queueLargest) - } - - addHostMetrics(bbuf, &index) - - if runtimeMetrics { - // runtime stats - addRuntimeMetrics(bbuf, &index) - } - - for _, measurement := range m.m { - addMeasurementToBSON(bbuf, &index, measurement) - } - - bbuf.AppendFinishObject(start) - // ========================================== - - // histograms - // ========================================== - start = bbuf.AppendStartArray("histograms") - index = 0 - - apmHistograms.lock.Lock() - - for _, h := range apmHistograms.histograms { - addHistogramToBSON(bbuf, &index, h) - } - apmHistograms.histograms = make(map[string]*histogram) // clear histograms - - apmHistograms.lock.Unlock() - bbuf.AppendFinishObject(start) - // ========================================== - - if m.transMap.Overflow() { - bbuf.AppendBool("TransactionNameOverflow", true) - } - - bbuf.Finish() - return bbuf.GetBuf() -} - // append host ID to a BSON buffer // bbuf the BSON buffer to append the KVs to func appendHostId(bbuf *bson.Buffer) { @@ -718,7 +489,7 @@ func (s *HTTPSpanMessage) appOpticsTagsList() []map[string]string { // processes HTTP measurements, record one for primary key, and one for each secondary key // transactionName the transaction name to be used for these measurements func (s *HTTPSpanMessage) processMeasurements(metricName string, tagsList []map[string]string, - m *Measurements) error { + m *measurements) error { if tagsList == nil { return errors.New("tagsList must not be nil") } @@ -726,7 +497,7 @@ func (s *HTTPSpanMessage) processMeasurements(metricName string, tagsList []map[ return m.record(metricName, tagsList, duration, 1, true) } -func (m *Measurements) recordWithSoloTags(name string, tags map[string]string, +func (m *measurements) recordWithSoloTags(name string, tags map[string]string, value float64, count int, reportValue bool) error { return m.record(name, []map[string]string{tags}, value, count, reportValue) } @@ -737,7 +508,7 @@ func (m *Measurements) recordWithSoloTags(name string, tags map[string]string, // value measurement value // count measurement count // reportValue should the sum of all values be reported? -func (m *Measurements) record(name string, tagsList []map[string]string, +func (m *measurements) record(name string, tagsList []map[string]string, value float64, count int, reportValue bool) error { if len(tagsList) == 0 { return nil @@ -765,19 +536,19 @@ func (m *Measurements) record(name string, tagsList []map[string]string, idTagsMap[id] = tags } - var me *Measurement + var me *measurement var ok bool // create a new measurement if it doesn't exist - // the lock protects both Measurements and Measurement + // the lock protects both measurements and measurement m.Lock() defer m.Unlock() for id, tags := range idTagsMap { if me, ok = m.m[id]; !ok { // N.B. This overflow logic is a bit cumbersome and is ripe for a rewrite if strings.Contains(id, otherTagExistsVal) || - m.transMap.IsWithinLimit(id) { - me = &Measurement{ + m.txnMap.isWithinLimit(id) { + me = &measurement{ Name: name, Tags: tags, ReportSum: reportValue, @@ -840,7 +611,7 @@ func (hi *histograms) recordHistogram(name string, duration time.Duration) { // bbuf the BSON buffer to append the metric to // index a running integer (0,1,2,...) which is needed for BSON arrays // m measurement to be added -func addMeasurementToBSON(bbuf *bson.Buffer, index *int, m *Measurement) { +func addMeasurementToBSON(bbuf *bson.Buffer, index *int, m *measurement) { start := bbuf.AppendStartObject(strconv.Itoa(*index)) bbuf.AppendString("name", m.Name) @@ -929,141 +700,3 @@ func (s *EventQueueStats) CopyAndReset() *EventQueueStats { return c } - -func BuildServerlessMessage(span HTTPSpanMessage, rcs map[string]*RateCounts, rate int, source int) []byte { - bbuf := bson.NewBuffer() - - bbuf.AppendInt64("Duration", int64(span.Duration/time.Microsecond)) - bbuf.AppendBool("HasError", span.HasError) - bbuf.AppendInt("SampleRate", rate) - bbuf.AppendInt("SampleSource", source) - bbuf.AppendInt64("Timestamp_u", time.Now().UnixNano()/1000) - bbuf.AppendString("TransactionName", span.Transaction) - - if span.Method != "" { - bbuf.AppendString("Method", span.Method) - } - if span.Status != 0 { - bbuf.AppendInt("Status", span.Status) - } - - // add request counters - start := bbuf.AppendStartArray("TraceDecision") - - var sampled, limited, traced, through, ttTraced int64 - - for _, rc := range rcs { - sampled += rc.sampled - limited += rc.limited - traced += rc.traced - through += rc.through - } - - if relaxed, ok := rcs[RCRelaxedTriggerTrace]; ok { - ttTraced += relaxed.Traced() - } - if strict, ok := rcs[RCStrictTriggerTrace]; ok { - ttTraced += strict.Traced() - } - - var i = 0 - if sampled != 0 { - bbuf.AppendString(strconv.Itoa(i), "Sample") - i++ - } - if traced != 0 { - bbuf.AppendString(strconv.Itoa(i), "Trace") - i++ - } - if limited != 0 { - bbuf.AppendString(strconv.Itoa(i), "TokenBucketExhaustion") - i++ - } - if through != 0 { - bbuf.AppendString(strconv.Itoa(i), "ThroughTrace") - i++ - } - if ttTraced != 0 { - bbuf.AppendString(strconv.Itoa(i), "Triggered") - } - - bbuf.AppendFinishObject(start) - - bbuf.Finish() - return bbuf.GetBuf() -} - -// -- otel -- - -func RecordSpan(span sdktrace.ReadOnlySpan, isAppoptics bool) { - method := "" - status := int64(0) - isError := span.Status().Code == codes.Error - attrs := span.Attributes() - swoTags := make(map[string]string) - httpRoute := "" - for _, attr := range attrs { - if attr.Key == semconv.HTTPMethodKey { - method = attr.Value.AsString() - } else if attr.Key == semconv.HTTPStatusCodeKey { - status = attr.Value.AsInt64() - } else if attr.Key == semconv.HTTPRouteKey { - httpRoute = attr.Value.AsString() - } - } - isHttp := span.SpanKind() == trace.SpanKindServer && method != "" - - if isHttp { - if status > 0 { - swoTags["http.status_code"] = strconv.FormatInt(status, 10) - if !isError && status/100 == 5 { - isError = true - } - } - swoTags["http.method"] = method - } - - swoTags["sw.is_error"] = strconv.FormatBool(isError) - txnName := utils.GetTransactionName(span) - swoTags["sw.transaction"] = txnName - - duration := span.EndTime().Sub(span.StartTime()) - s := &HTTPSpanMessage{ - BaseSpanMessage: BaseSpanMessage{Duration: duration, HasError: isError}, - Transaction: txnName, - Path: httpRoute, - Status: int(status), - Host: "", // intentionally not set - Method: method, - } - - var tagsList []map[string]string - var metricName string - if !isAppoptics { - tagsList = []map[string]string{swoTags} - metricName = responseTime - } else { - tagsList = s.appOpticsTagsList() - metricName = transactionResponseTime - } - - apmHistograms.recordHistogram("", duration) - if err := s.processMeasurements(metricName, tagsList, ApmMetrics); err == ErrExceedsMetricsCountLimit { - if isAppoptics { - s.Transaction = OtherTransactionName - tagsList = s.appOpticsTagsList() - } else { - tagsList[0]["sw.transaction"] = OtherTransactionName - } - err := s.processMeasurements(metricName, tagsList, ApmMetrics) - // This should never happen since the only failure case _should_ be ErrExceedsMetricsCountLimit - // which is handled above, and the reason we retry here. - if err != nil { - log.Errorf("Failed to process messages", err) - } - } else { - // We didn't hit ErrExceedsMetricsCountLimit - apmHistograms.recordHistogram(txnName, duration) - } - -} diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go index 8e5492b1..5ea6b4fc 100644 --- a/internal/metrics/metrics_test.go +++ b/internal/metrics/metrics_test.go @@ -130,10 +130,10 @@ func TestAppendMACAddresses(t *testing.T) { func TestAddMetricsValue(t *testing.T) { index := 0 bbuf := bson.NewBuffer() - addMetricsValue(bbuf, &index, "name1", int(111)) + addMetricsValue(bbuf, &index, "name1", 111) addMetricsValue(bbuf, &index, "name2", int64(222)) addMetricsValue(bbuf, &index, "name3", float32(333.33)) - addMetricsValue(bbuf, &index, "name4", float64(444.44)) + addMetricsValue(bbuf, &index, "name4", 444.44) addMetricsValue(bbuf, &index, "name5", "hello") bbuf.Finish() m, err := bsonToMap(bbuf) @@ -142,7 +142,7 @@ func TestAddMetricsValue(t *testing.T) { assert.NotZero(t, m["0"]) m2 := m["0"].(map[string]interface{}) assert.Equal(t, "name1", m2["name"]) - assert.Equal(t, int(111), m2["value"]) + assert.Equal(t, 111, m2["value"]) assert.NotZero(t, m["1"]) m2 = m["1"].(map[string]interface{}) @@ -153,12 +153,12 @@ func TestAddMetricsValue(t *testing.T) { m2 = m["2"].(map[string]interface{}) assert.Equal(t, "name3", m2["name"]) f64 := m2["value"].(float64) - assert.Equal(t, float64(333.33), round(f64, .5, 2)) + assert.Equal(t, 333.33, round(f64, .5, 2)) assert.NotZero(t, m["3"]) m2 = m["3"].(map[string]interface{}) assert.Equal(t, "name4", m2["name"]) - assert.Equal(t, float64(444.44), m2["value"]) + assert.Equal(t, 444.44, m2["value"]) assert.NotZero(t, m["4"]) m2 = m["4"].(map[string]interface{}) @@ -207,24 +207,8 @@ func TestGetTransactionFromURL(t *testing.T) { } } -func TestTransMap(t *testing.T) { - m := NewTransMap(3) - assert.EqualValues(t, 3, m.Cap()) - assert.True(t, m.IsWithinLimit("t1")) - assert.True(t, m.IsWithinLimit("t2")) - assert.True(t, m.IsWithinLimit("t3")) - assert.False(t, m.IsWithinLimit("t4")) - assert.True(t, m.IsWithinLimit("t2")) - assert.True(t, m.Overflow()) - - m.SetCap(4) - m.Reset() - assert.EqualValues(t, 4, m.Cap()) - assert.False(t, m.Overflow()) -} - func TestRecordMeasurement(t *testing.T) { - var me = NewMeasurements(false, 100) + var me = newMeasurements(false, 100) t1 := make(map[string]string) t1["t1"] = "tag1" @@ -294,14 +278,14 @@ func TestAddMeasurementToBSON(t *testing.T) { tags2 := make(map[string]string) tags2[veryLongTagName] = veryLongTagValue - measurement1 := &Measurement{ + measurement1 := &measurement{ Name: "name1", Tags: tags1, Count: 45, Sum: 592.42, ReportSum: false, } - measurement2 := &Measurement{ + measurement2 := &measurement{ Name: "name2", Tags: tags2, Count: 777, @@ -397,8 +381,9 @@ func TestAddHistogramToBSON(t *testing.T) { } func TestGenerateMetricsMessage(t *testing.T) { - testMetrics := NewMeasurements(false, metricsTransactionsMaxDefault) - bbuf := bson.WithBuf(BuildBuiltinMetricsMessage(testMetrics, &EventQueueStats{}, + reg := NewLegacyRegistry().(*registry) + flushInterval := int32(60) + bbuf := bson.WithBuf(reg.BuildBuiltinMetricsMessage(flushInterval, &EventQueueStats{}, map[string]*RateCounts{ // requested, sampled, limited, traced, through RCRegular: {10, 2, 5, 5, 1}, RCRelaxedTriggerTrace: {3, 0, 1, 2, 0}, @@ -442,12 +427,12 @@ func TestGenerateMetricsMessage(t *testing.T) { {"Load1", float64(1)}, {"TotalRAM", int64(1)}, {"FreeRAM", int64(1)}, - {"ProcessRAM", int(1)}, + {"ProcessRAM", 1}, }...) } testCases = append(testCases, []testCase{ // runtime - {"trace.go.runtime.NumGoroutine", int(1)}, + {"trace.go.runtime.NumGoroutine", 1}, {"trace.go.runtime.NumCgoCall", int64(1)}, // gc {"trace.go.gc.LastGC", int64(1)}, @@ -482,14 +467,14 @@ func TestGenerateMetricsMessage(t *testing.T) { assert.Nil(t, m["TransactionNameOverflow"]) - testMetrics = NewMeasurements(false, metricsTransactionsMaxDefault) + reg = NewLegacyRegistry().(*registry) for i := 0; i <= metricsTransactionsMaxDefault; i++ { - if !testMetrics.transMap.IsWithinLimit("Transaction-" + strconv.Itoa(i)) { + if !reg.apmMetrics.txnMap.isWithinLimit("Transaction-" + strconv.Itoa(i)) { break } } - m, err = bsonToMap(bson.WithBuf(BuildBuiltinMetricsMessage(testMetrics, &EventQueueStats{}, + m, err = bsonToMap(bson.WithBuf(reg.BuildBuiltinMetricsMessage(flushInterval, &EventQueueStats{}, map[string]*RateCounts{RCRegular: {}, RCRelaxedTriggerTrace: {}, RCStrictTriggerTrace: {}}, true))) require.NoError(t, err) @@ -545,13 +530,6 @@ func TestRateCounts(t *testing.T) { assert.Equal(t, &RateCounts{}, rc) } -func resetHistograms() { - apmHistograms = &histograms{ - histograms: make(map[string]*histogram), - precision: metricsHistPrecisionDefault, - } -} - func TestRecordSpan(t *testing.T) { tr, teardown := testutils.TracerSetup() defer teardown() @@ -568,11 +546,11 @@ func TestRecordSpan(t *testing.T) { ), ) span.End(trace.WithTimestamp(now.Add(1 * time.Second))) + reg := NewLegacyRegistry().(*registry) - // This affects global state (ApmMetrics below) - RecordSpan(span.(sdktrace.ReadOnlySpan), false) + reg.RecordSpan(span.(sdktrace.ReadOnlySpan), false) - m := ApmMetrics.CopyAndReset(60) + m := reg.apmMetrics.CopyAndReset(60) assert.NotEmpty(t, m.m) v := m.m["ResponseTime&true&http.method:GET&http.status_code:200&sw.is_error:false&sw.transaction:my cool route&"] assert.NotNil(t, v, fmt.Sprintf("Map: %v", m.m)) @@ -588,8 +566,8 @@ func TestRecordSpan(t *testing.T) { v.Tags) assert.Equal(t, responseTime, v.Name) - h := apmHistograms.histograms - resetHistograms() + h := reg.apmHistograms.histograms + reg = NewLegacyRegistry().(*registry) assert.NotEmpty(t, h) globalHisto := h[""] granularHisto := h["my cool route"] @@ -602,9 +580,9 @@ func TestRecordSpan(t *testing.T) { assert.Equal(t, int64(1), granularHisto.hist.TotalCount()) // Now test for AO - RecordSpan(span.(sdktrace.ReadOnlySpan), true) + reg.RecordSpan(span.(sdktrace.ReadOnlySpan), true) - m = ApmMetrics.CopyAndReset(60) + m = reg.apmMetrics.CopyAndReset(60) assert.NotEmpty(t, m.m) k1 := "TransactionResponseTime&true&HttpMethod:GET&TransactionName:my cool route&" k2 := "TransactionResponseTime&true&HttpStatus:200&TransactionName:my cool route&" @@ -629,8 +607,7 @@ func TestRecordSpan(t *testing.T) { m.m[k3].Tags, ) - h = apmHistograms.histograms - resetHistograms() + h = reg.apmHistograms.histograms assert.NotEmpty(t, h) globalHisto = h[""] granularHisto = h["my cool route"] @@ -660,10 +637,10 @@ func TestRecordSpanErrorStatus(t *testing.T) { ) span.End(trace.WithTimestamp(now.Add(1 * time.Second))) - // This affects global state (ApmMetrics below) - RecordSpan(span.(sdktrace.ReadOnlySpan), false) + reg := NewLegacyRegistry().(*registry) + reg.RecordSpan(span.(sdktrace.ReadOnlySpan), false) - m := ApmMetrics.CopyAndReset(60) + m := reg.apmMetrics.CopyAndReset(60) assert.NotEmpty(t, m.m) v := m.m["ResponseTime&true&http.method:GET&http.status_code:500&sw.is_error:true&sw.transaction:my cool route&"] assert.NotNil(t, v, fmt.Sprintf("Map: %v", m.m)) @@ -679,8 +656,8 @@ func TestRecordSpanErrorStatus(t *testing.T) { v.Tags) assert.Equal(t, responseTime, v.Name) - h := apmHistograms.histograms - resetHistograms() + h := reg.apmHistograms.histograms + reg = NewLegacyRegistry().(*registry) assert.NotEmpty(t, h) globalHisto := h[""] granularHisto := h["my cool route"] @@ -693,9 +670,9 @@ func TestRecordSpanErrorStatus(t *testing.T) { assert.Equal(t, int64(1), granularHisto.hist.TotalCount()) // Now test for AO - RecordSpan(span.(sdktrace.ReadOnlySpan), true) + reg.RecordSpan(span.(sdktrace.ReadOnlySpan), true) - m = ApmMetrics.CopyAndReset(60) + m = reg.apmMetrics.CopyAndReset(60) assert.NotEmpty(t, m.m) k1 := "TransactionResponseTime&true&HttpMethod:GET&TransactionName:my cool route&" k2 := "TransactionResponseTime&true&HttpStatus:500&TransactionName:my cool route&" @@ -719,8 +696,7 @@ func TestRecordSpanErrorStatus(t *testing.T) { map[string]string{"TransactionName": "my cool route"}, m.m[k3].Tags, ) - h = apmHistograms.histograms - resetHistograms() + h = reg.apmHistograms.histograms assert.NotEmpty(t, h) globalHisto = h[""] granularHisto = h["my cool route"] @@ -764,16 +740,16 @@ func TestRecordSpanOverflow(t *testing.T) { ) span2.End(trace.WithTimestamp(now.Add(1 * time.Second))) + reg := NewLegacyRegistry().(*registry) // The cap only takes affect after the following reset - ApmMetrics.SetCap(1) - ApmMetrics.CopyAndReset(60) - assert.Equal(t, int32(1), ApmMetrics.Cap()) + reg.SetApmMetricsCap(1) + reg.apmMetrics.CopyAndReset(60) + assert.Equal(t, int32(1), reg.ApmMetricsCap()) - // This affects global state (ApmMetrics below) - RecordSpan(span.(sdktrace.ReadOnlySpan), false) - RecordSpan(span2.(sdktrace.ReadOnlySpan), false) + reg.RecordSpan(span.(sdktrace.ReadOnlySpan), false) + reg.RecordSpan(span2.(sdktrace.ReadOnlySpan), false) - m := ApmMetrics.CopyAndReset(60) + m := reg.apmMetrics.CopyAndReset(60) // We expect to have a record for `my cool route` and one for `other` assert.Equal(t, 2, len(m.m)) v := m.m["ResponseTime&true&http.method:GET&http.status_code:200&sw.is_error:false&sw.transaction:my cool route&"] @@ -804,8 +780,7 @@ func TestRecordSpanOverflow(t *testing.T) { v.Tags) assert.Equal(t, responseTime, v.Name) - h := apmHistograms.histograms - resetHistograms() + h := reg.apmHistograms.histograms assert.NotEmpty(t, h) globalHisto := h[""] granularHisto := h["my cool route"] @@ -851,15 +826,15 @@ func TestRecordSpanOverflowAppoptics(t *testing.T) { // The cap only takes affect after the following reset // Appoptics-style will generate 3 metrics, so we'll set the cap to that here - ApmMetrics.SetCap(3) - ApmMetrics.CopyAndReset(60) - assert.Equal(t, int32(3), ApmMetrics.Cap()) + reg := NewLegacyRegistry().(*registry) + reg.SetApmMetricsCap(3) + reg.apmMetrics.CopyAndReset(60) + assert.Equal(t, int32(3), reg.ApmMetricsCap()) - // This affects global state (ApmMetrics below) - RecordSpan(span.(sdktrace.ReadOnlySpan), true) - RecordSpan(span2.(sdktrace.ReadOnlySpan), true) + reg.RecordSpan(span.(sdktrace.ReadOnlySpan), true) + reg.RecordSpan(span2.(sdktrace.ReadOnlySpan), true) - m := ApmMetrics.CopyAndReset(60) + m := reg.apmMetrics.CopyAndReset(60) // We expect to have 3 records for `my cool route` and 3 for `other` assert.Equal(t, 6, len(m.m)) @@ -878,8 +853,7 @@ func TestRecordSpanOverflowAppoptics(t *testing.T) { assert.Equal(t, 1, v.Count) } - h := apmHistograms.histograms - resetHistograms() + h := reg.apmHistograms.histograms assert.NotEmpty(t, h) globalHisto := h[""] granularHisto := h["my cool route"] diff --git a/internal/metrics/registry.go b/internal/metrics/registry.go new file mode 100644 index 00000000..4d25db5d --- /dev/null +++ b/internal/metrics/registry.go @@ -0,0 +1,253 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "errors" + "github.com/solarwinds/apm-go/internal/bson" + "github.com/solarwinds/apm-go/internal/log" + "github.com/solarwinds/apm-go/internal/swotel/semconv" + "github.com/solarwinds/apm-go/internal/utils" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/sdk/trace" + trace2 "go.opentelemetry.io/otel/trace" + "strconv" + "time" +) + +type registry struct { + apmHistograms *histograms + apmMetrics *measurements + customMetrics *measurements +} + +var _ LegacyRegistry = ®istry{} + +func NewLegacyRegistry() LegacyRegistry { + return ®istry{ + apmHistograms: &histograms{ + histograms: make(map[string]*histogram), + precision: getPrecision(), + }, + apmMetrics: newMeasurements(false, metricsTransactionsMaxDefault), + customMetrics: newMeasurements(true, metricsCustomMetricsMaxDefault), + } +} + +type MetricRegistry interface { + RecordSpan(span trace.ReadOnlySpan, isAppoptics bool) +} + +type LegacyRegistry interface { + MetricRegistry + BuildBuiltinMetricsMessage(flushInterval int32, qs *EventQueueStats, + rcs map[string]*RateCounts, runtimeMetrics bool) []byte + BuildCustomMetricsMessage(flushInterval int32) []byte + ApmMetricsCap() int32 + SetApmMetricsCap(int32) + CustomMetricsCap() int32 + SetCustomMetricsCap(int32) +} + +// BuildCustomMetricsMessage creates and encodes the custom metrics message. +func (r *registry) BuildCustomMetricsMessage(flushInterval int32) []byte { + m := r.customMetrics.CopyAndReset(flushInterval) + if m == nil { + return nil + } + bbuf := bson.NewBuffer() + if m.isCustom { + bbuf.AppendBool("isCustom", m.isCustom) + } + + appendHostId(bbuf) + bbuf.AppendInt32("MetricsFlushInterval", m.flushInterval) + + bbuf.AppendInt64("Timestamp_u", time.Now().UnixNano()/1000) + + start := bbuf.AppendStartArray("measurements") + index := 0 + + for _, measurement := range m.m { + addMeasurementToBSON(bbuf, &index, measurement) + } + + bbuf.AppendFinishObject(start) + + bbuf.Finish() + return bbuf.GetBuf() +} + +// BuildBuiltinMetricsMessage generates a metrics message in BSON format with all the currently available values +// metricsFlushInterval current metrics flush interval +// +// return metrics message in BSON format +func (r *registry) BuildBuiltinMetricsMessage(flushInterval int32, qs *EventQueueStats, + rcs map[string]*RateCounts, runtimeMetrics bool) []byte { + var m = r.apmMetrics.CopyAndReset(flushInterval) + if m == nil { + return nil + } + + bbuf := bson.NewBuffer() + + appendHostId(bbuf) + bbuf.AppendInt32("MetricsFlushInterval", flushInterval) + + bbuf.AppendInt64("Timestamp_u", time.Now().UnixNano()/1000) + + // measurements + // ========================================== + start := bbuf.AppendStartArray("measurements") + index := 0 + + // request counters + addRequestCounters(bbuf, &index, rcs) + + // Queue states + if qs != nil { + addMetricsValue(bbuf, &index, "NumSent", qs.numSent) + addMetricsValue(bbuf, &index, "NumOverflowed", qs.numOverflowed) + addMetricsValue(bbuf, &index, "NumFailed", qs.numFailed) + addMetricsValue(bbuf, &index, "TotalEvents", qs.totalEvents) + addMetricsValue(bbuf, &index, "QueueLargest", qs.queueLargest) + } + + addHostMetrics(bbuf, &index) + + if runtimeMetrics { + // runtime stats + addRuntimeMetrics(bbuf, &index) + } + + for _, measurement := range m.m { + addMeasurementToBSON(bbuf, &index, measurement) + } + + bbuf.AppendFinishObject(start) + // ========================================== + + // histograms + // ========================================== + start = bbuf.AppendStartArray("histograms") + index = 0 + + r.apmHistograms.lock.Lock() + + for _, h := range r.apmHistograms.histograms { + addHistogramToBSON(bbuf, &index, h) + } + r.apmHistograms.histograms = make(map[string]*histogram) // clear histograms + + r.apmHistograms.lock.Unlock() + bbuf.AppendFinishObject(start) + // ========================================== + + if m.txnMap.isOverflowed() { + bbuf.AppendBool("TransactionNameOverflow", true) + } + + bbuf.Finish() + return bbuf.GetBuf() +} + +func (r *registry) RecordSpan(span trace.ReadOnlySpan, isAppoptics bool) { + method := "" + status := int64(0) + isError := span.Status().Code == codes.Error + attrs := span.Attributes() + swoTags := make(map[string]string) + httpRoute := "" + for _, attr := range attrs { + if attr.Key == semconv.HTTPMethodKey { + method = attr.Value.AsString() + } else if attr.Key == semconv.HTTPStatusCodeKey { + status = attr.Value.AsInt64() + } else if attr.Key == semconv.HTTPRouteKey { + httpRoute = attr.Value.AsString() + } + } + isHttp := span.SpanKind() == trace2.SpanKindServer && method != "" + + if isHttp { + if status > 0 { + swoTags["http.status_code"] = strconv.FormatInt(status, 10) + if !isError && status/100 == 5 { + isError = true + } + } + swoTags["http.method"] = method + } + + swoTags["sw.is_error"] = strconv.FormatBool(isError) + txnName := utils.GetTransactionName(span) + swoTags["sw.transaction"] = txnName + + duration := span.EndTime().Sub(span.StartTime()) + s := &HTTPSpanMessage{ + BaseSpanMessage: BaseSpanMessage{Duration: duration, HasError: isError}, + Transaction: txnName, + Path: httpRoute, + Status: int(status), + Host: "", // intentionally not set + Method: method, + } + + var tagsList []map[string]string + var metricName string + if !isAppoptics { + tagsList = []map[string]string{swoTags} + metricName = responseTime + } else { + tagsList = s.appOpticsTagsList() + metricName = transactionResponseTime + } + + r.apmHistograms.recordHistogram("", duration) + if err := s.processMeasurements(metricName, tagsList, r.apmMetrics); errors.Is(err, ErrExceedsMetricsCountLimit) { + if isAppoptics { + s.Transaction = OtherTransactionName + tagsList = s.appOpticsTagsList() + } else { + tagsList[0]["sw.transaction"] = OtherTransactionName + } + err := s.processMeasurements(metricName, tagsList, r.apmMetrics) + // This should never happen since the only failure case _should_ be ErrExceedsMetricsCountLimit + // which is handled above, and the reason we retry here. + if err != nil { + log.Errorf("Failed to process messages", err) + } + } else { + // We didn't hit ErrExceedsMetricsCountLimit + r.apmHistograms.recordHistogram(txnName, duration) + } + +} + +func (r *registry) ApmMetricsCap() int32 { + return r.apmMetrics.Cap() +} + +func (r *registry) SetApmMetricsCap(cap int32) { + r.apmMetrics.SetCap(cap) +} + +func (r *registry) CustomMetricsCap() int32 { + return r.customMetrics.Cap() +} + +func (r *registry) SetCustomMetricsCap(cap int32) { + r.customMetrics.SetCap(cap) +} diff --git a/internal/metrics/txnmap.go b/internal/metrics/txnmap.go new file mode 100644 index 00000000..df5315fe --- /dev/null +++ b/internal/metrics/txnmap.go @@ -0,0 +1,113 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import "sync" + +// txnMap records the received transaction names in a metrics report cycle. It will refuse +// new transaction names if reaching the capacity. +type txnMap struct { + // The map to store transaction names + transactionNames map[string]struct{} + // The maximum capacity of the transaction map. The value is got from server settings which + // is updated periodically. + // The default value metricsTransactionsMaxDefault is used when a new txnMap + // is initialized. + currCap int32 + // The maximum capacity which is set by the server settings. This update usually happens in + // between two metrics reporting cycles. To avoid affecting the map capacity of the current reporting + // cycle, the new capacity got from the server is stored in nextCap and will only be flushed to currCap + // when the reset() is called. + nextCap int32 + // Whether there is an overflow. isOverflowed means the user tried to store more transaction names + // than the capacity defined by settings. + // This flag is cleared in every metrics cycle. + overflow bool + // The mutex to protect this whole struct. If the performance is a concern we should use separate + // mutexes for each of the fields. But for now it seems not necessary. + mutex sync.Mutex +} + +// newTxnMap initializes a new txnMap struct +func newTxnMap(cap int32) *txnMap { + return &txnMap{ + transactionNames: make(map[string]struct{}), + currCap: cap, + nextCap: cap, + overflow: false, + } +} + +// SetCap sets the capacity of the transaction map +func (t *txnMap) SetCap(cap int32) { + t.mutex.Lock() + defer t.mutex.Unlock() + t.nextCap = cap +} + +// cap returns the current capacity +func (t *txnMap) cap() int32 { + t.mutex.Lock() + defer t.mutex.Unlock() + return t.currCap +} + +// reset resets the transaction map to a initialized state. The new capacity got from the +// server will be used in next metrics reporting cycle after reset. +func (t *txnMap) reset() { + t.mutex.Lock() + defer t.mutex.Unlock() + t.transactionNames = make(map[string]struct{}) + t.currCap = t.nextCap + t.overflow = false +} + +// clone returns a shallow copy +func (t *txnMap) clone() *txnMap { + return &txnMap{ + transactionNames: t.transactionNames, + currCap: t.currCap, + nextCap: t.nextCap, + overflow: t.overflow, + } +} + +// isWithinLimit checks if the transaction name is stored in the txnMap. It will store this new +// transaction name and return true if not stored before and the map isn't full, or return false +// otherwise. +func (t *txnMap) isWithinLimit(name string) bool { + t.mutex.Lock() + defer t.mutex.Unlock() + + if _, ok := t.transactionNames[name]; !ok { + // only record if we haven't reached the limits yet + if int32(len(t.transactionNames)) < t.currCap { + t.transactionNames[name] = struct{}{} + return true + } + t.overflow = true + return false + } + + return true +} + +// isOverflowed returns true is the transaction map is overflow (reached its limit) +// or false if otherwise. +func (t *txnMap) isOverflowed() bool { + t.mutex.Lock() + defer t.mutex.Unlock() + return t.overflow +} diff --git a/internal/metrics/txnmap_test.go b/internal/metrics/txnmap_test.go new file mode 100644 index 00000000..2b848199 --- /dev/null +++ b/internal/metrics/txnmap_test.go @@ -0,0 +1,36 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestTxnMap(t *testing.T) { + m := newTxnMap(3) + assert.EqualValues(t, 3, m.cap()) + assert.True(t, m.isWithinLimit("t1")) + assert.True(t, m.isWithinLimit("t2")) + assert.True(t, m.isWithinLimit("t3")) + assert.False(t, m.isWithinLimit("t4")) + assert.True(t, m.isWithinLimit("t2")) + assert.True(t, m.isOverflowed()) + + m.SetCap(4) + m.reset() + assert.EqualValues(t, 4, m.cap()) + assert.False(t, m.isOverflowed()) +} diff --git a/internal/oboe/oboe.go b/internal/oboe/oboe.go new file mode 100644 index 00000000..038683bc --- /dev/null +++ b/internal/oboe/oboe.go @@ -0,0 +1,340 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboe + +import ( + "encoding/binary" + "errors" + "fmt" + "github.com/solarwinds/apm-go/internal/config" + "github.com/solarwinds/apm-go/internal/constants" + "github.com/solarwinds/apm-go/internal/log" + "github.com/solarwinds/apm-go/internal/metrics" + "github.com/solarwinds/apm-go/internal/rand" + "github.com/solarwinds/apm-go/internal/w3cfmt" + "math" + "strings" + "sync" + "time" +) + +const ( + maxSamplingRate = config.MaxSampleRate +) + +// SampleSource enums used by sampling and tracing settings +type SampleSource int + +// source of the sample value +const ( + SampleSourceUnset SampleSource = iota - 1 + SampleSourceNone + SampleSourceFile + SampleSourceDefault + SampleSourceLayer +) + +type Oboe interface { + UpdateSetting(sType int32, layer string, flags []byte, value int64, ttl int64, args map[string][]byte) + CheckSettingsTimeout() + GetSetting() (*settings, bool) + RemoveSetting() + HasDefaultSetting() bool + SampleRequest(continued bool, url string, triggerTrace TriggerTraceMode, swState w3cfmt.SwTraceState) SampleDecision + FlushRateCounts() map[string]*metrics.RateCounts + GetTriggerTraceToken() ([]byte, error) +} + +func NewOboe() Oboe { + return &oboe{ + settings: make(map[settingKey]*settings), + } +} + +type oboe struct { + sync.RWMutex + settings map[settingKey]*settings +} + +var _ Oboe = &oboe{} + +// FlushRateCounts collects the request counters values by categories. +func (o *oboe) FlushRateCounts() map[string]*metrics.RateCounts { + setting, ok := o.GetSetting() + if !ok { + return nil + } + rcs := make(map[string]*metrics.RateCounts) + rcs[metrics.RCRegular] = setting.bucket.FlushRateCounts() + rcs[metrics.RCRelaxedTriggerTrace] = setting.triggerTraceRelaxedBucket.FlushRateCounts() + rcs[metrics.RCStrictTriggerTrace] = setting.triggerTraceStrictBucket.FlushRateCounts() + + return rcs +} + +// SampleRequest returns a SampleDecision based on inputs and state of various token buckets +func (o *oboe) SampleRequest(continued bool, url string, triggerTrace TriggerTraceMode, swState w3cfmt.SwTraceState) SampleDecision { + setting, ok := o.GetSetting() + if !ok { + return SampleDecision{false, 0, SampleSourceNone, false, TtSettingsNotAvailable, 0, 0, false} + } + + var diceRolled, retval, doRateLimiting bool + sampleRate, flags, source := setting.mergeURLSetting(url) + + // Choose an appropriate bucket + bucket := setting.bucket + if triggerTrace == ModeRelaxedTriggerTrace { + bucket = setting.triggerTraceRelaxedBucket + } else if triggerTrace == ModeStrictTriggerTrace { + bucket = setting.triggerTraceStrictBucket + } + + if triggerTrace.Requested() && !continued { + sampled := (triggerTrace != ModeInvalidTriggerTrace) && (flags.TriggerTraceEnabled()) + rsp := TtOK + + ret := bucket.count(sampled, false, true) + + if flags.TriggerTraceEnabled() && triggerTrace.Enabled() { + if !ret { + rsp = TtRateExceeded + } + } else if triggerTrace == ModeInvalidTriggerTrace { + rsp = "" + } else { + if !flags.Enabled() { + rsp = TtTracingDisabled + } else { + rsp = TtTriggerTracingDisabled + } + } + ttCap, ttRate := setting.getTokenBucketSetting(triggerTrace) + return SampleDecision{ret, -1, SampleSourceUnset, flags.Enabled(), rsp, ttRate, ttCap, diceRolled} + } + + unsetBucketAndSampleKVs := false + if !continued { + // A new request + if flags&FlagSampleStart != 0 { + // roll the dice + diceRolled = true + retval = shouldSample(sampleRate) + if retval { + doRateLimiting = true + } + } + } else if swState.IsValid() { + if swState.Flags().IsSampled() { + if flags&FlagSampleThroughAlways != 0 { + // Conform to liboboe behavior; continue decision would result in a -1 value for the + // BucketCapacity, BucketRate, SampleRate and SampleSource KVs to indicate "unset". + unsetBucketAndSampleKVs = true + retval = true + } else if flags&FlagSampleThrough != 0 { + // roll the dice + diceRolled = true + retval = shouldSample(sampleRate) + } + } else { + retval = false + } + } + + retval = bucket.count(retval, continued, doRateLimiting) + + rsp := TtNotRequested + if triggerTrace.Requested() { + rsp = TtIgnored + } + + var bucketCap, bucketRate float64 + if unsetBucketAndSampleKVs { + bucketCap, bucketRate, sampleRate, source = -1, -1, -1, SampleSourceUnset + } else { + bucketCap, bucketRate = setting.getTokenBucketSetting(ModeTriggerTraceNotPresent) + } + + return SampleDecision{ + retval, + sampleRate, + source, + flags.Enabled(), + rsp, + bucketCap, + bucketRate, + diceRolled, + } +} + +func bytesToFloat64(b []byte) (float64, error) { + if len(b) != 8 { + return -1, fmt.Errorf("invalid length: %d", len(b)) + } + return math.Float64frombits(binary.LittleEndian.Uint64(b)), nil +} + +func parseFloat64(args map[string][]byte, key string, fb float64) float64 { + ret := fb + if c, ok := args[key]; ok { + v, err := bytesToFloat64(c) + if err == nil && v >= 0 { + ret = v + log.Debugf("parsed %s=%f", key, v) + } else { + log.Warningf("parse error: %s=%f err=%v fallback=%f", key, v, err, fb) + } + } + return ret +} + +func adjustSampleRate(rate int64) int { + if rate < 0 { + log.Debugf("Invalid sample rate: %d", rate) + return 0 + } + + if rate > maxSamplingRate { + log.Debugf("Invalid sample rate: %d", rate) + return maxSamplingRate + } + return int(rate) +} + +func (o *oboe) UpdateSetting(sType int32, layer string, flags []byte, value int64, ttl int64, args map[string][]byte) { + ns := newOboeSettings() + + ns.timestamp = time.Now() + ns.source = settingType(sType).toSampleSource() + ns.flags = flagStringToBin(string(flags)) + ns.originalFlags = ns.flags + ns.value = adjustSampleRate(value) + ns.ttl = ttl + ns.layer = layer + + ns.TriggerToken = args[constants.KvSignatureKey] + + rate := parseFloat64(args, constants.KvBucketRate, 0) + capacity := parseFloat64(args, constants.KvBucketCapacity, 0) + ns.bucket.setRateCap(rate, capacity) + + tRelaxedRate := parseFloat64(args, constants.KvTriggerTraceRelaxedBucketRate, 0) + tRelaxedCapacity := parseFloat64(args, constants.KvTriggerTraceRelaxedBucketCapacity, 0) + ns.triggerTraceRelaxedBucket.setRateCap(tRelaxedRate, tRelaxedCapacity) + + tStrictRate := parseFloat64(args, constants.KvTriggerTraceStrictBucketRate, 0) + tStrictCapacity := parseFloat64(args, constants.KvTriggerTraceStrictBucketCapacity, 0) + ns.triggerTraceStrictBucket.setRateCap(tStrictRate, tStrictCapacity) + + merged := mergeLocalSetting(ns) + + key := settingKey{ + sType: settingType(sType), + layer: layer, + } + + o.Lock() + o.settings[key] = merged + o.Unlock() +} + +// CheckSettingsTimeout checks and deletes expired settings +func (o *oboe) CheckSettingsTimeout() { + o.checkSettingsTimeout() +} + +func (o *oboe) checkSettingsTimeout() { + o.Lock() + defer o.Unlock() + + ss := o.settings + for k, s := range ss { + e := s.timestamp.Add(time.Duration(s.ttl) * time.Second) + if e.Before(time.Now()) { + delete(ss, k) + } + } +} + +func (o *oboe) GetSetting() (*settings, bool) { + o.RLock() + defer o.RUnlock() + + // for now only look up the default settings + key := settingKey{ + sType: TypeDefault, + layer: "", + } + if setting, ok := o.settings[key]; ok { + return setting, true + } + + return nil, false +} + +func (o *oboe) RemoveSetting() { + o.Lock() + defer o.Unlock() + + key := settingKey{ + sType: TypeDefault, + layer: "", + } + + delete(o.settings, key) +} + +func (o *oboe) HasDefaultSetting() bool { + if _, ok := o.GetSetting(); ok { + return true + } + return false +} + +func (o *oboe) GetTriggerTraceToken() ([]byte, error) { + setting, ok := o.GetSetting() + if !ok { + return nil, errors.New("failed to get settings") + } + if len(setting.TriggerToken) == 0 { + return nil, errors.New("no valid signature key found") + } + return setting.TriggerToken, nil +} + +func shouldSample(sampleRate int) bool { + return sampleRate == maxSamplingRate || rand.RandIntn(maxSamplingRate) <= sampleRate +} + +func flagStringToBin(flagString string) settingFlag { + flags := settingFlag(0) + if flagString != "" { + for _, s := range strings.Split(flagString, ",") { + switch s { + case "OVERRIDE": + flags |= FlagOverride + case "SAMPLE_START": + flags |= FlagSampleStart + case "SAMPLE_THROUGH": + flags |= FlagSampleThrough + case "SAMPLE_THROUGH_ALWAYS": + flags |= FlagSampleThroughAlways + case "TRIGGER_TRACE": + flags |= FlagTriggerTrace + } + } + } + return flags +} diff --git a/internal/reporter/oboe_test.go b/internal/oboe/oboe_test.go similarity index 62% rename from internal/reporter/oboe_test.go rename to internal/oboe/oboe_test.go index 368ba3f2..7b7de998 100644 --- a/internal/reporter/oboe_test.go +++ b/internal/oboe/oboe_test.go @@ -12,18 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package reporter +package oboe import ( - "context" - "github.com/solarwinds/apm-go/internal/utils" + "github.com/solarwinds/apm-go/internal/oboetestutils" "github.com/solarwinds/apm-go/internal/w3cfmt" "github.com/stretchr/testify/require" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/resource" - "go.opentelemetry.io/otel/trace" "testing" - "time" ) var ( @@ -31,40 +26,10 @@ var ( unsampledSwState = w3cfmt.ParseSwTraceState("0123456789abcdef-00") ) -func TestCreateInitMessage(t *testing.T) { - tid := trace.TraceID{0x01, 0x02, 0x03, 0x04} - r, err := resource.New(context.Background(), resource.WithAttributes( - attribute.String("foo", "bar"), - // service.name should be omitted - attribute.String("service.name", "my cool service"), - )) - require.NoError(t, err) - a := time.Now() - evt := createInitMessage(tid, r) - b := time.Now() - require.NoError(t, err) - require.NotNil(t, evt) - e, ok := evt.(*event) - require.True(t, ok) - require.Equal(t, tid, e.taskID) - require.NotEqual(t, [8]byte{}, e.opID) - require.True(t, e.t.After(a)) - require.True(t, e.t.Before(b)) - require.Equal(t, []attribute.KeyValue{ - attribute.String("foo", "bar"), - attribute.Bool("__Init", true), - attribute.String("APM.Version", utils.Version()), - }, e.kvs) - require.Equal(t, LabelUnset, e.label) - require.Equal(t, "", e.layer) - require.False(t, e.parent.IsValid()) -} - func TestOboeSampleRequestSettingsUnavailable(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(NoSettingST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ xTraceOptsRsp: "settings-not-available", } @@ -72,14 +37,14 @@ func TestOboeSampleRequestSettingsUnavailable(t *testing.T) { } func TestOboeSampleRequestSettingsDisabled(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DisabledST)) - defer r.Close(0) ttMode := ModeRelaxedTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddDisabled(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: false, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, xTraceOptsRsp: "tracing-disabled", bucketCap: 1, bucketRate: 1, @@ -89,14 +54,14 @@ func TestOboeSampleRequestSettingsDisabled(t *testing.T) { } func TestOboeSampleRequest(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DefaultST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddDefaultSetting(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: true, rate: 1000000, - source: SAMPLE_SOURCE_DEFAULT, + source: SampleSourceDefault, enabled: true, xTraceOptsRsp: "not-requested", bucketCap: 1000000, @@ -107,14 +72,14 @@ func TestOboeSampleRequest(t *testing.T) { } func TestOboeSampleRequestContinuedUnsampledSwState(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DefaultST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(true, "url", ttMode, unsampledSwState) + o := NewOboe() + oboetestutils.AddDefaultSetting(o) + dec := o.SampleRequest(true, "url", ttMode, unsampledSwState) expected := SampleDecision{ trace: false, rate: 1000000, - source: SAMPLE_SOURCE_DEFAULT, + source: SampleSourceDefault, enabled: true, xTraceOptsRsp: "not-requested", bucketCap: 1000000, @@ -125,14 +90,14 @@ func TestOboeSampleRequestContinuedUnsampledSwState(t *testing.T) { } func TestOboeSampleRequestNoTTGivenButReporterIsTTOnly(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(TriggerTraceOnlyST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddTriggerTraceOnly(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: false, rate: 0, - source: SAMPLE_SOURCE_DEFAULT, + source: SampleSourceDefault, enabled: false, xTraceOptsRsp: "not-requested", bucketCap: 0, @@ -143,14 +108,14 @@ func TestOboeSampleRequestNoTTGivenButReporterIsTTOnly(t *testing.T) { } func TestOboeSampleRequestUnsampledSwState(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DefaultST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(false, "url", ttMode, unsampledSwState) + o := NewOboe() + oboetestutils.AddDefaultSetting(o) + dec := o.SampleRequest(false, "url", ttMode, unsampledSwState) expected := SampleDecision{ trace: true, rate: 1000000, - source: SAMPLE_SOURCE_DEFAULT, + source: SampleSourceDefault, enabled: true, xTraceOptsRsp: "not-requested", bucketCap: 1000000, @@ -161,14 +126,14 @@ func TestOboeSampleRequestUnsampledSwState(t *testing.T) { } func TestOboeSampleRequestThrough(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(SampleThroughST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(true, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddSampleThrough(o) + dec := o.SampleRequest(true, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: true, rate: 1000000, - source: SAMPLE_SOURCE_DEFAULT, + source: SampleSourceDefault, enabled: true, xTraceOptsRsp: "not-requested", bucketCap: 1000000, @@ -179,14 +144,14 @@ func TestOboeSampleRequestThrough(t *testing.T) { } func TestOboeSampleRequestThroughUnsampled(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(SampleThroughST)) - defer r.Close(0) ttMode := ModeTriggerTraceNotPresent - dec := oboeSampleRequest(true, "url", ttMode, unsampledSwState) + o := NewOboe() + oboetestutils.AddSampleThrough(o) + dec := o.SampleRequest(true, "url", ttMode, unsampledSwState) expected := SampleDecision{ trace: false, rate: 1000000, - source: SAMPLE_SOURCE_DEFAULT, + source: SampleSourceDefault, enabled: true, xTraceOptsRsp: "not-requested", bucketCap: 1000000, @@ -199,14 +164,14 @@ func TestOboeSampleRequestThroughUnsampled(t *testing.T) { // TRIGGER TRACE func TestOboeSampleRequestRelaxedTT(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DefaultST)) - defer r.Close(0) ttMode := ModeRelaxedTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddDefaultSetting(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: true, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, enabled: true, xTraceOptsRsp: "ok", bucketCap: 1000000, @@ -217,14 +182,14 @@ func TestOboeSampleRequestRelaxedTT(t *testing.T) { } func TestOboeSampleRequestStrictTT(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DefaultST)) - defer r.Close(0) ttMode := ModeStrictTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddDefaultSetting(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: true, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, enabled: true, xTraceOptsRsp: "ok", bucketCap: 1000000, @@ -235,14 +200,14 @@ func TestOboeSampleRequestStrictTT(t *testing.T) { } func TestOboeSampleRequestRelaxedTTDisabled(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(NoTriggerTraceST)) - defer r.Close(0) ttMode := ModeRelaxedTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddNoTriggerTrace(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: false, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, enabled: true, xTraceOptsRsp: "trigger-tracing-disabled", bucketCap: 0, @@ -253,14 +218,14 @@ func TestOboeSampleRequestRelaxedTTDisabled(t *testing.T) { } func TestOboeSampleRequestStrictTTDisabled(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(NoTriggerTraceST)) - defer r.Close(0) ttMode := ModeStrictTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddNoTriggerTrace(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: false, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, enabled: true, xTraceOptsRsp: "trigger-tracing-disabled", bucketCap: 0, @@ -271,14 +236,27 @@ func TestOboeSampleRequestStrictTTDisabled(t *testing.T) { } func TestOboeSampleRequestRelaxedTTLimited(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(LimitedTriggerTraceST)) - defer r.Close(0) ttMode := ModeRelaxedTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddLimitedTriggerTrace(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) + // We expect the first TT to go through expected := SampleDecision{ + trace: true, + rate: -1, + source: SampleSourceUnset, + enabled: true, + xTraceOptsRsp: "ok", + bucketCap: 1, + bucketRate: 1, + diceRolled: false, + } + require.Equal(t, expected, dec) + dec = o.SampleRequest(false, "url", ttMode, sampledSwState) + expected = SampleDecision{ trace: false, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, enabled: true, xTraceOptsRsp: "rate-exceeded", bucketCap: 1, @@ -289,14 +267,14 @@ func TestOboeSampleRequestRelaxedTTLimited(t *testing.T) { } func TestOboeSampleRequestInvalidTT(t *testing.T) { - r := SetTestReporter(TestReporterSettingType(DefaultST)) - defer r.Close(0) ttMode := ModeInvalidTriggerTrace - dec := oboeSampleRequest(false, "url", ttMode, sampledSwState) + o := NewOboe() + oboetestutils.AddDefaultSetting(o) + dec := o.SampleRequest(false, "url", ttMode, sampledSwState) expected := SampleDecision{ trace: false, rate: -1, - source: SAMPLE_SOURCE_UNSET, + source: SampleSourceUnset, enabled: true, xTraceOptsRsp: "", bucketCap: 1000000, @@ -310,7 +288,7 @@ func TestGetTokenBucketSetting(t *testing.T) { main := &tokenBucket{ratePerSec: 1, capacity: 2} relaxed := &tokenBucket{ratePerSec: 3, capacity: 4} strict := &tokenBucket{ratePerSec: 5, capacity: 6} - setting := &oboeSettings{ + setting := &settings{ bucket: main, triggerTraceRelaxedBucket: relaxed, triggerTraceStrictBucket: strict, @@ -327,7 +305,7 @@ func TestGetTokenBucketSetting(t *testing.T) { {99, nil}, } for _, scen := range scenarios { - capacity, rate := getTokenBucketSetting(setting, scen.mode) + capacity, rate := setting.getTokenBucketSetting(scen.mode) if scen.bucket == nil { require.Equal(t, float64(0), capacity) require.Equal(t, float64(0), rate) diff --git a/internal/oboe/sample_decision.go b/internal/oboe/sample_decision.go new file mode 100644 index 00000000..d71bb944 --- /dev/null +++ b/internal/oboe/sample_decision.go @@ -0,0 +1,65 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboe + +type SampleDecision struct { + trace bool + rate int + source SampleSource + // if the request is disabled from tracing in a per-transaction level or for + // the entire service. + enabled bool + xTraceOptsRsp string + bucketCap float64 + bucketRate float64 + + diceRolled bool +} + +func (s SampleDecision) Trace() bool { + return s.trace +} + +func (s SampleDecision) XTraceOptsRsp() string { + return s.xTraceOptsRsp +} + +func (s SampleDecision) Enabled() bool { + return s.enabled +} + +func (s SampleDecision) BucketCapacity() float64 { + return s.bucketCap +} + +func (s SampleDecision) BucketCapacityStr() string { + return floatToStr(s.BucketCapacity()) +} + +func (s SampleDecision) BucketRate() float64 { + return s.bucketRate +} + +func (s SampleDecision) BucketRateStr() string { + return floatToStr(s.BucketRate()) +} + +func (s SampleDecision) SampleRate() int { + return s.rate +} + +func (s SampleDecision) SampleSource() SampleSource { + return s.source +} diff --git a/internal/oboe/settings.go b/internal/oboe/settings.go new file mode 100644 index 00000000..9f54b603 --- /dev/null +++ b/internal/oboe/settings.go @@ -0,0 +1,183 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboe + +import ( + "github.com/solarwinds/apm-go/internal/config" + "github.com/solarwinds/apm-go/internal/log" + "time" +) + +type settings struct { + timestamp time.Time + // the flags which may be modified through merging local settings. + flags settingFlag + // the original flags retrieved from the remote collector. + originalFlags settingFlag + // The sample rate. It could be the original value got from remote server + // or a new value after negotiating with local config + value int + // The sample source after negotiating with local config + source SampleSource + ttl int64 + layer string + TriggerToken []byte + bucket *tokenBucket + triggerTraceRelaxedBucket *tokenBucket + triggerTraceStrictBucket *tokenBucket +} + +func (s *settings) hasOverrideFlag() bool { + return s.originalFlags&FlagOverride != 0 +} +func newOboeSettings() *settings { + return &settings{ + // The global token bucket. Trace decisions of all the requests are controlled + // by this single bucket. + // + // The rate and capacity will be initialized by the values fetched from the remote + // server, therefore it's initialized with only the default values. + bucket: &tokenBucket{}, + // The token bucket exclusively for trigger trace from authenticated clients + triggerTraceRelaxedBucket: &tokenBucket{}, + // The token bucket exclusively for trigger trace from unauthenticated clients + triggerTraceStrictBucket: &tokenBucket{}, + } +} + +// mergeLocalSetting follow the predefined precedence to decide which one to +// pick from: either the local configs or the remote ones, or the combination. +// +// Note: This function modifies the argument in place. +func mergeLocalSetting(remote *settings) *settings { + if remote.hasOverrideFlag() && config.SamplingConfigured() { + // Choose the lower sample rate and merge the flags + if remote.value > config.GetSampleRate() { + remote.value = config.GetSampleRate() + remote.source = SampleSourceFile + } + remote.flags &= NewTracingMode(config.GetTracingMode()).toFlags() + } else if config.SamplingConfigured() { + // Use local sample rate and tracing mode config + remote.value = config.GetSampleRate() + remote.flags = NewTracingMode(config.GetTracingMode()).toFlags() + remote.source = SampleSourceFile + } + + if !config.GetTriggerTrace() { + remote.flags = remote.flags &^ (1 << FlagTriggerTraceOffset) + } + return remote +} + +// mergeURLSetting merges the service level setting (merged from remote and local +// settings) and the per-URL sampling flags, if any. +func (s *settings) mergeURLSetting(url string) (int, settingFlag, SampleSource) { + if url == "" { + return s.value, s.flags, s.source + } + + urlTracingMode := urls.GetTracingMode(url) + if urlTracingMode.isUnknown() { + return s.value, s.flags, s.source + } + + flags := urlTracingMode.toFlags() + source := SampleSourceFile + + if s.hasOverrideFlag() { + flags &= s.originalFlags + } + + return s.value, flags, source +} + +func (s *settings) getTokenBucketSetting(ttMode TriggerTraceMode) (capacity float64, rate float64) { + var bucket *tokenBucket + + switch ttMode { + case ModeRelaxedTriggerTrace: + bucket = s.triggerTraceRelaxedBucket + case ModeStrictTriggerTrace: + bucket = s.triggerTraceStrictBucket + case ModeTriggerTraceNotPresent, ModeInvalidTriggerTrace: + bucket = s.bucket + default: + log.Warningf("Could not determine token bucket setting for invalid TriggerTraceMode: %#v", ttMode) + return 0, 0 + } + + return bucket.capacity, bucket.ratePerSec +} + +// The identifying keys for a setting +type settingKey struct { + sType settingType + layer string +} +type settingType int +type settingFlag uint16 + +// setting types +const ( + TypeDefault settingType = iota // default setting which serves as a fallback if no other settings found + TypeLayer // layer specific settings +) + +// setting flags offset +const ( + FlagInvalidOffset = iota + FlagOverrideOffset + FlagSampleStartOffset + FlagSampleThroughOffset + FlagSampleThroughAlwaysOffset + FlagTriggerTraceOffset +) + +// setting flags +// +//goland:noinspection GoUnusedConst +const ( + FlagOk settingFlag = 0x0 + FlagInvalid settingFlag = 1 << FlagInvalidOffset + FlagOverride settingFlag = 1 << FlagOverrideOffset + FlagSampleStart settingFlag = 1 << FlagSampleStartOffset + FlagSampleThrough settingFlag = 1 << FlagSampleThroughOffset + FlagSampleThroughAlways settingFlag = 1 << FlagSampleThroughAlwaysOffset + FlagTriggerTrace settingFlag = 1 << FlagTriggerTraceOffset +) + +// Enabled returns if the trace is enabled or not. +func (f settingFlag) Enabled() bool { + return f&(FlagSampleStart|FlagSampleThroughAlways) != 0 +} + +// TriggerTraceEnabled returns if the trigger trace is enabled +func (f settingFlag) TriggerTraceEnabled() bool { + return f&FlagTriggerTrace != 0 +} + +func (st settingType) toSampleSource() SampleSource { + var source SampleSource + switch st { + case TypeDefault: + source = SampleSourceDefault + case TypeLayer: + source = SampleSourceLayer + default: + source = SampleSourceNone + } + return source +} diff --git a/internal/oboe/token_bucket.go b/internal/oboe/token_bucket.go new file mode 100644 index 00000000..46a514c8 --- /dev/null +++ b/internal/oboe/token_bucket.go @@ -0,0 +1,95 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboe + +import ( + "github.com/solarwinds/apm-go/internal/metrics" + "math" + "strconv" + "sync" + "time" +) + +type tokenBucket struct { + ratePerSec float64 + capacity float64 + available float64 + last time.Time + lock sync.Mutex + metrics.RateCounts +} + +func (b *tokenBucket) setRateCap(rate, cap float64) { + b.lock.Lock() + defer b.lock.Unlock() + b.ratePerSec = rate + b.capacity = cap + + if b.available > b.capacity { + b.available = b.capacity + } +} + +func (b *tokenBucket) count(sampled, hasMetadata, rateLimit bool) bool { + b.RequestedInc() + + if !hasMetadata { + b.SampledInc() + } + + if !sampled { + return sampled + } + + if rateLimit { + if ok := b.consume(1); !ok { + b.LimitedInc() + return false + } + } + + if hasMetadata { + b.ThroughInc() + } + b.TracedInc() + return sampled +} + +func (b *tokenBucket) consume(size float64) bool { + b.lock.Lock() + defer b.lock.Unlock() + b.update(time.Now()) + if b.available >= size { + b.available -= size + return true + } + return false +} + +func (b *tokenBucket) update(now time.Time) { + if b.available < b.capacity { // room for more tokens? + delta := now.Sub(b.last) // calculate duration since last check + b.last = now // update time of last check + if delta <= 0 { // return if no delta or time went "backwards" + return + } + newTokens := b.ratePerSec * delta.Seconds() // # tokens generated since last check + b.available = math.Min(b.capacity, b.available+newTokens) // add new tokens to bucket, but don't overfill + } +} + +func floatToStr(f float64) string { + return strconv.FormatFloat(f, 'f', -1, 64) +} diff --git a/internal/oboe/tracing_mode.go b/internal/oboe/tracing_mode.go new file mode 100644 index 00000000..62224296 --- /dev/null +++ b/internal/oboe/tracing_mode.go @@ -0,0 +1,62 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboe + +import "github.com/solarwinds/apm-go/internal/config" + +type TracingMode int + +const ( + TraceDisabled TracingMode = iota // disable tracing, will neither start nor continue traces + TraceEnabled // perform sampling every inbound request for tracing + TraceUnknown // for cache purpose only +) + +// NewTracingMode creates a tracing mode object from a string +func NewTracingMode(mode config.TracingMode) TracingMode { + switch mode { + case config.DisabledTracingMode: + return TraceDisabled + case config.EnabledTracingMode: + return TraceEnabled + default: + } + return TraceUnknown +} + +func (tm TracingMode) isUnknown() bool { + return tm == TraceUnknown +} + +func (tm TracingMode) toFlags() settingFlag { + switch tm { + case TraceEnabled: + return FlagSampleStart | FlagSampleThroughAlways | FlagTriggerTrace + case TraceDisabled: + default: + } + return FlagOk +} + +func (tm TracingMode) ToString() string { + switch tm { + case TraceEnabled: + return string(config.EnabledTracingMode) + case TraceDisabled: + return string(config.DisabledTracingMode) + default: + return string(config.UnknownTracingMode) + } +} diff --git a/internal/oboe/trigger_trace.go b/internal/oboe/trigger_trace.go new file mode 100644 index 00000000..f77ef271 --- /dev/null +++ b/internal/oboe/trigger_trace.go @@ -0,0 +1,74 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboe + +import "fmt" + +// Trigger trace response messages +const ( + TtOK = "ok" + TtRateExceeded = "rate-exceeded" + TtTracingDisabled = "tracing-disabled" + TtTriggerTracingDisabled = "trigger-tracing-disabled" + TtNotRequested = "not-requested" + TtIgnored = "ignored" + TtSettingsNotAvailable = "settings-not-available" +) + +type TriggerTraceMode int + +const ( + // ModeTriggerTraceNotPresent means there is no X-Trace-Options header detected, + // or the X-Trace-Options header is present but trigger_trace flag is not. This + // indicates that it's a trace for regular sampling. + ModeTriggerTraceNotPresent TriggerTraceMode = iota + + // ModeInvalidTriggerTrace means X-Trace-Options is detected but no valid trigger-trace + // flag found, or X-Trace-Options-Signature is present but the authentication is failed. + ModeInvalidTriggerTrace + + // ModeRelaxedTriggerTrace means X-Trace-Options-Signature is present and valid. + // The trace will be sampled/limited by the relaxed token bucket. + ModeRelaxedTriggerTrace + + // ModeStrictTriggerTrace means no X-Trace-Options-Signature is present. The trace + // will be limited by the strict token bucket. + ModeStrictTriggerTrace +) + +// Enabled indicates whether it's a trigger-trace request +func (tm TriggerTraceMode) Enabled() bool { + switch tm { + case ModeTriggerTraceNotPresent, ModeInvalidTriggerTrace: + return false + case ModeRelaxedTriggerTrace, ModeStrictTriggerTrace: + return true + default: + panic(fmt.Sprintf("Unhandled trigger trace mode: %x", tm)) + } +} + +// Requested indicates whether the user tries to issue a trigger-trace request +// (but may be rejected if the header is illegal) +func (tm TriggerTraceMode) Requested() bool { + switch tm { + case ModeTriggerTraceNotPresent: + return false + case ModeRelaxedTriggerTrace, ModeStrictTriggerTrace, ModeInvalidTriggerTrace: + return true + default: + panic(fmt.Sprintf("Unhandled trigger trace mode: %x", tm)) + } +} diff --git a/internal/reporter/url.go b/internal/oboe/url.go similarity index 82% rename from internal/reporter/url.go rename to internal/oboe/url.go index b5b7e0fb..6923754d 100644 --- a/internal/reporter/url.go +++ b/internal/oboe/url.go @@ -11,7 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package reporter + +package oboe import ( "github.com/solarwinds/apm-go/internal/config" @@ -47,34 +48,34 @@ const ( ) // setURLTrace sets a url and its trace decision into the cache -func (c *urlCache) setURLTrace(url string, trace tracingMode) { +func (c *urlCache) setURLTrace(url string, trace TracingMode) { _ = c.Set([]byte(url), []byte(trace.ToString()), cacheExpireSeconds) } // getURLTrace gets the trace decision of a URL -func (c *urlCache) getURLTrace(url string) (tracingMode, error) { +func (c *urlCache) getURLTrace(url string) (TracingMode, error) { traceStr, err := c.Get([]byte(url)) if err != nil { return TraceUnknown, err } - return newTracingMode(config.TracingMode(string(traceStr))), nil + return NewTracingMode(config.TracingMode(traceStr)), nil } // urlFilter defines a URL filter type urlFilter interface { match(url string) bool - tracingMode() tracingMode + tracingMode() TracingMode } // regexFilter is a regular expression based URL filter type regexFilter struct { regex *regexp.Regexp - trace tracingMode + trace TracingMode } // newRegexFilter creates a new regexFilter instance -func newRegexFilter(regex string, mode tracingMode) (*regexFilter, error) { +func newRegexFilter(regex string, mode TracingMode) (*regexFilter, error) { re, err := regexp.Compile(regex) if err != nil { return nil, errors.Wrap(err, "failed to parse regexp") @@ -88,18 +89,18 @@ func (f *regexFilter) match(url string) bool { } // tracingMode returns the tracing mode of this url pattern -func (f *regexFilter) tracingMode() tracingMode { +func (f *regexFilter) tracingMode() TracingMode { return f.trace } // extensionFilter is a extension-based filter type extensionFilter struct { Exts map[string]struct{} - trace tracingMode + trace TracingMode } // newExtensionFilter create a new instance of extensionFilter -func newExtensionFilter(extensions []string, mode tracingMode) *extensionFilter { +func newExtensionFilter(extensions []string, mode TracingMode) *extensionFilter { exts := make(map[string]struct{}) for _, ext := range extensions { exts[ext] = struct{}{} @@ -115,7 +116,7 @@ func (f *extensionFilter) match(url string) bool { } // tracingMode returns the tracing mode of this extension pattern -func (f *extensionFilter) tracingMode() tracingMode { +func (f *extensionFilter) tracingMode() TracingMode { return f.trace } @@ -140,21 +141,21 @@ func (f *urlFilters) loadConfig(filters []config.TransactionFilter) { for _, filter := range filters { if filter.RegEx != "" { - re, err := newRegexFilter(filter.RegEx, newTracingMode(filter.Tracing)) + re, err := newRegexFilter(filter.RegEx, NewTracingMode(filter.Tracing)) if err != nil { log.Warningf("Ignore bad regex: %s, error=", filter.RegEx, err.Error()) } f.filters = append(f.filters, re) } else { f.filters = append(f.filters, - newExtensionFilter(filter.Extensions, newTracingMode(filter.Tracing))) + newExtensionFilter(filter.Extensions, NewTracingMode(filter.Tracing))) } } } -// getTracingMode checks if the URL should be traced or not. It returns TraceUnknown +// GetTracingMode checks if the URL should be traced or not. It returns TraceUnknown // if the url is not found. -func (f *urlFilters) getTracingMode(url string) tracingMode { +func (f *urlFilters) GetTracingMode(url string) TracingMode { if len(f.filters) == 0 || url == "" { return TraceUnknown } @@ -170,7 +171,7 @@ func (f *urlFilters) getTracingMode(url string) tracingMode { return trace } -func (f *urlFilters) lookupTracingMode(url string) tracingMode { +func (f *urlFilters) lookupTracingMode(url string) TracingMode { for _, filter := range f.filters { if filter.match(url) { return filter.tracingMode() diff --git a/internal/reporter/url_test.go b/internal/oboe/url_test.go similarity index 87% rename from internal/reporter/url_test.go rename to internal/oboe/url_test.go index d983c97b..1a5b512a 100644 --- a/internal/reporter/url_test.go +++ b/internal/oboe/url_test.go @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package reporter +package oboe import ( "github.com/solarwinds/apm-go/internal/config" @@ -52,22 +52,22 @@ func TestUrlFilter(t *testing.T) { {Type: "url", Extensions: []string{"png", "jpg"}, Tracing: config.DisabledTracingMode}, }) - assert.Equal(t, TraceDisabled, filter.getTracingMode("user123")) + assert.Equal(t, TraceDisabled, filter.GetTracingMode("user123")) assert.Equal(t, int64(1), filter.cache.EntryCount()) assert.Equal(t, int64(0), filter.cache.HitCount()) - assert.Equal(t, TraceUnknown, filter.getTracingMode("test123")) + assert.Equal(t, TraceUnknown, filter.GetTracingMode("test123")) assert.Equal(t, int64(2), filter.cache.EntryCount()) assert.Equal(t, int64(2), filter.cache.MissCount()) - assert.Equal(t, TraceDisabled, filter.getTracingMode("user200")) + assert.Equal(t, TraceDisabled, filter.GetTracingMode("user200")) assert.Equal(t, int64(3), filter.cache.EntryCount()) assert.Equal(t, int64(0), filter.cache.HitCount()) - assert.Equal(t, TraceDisabled, filter.getTracingMode("user123")) + assert.Equal(t, TraceDisabled, filter.GetTracingMode("user123")) assert.Equal(t, int64(3), filter.cache.EntryCount()) assert.Equal(t, int64(1), filter.cache.HitCount()) - assert.Equal(t, TraceDisabled, filter.getTracingMode("http://user.com/eric/avatar.png")) + assert.Equal(t, TraceDisabled, filter.GetTracingMode("http://user.com/eric/avatar.png")) assert.Equal(t, int64(4), filter.cache.EntryCount()) } diff --git a/internal/oboetestutils/oboe.go b/internal/oboetestutils/oboe.go new file mode 100644 index 00000000..3bd4a5cd --- /dev/null +++ b/internal/oboetestutils/oboe.go @@ -0,0 +1,134 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package oboetestutils + +import ( + "encoding/binary" + "github.com/solarwinds/apm-go/internal/constants" + "math" +) + +const TestToken = "TOKEN" +const TypeDefault = 0 + +func argsToMap(capacity, ratePerSec, tRCap, tRRate, tSCap, tSRate float64, + metricsFlushInterval, maxTransactions int, token []byte) map[string][]byte { + args := make(map[string][]byte) + + if capacity > -1 { + bits := math.Float64bits(capacity) + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, bits) + args[constants.KvBucketCapacity] = bytes + } + if ratePerSec > -1 { + bits := math.Float64bits(ratePerSec) + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, bits) + args[constants.KvBucketRate] = bytes + } + if tRCap > -1 { + bits := math.Float64bits(tRCap) + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, bits) + args[constants.KvTriggerTraceRelaxedBucketCapacity] = bytes + } + if tRRate > -1 { + bits := math.Float64bits(tRRate) + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, bits) + args[constants.KvTriggerTraceRelaxedBucketRate] = bytes + } + if tSCap > -1 { + bits := math.Float64bits(tSCap) + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, bits) + args[constants.KvTriggerTraceStrictBucketCapacity] = bytes + } + if tSRate > -1 { + bits := math.Float64bits(tSRate) + bytes := make([]byte, 8) + binary.LittleEndian.PutUint64(bytes, bits) + args[constants.KvTriggerTraceStrictBucketRate] = bytes + } + if metricsFlushInterval > -1 { + bytes := make([]byte, 4) + binary.LittleEndian.PutUint32(bytes, uint32(metricsFlushInterval)) + args[constants.KvMetricsFlushInterval] = bytes + } + if maxTransactions > -1 { + bytes := make([]byte, 4) + binary.LittleEndian.PutUint32(bytes, uint32(maxTransactions)) + args[constants.KvMaxTransactions] = bytes + } + + args[constants.KvSignatureKey] = token + + return args +} + +type SettingUpdater interface { + UpdateSetting(sType int32, layer string, flags []byte, value int64, ttl int64, args map[string][]byte) +} + +func AddDefaultSetting(o SettingUpdater) { + // add default setting with 100% sampling + o.UpdateSetting(int32(TypeDefault), "", + []byte("SAMPLE_START,SAMPLE_THROUGH_ALWAYS,TRIGGER_TRACE"), + 1000000, 120, argsToMap(1000000, 1000000, 1000000, 1000000, 1000000, 1000000, -1, -1, []byte(TestToken))) +} + +func AddSampleThrough(o SettingUpdater) { + // add default setting with 100% sampling + o.UpdateSetting(int32(TypeDefault), "", + []byte("SAMPLE_START,SAMPLE_THROUGH,TRIGGER_TRACE"), + 1000000, 120, argsToMap(1000000, 1000000, 1000000, 1000000, 1000000, 1000000, -1, -1, []byte(TestToken))) +} + +func AddNoTriggerTrace(o SettingUpdater) { + o.UpdateSetting(int32(TypeDefault), "", + []byte("SAMPLE_START,SAMPLE_THROUGH_ALWAYS"), + 1000000, 120, argsToMap(1000000, 1000000, 0, 0, 0, 0, -1, -1, []byte(TestToken))) +} + +func AddTriggerTraceOnly(o SettingUpdater) { + o.UpdateSetting(int32(TypeDefault), "", + []byte("TRIGGER_TRACE"), + 0, 120, argsToMap(0, 0, 1000000, 1000000, 1000000, 1000000, -1, -1, []byte(TestToken))) +} + +func AddRelaxedTriggerTraceOnly(o SettingUpdater) { + o.UpdateSetting(int32(TypeDefault), "", + []byte("TRIGGER_TRACE"), + 0, 120, argsToMap(0, 0, 1000000, 1000000, 0, 0, -1, -1, []byte(TestToken))) +} + +func AddStrictTriggerTraceOnly(o SettingUpdater) { + o.UpdateSetting(int32(TypeDefault), "", + []byte("TRIGGER_TRACE"), + 0, 120, argsToMap(0, 0, 0, 0, 1000000, 1000000, -1, -1, []byte(TestToken))) +} + +func AddLimitedTriggerTrace(o SettingUpdater) { + o.UpdateSetting(int32(TypeDefault), "", + []byte("SAMPLE_START,SAMPLE_THROUGH_ALWAYS,TRIGGER_TRACE"), + 1000000, 120, argsToMap(1000000, 1000000, 1, 1, 1, 1, -1, -1, []byte(TestToken))) +} + +func AddDisabled(o SettingUpdater) { + o.UpdateSetting(int32(TypeDefault), "", + []byte(""), + 0, 120, argsToMap(0, 0, 1, 1, 1, 1, -1, -1, []byte(TestToken))) +} diff --git a/internal/processor/processor.go b/internal/processor/processor.go index 7a633d7b..f581d389 100644 --- a/internal/processor/processor.go +++ b/internal/processor/processor.go @@ -22,17 +22,17 @@ import ( sdktrace "go.opentelemetry.io/otel/sdk/trace" ) -func NewInboundMetricsSpanProcessor(isAppoptics bool) sdktrace.SpanProcessor { +func NewInboundMetricsSpanProcessor(registry metrics.MetricRegistry, isAppoptics bool) sdktrace.SpanProcessor { return &inboundMetricsSpanProcessor{ + registry: registry, isAppoptics: isAppoptics, } } var _ sdktrace.SpanProcessor = &inboundMetricsSpanProcessor{} -var recordFunc = metrics.RecordSpan - type inboundMetricsSpanProcessor struct { + registry metrics.MetricRegistry isAppoptics bool } @@ -62,7 +62,7 @@ func maybeClearEntrySpan(span sdktrace.ReadOnlySpan) { func (s *inboundMetricsSpanProcessor) OnEnd(span sdktrace.ReadOnlySpan) { if entryspans.IsEntrySpan(span) { - recordFunc(span, s.isAppoptics) + s.registry.RecordSpan(span, s.isAppoptics) maybeClearEntrySpan(span) } } diff --git a/internal/processor/processor_test.go b/internal/processor/processor_test.go index 609ab55e..c6edba46 100644 --- a/internal/processor/processor_test.go +++ b/internal/processor/processor_test.go @@ -31,17 +31,41 @@ type recordMock struct { called bool } +func (r *recordMock) RecordSpan(span sdktrace.ReadOnlySpan, isAppoptics bool) { + r.span = span + r.isAppoptics = isAppoptics + r.called = true +} + +func (r *recordMock) BuildBuiltinMetricsMessage(int32, *metrics.EventQueueStats, map[string]*metrics.RateCounts, bool) []byte { + panic("should not be called in this test") +} + +func (r *recordMock) BuildCustomMetricsMessage(int32) []byte { + panic("should not be called in this test") +} + +func (r *recordMock) ApmMetricsCap() int32 { + panic("should not be called in this test") +} + +func (r *recordMock) SetApmMetricsCap(int32) { + panic("should not be called in this test") +} + +func (r *recordMock) CustomMetricsCap() int32 { + panic("should not be called in this test") +} + +func (r *recordMock) SetCustomMetricsCap(int32) { + panic("should not be called in this test") +} + +var _ metrics.LegacyRegistry = &recordMock{} + func TestInboundMetricsSpanProcessorOnEnd(t *testing.T) { mock := &recordMock{} - recordFunc = func(span sdktrace.ReadOnlySpan, isAppoptics bool) { - mock.span = span - mock.isAppoptics = isAppoptics - mock.called = true - } - defer func() { - recordFunc = metrics.RecordSpan - }() - sp := &inboundMetricsSpanProcessor{} + sp := NewInboundMetricsSpanProcessor(mock, false) tp := sdktrace.NewTracerProvider( sdktrace.WithSpanProcessor(sp), sdktrace.WithSampler(sdktrace.AlwaysSample()), @@ -80,15 +104,7 @@ func (ro recordOnlySampler) Description() string { func TestInboundMetricsSpanProcessorOnEndRecordOnly(t *testing.T) { mock := &recordMock{} - recordFunc = func(span sdktrace.ReadOnlySpan, isAppoptics bool) { - mock.span = span - mock.isAppoptics = isAppoptics - mock.called = true - } - defer func() { - recordFunc = metrics.RecordSpan - }() - sp := &inboundMetricsSpanProcessor{} + sp := NewInboundMetricsSpanProcessor(mock, false) tp := sdktrace.NewTracerProvider( sdktrace.WithSpanProcessor(sp), sdktrace.WithSampler(recordOnlySampler{}), @@ -114,15 +130,7 @@ func TestInboundMetricsSpanProcessorOnEndRecordOnly(t *testing.T) { func TestInboundMetricsSpanProcessorOnEndWithLocalParent(t *testing.T) { mock := &recordMock{} - recordFunc = func(span sdktrace.ReadOnlySpan, isAppoptics bool) { - mock.span = span - mock.isAppoptics = isAppoptics - mock.called = true - } - defer func() { - recordFunc = metrics.RecordSpan - }() - sp := &inboundMetricsSpanProcessor{} + sp := NewInboundMetricsSpanProcessor(mock, false) tp := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sp)) tracer := tp.Tracer("foo") ctx, s1 := tracer.Start(context.Background(), "span name") @@ -145,15 +153,7 @@ func TestInboundMetricsSpanProcessorOnEndWithLocalParent(t *testing.T) { func TestInboundMetricsSpanProcessorOnEndWithRemoteParent(t *testing.T) { mock := &recordMock{} - recordFunc = func(span sdktrace.ReadOnlySpan, isAppoptics bool) { - mock.span = span - mock.isAppoptics = isAppoptics - mock.called = true - } - defer func() { - recordFunc = metrics.RecordSpan - }() - sp := &inboundMetricsSpanProcessor{} + sp := NewInboundMetricsSpanProcessor(mock, false) tp := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sp)) tracer := tp.Tracer("foo") ctx := context.Background() diff --git a/internal/reporter/context.go b/internal/reporter/context.go deleted file mode 100644 index 982f0acb..00000000 --- a/internal/reporter/context.go +++ /dev/null @@ -1,116 +0,0 @@ -// © 2023 SolarWinds Worldwide, LLC. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package reporter - -import ( - "crypto/hmac" - "crypto/sha1" - "encoding/hex" - "fmt" - "github.com/solarwinds/apm-go/internal/log" - "strconv" - "time" - - "github.com/pkg/errors" -) - -type AuthStatus int - -const ( - AuthOK = iota - AuthBadTimestamp - AuthNoSignatureKey - AuthBadSignature -) - -func (a AuthStatus) IsError() bool { - return a != AuthOK -} - -func (a AuthStatus) Msg() string { - switch a { - case AuthOK: - return "ok" - case AuthBadTimestamp: - return "bad-timestamp" - case AuthNoSignatureKey: - return "no-signature-key" - case AuthBadSignature: - return "bad-signature" - } - log.Debugf("could not read msg for unknown AuthStatus: %s", a) - return "" -} - -// TODO: This could live in the `xtrace` package, except it requires -// TODO: the ability to extract the TT Token from oboe settings. -// TODO: Determine a clean/elegant way to clean this up. -func ValidateXTraceOptionsSignature(signature, ts, data string) AuthStatus { - var err error - _, err = tsInScope(ts) - if err != nil { - return AuthBadTimestamp - } - - token, err := getTriggerTraceToken() - if err != nil { - return AuthNoSignatureKey - } - - if HmacHash(token, []byte(data)) != signature { - return AuthBadSignature - } - return AuthOK -} - -func HmacHashTT(data []byte) (string, error) { - token, err := getTriggerTraceToken() - if err != nil { - return "", err - } - return HmacHash(token, data), nil -} - -func HmacHash(token, data []byte) string { - h := hmac.New(sha1.New, token) - h.Write(data) - sha := hex.EncodeToString(h.Sum(nil)) - return sha -} - -func getTriggerTraceToken() ([]byte, error) { - setting, ok := getSetting() - if !ok { - return nil, errors.New("failed to get settings") - } - if len(setting.triggerToken) == 0 { - return nil, errors.New("no valid signature key found") - } - return setting.triggerToken, nil -} - -func tsInScope(tsStr string) (string, error) { - ts, err := strconv.ParseInt(tsStr, 10, 64) - if err != nil { - return "", errors.Wrap(err, "tsInScope") - } - - t := time.Unix(ts, 0) - if t.Before(time.Now().Add(time.Minute*-5)) || - t.After(time.Now().Add(time.Minute*5)) { - return "", fmt.Errorf("timestamp out of scope: %s", tsStr) - } - return strconv.FormatInt(ts, 10), nil -} diff --git a/internal/reporter/methods.go b/internal/reporter/methods.go index 8d92284f..a8d7e75a 100644 --- a/internal/reporter/methods.go +++ b/internal/reporter/methods.go @@ -16,10 +16,10 @@ package reporter import ( "context" + "errors" "fmt" "time" - "github.com/pkg/errors" collector "github.com/solarwinds/apm-proto/go/collectorpb" ) @@ -145,7 +145,7 @@ func (pe *PostEventsMethod) CallSummary() string { // RetryOnErr denotes if retry is needed for this RPC method func (pe *PostEventsMethod) RetryOnErr(err error) bool { - return errRequestTooBig != errors.Cause(err) + return !errors.Is(err, errRequestTooBig) } func (pe *PostEventsMethod) ServiceKey() string { @@ -230,7 +230,7 @@ func (pm *PostMetricsMethod) CallSummary() string { // RetryOnErr denotes if retry is needed for this RPC method func (pm *PostMetricsMethod) RetryOnErr(err error) bool { - return errRequestTooBig != errors.Cause(err) + return !errors.Is(err, errRequestTooBig) } func (pm *PostMetricsMethod) ServiceKey() string { @@ -307,7 +307,7 @@ func (ps *PostStatusMethod) Call(ctx context.Context, // RetryOnErr denotes if retry is needed for this RPC method func (ps *PostStatusMethod) RetryOnErr(err error) bool { - return errRequestTooBig != errors.Cause(err) + return !errors.Is(err, errRequestTooBig) } func (ps *PostStatusMethod) ServiceKey() string { diff --git a/internal/reporter/oboe.go b/internal/reporter/oboe.go deleted file mode 100644 index 0f0eddd0..00000000 --- a/internal/reporter/oboe.go +++ /dev/null @@ -1,799 +0,0 @@ -// © 2023 SolarWinds Worldwide, LLC. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package reporter - -import ( - "encoding/binary" - "fmt" - "github.com/solarwinds/apm-go/internal/config" - "github.com/solarwinds/apm-go/internal/log" - "github.com/solarwinds/apm-go/internal/metrics" - "github.com/solarwinds/apm-go/internal/rand" - "github.com/solarwinds/apm-go/internal/swotel/semconv" - "github.com/solarwinds/apm-go/internal/utils" - "github.com/solarwinds/apm-go/internal/w3cfmt" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/resource" - "go.opentelemetry.io/otel/trace" - "math" - "strconv" - "strings" - "sync" - "time" - - "github.com/pkg/errors" -) - -const ( - maxSamplingRate = config.MaxSampleRate -) - -// enums used by sampling and tracing settings -type SampleSource int - -// source of the sample value -const ( - SAMPLE_SOURCE_UNSET SampleSource = iota - 1 - SAMPLE_SOURCE_NONE - SAMPLE_SOURCE_FILE - SAMPLE_SOURCE_DEFAULT - SAMPLE_SOURCE_LAYER -) - -// Current settings configuration -type oboeSettingsCfg struct { - settings map[oboeSettingKey]*oboeSettings - lock sync.RWMutex -} - -// FlushRateCounts collects the request counters values by categories. -func FlushRateCounts() map[string]*metrics.RateCounts { - setting, ok := getSetting() - if !ok { - return nil - } - rcs := make(map[string]*metrics.RateCounts) - rcs[metrics.RCRegular] = setting.bucket.FlushRateCounts() - rcs[metrics.RCRelaxedTriggerTrace] = setting.triggerTraceRelaxedBucket.FlushRateCounts() - rcs[metrics.RCStrictTriggerTrace] = setting.triggerTraceStrictBucket.FlushRateCounts() - - return rcs -} - -type oboeSettings struct { - timestamp time.Time - // the flags which may be modified through merging local settings. - flags settingFlag - // the original flags retrieved from the remote collector. - originalFlags settingFlag - // The sample rate. It could be the original value got from remote server - // or a new value after negotiating with local config - value int - // The sample source after negotiating with local config - source SampleSource - ttl int64 - layer string - triggerToken []byte - bucket *tokenBucket - triggerTraceRelaxedBucket *tokenBucket - triggerTraceStrictBucket *tokenBucket -} - -func (s *oboeSettings) hasOverrideFlag() bool { - return s.originalFlags&FLAG_OVERRIDE != 0 -} - -func newOboeSettings() *oboeSettings { - return &oboeSettings{ - bucket: globalTokenBucket, - triggerTraceRelaxedBucket: triggerTraceRelaxedBucket, - triggerTraceStrictBucket: triggerTraceStrictBucket, - } -} - -// token bucket -type tokenBucket struct { - ratePerSec float64 - capacity float64 - available float64 - last time.Time - lock sync.Mutex - metrics.RateCounts -} - -func (b *tokenBucket) reset() { - b.lock.Lock() - defer b.lock.Unlock() - - b.ratePerSec = 0 - b.capacity = 0 - b.available = 0 - b.last = time.Time{} -} - -func (b *tokenBucket) setRateCap(rate, cap float64) { - b.lock.Lock() - defer b.lock.Unlock() - b.ratePerSec = rate - b.capacity = cap - - if b.available > b.capacity { - b.available = b.capacity - } -} - -// The identifying keys for a setting -type oboeSettingKey struct { - sType settingType - layer string -} - -// Global configuration settings -var globalSettingsCfg = &oboeSettingsCfg{ - settings: make(map[oboeSettingKey]*oboeSettings), -} - -// The global token bucket. Trace decisions of all the requests are controlled -// by this single bucket. -// -// The rate and capacity will be initialized by the values fetched from the remote -// server, therefore it's initialized with only the default values. -var globalTokenBucket = &tokenBucket{} - -// The token bucket exclusively for trigger trace from authenticated clients -var triggerTraceRelaxedBucket = &tokenBucket{} - -// The token bucket exclusively for trigger trace from unauthenticated clients -var triggerTraceStrictBucket = &tokenBucket{} - -func createInitMessage(tid trace.TraceID, r *resource.Resource) Event { - evt := NewEventWithRandomOpID(tid, time.Now()) - evt.SetLabel(LabelUnset) - for _, kv := range r.Attributes() { - if kv.Key != semconv.ServiceNameKey { - evt.AddKV(kv) - } - } - - evt.AddKVs([]attribute.KeyValue{ - attribute.Bool("__Init", true), - attribute.String("APM.Version", utils.Version()), - }) - return evt -} - -func sendInitMessage(r *resource.Resource) { - if Closed() { - log.Info(errors.Wrap(ErrReporterIsClosed, "send init message")) - return - } - tid := trace.TraceID{0} - rand.Random(tid[:]) - evt := createInitMessage(tid, r) - if err := ReportStatus(evt); err != nil { - log.Error("could not send init message", err) - } -} - -func (b *tokenBucket) count(sampled, hasMetadata, rateLimit bool) bool { - b.RequestedInc() - - if !hasMetadata { - b.SampledInc() - } - - if !sampled { - return sampled - } - - if rateLimit { - if ok := b.consume(1); !ok { - b.LimitedInc() - return false - } - } - - if hasMetadata { - b.ThroughInc() - } - b.TracedInc() - return sampled -} - -func (b *tokenBucket) consume(size float64) bool { - b.lock.Lock() - defer b.lock.Unlock() - b.update(time.Now()) - if b.available >= size { - b.available -= size - return true - } - return false -} - -func (b *tokenBucket) update(now time.Time) { - if b.available < b.capacity { // room for more tokens? - delta := now.Sub(b.last) // calculate duration since last check - b.last = now // update time of last check - if delta <= 0 { // return if no delta or time went "backwards" - return - } - newTokens := b.ratePerSec * delta.Seconds() // # tokens generated since last check - b.available = math.Min(b.capacity, b.available+newTokens) // add new tokens to bucket, but don't overfill - } -} - -type SampleDecision struct { - trace bool - rate int - source SampleSource - // if the request is disabled from tracing in a per-transaction level or for - // the entire service. - enabled bool - xTraceOptsRsp string - bucketCap float64 - bucketRate float64 - - diceRolled bool -} - -func (s SampleDecision) Trace() bool { - return s.trace -} - -func (s SampleDecision) XTraceOptsRsp() string { - return s.xTraceOptsRsp -} - -func (s SampleDecision) Enabled() bool { - return s.enabled -} - -func (s SampleDecision) BucketCapacity() float64 { - return s.bucketCap -} - -func (s SampleDecision) BucketCapacityStr() string { - return floatToStr(s.BucketCapacity()) -} - -func (s SampleDecision) BucketRate() float64 { - return s.bucketRate -} - -func (s SampleDecision) BucketRateStr() string { - return floatToStr(s.BucketRate()) -} - -func (s SampleDecision) SampleRate() int { - return s.rate -} - -func (s SampleDecision) SampleSource() SampleSource { - return s.source -} - -func floatToStr(f float64) string { - return strconv.FormatFloat(f, 'f', -1, 64) -} - -type TriggerTraceMode int - -const ( - // ModeTriggerTraceNotPresent means there is no X-Trace-Options header detected, - // or the X-Trace-Options header is present but trigger_trace flag is not. This - // indicates that it's a trace for regular sampling. - ModeTriggerTraceNotPresent TriggerTraceMode = iota - - // ModeInvalidTriggerTrace means X-Trace-Options is detected but no valid trigger-trace - // flag found, or X-Trace-Options-Signature is present but the authentication is failed. - ModeInvalidTriggerTrace - - // ModeRelaxedTriggerTrace means X-Trace-Options-Signature is present and valid. - // The trace will be sampled/limited by the relaxed token bucket. - ModeRelaxedTriggerTrace - - // ModeStrictTriggerTrace means no X-Trace-Options-Signature is present. The trace - // will be limited by the strict token bucket. - ModeStrictTriggerTrace -) - -// Trigger trace response messages -const ( - ttOK = "ok" - ttRateExceeded = "rate-exceeded" - ttTracingDisabled = "tracing-disabled" - ttTriggerTracingDisabled = "trigger-tracing-disabled" - ttNotRequested = "not-requested" - ttIgnored = "ignored" - ttSettingsNotAvailable = "settings-not-available" - ttEmpty = "" -) - -// Enabled indicates whether it's a trigger-trace request -func (tm TriggerTraceMode) Enabled() bool { - switch tm { - case ModeTriggerTraceNotPresent, ModeInvalidTriggerTrace: - return false - case ModeRelaxedTriggerTrace, ModeStrictTriggerTrace: - return true - default: - panic(fmt.Sprintf("Unhandled trigger trace mode: %x", tm)) - } -} - -// Requested indicates whether the user tries to issue a trigger-trace request -// (but may be rejected if the header is illegal) -func (tm TriggerTraceMode) Requested() bool { - switch tm { - case ModeTriggerTraceNotPresent: - return false - case ModeRelaxedTriggerTrace, ModeStrictTriggerTrace, ModeInvalidTriggerTrace: - return true - default: - panic(fmt.Sprintf("Unhandled trigger trace mode: %x", tm)) - } -} - -func oboeSampleRequest(continued bool, url string, triggerTrace TriggerTraceMode, swState w3cfmt.SwTraceState) SampleDecision { - if usingTestReporter { - if r, ok := globalReporter.(*TestReporter); ok { - if !r.UseSettings { - return SampleDecision{r.ShouldTrace, 0, SAMPLE_SOURCE_NONE, true, ttEmpty, 0, 0, false} // trace tests - } - } - } - - var setting *oboeSettings - var ok bool - diceRolled := false - if setting, ok = getSetting(); !ok { - return SampleDecision{false, 0, SAMPLE_SOURCE_NONE, false, ttSettingsNotAvailable, 0, 0, diceRolled} - } - - retval := false - doRateLimiting := false - - sampleRate, flags, source := mergeURLSetting(setting, url) - - // Choose an appropriate bucket - bucket := setting.bucket - if triggerTrace == ModeRelaxedTriggerTrace { - bucket = setting.triggerTraceRelaxedBucket - } else if triggerTrace == ModeStrictTriggerTrace { - bucket = setting.triggerTraceStrictBucket - } - - if triggerTrace.Requested() && !continued { - sampled := (triggerTrace != ModeInvalidTriggerTrace) && (flags.TriggerTraceEnabled()) - rsp := ttOK - - ret := bucket.count(sampled, false, true) - - if flags.TriggerTraceEnabled() && triggerTrace.Enabled() { - if !ret { - rsp = ttRateExceeded - } - } else if triggerTrace == ModeInvalidTriggerTrace { - rsp = "" - } else { - if !flags.Enabled() { - rsp = ttTracingDisabled - } else { - rsp = ttTriggerTracingDisabled - } - } - ttCap, ttRate := getTokenBucketSetting(setting, triggerTrace) - return SampleDecision{ret, -1, SAMPLE_SOURCE_UNSET, flags.Enabled(), rsp, ttRate, ttCap, diceRolled} - } - - unsetBucketAndSampleKVs := false - if !continued { - // A new request - if flags&FLAG_SAMPLE_START != 0 { - // roll the dice - diceRolled = true - retval = shouldSample(sampleRate) - if retval { - doRateLimiting = true - } - } - } else if swState.IsValid() { - if swState.Flags().IsSampled() { - if flags&FLAG_SAMPLE_THROUGH_ALWAYS != 0 { - // Conform to liboboe behavior; continue decision would result in a -1 value for the - // BucketCapacity, BucketRate, SampleRate and SampleSource KVs to indicate "unset". - unsetBucketAndSampleKVs = true - retval = true - } else if flags&FLAG_SAMPLE_THROUGH != 0 { - // roll the dice - diceRolled = true - retval = shouldSample(sampleRate) - } - } else { - retval = false - } - } - - retval = bucket.count(retval, continued, doRateLimiting) - - rsp := ttNotRequested - if triggerTrace.Requested() { - rsp = ttIgnored - } - - var bucketCap, bucketRate float64 - if unsetBucketAndSampleKVs { - bucketCap, bucketRate, sampleRate, source = -1, -1, -1, SAMPLE_SOURCE_UNSET - } else { - bucketCap, bucketRate = getTokenBucketSetting(setting, ModeTriggerTraceNotPresent) - } - - return SampleDecision{ - retval, - sampleRate, - source, - flags.Enabled(), - rsp, - bucketCap, - bucketRate, - diceRolled, - } -} - -func getTokenBucketSetting(setting *oboeSettings, ttMode TriggerTraceMode) (capacity float64, rate float64) { - var bucket *tokenBucket - - switch ttMode { - case ModeRelaxedTriggerTrace: - bucket = setting.triggerTraceRelaxedBucket - case ModeStrictTriggerTrace: - bucket = setting.triggerTraceStrictBucket - case ModeTriggerTraceNotPresent, ModeInvalidTriggerTrace: - bucket = setting.bucket - default: - log.Warningf("Could not determine token bucket setting for invalid TriggerTraceMode: %#v", ttMode) - return 0, 0 - } - - return bucket.capacity, bucket.ratePerSec -} - -func bytesToFloat64(b []byte) (float64, error) { - if len(b) != 8 { - return -1, fmt.Errorf("invalid length: %d", len(b)) - } - return math.Float64frombits(binary.LittleEndian.Uint64(b)), nil -} - -func bytesToInt32(b []byte) (int32, error) { - if len(b) != 4 { - return -1, fmt.Errorf("invalid length: %d", len(b)) - } - return int32(binary.LittleEndian.Uint32(b)), nil -} - -func parseFloat64(args map[string][]byte, key string, fb float64) float64 { - ret := fb - if c, ok := args[key]; ok { - v, err := bytesToFloat64(c) - if err == nil && v >= 0 { - ret = v - log.Debugf("parsed %s=%f", key, v) - } else { - log.Warningf("parse error: %s=%f err=%v fallback=%f", key, v, err, fb) - } - } - return ret -} - -func parseInt32(args map[string][]byte, key string, fb int32) int32 { - ret := fb - if c, ok := args[key]; ok { - v, err := bytesToInt32(c) - if err == nil && v >= 0 { - ret = v - log.Debugf("parsed %s=%d", key, v) - } else { - log.Warningf("parse error: %s=%d err=%v fallback=%d", key, v, err, fb) - } - } - return ret -} - -// mergeLocalSetting follow the predefined precedence to decide which one to -// pick from: either the local configs or the remote ones, or the combination. -// -// Note: This function modifies the argument in place. -func mergeLocalSetting(remote *oboeSettings) *oboeSettings { - if remote.hasOverrideFlag() && config.SamplingConfigured() { - // Choose the lower sample rate and merge the flags - if remote.value > config.GetSampleRate() { - remote.value = config.GetSampleRate() - remote.source = SAMPLE_SOURCE_FILE - } - remote.flags &= newTracingMode(config.GetTracingMode()).toFlags() - } else if config.SamplingConfigured() { - // Use local sample rate and tracing mode config - remote.value = config.GetSampleRate() - remote.flags = newTracingMode(config.GetTracingMode()).toFlags() - remote.source = SAMPLE_SOURCE_FILE - } - - if !config.GetTriggerTrace() { - remote.flags = remote.flags &^ (1 << FlagTriggerTraceOffset) - } - return remote -} - -// mergeURLSetting merges the service level setting (merged from remote and local -// settings) and the per-URL sampling flags, if any. -func mergeURLSetting(setting *oboeSettings, url string) (int, settingFlag, SampleSource) { - if url == "" { - return setting.value, setting.flags, setting.source - } - - urlTracingMode := urls.getTracingMode(url) - if urlTracingMode.isUnknown() { - return setting.value, setting.flags, setting.source - } - - flags := urlTracingMode.toFlags() - source := SAMPLE_SOURCE_FILE - - if setting.hasOverrideFlag() { - flags &= setting.originalFlags - } - - return setting.value, flags, source -} - -func adjustSampleRate(rate int64) int { - if rate < 0 { - log.Debugf("Invalid sample rate: %d", rate) - return 0 - } - - if rate > maxSamplingRate { - log.Debugf("Invalid sample rate: %d", rate) - return maxSamplingRate - } - return int(rate) -} - -func updateSetting(sType int32, layer string, flags []byte, value int64, ttl int64, args map[string][]byte) { - ns := newOboeSettings() - - ns.timestamp = time.Now() - ns.source = settingType(sType).toSampleSource() - ns.flags = flagStringToBin(string(flags)) - ns.originalFlags = ns.flags - ns.value = adjustSampleRate(value) - ns.ttl = ttl - ns.layer = layer - - ns.triggerToken = args[kvSignatureKey] - - rate := parseFloat64(args, kvBucketRate, 0) - capacity := parseFloat64(args, kvBucketCapacity, 0) - ns.bucket.setRateCap(rate, capacity) - - tRelaxedRate := parseFloat64(args, kvTriggerTraceRelaxedBucketRate, 0) - tRelaxedCapacity := parseFloat64(args, kvTriggerTraceRelaxedBucketCapacity, 0) - ns.triggerTraceRelaxedBucket.setRateCap(tRelaxedRate, tRelaxedCapacity) - - tStrictRate := parseFloat64(args, kvTriggerTraceStrictBucketRate, 0) - tStrictCapacity := parseFloat64(args, kvTriggerTraceStrictBucketCapacity, 0) - ns.triggerTraceStrictBucket.setRateCap(tStrictRate, tStrictCapacity) - - merged := mergeLocalSetting(ns) - - key := oboeSettingKey{ - sType: settingType(sType), - layer: layer, - } - - globalSettingsCfg.lock.Lock() - globalSettingsCfg.settings[key] = merged - globalSettingsCfg.lock.Unlock() -} - -// Used for tests only -func resetSettings() { - FlushRateCounts() - - globalSettingsCfg.lock.Lock() - defer globalSettingsCfg.lock.Unlock() - globalSettingsCfg.settings = make(map[oboeSettingKey]*oboeSettings) - globalTokenBucket.reset() -} - -// OboeCheckSettingsTimeout checks and deletes expired settings -func OboeCheckSettingsTimeout() { - globalSettingsCfg.checkSettingsTimeout() -} - -func (sc *oboeSettingsCfg) checkSettingsTimeout() { - sc.lock.Lock() - defer sc.lock.Unlock() - - ss := sc.settings - for k, s := range ss { - e := s.timestamp.Add(time.Duration(s.ttl) * time.Second) - if e.Before(time.Now()) { - delete(ss, k) - } - } -} - -func getSetting() (*oboeSettings, bool) { - globalSettingsCfg.lock.RLock() - defer globalSettingsCfg.lock.RUnlock() - - // for now only look up the default settings - key := oboeSettingKey{ - sType: TYPE_DEFAULT, - layer: "", - } - if setting, ok := globalSettingsCfg.settings[key]; ok { - return setting, true - } - - return nil, false -} - -func removeSetting() { - globalSettingsCfg.lock.Lock() - defer globalSettingsCfg.lock.Unlock() - - key := oboeSettingKey{ - sType: TYPE_DEFAULT, - layer: "", - } - - delete(globalSettingsCfg.settings, key) -} - -func hasDefaultSetting() bool { - if _, ok := getSetting(); ok { - return true - } - return false -} - -func shouldSample(sampleRate int) bool { - return sampleRate == maxSamplingRate || rand.RandIntn(maxSamplingRate) <= sampleRate -} - -func flagStringToBin(flagString string) settingFlag { - flags := settingFlag(0) - if flagString != "" { - for _, s := range strings.Split(flagString, ",") { - switch s { - case "OVERRIDE": - flags |= FLAG_OVERRIDE - case "SAMPLE_START": - flags |= FLAG_SAMPLE_START - case "SAMPLE_THROUGH": - flags |= FLAG_SAMPLE_THROUGH - case "SAMPLE_THROUGH_ALWAYS": - flags |= FLAG_SAMPLE_THROUGH_ALWAYS - case "TRIGGER_TRACE": - flags |= FLAG_TRIGGER_TRACE - } - } - } - return flags -} - -// tracing mode -type tracingMode int - -// tracing modes -const ( - TraceDisabled tracingMode = iota // disable tracing, will neither start nor continue traces - TraceEnabled // perform sampling every inbound request for tracing - TraceUnknown // for cache purpose only -) - -// newTracingMode creates a tracing mode object from a string -func newTracingMode(mode config.TracingMode) tracingMode { - switch mode { - case config.DisabledTracingMode: - return TraceDisabled - case config.EnabledTracingMode: - return TraceEnabled - default: - } - return TraceUnknown -} - -func (tm tracingMode) isUnknown() bool { - return tm == TraceUnknown -} - -func (tm tracingMode) toFlags() settingFlag { - switch tm { - case TraceEnabled: - return FLAG_SAMPLE_START | FLAG_SAMPLE_THROUGH_ALWAYS | FLAG_TRIGGER_TRACE - case TraceDisabled: - default: - } - return FLAG_OK -} - -func (tm tracingMode) ToString() string { - switch tm { - case TraceEnabled: - return string(config.EnabledTracingMode) - case TraceDisabled: - return string(config.DisabledTracingMode) - default: - return string(config.UnknownTracingMode) - } -} - -type settingType int -type settingFlag uint16 - -// setting types -const ( - TYPE_DEFAULT settingType = iota // default setting which serves as a fallback if no other settings found - TYPE_LAYER // layer specific settings -) - -// setting flags offset -const ( - FlagInvalidOffset = iota - FlagOverrideOffset - FlagSampleStartOffset - FlagSampleThroughOffset - FlagSampleThroughAlwaysOffset - FlagTriggerTraceOffset -) - -// setting flags -const ( - FLAG_OK settingFlag = 0x0 - FLAG_INVALID settingFlag = 1 << FlagInvalidOffset - FLAG_OVERRIDE settingFlag = 1 << FlagOverrideOffset - FLAG_SAMPLE_START settingFlag = 1 << FlagSampleStartOffset - FLAG_SAMPLE_THROUGH settingFlag = 1 << FlagSampleThroughOffset - FLAG_SAMPLE_THROUGH_ALWAYS settingFlag = 1 << FlagSampleThroughAlwaysOffset - FLAG_TRIGGER_TRACE settingFlag = 1 << FlagTriggerTraceOffset -) - -// Enabled returns if the trace is enabled or not. -func (f settingFlag) Enabled() bool { - return f&(FLAG_SAMPLE_START|FLAG_SAMPLE_THROUGH_ALWAYS) != 0 -} - -// TriggerTraceEnabled returns if the trigger trace is enabled -func (f settingFlag) TriggerTraceEnabled() bool { - return f&FLAG_TRIGGER_TRACE != 0 -} - -func (st settingType) toSampleSource() SampleSource { - var source SampleSource - switch st { - case TYPE_DEFAULT: - source = SAMPLE_SOURCE_DEFAULT - case TYPE_LAYER: - source = SAMPLE_SOURCE_LAYER - default: - source = SAMPLE_SOURCE_NONE - } - return source -} diff --git a/internal/reporter/reporter.go b/internal/reporter/reporter.go index c01c72a8..e7c598b6 100644 --- a/internal/reporter/reporter.go +++ b/internal/reporter/reporter.go @@ -16,14 +16,21 @@ package reporter import ( "context" - "encoding/binary" + "fmt" + "github.com/pkg/errors" "github.com/solarwinds/apm-go/internal/config" "github.com/solarwinds/apm-go/internal/log" + "github.com/solarwinds/apm-go/internal/metrics" + "github.com/solarwinds/apm-go/internal/oboe" + "github.com/solarwinds/apm-go/internal/rand" + "github.com/solarwinds/apm-go/internal/state" "github.com/solarwinds/apm-go/internal/swotel/semconv" - "github.com/solarwinds/apm-go/internal/w3cfmt" + "github.com/solarwinds/apm-go/internal/utils" + "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/resource" - "math" + "go.opentelemetry.io/otel/trace" "strings" + "time" ) // defines what methods a Reporter should offer (internal to Reporter package) @@ -47,24 +54,6 @@ type Reporter interface { GetServiceName() string } -// KVs from getSettingsResult arguments -const ( - kvSignatureKey = "SignatureKey" - kvBucketCapacity = "BucketCapacity" - kvBucketRate = "BucketRate" - kvTriggerTraceRelaxedBucketCapacity = "TriggerRelaxedBucketCapacity" - kvTriggerTraceRelaxedBucketRate = "TriggerRelaxedBucketRate" - kvTriggerTraceStrictBucketCapacity = "TriggerStrictBucketCapacity" - kvTriggerTraceStrictBucketRate = "TriggerStrictBucketRate" - kvMetricsFlushInterval = "MetricsFlushInterval" - kvEventsFlushInterval = "EventsFlushInterval" - kvMaxTransactions = "MaxTransactions" - kvMaxCustomMetrics = "MaxCustomMetrics" -) - -// currently used reporter -var globalReporter Reporter = &nullReporter{} - var ( periodicTasksDisabled = false // disable periodic tasks, for testing ) @@ -82,13 +71,18 @@ func (r *nullReporter) WaitForReady(context.Context) bool { return true } func (r *nullReporter) SetServiceKey(string) error { return nil } func (r *nullReporter) GetServiceName() string { return "" } -func Start(r *resource.Resource) { +func Start(rsrc *resource.Resource, registry interface{}, o oboe.Oboe) (Reporter, error) { log.SetLevelFromStr(config.DebugLevel()) - initReporter(r) - sendInitMessage(r) + if reg, ok := registry.(metrics.LegacyRegistry); !ok { + return nil, fmt.Errorf("metrics registry must implement metrics.LegacyRegistry") + } else { + rptr := initReporter(rsrc, reg, o) + sendInitMessage(rptr, rsrc) + return rptr, nil + } } -func initReporter(r *resource.Resource) { +func initReporter(r *resource.Resource, registry metrics.LegacyRegistry, o oboe.Oboe) Reporter { var rt string if !config.GetEnabled() { log.Warning("SolarWinds Observability APM agent is disabled.") @@ -99,118 +93,39 @@ func initReporter(r *resource.Resource) { otelServiceName := "" if sn, ok := r.Set().Value(semconv.ServiceNameKey); ok { otelServiceName = strings.TrimSpace(sn.AsString()) + state.SetServiceName(otelServiceName) } - setGlobalReporter(rt, otelServiceName) -} - -func setGlobalReporter(reporterType string, otelServiceName string) { - // Close the previous reporter - if globalReporter != nil { - globalReporter.ShutdownNow() - } - - switch strings.ToLower(reporterType) { - case "none": - globalReporter = newNullReporter() - default: - globalReporter = newGRPCReporter(otelServiceName) + if rt == "none" { + return newNullReporter() } + return newGRPCReporter(otelServiceName, registry, o) } -// WaitForReady waits until the reporter becomes ready or the context is canceled. -func WaitForReady(ctx context.Context) bool { - // globalReporter is not protected by a mutex as currently it's only modified - // from the init() function. - return globalReporter.WaitForReady(ctx) -} - -// Shutdown flushes the metrics and stops the reporter. It blocked until the reporter -// is shutdown or the context is canceled. -func Shutdown(ctx context.Context) error { - return globalReporter.Shutdown(ctx) -} - -// Closed indicates if the reporter has been shutdown -func Closed() bool { - return globalReporter.Closed() -} - -func ShouldTraceRequestWithURL(traced bool, url string, ttMode TriggerTraceMode, swState w3cfmt.SwTraceState) SampleDecision { - return shouldTraceRequestWithURL(traced, url, ttMode, swState) -} +func CreateInitMessage(tid trace.TraceID, r *resource.Resource) Event { + evt := NewEventWithRandomOpID(tid, time.Now()) + evt.SetLabel(LabelUnset) + for _, kv := range r.Attributes() { + if kv.Key != semconv.ServiceNameKey { + evt.AddKV(kv) + } + } -func shouldTraceRequestWithURL(traced bool, url string, triggerTrace TriggerTraceMode, swState w3cfmt.SwTraceState) SampleDecision { - return oboeSampleRequest(traced, url, triggerTrace, swState) + evt.AddKVs([]attribute.KeyValue{ + attribute.Bool("__Init", true), + attribute.String("APM.Version", utils.Version()), + }) + return evt } -func argsToMap(capacity, ratePerSec, tRCap, tRRate, tSCap, tSRate float64, - metricsFlushInterval, maxTransactions int, token []byte) map[string][]byte { - args := make(map[string][]byte) - - if capacity > -1 { - bits := math.Float64bits(capacity) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - args[kvBucketCapacity] = bytes - } - if ratePerSec > -1 { - bits := math.Float64bits(ratePerSec) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - args[kvBucketRate] = bytes - } - if tRCap > -1 { - bits := math.Float64bits(tRCap) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - args[kvTriggerTraceRelaxedBucketCapacity] = bytes - } - if tRRate > -1 { - bits := math.Float64bits(tRRate) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - args[kvTriggerTraceRelaxedBucketRate] = bytes - } - if tSCap > -1 { - bits := math.Float64bits(tSCap) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - args[kvTriggerTraceStrictBucketCapacity] = bytes +func sendInitMessage(r Reporter, rsrc *resource.Resource) { + if r.Closed() { + log.Info(errors.Wrap(ErrReporterIsClosed, "send init message")) + return } - if tSRate > -1 { - bits := math.Float64bits(tSRate) - bytes := make([]byte, 8) - binary.LittleEndian.PutUint64(bytes, bits) - args[kvTriggerTraceStrictBucketRate] = bytes + tid := trace.TraceID{0} + rand.Random(tid[:]) + evt := CreateInitMessage(tid, rsrc) + if err := r.ReportStatus(evt); err != nil { + log.Error("could not send init message", err) } - if metricsFlushInterval > -1 { - bytes := make([]byte, 4) - binary.LittleEndian.PutUint32(bytes, uint32(metricsFlushInterval)) - args[kvMetricsFlushInterval] = bytes - } - if maxTransactions > -1 { - bytes := make([]byte, 4) - binary.LittleEndian.PutUint32(bytes, uint32(maxTransactions)) - args[kvMaxTransactions] = bytes - } - - args[kvSignatureKey] = token - - return args -} - -func SetServiceKey(key string) error { - return globalReporter.SetServiceKey(key) -} - -func ReportStatus(e Event) error { - return globalReporter.ReportStatus(e) -} - -func ReportEvent(e Event) error { - return globalReporter.ReportEvent(e) -} - -func GetServiceName() string { - return globalReporter.GetServiceName() } diff --git a/internal/reporter/reporter_grpc.go b/internal/reporter/reporter_grpc.go index 041bd882..ff36c398 100644 --- a/internal/reporter/reporter_grpc.go +++ b/internal/reporter/reporter_grpc.go @@ -19,15 +19,19 @@ import ( "crypto/tls" "crypto/x509" "encoding/base64" + "encoding/binary" + "errors" "fmt" "github.com/google/uuid" "github.com/solarwinds/apm-go/internal/config" + "github.com/solarwinds/apm-go/internal/constants" "github.com/solarwinds/apm-go/internal/host" "github.com/solarwinds/apm-go/internal/host/aws" "github.com/solarwinds/apm-go/internal/host/azure" "github.com/solarwinds/apm-go/internal/host/k8s" "github.com/solarwinds/apm-go/internal/log" "github.com/solarwinds/apm-go/internal/metrics" + "github.com/solarwinds/apm-go/internal/oboe" "github.com/solarwinds/apm-go/internal/uams" "github.com/solarwinds/apm-go/internal/utils" "io" @@ -42,7 +46,6 @@ import ( "sync/atomic" "time" - "github.com/pkg/errors" "google.golang.org/grpc/credentials" "google.golang.org/grpc/encoding/gzip" @@ -195,7 +198,7 @@ func newGrpcConnection(name string, target string, opts ...GrpcConnOpt) (*grpcCo err := gc.connect() if err != nil { - return nil, errors.Wrap(err, name) + return nil, errors.Join(fmt.Errorf("failed to connect to %s", name), err) } return gc, nil } @@ -239,6 +242,12 @@ type grpcReporter struct { // The flag to indicate gracefully stopping the reporter. It should be accessed atomically. // A (default) zero value means shutdown abruptly. gracefully int32 + + // metrics + registry metrics.LegacyRegistry + + // oboe + oboe oboe.Oboe } // gRPC reporter errors @@ -259,7 +268,7 @@ func getProxyCertPath() string { // initializes a new GRPC reporter from scratch (called once on program startup) // // returns GRPC Reporter object -func newGRPCReporter(otelServiceName string) Reporter { +func newGRPCReporter(otelServiceName string, registry metrics.LegacyRegistry, o oboe.Oboe) Reporter { // collector address override addr := config.GetCollector() @@ -304,6 +313,9 @@ func newGRPCReporter(otelServiceName string) Reporter { cond: sync.NewCond(&sync.Mutex{}), done: make(chan struct{}), + + registry: registry, + oboe: o, } r.start() @@ -511,7 +523,7 @@ func (c *grpcConnection) connect() error { ProxyCertPath: c.proxyTLSCertPath, }) if err != nil { - return errors.Wrap(err, "failed to connect to target") + return errors.Join(fmt.Errorf("failed to connect to %s", c.address), err) } // close the old connection @@ -623,7 +635,7 @@ func (r *grpcReporter) periodicTasks() { // set up ticker for next round r.conn.resetPing() go func() { - if r.conn.ping(r.done, r.serviceKey.Load()) == errInvalidServiceKey { + if errors.Is(r.conn.ping(r.done, r.serviceKey.Load()), errInvalidServiceKey) { r.ShutdownNow() } }() @@ -771,14 +783,11 @@ func (r *grpcReporter) eventBatchSender(batches <-chan [][]byte) { if len(messages) != 0 { method := newPostEventsMethod(r.serviceKey.Load(), messages) - err := r.conn.InvokeRPC(r.done, method) - - switch err { - case errInvalidServiceKey: - r.ShutdownNow() - case nil: + if err := r.conn.InvokeRPC(r.done, method); err == nil { log.Info(method.CallSummary()) - default: + } else if errors.Is(err, errInvalidServiceKey) { + r.ShutdownNow() + } else { log.Warningf("eventBatchSender: %s", err) } } @@ -810,13 +819,12 @@ func (r *grpcReporter) collectMetrics(collectReady chan bool) { var messages [][]byte // generate a new metrics message - builtin := metrics.BuildBuiltinMetricsMessage(metrics.ApmMetrics.CopyAndReset(i), - r.conn.queueStats.CopyAndReset(), FlushRateCounts(), config.GetRuntimeMetrics()) + builtin := r.registry.BuildBuiltinMetricsMessage(i, r.conn.queueStats.CopyAndReset(), r.oboe.FlushRateCounts(), config.GetRuntimeMetrics()) if builtin != nil { messages = append(messages, builtin) } - custom := metrics.BuildMessage(metrics.CustomMetrics.CopyAndReset(i), false) + custom := r.registry.BuildCustomMetricsMessage(i) if custom != nil { messages = append(messages, custom) } @@ -834,13 +842,11 @@ func (r *grpcReporter) sendMetrics(msgs [][]byte) { method := newPostMetricsMethod(r.serviceKey.Load(), msgs) - err := r.conn.InvokeRPC(r.done, method) - switch err { - case errInvalidServiceKey: - r.ShutdownNow() - case nil: + if err := r.conn.InvokeRPC(r.done, method); err == nil { log.Info(method.CallSummary()) - default: + } else if errors.Is(err, errInvalidServiceKey) { + r.ShutdownNow() + } else { log.Warningf("sendMetrics: %s", err) } } @@ -854,19 +860,16 @@ func (r *grpcReporter) getSettings(ready chan bool) { defer func() { ready <- true }() method := newGetSettingsMethod(r.serviceKey.Load()) - err := r.conn.InvokeRPC(r.done, method) - - switch err { - case errInvalidServiceKey: - r.ShutdownNow() - case nil: + if err := r.conn.InvokeRPC(r.done, method); err == nil { logger := log.Info if method.Resp.Warning != "" { logger = log.Warning } logger(method.CallSummary()) r.updateSettings(method.Resp) - default: + } else if errors.Is(err, errInvalidServiceKey) { + r.ShutdownNow() + } else { log.Infof("getSettings: %s", err) } } @@ -875,26 +878,26 @@ func (r *grpcReporter) getSettings(ready chan bool) { // settings new settings func (r *grpcReporter) updateSettings(settings *collector.SettingsResult) { for _, s := range settings.GetSettings() { - updateSetting(int32(s.Type), string(s.Layer), s.Flags, s.Value, s.Ttl, s.Arguments) + r.oboe.UpdateSetting(int32(s.Type), string(s.Layer), s.Flags, s.Value, s.Ttl, s.Arguments) // update MetricsFlushInterval - mi := parseInt32(s.Arguments, kvMetricsFlushInterval, r.collectMetricInterval) + mi := ParseInt32(s.Arguments, constants.KvMetricsFlushInterval, r.collectMetricInterval) atomic.StoreInt32(&r.collectMetricInterval, mi) // update events flush interval o := config.ReporterOpts() - ei := parseInt32(s.Arguments, kvEventsFlushInterval, int32(o.GetEventFlushInterval())) + ei := ParseInt32(s.Arguments, constants.KvEventsFlushInterval, int32(o.GetEventFlushInterval())) o.SetEventFlushInterval(int64(ei)) // update MaxTransactions - mt := parseInt32(s.Arguments, kvMaxTransactions, metrics.ApmMetrics.Cap()) - metrics.ApmMetrics.SetCap(mt) + mt := ParseInt32(s.Arguments, constants.KvMaxTransactions, r.registry.ApmMetricsCap()) + r.registry.SetApmMetricsCap(mt) - maxCustomMetrics := parseInt32(s.Arguments, kvMaxCustomMetrics, metrics.CustomMetrics.Cap()) - metrics.CustomMetrics.SetCap(maxCustomMetrics) + maxCustomMetrics := ParseInt32(s.Arguments, constants.KvMaxCustomMetrics, r.registry.CustomMetricsCap()) + r.registry.SetCustomMetricsCap(maxCustomMetrics) } - if !r.isReady() && hasDefaultSetting() { + if !r.isReady() && r.oboe.HasDefaultSetting() { r.cond.L.Lock() r.setReady(true) log.Warningf("Got dynamic settings. The SolarWinds Observability APM agent (%v) is ready.", r.done) @@ -909,8 +912,8 @@ func (r *grpcReporter) checkSettingsTimeout(ready chan bool) { // notify caller that this routine has terminated (defered to end of routine) defer func() { ready <- true }() - OboeCheckSettingsTimeout() - if r.isReady() && !hasDefaultSetting() { + r.oboe.CheckSettingsTimeout() + if r.isReady() && !r.oboe.HasDefaultSetting() { log.Warningf("Sampling setting expired. SolarWinds Observability APM library (%v) is not working.", r.done) r.setReady(false) } @@ -918,25 +921,6 @@ func (r *grpcReporter) checkSettingsTimeout(ready chan bool) { // ========================= Status Message Handling ============================= -// TODO use something similar for init message -//func (r *grpcReporter) reportStatus(ctx *oboeContext, e *event) error { -// if r.Closed() { -// return ErrReporterIsClosed -// } -// if err := prepareEvent(ctx, e); err != nil { -// // don't continue if preparation failed -// return err -// } -// -// select { -// case r.statusMessages <- (*e).bbuf.GetBuf(): -// return nil -// default: -// return errors.New("status message queue is full") -// } -// -//} - // long-running goroutine that listens on the status message channel, collects all messages // on that channel and attempts to send them to the collector using the GRPC method PostStatus() func (r *grpcReporter) statusSender() { @@ -963,14 +947,11 @@ func (r *grpcReporter) statusSender() { } } method := newPostStatusMethod(r.serviceKey.Load(), messages) - err := r.conn.InvokeRPC(r.done, method) - - switch err { - case errInvalidServiceKey: - r.ShutdownNow() - case nil: + if err := r.conn.InvokeRPC(r.done, method); err == nil { log.Info(method.CallSummary()) - default: + } else if errors.Is(err, errInvalidServiceKey) { + r.ShutdownNow() + } else { log.Infof("statusSender: %s", err) } } @@ -1011,9 +992,6 @@ var ( // dropped. errTooManyRedirections = errors.New("too many redirections") - // The destination returned by the collector is not valid. - errInvalidRedirectTarget = errors.New("redirection target is empty.") - // the operation or loop cannot continue as the reporter is exiting. errReporterExiting = errors.New("reporter is exiting") @@ -1065,8 +1043,7 @@ func (c *grpcConnection) InvokeRPC(exit chan struct{}, m Method) error { if c.isActive() { ctx, cancel := context.WithTimeout(context.Background(), grpcCtxTimeout) if m.RequestSize() > c.maxReqBytes { - v := fmt.Sprintf("%d|%d", m.RequestSize(), c.maxReqBytes) - err = errors.Wrap(errRequestTooBig, v) + err = fmt.Errorf("rpc request exceeds byte limit; request size: %d, max size: %d", m.RequestSize(), c.maxReqBytes) } else { if m.ServiceKey() != "" { err = m.Call(ctx, c.client) @@ -1131,7 +1108,7 @@ func (c *grpcConnection) InvokeRPC(exit chan struct{}, m Method) error { // a proper redirect shouldn't cause delays retriesNum = 0 } else { - log.Warning(errors.Wrap(errInvalidRedirectTarget, c.name)) + log.Warning(fmt.Errorf("redirection target is empty for %s", c.name)) } default: log.Info(m.CallSummary()) @@ -1146,7 +1123,7 @@ func (c *grpcConnection) InvokeRPC(exit chan struct{}, m Method) error { if !m.RetryOnErr(err) { if err != nil { - return errors.Wrap(errNoRetryOnErr, err.Error()) + return errors.Join(errNoRetryOnErr, err) } else { return errNoRetryOnErr } @@ -1265,7 +1242,7 @@ func (d *DefaultDialer) Dial(p DialParams) (*grpc.ClientConn, error) { } else { certPool, err = x509.SystemCertPool() if err != nil { - return nil, errors.Wrap(err, "unable to obtain system cert pool") + return nil, errors.Join(errors.New("unable to obtain system cert pool"), err) } } @@ -1306,13 +1283,13 @@ func newGRPCProxyDialer(p DialParams) func(context.Context, string) (net.Conn, e proxy, err := url.Parse(p.Proxy) if err != nil { - return nil, errors.Wrap(err, "error parsing the proxy url") + return nil, errors.Join(errors.New("error parsing the proxy url"), err) } if proxy.Scheme == "https" { cert, err := os.ReadFile(p.ProxyCertPath) if err != nil { - return nil, errors.Wrap(err, "failed to load proxy cert") + return nil, errors.Join(errors.New("failed to load proxy cert"), err) } caCertPool := x509.NewCertPool() caCertPool.AppendCertsFromPEM(cert) @@ -1321,12 +1298,12 @@ func newGRPCProxyDialer(p DialParams) func(context.Context, string) (net.Conn, e tlsConfig := tls.Config{RootCAs: caCertPool} conn, err = tls.Dial("tcp", proxy.Host, &tlsConfig) if err != nil { - return nil, errors.Wrap(err, "failed to dial the https proxy") + return nil, errors.Join(errors.New("failed to dial the https proxy"), err) } } else if proxy.Scheme == "http" { conn, err = (&net.Dialer{}).DialContext(ctx, "tcp", proxy.Host) if err != nil { - return nil, errors.Wrap(err, "failed to dial the http proxy") + return nil, errors.Join(errors.New("failed to dial the http proxy"), err) } } else { return nil, fmt.Errorf("proxy scheme not supported: %s", proxy.Scheme) @@ -1401,3 +1378,24 @@ func printRPCMsg(m Method) { } log.Debugf("%s", str) } + +func bytesToInt32(b []byte) (int32, error) { + if len(b) != 4 { + return -1, fmt.Errorf("invalid length: %d", len(b)) + } + return int32(binary.LittleEndian.Uint32(b)), nil +} + +func ParseInt32(args map[string][]byte, key string, fb int32) int32 { + ret := fb + if c, ok := args[key]; ok { + v, err := bytesToInt32(c) + if err == nil && v >= 0 { + ret = v + log.Debugf("parsed %s=%d", key, v) + } else { + log.Warningf("parse error: %s=%d err=%v fallback=%d", key, v, err, fb) + } + } + return ret +} diff --git a/internal/reporter/reporter_test.go b/internal/reporter/reporter_test.go index 5bbbca53..b5aab9ea 100644 --- a/internal/reporter/reporter_test.go +++ b/internal/reporter/reporter_test.go @@ -21,10 +21,13 @@ import ( "github.com/solarwinds/apm-go/internal/host" "github.com/solarwinds/apm-go/internal/log" "github.com/solarwinds/apm-go/internal/metrics" + "github.com/solarwinds/apm-go/internal/oboe" "github.com/solarwinds/apm-go/internal/reporter/mocks" "github.com/solarwinds/apm-go/internal/swotel/semconv" "github.com/solarwinds/apm-go/internal/utils" + "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/trace" "go.uber.org/atomic" "io" stdlog "log" @@ -85,12 +88,9 @@ func TestGRPCReporter(t *testing.T) { setEnv("SW_APM_COLLECTOR", addr) setEnv("SW_APM_TRUSTEDPATH", testCertFile) config.Load() - oldReporter := globalReporter - setGlobalReporter("ssl", "") - - require.IsType(t, &grpcReporter{}, globalReporter) - - r := globalReporter.(*grpcReporter) + registry := metrics.NewLegacyRegistry() + o := oboe.NewOboe() + r := newGRPCReporter("myservice", registry, o).(*grpcReporter) // Test WaitForReady // The reporter is not ready when there is no default setting. @@ -130,7 +130,7 @@ func TestGRPCReporter(t *testing.T) { time.Sleep(time.Second) // The reporter becomes not ready after the default setting has been deleted - removeSetting() + o.RemoveSetting() r.checkSettingsTimeout(make(chan bool, 1)) require.False(t, r.isReady()) @@ -140,7 +140,6 @@ func TestGRPCReporter(t *testing.T) { // stop test reporter server.Stop() - globalReporter = oldReporter // assert data received require.Len(t, server.events, 1) @@ -176,12 +175,9 @@ func TestShutdownGRPCReporter(t *testing.T) { setEnv("SW_APM_COLLECTOR", addr) setEnv("SW_APM_TRUSTEDPATH", testCertFile) config.Load() - oldReporter := globalReporter - setGlobalReporter("ssl", "") - - require.IsType(t, &grpcReporter{}, globalReporter) - - r := globalReporter.(*grpcReporter) + registry := metrics.NewLegacyRegistry() + o := oboe.NewOboe() + r := newGRPCReporter("myservice", registry, o).(*grpcReporter) r.ShutdownNow() require.Equal(t, true, r.Closed()) @@ -190,7 +186,6 @@ func TestShutdownGRPCReporter(t *testing.T) { // stop test reporter server.Stop() - globalReporter = oldReporter } func TestSetServiceKey(t *testing.T) { @@ -238,13 +233,12 @@ func TestInvalidKey(t *testing.T) { // set gRPC reporter config.Load() - oldReporter := globalReporter log.SetLevel(log.INFO) - setGlobalReporter("ssl", "") - require.IsType(t, &grpcReporter{}, globalReporter) + registry := metrics.NewLegacyRegistry() - r := globalReporter.(*grpcReporter) + o := oboe.NewOboe() + r := newGRPCReporter("myservice", registry, o).(*grpcReporter) ev1 := CreateInfoEvent(validSpanContext, time.Now()) ev1.SetLayer("hello-from-invalid-key") require.NoError(t, r.ReportEvent(ev1)) @@ -258,7 +252,6 @@ func TestInvalidKey(t *testing.T) { // Tear down everything. server.Stop() - globalReporter = oldReporter setEnv("SW_APM_SERVICE_KEY", oldKey) patterns := []string{ @@ -453,8 +446,10 @@ func TestInitReporter(t *testing.T) { // Test disable agent setEnv("SW_APM_ENABLED", "false") config.Load() - initReporter(resource.Empty()) - require.IsType(t, &nullReporter{}, globalReporter) + registry := metrics.NewLegacyRegistry() + o := oboe.NewOboe() + r := initReporter(resource.Empty(), registry, o) + require.IsType(t, &nullReporter{}, r) // Test enable agent require.NoError(t, os.Unsetenv("SW_APM_ENABLED")) @@ -462,9 +457,9 @@ func TestInitReporter(t *testing.T) { config.Load() require.True(t, config.GetEnabled()) - initReporter(resource.NewWithAttributes("", semconv.ServiceName("my service name"))) - require.IsType(t, &grpcReporter{}, globalReporter) - require.Equal(t, "my service name", globalReporter.GetServiceName()) + r = initReporter(resource.NewWithAttributes("", semconv.ServiceName("my service name")), registry, o) + require.IsType(t, &grpcReporter{}, r) + require.Equal(t, "my service name", r.GetServiceName()) } func TestCollectMetricsNextInterval(t *testing.T) { @@ -497,14 +492,10 @@ func testProxy(t *testing.T, proxyUrl string) { server := StartTestGRPCServer(t, addr) time.Sleep(100 * time.Millisecond) - oldReporter := globalReporter - defer func() { globalReporter = oldReporter }() - - setGlobalReporter("ssl", "") + registry := metrics.NewLegacyRegistry() - require.IsType(t, &grpcReporter{}, globalReporter) - - r := globalReporter.(*grpcReporter) + o := oboe.NewOboe() + r := newGRPCReporter("myservice", registry, o).(*grpcReporter) // Test WaitForReady // The reporter is not ready when there is no default setting. @@ -547,7 +538,7 @@ func testProxy(t *testing.T, proxyUrl string) { time.Sleep(time.Second) // The reporter becomes not ready after the default setting has been deleted - removeSetting() + o.RemoveSetting() r.checkSettingsTimeout(make(chan bool, 1)) require.False(t, r.isReady()) @@ -588,3 +579,32 @@ func TestHttpProxy(t *testing.T) { func TestHttpsProxy(t *testing.T) { testProxy(t, "https://usr:pwd@localhost:12345") } + +func TestCreateInitMessage(t *testing.T) { + tid := trace.TraceID{0x01, 0x02, 0x03, 0x04} + r, err := resource.New(context.Background(), resource.WithAttributes( + attribute.String("foo", "bar"), + // service.name should be omitted + attribute.String("service.name", "my cool service"), + )) + require.NoError(t, err) + a := time.Now() + evt := CreateInitMessage(tid, r) + b := time.Now() + require.NoError(t, err) + require.NotNil(t, evt) + e, ok := evt.(*event) + require.True(t, ok) + require.Equal(t, tid, e.taskID) + require.NotEqual(t, [8]byte{}, e.opID) + require.True(t, e.t.After(a)) + require.True(t, e.t.Before(b)) + require.Equal(t, []attribute.KeyValue{ + attribute.String("foo", "bar"), + attribute.Bool("__Init", true), + attribute.String("APM.Version", utils.Version()), + }, e.kvs) + require.Equal(t, LabelUnset, e.label) + require.Equal(t, "", e.layer) + require.False(t, e.parent.IsValid()) +} diff --git a/internal/reporter/test_reporter.go b/internal/reporter/test_reporter.go deleted file mode 100644 index dc0ebed1..00000000 --- a/internal/reporter/test_reporter.go +++ /dev/null @@ -1,256 +0,0 @@ -// © 2023 SolarWinds Worldwide, LLC. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package reporter - -import ( - "context" - "errors" - "log" - "sync" - "time" -) - -// TestReporter appends reported events to Bufs if ShouldTrace is true. -type TestReporter struct { - EventBufs [][]byte - ShouldTrace bool - ShouldError bool - UseSettings bool - SettingType int - CaptureMetrics bool - ErrorEvents map[int]bool // whether to drop an event - done chan int - wg sync.WaitGroup - eventChan chan []byte - Timeout time.Duration -} - -const ( - defaultTestReporterTimeout = 2 * time.Second - TestToken = "TOKEN" -) - -var usingTestReporter = false -var oldReporter Reporter = &nullReporter{} - -// TestReporterOption values may be passed to SetTestReporter. -type TestReporterOption func(*TestReporter) - -func TestReporterSettingType(tp int) TestReporterOption { - return func(r *TestReporter) { r.SettingType = tp } -} - -func SetGlobalReporter(r Reporter) func() { - old := globalReporter - globalReporter = r - return func() { - globalReporter = old - } -} - -// SetTestReporter sets and returns a test Reporter that captures raw event bytes -// for making assertions about using the graphtest package. -func SetTestReporter(options ...TestReporterOption) *TestReporter { - r := &TestReporter{ - ShouldTrace: true, - UseSettings: true, - Timeout: defaultTestReporterTimeout, - done: make(chan int), - eventChan: make(chan []byte), - } - for _, option := range options { - option(r) - } - r.wg.Add(1) - go r.resultWriter() - - if _, ok := oldReporter.(*nullReporter); ok { - oldReporter = globalReporter - } - globalReporter = r - usingTestReporter = true - - // start with clean slate - resetSettings() - - r.updateSetting() - - return r -} - -func (r *TestReporter) SetServiceKey(string) error { - return nil -} - -func (r *TestReporter) GetServiceName() string { - return "test-reporter-service" -} - -func (r *TestReporter) resultWriter() { - var numBufs int - for { - select { - case numBufs = <-r.done: - if len(r.EventBufs) >= numBufs { - r.wg.Done() - return - } - r.done = nil - case <-time.After(r.Timeout): - r.wg.Done() - return - case buf := <-r.eventChan: - r.EventBufs = append(r.EventBufs, buf) - if r.done == nil && len(r.EventBufs) >= numBufs { - r.wg.Done() - return - } - } - } -} - -// Close stops the test reporter from listening for events; r.EventBufs will no longer be updated and any -// calls to WritePacket() will panic. -func (r *TestReporter) Close(numBufs int) { - r.done <- numBufs - // wait for reader goroutine to receive numBufs events, or timeout. - r.wg.Wait() - close(r.eventChan) - received := len(r.EventBufs) - if received < numBufs { - log.Printf("# FIX: TestReporter.Close() waited for %d events, got %d", numBufs, received) - } - usingTestReporter = false - if _, ok := oldReporter.(*nullReporter); !ok { - globalReporter = oldReporter - oldReporter = &nullReporter{} - } -} - -// Shutdown closes the Test reporter TODO: not supported -func (r *TestReporter) Shutdown(context.Context) error { - // return r.conn.Close() - return errors.New("shutdown is not supported by TestReporter") -} - -// ShutdownNow closes the Test reporter immediately -func (r *TestReporter) ShutdownNow() {} - -// Closed returns if the reporter is closed or not TODO: not supported -func (r *TestReporter) Closed() bool { - return false -} - -// WaitForReady checks the state of the reporter and may wait for up to the specified -// duration until it becomes ready. -func (r *TestReporter) WaitForReady(context.Context) bool { - return true -} - -func (r *TestReporter) ReportEvent(Event) error { - return errors.New("TestReporter.ReportEvent not implemented") -} - -func (r *TestReporter) ReportStatus(Event) error { - return errors.New("TestReporter.ReportStatus not implemented") -} - -func (r *TestReporter) addDefaultSetting() { - // add default setting with 100% sampling - updateSetting(int32(TYPE_DEFAULT), "", - []byte("SAMPLE_START,SAMPLE_THROUGH_ALWAYS,TRIGGER_TRACE"), - 1000000, 120, argsToMap(1000000, 1000000, 1000000, 1000000, 1000000, 1000000, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addSampleThrough() { - // add default setting with 100% sampling - updateSetting(int32(TYPE_DEFAULT), "", - []byte("SAMPLE_START,SAMPLE_THROUGH,TRIGGER_TRACE"), - 1000000, 120, argsToMap(1000000, 1000000, 1000000, 1000000, 1000000, 1000000, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addNoTriggerTrace() { - updateSetting(int32(TYPE_DEFAULT), "", - []byte("SAMPLE_START,SAMPLE_THROUGH_ALWAYS"), - 1000000, 120, argsToMap(1000000, 1000000, 0, 0, 0, 0, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addTriggerTraceOnly() { - updateSetting(int32(TYPE_DEFAULT), "", - []byte("TRIGGER_TRACE"), - 0, 120, argsToMap(0, 0, 1000000, 1000000, 1000000, 1000000, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addRelaxedTriggerTraceOnly() { - updateSetting(int32(TYPE_DEFAULT), "", - []byte("TRIGGER_TRACE"), - 0, 120, argsToMap(0, 0, 1000000, 1000000, 0, 0, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addStrictTriggerTraceOnly() { - updateSetting(int32(TYPE_DEFAULT), "", - []byte("TRIGGER_TRACE"), - 0, 120, argsToMap(0, 0, 0, 0, 1000000, 1000000, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addLimitedTriggerTrace() { - updateSetting(int32(TYPE_DEFAULT), "", - []byte("SAMPLE_START,SAMPLE_THROUGH_ALWAYS,TRIGGER_TRACE"), - 1000000, 120, argsToMap(1000000, 1000000, 1, 1, 1, 1, -1, -1, []byte(TestToken))) -} - -func (r *TestReporter) addDisabled() { - updateSetting(int32(TYPE_DEFAULT), "", - []byte(""), - 0, 120, argsToMap(0, 0, 1, 1, 1, 1, -1, -1, []byte(TestToken))) -} - -// Setting types -const ( - DefaultST = iota - NoTriggerTraceST - TriggerTraceOnlyST - RelaxedTriggerTraceOnlyST - StrictTriggerTraceOnlyST - LimitedTriggerTraceST - SampleThroughST - DisabledST - NoSettingST -) - -func (r *TestReporter) updateSetting() { - switch r.SettingType { - case DefaultST: - r.addDefaultSetting() - case NoTriggerTraceST: - r.addNoTriggerTrace() - case TriggerTraceOnlyST: - r.addTriggerTraceOnly() - case RelaxedTriggerTraceOnlyST: - r.addRelaxedTriggerTraceOnly() - case StrictTriggerTraceOnlyST: - r.addStrictTriggerTraceOnly() - case LimitedTriggerTraceST: - r.addLimitedTriggerTrace() - case SampleThroughST: - r.addSampleThrough() - case DisabledST: - r.addDisabled() - case NoSettingST: - // Nothing to do - default: - panic("No such setting type.") - } -} diff --git a/internal/sampler/sampler.go b/internal/sampler/sampler.go index 415d07d4..b318988d 100644 --- a/internal/sampler/sampler.go +++ b/internal/sampler/sampler.go @@ -17,7 +17,7 @@ package sampler import ( "fmt" "github.com/solarwinds/apm-go/internal/log" - "github.com/solarwinds/apm-go/internal/reporter" + "github.com/solarwinds/apm-go/internal/oboe" "github.com/solarwinds/apm-go/internal/swotel" "github.com/solarwinds/apm-go/internal/w3cfmt" "github.com/solarwinds/apm-go/internal/xtrace" @@ -27,10 +27,16 @@ import ( ) type sampler struct { + oboe oboe.Oboe } -func NewSampler() sdktrace.Sampler { - return sampler{} +func NewSampler(o oboe.Oboe) (sdktrace.Sampler, error) { + if o == nil { + return nil, fmt.Errorf("oboe must not be nil") + } + return sampler{ + oboe: o, + }, nil } var _ sdktrace.Sampler = sampler{} @@ -89,11 +95,11 @@ func (s sampler) ShouldSample(params sdktrace.SamplingParameters) sdktrace.Sampl } else { // TODO url url := "" - xto := xtrace.GetXTraceOptions(params.ParentContext) + xto := xtrace.GetXTraceOptions(params.ParentContext, s.oboe) ttMode := getTtMode(xto) // If parent context is not valid, swState will also not be valid swState := w3cfmt.GetSwTraceState(psc) - traceDecision := reporter.ShouldTraceRequestWithURL(swState.IsValid(), url, ttMode, swState) + traceDecision := s.oboe.SampleRequest(swState.IsValid(), url, ttMode, swState) var decision sdktrace.SamplingDecision if !traceDecision.Enabled() { decision = sdktrace.Drop @@ -137,17 +143,17 @@ func (s sampler) ShouldSample(params sdktrace.SamplingParameters) sdktrace.Sampl } -func getTtMode(xto xtrace.Options) reporter.TriggerTraceMode { +func getTtMode(xto xtrace.Options) oboe.TriggerTraceMode { if xto.TriggerTrace() { switch xto.SignatureState() { case xtrace.ValidSignature: - return reporter.ModeRelaxedTriggerTrace + return oboe.ModeRelaxedTriggerTrace case xtrace.InvalidSignature: - return reporter.ModeInvalidTriggerTrace + return oboe.ModeInvalidTriggerTrace default: - return reporter.ModeStrictTriggerTrace + return oboe.ModeStrictTriggerTrace } } else { - return reporter.ModeTriggerTraceNotPresent + return oboe.ModeTriggerTraceNotPresent } } diff --git a/internal/sampler/sampler_test.go b/internal/sampler/sampler_test.go index e6f8bc89..dbec0649 100644 --- a/internal/sampler/sampler_test.go +++ b/internal/sampler/sampler_test.go @@ -16,7 +16,8 @@ package sampler import ( "context" "fmt" - "github.com/solarwinds/apm-go/internal/reporter" + "github.com/solarwinds/apm-go/internal/oboe" + "github.com/solarwinds/apm-go/internal/oboetestutils" "github.com/solarwinds/apm-go/internal/swotel" "github.com/solarwinds/apm-go/internal/testutils" "github.com/solarwinds/apm-go/internal/xtrace" @@ -37,7 +38,9 @@ var ( ) func TestDescription(t *testing.T) { - s := NewSampler() + o := oboe.NewOboe() + s, err := NewSampler(o) + require.NoError(t, err) assert.Equal(t, "SolarWinds APM Sampler", s.Description()) } @@ -126,7 +129,7 @@ func TestScenario6(t *testing.T) { xtraceSignature: false, oboeDecision: true, - ttMode: reporter.ModeStrictTriggerTrace, + ttMode: oboe.ModeStrictTriggerTrace, decision: sdktrace.RecordAndSample, } scen.test(t) @@ -144,7 +147,7 @@ func TestScenario7(t *testing.T) { xtraceSignature: false, oboeDecision: true, - ttMode: reporter.ModeStrictTriggerTrace, + ttMode: oboe.ModeStrictTriggerTrace, decision: sdktrace.RecordAndSample, } scen.test(t) @@ -164,7 +167,7 @@ func TestScenario8(t *testing.T) { xtraceSignature: false, oboeDecision: true, - ttMode: reporter.ModeStrictTriggerTrace, + ttMode: oboe.ModeStrictTriggerTrace, decision: sdktrace.RecordAndSample, } scen.test(t) @@ -181,7 +184,7 @@ func TestScenarioSwKeys(t *testing.T) { xtraceSwKeys: true, oboeDecision: true, - ttMode: reporter.ModeTriggerTraceNotPresent, + ttMode: oboe.ModeTriggerTraceNotPresent, decision: sdktrace.RecordAndSample, } scen.test(t) @@ -195,7 +198,7 @@ func TestScenarioSwKeysUnsampled(t *testing.T) { oboeDecision: false, xtraceSwKeys: true, - ttMode: reporter.ModeTriggerTraceNotPresent, + ttMode: oboe.ModeTriggerTraceNotPresent, decision: sdktrace.RecordOnly, } scen.test(t) @@ -210,7 +213,7 @@ func TestScenarioCustomKeys(t *testing.T) { xtraceCustomKeys: true, oboeDecision: true, - ttMode: reporter.ModeTriggerTraceNotPresent, + ttMode: oboe.ModeTriggerTraceNotPresent, decision: sdktrace.RecordAndSample, } scen.test(t) @@ -224,7 +227,7 @@ func TestScenarioCustomKeysUnsampled(t *testing.T) { oboeDecision: false, xtraceCustomKeys: false, - ttMode: reporter.ModeTriggerTraceNotPresent, + ttMode: oboe.ModeTriggerTraceNotPresent, decision: sdktrace.RecordOnly, } scen.test(t) @@ -266,7 +269,7 @@ type SamplingScenario struct { oboeDecision bool // expectations - ttMode reporter.TriggerTraceMode + ttMode oboe.TriggerTraceMode decision sdktrace.SamplingDecision } @@ -277,11 +280,12 @@ func requireAttrEqual(t *testing.T, attrs attribute.Set, key string, expected at } func (s SamplingScenario) test(t *testing.T) { - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.DefaultST)) - defer r.Close(0) + o := oboe.NewOboe() + oboetestutils.AddDefaultSetting(o) var err error - smplr := NewSampler() + smplr, err := NewSampler(o) + require.NoError(t, err) traceState := trace.TraceState{} if s.traceStateContainsSw { flags := "00" @@ -344,10 +348,10 @@ func (s SamplingScenario) test(t *testing.T) { bucketCap := "1000000" bucketRate := bucketCap sampleRate := 1000000 - sampleSource := reporter.SAMPLE_SOURCE_DEFAULT + sampleSource := oboe.SampleSourceDefault if s.triggerTrace && !s.traceStateSwSampled { sampleRate = -1 - sampleSource = reporter.SAMPLE_SOURCE_UNSET + sampleSource = oboe.SampleSourceUnset } if s.traceStateSwSampled { bucketCap, bucketRate, sampleRate, sampleSource = "-1", "-1", -1, -1 @@ -411,7 +415,8 @@ func TestHydrateTraceState(t *testing.T) { SpanID: spanId, }) ctx := context.WithValue(context.Background(), xtrace.OptionsKey, "trigger-trace") - xto := xtrace.GetXTraceOptions(ctx) + o := oboe.NewOboe() + xto := xtrace.GetXTraceOptions(ctx, o) ts := hydrateTraceState(sc, xto, "ok") fullResp, err := swotel.GetInternalState(ts, swotel.XTraceOptResp) require.NoError(t, err) @@ -425,7 +430,8 @@ func TestHydrateTraceStateBadTimestamp(t *testing.T) { }) ctx := context.WithValue(context.Background(), xtrace.OptionsKey, "trigger-trace") ctx = context.WithValue(ctx, xtrace.SignatureKey, "not a valid signature") - xto := xtrace.GetXTraceOptions(ctx) + o := oboe.NewOboe() + xto := xtrace.GetXTraceOptions(ctx, o) ts := hydrateTraceState(sc, xto, "") fullResp, err := swotel.GetInternalState(ts, swotel.XTraceOptResp) require.NoError(t, err) @@ -441,7 +447,9 @@ func TestHydrateTraceStateBadSignature(t *testing.T) { ctx := context.WithValue(context.Background(), xtrace.OptionsKey, opts) sig := "invalid signature" ctx = context.WithValue(ctx, xtrace.SignatureKey, sig) - xto := xtrace.GetXTraceOptions(ctx) + o := oboe.NewOboe() + oboetestutils.AddDefaultSetting(o) + xto := xtrace.GetXTraceOptions(ctx, o) ts := hydrateTraceState(sc, xto, "") fullResp, err := swotel.GetInternalState(ts, swotel.XTraceOptResp) require.NoError(t, err) @@ -449,8 +457,6 @@ func TestHydrateTraceStateBadSignature(t *testing.T) { } func TestHydrateTraceStateNoSignatureKey(t *testing.T) { - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.NoSettingST)) - defer r.Close(0) sc := trace.NewSpanContext(trace.SpanContextConfig{ TraceID: traceId, SpanID: spanId, @@ -459,7 +465,8 @@ func TestHydrateTraceStateNoSignatureKey(t *testing.T) { ctx := context.WithValue(context.Background(), xtrace.OptionsKey, opts) sig := "0000" ctx = context.WithValue(ctx, xtrace.SignatureKey, sig) - xto := xtrace.GetXTraceOptions(ctx) + o := oboe.NewOboe() + xto := xtrace.GetXTraceOptions(ctx, o) ts := hydrateTraceState(sc, xto, "ok") fullResp, err := swotel.GetInternalState(ts, swotel.XTraceOptResp) require.NoError(t, err) @@ -468,18 +475,18 @@ func TestHydrateTraceStateNoSignatureKey(t *testing.T) { func TestHydrateTraceStateValidSignature(t *testing.T) { // set test reporter so we can use the hmac token for signing the xto - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.DefaultST)) - defer r.Close(0) + o := oboe.NewOboe() + oboetestutils.AddDefaultSetting(o) sc := trace.NewSpanContext(trace.SpanContextConfig{ TraceID: traceId, SpanID: spanId, }) opts := fmt.Sprintf("trigger-trace;ts=%d", time.Now().Unix()) ctx := context.WithValue(context.Background(), xtrace.OptionsKey, opts) - sig, err := reporter.HmacHashTT([]byte(opts)) + sig, err := xtrace.HmacHashTT(o, []byte(opts)) require.NoError(t, err) ctx = context.WithValue(ctx, xtrace.SignatureKey, sig) - xto := xtrace.GetXTraceOptions(ctx) + xto := xtrace.GetXTraceOptions(ctx, o) ts := hydrateTraceState(sc, xto, "ok") fullResp, err := swotel.GetInternalState(ts, swotel.XTraceOptResp) require.NoError(t, err) diff --git a/internal/state/state.go b/internal/state/state.go new file mode 100644 index 00000000..5ba01e74 --- /dev/null +++ b/internal/state/state.go @@ -0,0 +1,25 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package state + +var serviceName string + +func SetServiceName(svc string) { + serviceName = svc +} + +func GetServiceName() string { + return serviceName +} diff --git a/internal/utils/version.go b/internal/utils/version.go index 8152e211..712866cf 100644 --- a/internal/utils/version.go +++ b/internal/utils/version.go @@ -21,7 +21,7 @@ import ( var ( // The SolarWinds Observability Go APM library version - version = "1.0.0" + version = "1.1.0-pre" // The Go version goVersion = strings.TrimPrefix(runtime.Version(), "go") diff --git a/internal/xtrace/auth.go b/internal/xtrace/auth.go new file mode 100644 index 00000000..26878a7e --- /dev/null +++ b/internal/xtrace/auth.go @@ -0,0 +1,45 @@ +// © 2023 SolarWinds Worldwide, LLC. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package xtrace + +import "github.com/solarwinds/apm-go/internal/log" + +type AuthStatus int + +const ( + AuthOK = iota + AuthBadTimestamp + AuthNoSignatureKey + AuthBadSignature +) + +func (a AuthStatus) IsError() bool { + return a != AuthOK +} + +func (a AuthStatus) Msg() string { + switch a { + case AuthOK: + return "ok" + case AuthBadTimestamp: + return "bad-timestamp" + case AuthNoSignatureKey: + return "no-signature-key" + case AuthBadSignature: + return "bad-signature" + } + log.Debugf("could not read msg for unknown AuthStatus: %s", a) + return "" +} diff --git a/internal/xtrace/xtrace.go b/internal/xtrace/xtrace.go index b788101e..f5615cb8 100644 --- a/internal/xtrace/xtrace.go +++ b/internal/xtrace/xtrace.go @@ -16,11 +16,17 @@ package xtrace import ( "context" + "crypto/hmac" + "crypto/sha1" + "encoding/hex" + "fmt" + "github.com/pkg/errors" "github.com/solarwinds/apm-go/internal/log" - "github.com/solarwinds/apm-go/internal/reporter" + "github.com/solarwinds/apm-go/internal/oboe" "regexp" "strconv" "strings" + "time" ) const ( @@ -46,7 +52,7 @@ const ( var optRegex = regexp.MustCompile(";+") var customKeyRegex = regexp.MustCompile(`^custom-[^\s]*$`) -func GetXTraceOptions(ctx context.Context) Options { +func GetXTraceOptions(ctx context.Context, o oboe.Oboe) Options { xtoStr, ok := ctx.Value(OptionsKey).(string) if !ok { xtoStr = "" @@ -56,10 +62,10 @@ func GetXTraceOptions(ctx context.Context) Options { xtoSig = "" } - return parseXTraceOptions(xtoStr, xtoSig) + return parseXTraceOptions(o, xtoStr, xtoSig) } -func parseXTraceOptions(opts string, sig string) Options { +func parseXTraceOptions(o oboe.Oboe, opts string, sig string) Options { x := Options{ opts: opts, sig: sig, @@ -107,7 +113,7 @@ func parseXTraceOptions(opts string, sig string) Options { if sig == "" { x.sigState = NoSignature } else { - x.authStatus = reporter.ValidateXTraceOptionsSignature(sig, strconv.FormatInt(x.timestamp, 10), opts) + x.authStatus = validateXTraceOptionsSignature(o, sig, strconv.FormatInt(x.timestamp, 10), opts) if x.authStatus.IsError() { log.Warning("Invalid xtrace options signature", x.authStatus.Msg()) x.sigState = InvalidSignature @@ -127,7 +133,7 @@ type Options struct { tt bool ignoredKeys []string sigState SignatureState - authStatus reporter.AuthStatus + authStatus AuthStatus } func (x Options) SwKeys() string { @@ -169,3 +175,50 @@ func (x Options) IncludeResponse() bool { func (x Options) SigAuthMsg() string { return x.authStatus.Msg() } + +func validateXTraceOptionsSignature(o oboe.Oboe, signature, ts, data string) AuthStatus { + var err error + _, err = tsInScope(ts) + if err != nil { + return AuthBadTimestamp + } + + token, err := o.GetTriggerTraceToken() + if err != nil { + return AuthNoSignatureKey + } + + if hmacHash(token, []byte(data)) != signature { + return AuthBadSignature + } + return AuthOK +} + +func HmacHashTT(o oboe.Oboe, data []byte) (string, error) { + token, err := o.GetTriggerTraceToken() + if err != nil { + return "", err + } + return hmacHash(token, data), nil +} + +func hmacHash(token, data []byte) string { + h := hmac.New(sha1.New, token) + h.Write(data) + sha := hex.EncodeToString(h.Sum(nil)) + return sha +} + +func tsInScope(tsStr string) (string, error) { + ts, err := strconv.ParseInt(tsStr, 10, 64) + if err != nil { + return "", errors.Wrap(err, "tsInScope") + } + + t := time.Unix(ts, 0) + if t.Before(time.Now().Add(time.Minute*-5)) || + t.After(time.Now().Add(time.Minute*5)) { + return "", fmt.Errorf("timestamp out of scope: %s", tsStr) + } + return strconv.FormatInt(ts, 10), nil +} diff --git a/internal/xtrace/xtrace_test.go b/internal/xtrace/xtrace_test.go index b7a2c8c0..95435563 100644 --- a/internal/xtrace/xtrace_test.go +++ b/internal/xtrace/xtrace_test.go @@ -21,28 +21,29 @@ package xtrace import ( "context" "fmt" - "github.com/solarwinds/apm-go/internal/reporter" + "github.com/solarwinds/apm-go/internal/oboe" + "github.com/solarwinds/apm-go/internal/oboetestutils" "testing" "time" "github.com/stretchr/testify/assert" ) +var o = oboe.NewOboe() + func TestGetXTraceOptions(t *testing.T) { - // We set the test reporter which will set the TT Token used for HMAC verification - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.DefaultST)) - defer r.Close(0) + oboetestutils.AddDefaultSetting(o) ctx := context.Background() // Timestamp required in signature validation opts := fmt.Sprintf("sw-keys=check-id:check-1013,website-id;booking-demo;ts=%d", time.Now().Unix()) ctx = context.WithValue(ctx, OptionsKey, opts) - sig, err := reporter.HmacHashTT([]byte(opts)) + sig, err := HmacHashTT(o, []byte(opts)) if err != nil { t.Fatal(err) } ctx = context.WithValue(ctx, SignatureKey, sig) - xto := GetXTraceOptions(ctx) + xto := GetXTraceOptions(ctx, o) assert.Equal(t, "check-id:check-1013,website-id", xto.SwKeys()) assert.Equal(t, []string{"booking-demo"}, xto.IgnoredKeys()) assert.Equal(t, sig, xto.Signature()) @@ -54,7 +55,7 @@ func TestGetXTraceOptionsInvalidType(t *testing.T) { ctx = context.WithValue(ctx, OptionsKey, 123) ctx = context.WithValue(ctx, SignatureKey, 321) - xto := GetXTraceOptions(ctx) + xto := GetXTraceOptions(ctx, o) assert.Equal(t, "", xto.SwKeys()) assert.Equal(t, []string{}, xto.IgnoredKeys()) assert.Equal(t, "", xto.Signature()) @@ -62,7 +63,7 @@ func TestGetXTraceOptionsInvalidType(t *testing.T) { } func TestNoKeyNoValue(t *testing.T) { - xto := parseXTraceOptions("=", "") + xto := parseXTraceOptions(o, "=", "") assert.Empty(t, xto.CustomKVs()) assert.Empty(t, xto.SwKeys()) assert.Empty(t, xto.IgnoredKeys()) @@ -70,7 +71,7 @@ func TestNoKeyNoValue(t *testing.T) { } func TestOrphanValue(t *testing.T) { - xto := parseXTraceOptions("=oops", "") + xto := parseXTraceOptions(o, "=oops", "") assert.Empty(t, xto.CustomKVs()) assert.Empty(t, xto.SwKeys()) assert.Empty(t, xto.IgnoredKeys()) @@ -78,7 +79,7 @@ func TestOrphanValue(t *testing.T) { } func TestValidTT(t *testing.T) { - xto := parseXTraceOptions("trigger-trace", "") + xto := parseXTraceOptions(o, "trigger-trace", "") assert.True(t, xto.TriggerTrace()) assert.Empty(t, xto.CustomKVs()) assert.Empty(t, xto.SwKeys()) @@ -87,7 +88,7 @@ func TestValidTT(t *testing.T) { } func TestTTKeyIgnored(t *testing.T) { - xto := parseXTraceOptions("trigger-trace=1", "") + xto := parseXTraceOptions(o, "trigger-trace=1", "") assert.False(t, xto.TriggerTrace()) assert.Empty(t, xto.CustomKVs()) assert.Empty(t, xto.SwKeys()) @@ -96,83 +97,83 @@ func TestTTKeyIgnored(t *testing.T) { } func TestSwKeysKVStrip(t *testing.T) { - xto := parseXTraceOptions("sw-keys= foo:key ", "") + xto := parseXTraceOptions(o, "sw-keys= foo:key ", "") assert.Equal(t, "foo:key", xto.SwKeys()) assert.Empty(t, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestSwKeysContainingSemicolonIgnoreAfter(t *testing.T) { - xto := parseXTraceOptions("sw-keys=check-id:check-1013,website-id;booking-demo", "") + xto := parseXTraceOptions(o, "sw-keys=check-id:check-1013,website-id;booking-demo", "") assert.Equal(t, "check-id:check-1013,website-id", xto.SwKeys()) assert.Equal(t, []string{"booking-demo"}, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestCustomKeysMatchStoredInOptionsHeaderAndCustomKVs(t *testing.T) { - xto := parseXTraceOptions("custom-awesome-key= foo ", "") + xto := parseXTraceOptions(o, "custom-awesome-key= foo ", "") assert.Equal(t, map[string]string{"custom-awesome-key": "foo"}, xto.CustomKVs()) assert.Empty(t, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestCustomKeysMatchButNoValueIgnored(t *testing.T) { - xto := parseXTraceOptions("custom-no-value", "") + xto := parseXTraceOptions(o, "custom-no-value", "") assert.Equal(t, map[string]string{}, xto.CustomKVs()) assert.Equal(t, []string{"custom-no-value"}, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestCustomKeysMatchEqualInValue(t *testing.T) { - xto := parseXTraceOptions("custom-and=a-value=12345containing_equals=signs", "") + xto := parseXTraceOptions(o, "custom-and=a-value=12345containing_equals=signs", "") assert.Equal(t, map[string]string{"custom-and": "a-value=12345containing_equals=signs"}, xto.CustomKVs()) assert.Empty(t, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestCustomKeysSpacesInKeyDisallowed(t *testing.T) { - xto := parseXTraceOptions("custom- key=this_is_bad;custom-key 7=this_is_bad_too", "") + xto := parseXTraceOptions(o, "custom- key=this_is_bad;custom-key 7=this_is_bad_too", "") assert.Equal(t, map[string]string{}, xto.CustomKVs()) assert.Equal(t, []string{"custom- key", "custom-key 7"}, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestValidTs(t *testing.T) { - xto := parseXTraceOptions("ts=12345", "") + xto := parseXTraceOptions(o, "ts=12345", "") assert.Equal(t, int64(12345), xto.Timestamp()) assert.Empty(t, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestInvalidTs(t *testing.T) { - xto := parseXTraceOptions("ts=invalid", "") + xto := parseXTraceOptions(o, "ts=invalid", "") assert.Equal(t, int64(0), xto.Timestamp()) assert.Equal(t, []string{"ts"}, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestOtherKeyIgnored(t *testing.T) { - xto := parseXTraceOptions("customer-key=foo", "") + xto := parseXTraceOptions(o, "customer-key=foo", "") assert.Equal(t, []string{"customer-key"}, xto.IgnoredKeys()) assert.Equal(t, NoSignature, xto.SignatureState()) } func TestSig(t *testing.T) { - xto := parseXTraceOptions("foo bar baz", "signature123") + xto := parseXTraceOptions(o, "foo bar baz", "signature123") assert.Equal(t, "signature123", xto.Signature()) assert.Equal(t, []string{"foo bar baz"}, xto.IgnoredKeys()) assert.Equal(t, InvalidSignature, xto.SignatureState()) } func TestSigWithoutOptions(t *testing.T) { - xto := parseXTraceOptions("", "signature123") + xto := parseXTraceOptions(o, "", "signature123") assert.Equal(t, "signature123", xto.Signature()) assert.Empty(t, xto.IgnoredKeys()) assert.Equal(t, InvalidSignature, xto.SignatureState()) } func TestDocumentedExample1(t *testing.T) { - xto := parseXTraceOptions("trigger-trace;sw-keys=check-id:check-1013,website-id:booking-demo", "") + xto := parseXTraceOptions(o, "trigger-trace;sw-keys=check-id:check-1013,website-id:booking-demo", "") assert.True(t, xto.TriggerTrace()) assert.Empty(t, xto.CustomKVs()) assert.Equal(t, "check-id:check-1013,website-id:booking-demo", xto.SwKeys()) @@ -181,7 +182,7 @@ func TestDocumentedExample1(t *testing.T) { } func TestDocumentedExample2(t *testing.T) { - xto := parseXTraceOptions("trigger-trace;custom-key1=value1", "") + xto := parseXTraceOptions(o, "trigger-trace;custom-key1=value1", "") assert.True(t, xto.TriggerTrace()) assert.Equal(t, map[string]string{"custom-key1": "value1"}, xto.CustomKVs()) assert.Empty(t, xto.SwKeys()) @@ -191,6 +192,7 @@ func TestDocumentedExample2(t *testing.T) { func TestDocumentedExample3(t *testing.T) { xto := parseXTraceOptions( + o, "trigger-trace;sw-keys=check-id:check-1013,website-id:booking-demo;ts=1564432370", "5c7c733c727e5038d2cd537630206d072bbfc07c", ) @@ -204,6 +206,7 @@ func TestDocumentedExample3(t *testing.T) { func TestStripAllOptions(t *testing.T) { xto := parseXTraceOptions( + o, " trigger-trace ; custom-something=value; custom-OtherThing = other val ; sw-keys = 029734wr70:9wqj21,0d9j1 ; ts = 12345 ; foo = bar ", "", ) @@ -221,6 +224,7 @@ func TestStripAllOptions(t *testing.T) { func TestAllOptionsHandleSequentialSemicolons(t *testing.T) { xto := parseXTraceOptions( + o, ";foo=bar;;;custom-something=value_thing;;sw-keys=02973r70:1b2a3;;;;custom-key=val;ts=12345;;;;;;;trigger-trace;;;", "", ) @@ -238,6 +242,7 @@ func TestAllOptionsHandleSequentialSemicolons(t *testing.T) { func TestAllOptionsHandleSingleQuotes(t *testing.T) { xto := parseXTraceOptions( + o, "trigger-trace;custom-foo='bar;bar';custom-bar=foo", "", ) @@ -255,6 +260,7 @@ func TestAllOptionsHandleSingleQuotes(t *testing.T) { func TestAllOptionsHandleMissingValuesAndSemicolons(t *testing.T) { xto := parseXTraceOptions( + o, ";trigger-trace;custom-something=value_thing;sw-keys=02973r70:9wqj21,0d9j1;1;2;3;4;5;=custom-key=val?;=", "", ) diff --git a/swo/agent.go b/swo/agent.go index ac81655f..5500e7a6 100644 --- a/swo/agent.go +++ b/swo/agent.go @@ -20,6 +20,8 @@ import ( "github.com/solarwinds/apm-go/internal/entryspans" "github.com/solarwinds/apm-go/internal/exporter" "github.com/solarwinds/apm-go/internal/log" + "github.com/solarwinds/apm-go/internal/metrics" + "github.com/solarwinds/apm-go/internal/oboe" "github.com/solarwinds/apm-go/internal/processor" "github.com/solarwinds/apm-go/internal/propagator" "github.com/solarwinds/apm-go/internal/reporter" @@ -41,35 +43,6 @@ var ( errInvalidLogLevel = errors.New("invalid log level") ) -// WaitForReady checks if the agent is ready. It returns true is the agent is ready, -// or false if it is not. -// -// A call to this method will block until the agent is ready or the context is -// canceled, or the agent is already closed. -// The agent is considered ready if there is a valid default setting for sampling. -func WaitForReady(ctx context.Context) bool { - if Closed() { - return false - } - return reporter.WaitForReady(ctx) -} - -// Shutdown flush the metrics and stops the agent. The call will block until the agent -// flushes and is successfully shutdown or the context is canceled. It returns nil -// for successful shutdown and or error when the context is canceled or the agent -// has already been closed before. -// -// This function should be called only once. -func Shutdown(ctx context.Context) error { - return reporter.Shutdown(ctx) -} - -// Closed denotes if the agent is closed (by either calling Shutdown explicitly -// or being triggered from some internal error). -func Closed() bool { - return reporter.Closed() -} - // SetLogLevel changes the logging level of the library // Valid logging levels: DEBUG, INFO, WARN, ERROR func SetLogLevel(level string) error { @@ -91,11 +64,6 @@ func SetLogOutput(w io.Writer) { log.SetOutput(w) } -// SetServiceKey sets the service key of the agent -func SetServiceKey(key string) error { - return reporter.SetServiceKey(key) -} - func createResource(resourceAttrs ...attribute.KeyValue) (*resource.Resource, error) { return resource.New(context.Background(), resource.WithContainer(), @@ -120,13 +88,21 @@ func Start(resourceAttrs ...attribute.KeyValue) (func(), error) { // return a no-op func so that we don't cause a nil-deref for the end-user }, err } - reporter.Start(resrc) + registry := metrics.NewLegacyRegistry() + o := oboe.NewOboe() + _reporter, err := reporter.Start(resrc, registry, o) + if err != nil { + return func() {}, err + } - exprtr := exporter.NewExporter() - smplr := sampler.NewSampler() + exprtr := exporter.NewExporter(_reporter) + smplr, err := sampler.NewSampler(o) + if err != nil { + return func() {}, err + } config.Load() isAppoptics := strings.Contains(strings.ToLower(config.GetCollector()), "appoptics.com") - proc := processor.NewInboundMetricsSpanProcessor(isAppoptics) + proc := processor.NewInboundMetricsSpanProcessor(registry, isAppoptics) prop := propagation.NewCompositeTextMapPropagator( &propagation.TraceContext{}, &propagation.Baggage{}, diff --git a/swo/agent_test.go b/swo/agent_test.go index 370254e9..7b99fcc2 100644 --- a/swo/agent_test.go +++ b/swo/agent_test.go @@ -20,15 +20,13 @@ import ( "github.com/solarwinds/apm-go/internal/log" "github.com/solarwinds/apm-go/internal/testutils" "github.com/solarwinds/apm-go/internal/utils" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/trace" "os" "strings" "testing" - "time" - - "github.com/stretchr/testify/assert" ) func TestSetGetLogLevel(t *testing.T) { @@ -47,14 +45,6 @@ func TestSetGetLogLevel(t *testing.T) { require.NoError(t, SetLogLevel(oldLevel)) } -func TestShutdown(t *testing.T) { - require.NoError(t, Shutdown(context.Background())) - assert.True(t, Closed()) - ctx, cancel := context.WithTimeout(context.Background(), time.Hour*24) - defer cancel() - assert.False(t, WaitForReady(ctx)) -} - func TestSetLogOutput(t *testing.T) { oldLevel := GetLogLevel() _ = SetLogLevel("DEBUG") diff --git a/swo/log.go b/swo/log.go index f3720508..c3357bc2 100644 --- a/swo/log.go +++ b/swo/log.go @@ -17,7 +17,7 @@ package swo import ( "context" "fmt" - "github.com/solarwinds/apm-go/internal/reporter" + "github.com/solarwinds/apm-go/internal/state" "go.opentelemetry.io/otel/trace" ) @@ -56,6 +56,6 @@ func LoggableTraceFromSpanContext(ctx trace.SpanContext) LoggableTraceContext { TraceID: ctx.TraceID(), SpanID: ctx.SpanID(), TraceFlags: ctx.TraceFlags(), - ServiceName: reporter.GetServiceName(), + ServiceName: state.GetServiceName(), } } diff --git a/swo/log_test.go b/swo/log_test.go index a6501c55..2cef13da 100644 --- a/swo/log_test.go +++ b/swo/log_test.go @@ -16,23 +16,23 @@ package swo import ( "context" - "github.com/solarwinds/apm-go/internal/reporter" + "github.com/solarwinds/apm-go/internal/state" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/trace" "testing" ) func TestLoggableTraceIDFromContext(t *testing.T) { - r := reporter.SetTestReporter(reporter.TestReporterSettingType(reporter.DefaultST)) - defer r.Close(0) - + prev := state.GetServiceName() + state.SetServiceName("test-service") + defer state.SetServiceName(prev) ctx := context.Background() lt := LoggableTrace(ctx) require.Equal(t, LoggableTraceContext{ TraceID: trace.TraceID{}, SpanID: trace.SpanID{}, TraceFlags: 0, - ServiceName: "test-reporter-service", + ServiceName: "test-service", }, lt) sc := trace.NewSpanContext(trace.SpanContextConfig{ TraceID: trace.TraceID{0x22}, @@ -41,7 +41,7 @@ func TestLoggableTraceIDFromContext(t *testing.T) { }) require.False(t, lt.IsValid()) require.Equal(t, - "trace_id=00000000000000000000000000000000 span_id=0000000000000000 trace_flags=00 resource.service.name=test-reporter-service", + "trace_id=00000000000000000000000000000000 span_id=0000000000000000 trace_flags=00 resource.service.name=test-service", lt.String()) ctx = trace.ContextWithSpanContext(ctx, sc) @@ -50,11 +50,11 @@ func TestLoggableTraceIDFromContext(t *testing.T) { TraceID: sc.TraceID(), SpanID: sc.SpanID(), TraceFlags: sc.TraceFlags(), - ServiceName: "test-reporter-service", + ServiceName: "test-service", }, lt) require.True(t, lt.IsValid()) require.Equal(t, - "trace_id=22000000000000000000000000000000 span_id=1100000000000000 trace_flags=01 resource.service.name=test-reporter-service", + "trace_id=22000000000000000000000000000000 span_id=1100000000000000 trace_flags=01 resource.service.name=test-service", lt.String()) sc = trace.NewSpanContext(trace.SpanContextConfig{ @@ -68,10 +68,10 @@ func TestLoggableTraceIDFromContext(t *testing.T) { TraceID: sc.TraceID(), SpanID: sc.SpanID(), TraceFlags: sc.TraceFlags(), - ServiceName: "test-reporter-service", + ServiceName: "test-service", }, lt) require.True(t, lt.IsValid()) require.Equal(t, - "trace_id=33000000000000000000000000000000 span_id=aa00000000000000 trace_flags=01 resource.service.name=test-reporter-service", + "trace_id=33000000000000000000000000000000 span_id=aa00000000000000 trace_flags=01 resource.service.name=test-service", lt.String()) } diff --git a/swo/metrics.go b/swo/metrics.go deleted file mode 100644 index cdffca10..00000000 --- a/swo/metrics.go +++ /dev/null @@ -1,49 +0,0 @@ -// © 2023 SolarWinds Worldwide, LLC. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package swo - -import ( - "github.com/solarwinds/apm-go/internal/metrics" -) - -// MetricOptions is a struct for the optional parameters of a measurement. -type MetricOptions = metrics.MetricOptions - -const ( - // MaxTagsCount is the maximum number of tags allowed. - MaxTagsCount = metrics.MaxTagsCount -) - -// The measurements submission errors -var ( - // ErrExceedsTagsCountLimit indicates the count of tags exceeds the limit - ErrExceedsTagsCountLimit = metrics.ErrExceedsTagsCountLimit - // ErrExceedsMetricsCountLimit indicates there are too many distinct measurements in a flush cycle. - ErrExceedsMetricsCountLimit = metrics.ErrExceedsMetricsCountLimit - // ErrMetricsWithNonPositiveCount indicates the count is negative or zero - ErrMetricsWithNonPositiveCount = metrics.ErrMetricsWithNonPositiveCount -) - -// SummaryMetric submits a summary type measurement to the reporter. The measurements -// will be collected in the background and reported periodically. -func SummaryMetric(name string, value float64, opts MetricOptions) error { - return metrics.CustomMetrics.Summary(name, value, opts) -} - -// IncrementMetric submits a incremental measurement to the reporter. The measurements -// will be collected in the background and reported periodically. -func IncrementMetric(name string, opts MetricOptions) error { - return metrics.CustomMetrics.Increment(name, opts) -}