From 7dadc141780c3f73fb22f782fa81c1d5335885e6 Mon Sep 17 00:00:00 2001 From: Evan <10603766+hoenn@users.noreply.github.com> Date: Thu, 8 Aug 2019 16:28:48 -0400 Subject: [PATCH] Instrument writers, secondary registry for local metrics (#204) --- cmd/do-agent/config.go | 6 +++--- cmd/do-agent/main.go | 8 ++++++-- cmd/do-agent/run.go | 16 ++++++++++++++-- pkg/writer/file.go | 7 ++++++- pkg/writer/sonar.go | 11 ++++++++++- 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/cmd/do-agent/config.go b/cmd/do-agent/config.go index f9be1edc..48c60107 100644 --- a/cmd/do-agent/config.go +++ b/cmd/do-agent/config.go @@ -147,13 +147,13 @@ func checkConfig() error { return nil } -func initWriter() (metricWriter, limiter) { +func initWriter(wc *prometheus.CounterVec) (metricWriter, limiter) { if config.stdoutOnly { - return writer.NewFile(os.Stdout), &constThrottler{wait: 10 * time.Second} + return writer.NewFile(os.Stdout, wc), &constThrottler{wait: 10 * time.Second} } tsc := newTimeseriesClient() - return writer.NewSonar(tsc), tsc + return writer.NewSonar(tsc, wc), tsc } func initDecorator() decorate.Chain { diff --git a/cmd/do-agent/main.go b/cmd/do-agent/main.go index 28b37aa8..b9d73e51 100644 --- a/cmd/do-agent/main.go +++ b/cmd/do-agent/main.go @@ -31,8 +31,12 @@ func main() { reg.MustRegister(cols...) if config.webListen { + //Create a secondary registry for local only metrics + localReg := prometheus.NewRegistry() + localCols := append(cols, metricWriterDiagnostics) + localReg.MustRegister(localCols...) go func() { - http.Handle("/", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) + http.Handle("/", promhttp.HandlerFor(localReg, promhttp.HandlerOpts{})) err := http.ListenAndServe(config.webListenAddress, nil) if err != nil { log.Error("failed to init HTTP listener: %+v", err.Error()) @@ -40,7 +44,7 @@ func main() { }() } - w, th := initWriter() + w, th := initWriter(metricWriterDiagnostics) d := initDecorator() aggregateSpecs := initAggregatorSpecs() diff --git a/cmd/do-agent/run.go b/cmd/do-agent/run.go index dbfdfba9..876f2b75 100644 --- a/cmd/do-agent/run.go +++ b/cmd/do-agent/run.go @@ -14,16 +14,28 @@ import ( ) const ( - diagnosticMetricName = "sonar_diagnostic" + diagnosticMetricName = "sonar_diagnostic" + metricWriterDiagnosticsName = "metric_writes" ) var ( + //ErrAggregationFailed is the error msg for failed aggregation ErrAggregationFailed = fmt.Errorf("metric aggregation failed") - diagnosticMetric = prometheus.NewCounterVec(prometheus.CounterOpts{ + + diagnosticMetric = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: "", Name: diagnosticMetricName, Help: "do-agent diagnostic information", }, []string{"error"}) + + metricWriterDiagnostics = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "", + Name: metricWriterDiagnosticsName, + Help: "Total successes and failures of metric writers", + }, + []string{"writer", "result", "reason"}, + ) ) type metricWriter interface { diff --git a/pkg/writer/file.go b/pkg/writer/file.go index 40918ff3..377858c5 100644 --- a/pkg/writer/file.go +++ b/pkg/writer/file.go @@ -6,19 +6,23 @@ import ( "sync" "github.com/digitalocean/do-agent/pkg/aggregate" + "github.com/prometheus/client_golang/prometheus" ) // File writes metrics to an io.Writer type File struct { w io.Writer m *sync.Mutex + c *prometheus.CounterVec } // NewFile creates a new File writer with the provided writer -func NewFile(w io.Writer) *File { +func NewFile(w io.Writer, c *prometheus.CounterVec) *File { + c = c.MustCurryWith(prometheus.Labels{"writer": "file"}) return &File{ w: w, m: new(sync.Mutex), + c: c, } } @@ -29,6 +33,7 @@ func (w *File) Write(mets []aggregate.MetricWithValue) error { for _, met := range mets { fmt.Fprintf(w.w, "[%s]: %v: %v\n", met.LFM["__name__"], met.LFM, met.Value) } + w.c.WithLabelValues("success", "").Inc() return nil } diff --git a/pkg/writer/sonar.go b/pkg/writer/sonar.go index eb4fff5c..280de28e 100644 --- a/pkg/writer/sonar.go +++ b/pkg/writer/sonar.go @@ -6,6 +6,7 @@ import ( "github.com/digitalocean/do-agent/internal/log" "github.com/digitalocean/do-agent/pkg/aggregate" "github.com/digitalocean/do-agent/pkg/clients/tsclient" + "github.com/prometheus/client_golang/prometheus" "github.com/pkg/errors" ) @@ -26,13 +27,16 @@ var ( type Sonar struct { client tsclient.Client firstWriteSent bool + c *prometheus.CounterVec } // NewSonar creates a new Sonar writer -func NewSonar(client tsclient.Client) *Sonar { +func NewSonar(client tsclient.Client, c *prometheus.CounterVec) *Sonar { + c = c.MustCurryWith(prometheus.Labels{"writer": "sonar"}) return &Sonar{ client: client, firstWriteSent: false, + c: c, } } @@ -40,16 +44,19 @@ func NewSonar(client tsclient.Client) *Sonar { // before the next write func (s *Sonar) Write(mets []aggregate.MetricWithValue) error { if len(mets) > s.client.MaxBatchSize() { + s.c.WithLabelValues("failure", "too many metrics").Inc() return errors.Wrap(ErrTooManyMetrics, "cannot write metrics") } for _, m := range mets { lfmEncoded := tsclient.ConvertLFMMapToPrometheusEncodedName(m.LFM) if len(lfmEncoded) > s.client.MaxMetricLength() { + s.c.WithLabelValues("failure", "metric exceeds max length").Inc() return errors.Wrapf(ErrMetricTooLong, "cannot send metric: %q", lfmEncoded) } err := s.client.AddMetric(tsclient.NewDefinitionFromMap(m.LFM), m.Value) if err != nil { + s.c.WithLabelValues("failure", "could not add metric to batch").Inc() return err } } @@ -62,9 +69,11 @@ func (s *Sonar) Write(mets []aggregate.MetricWithValue) error { s.firstWriteSent = true if err == nil { + s.c.WithLabelValues("success", "").Inc() return nil } + s.c.WithLabelValues("failure", "failed to flush").Inc() log.Error("failed to flush: %+v", err) return ErrFlushFailure }