Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: separate metrics code for reuse #13514

Merged
merged 1 commit into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions util/telemetry/attributes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package telemetry

const (
AttribBuildVersion string = `version`
AttribBuildPlatform string = `platform`
AttribBuildGoVersion string = `go_version`
AttribBuildDate string = `build_date`
AttribBuildCompiler string = `compiler`
AttribBuildGitCommit string = `git_commit`
AttribBuildGitTreeState string = `git_treestate`
AttribBuildGitTag string = `git_tag`

AttribCronWFName string = `name`

AttribErrorCause string = "cause"

AttribLogLevel string = `level`

AttribNodePhase string = `node_phase`

AttribPodPhase string = `phase`
AttribPodNamespace string = `namespace`
AttribPodPendingReason string = `reason`

AttribQueueName string = `queue_name`

AttribRecentlyStarted string = `recently_started`

AttribRequestKind = `kind`
AttribRequestVerb = `verb`
AttribRequestCode = `status_code`

AttribTemplateName string = `name`
AttribTemplateNamespace string = `namespace`
AttribTemplateCluster string = `cluster_scope`

AttribWorkerType string = `worker_type`

AttribWorkflowNamespace string = `namespace`
AttribWorkflowPhase string = `phase`
AttribWorkflowStatus = `status`
AttribWorkflowType = `type`
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package metrics
package telemetry

import (
"context"
Expand All @@ -20,8 +20,8 @@ import (
)

const (
defaultPrometheusServerPort = 9090
defaultPrometheusServerPath = "/metrics"
DefaultPrometheusServerPort = 9090
DefaultPrometheusServerPath = "/metrics"
)

func (config *Config) prometheusMetricsExporter(namespace string) (*prometheus.Exporter, error) {
Expand All @@ -39,14 +39,14 @@ func (config *Config) prometheusMetricsExporter(namespace string) (*prometheus.E

func (config *Config) path() string {
if config.Path == "" {
return defaultPrometheusServerPath
return DefaultPrometheusServerPath
}
return config.Path
}

func (config *Config) port() int {
if config.Port == 0 {
return defaultPrometheusServerPort
return DefaultPrometheusServerPort
}
return config.Port
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//go:build !windows

package metrics
package telemetry

import (
"context"
Expand All @@ -14,19 +14,22 @@ import (
"github.com/stretchr/testify/require"
)

// testScopeName is the name that the metrics running under test will have
const testScopeName string = "argo-workflows-test"

func TestDisablePrometheusServer(t *testing.T) {
config := Config{
Enabled: false,
Path: defaultPrometheusServerPath,
Port: defaultPrometheusServerPort,
Path: DefaultPrometheusServerPath,
Port: DefaultPrometheusServerPort,
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
m, err := New(ctx, TestScopeName, &config, Callbacks{})
m, err := NewMetrics(ctx, testScopeName, testScopeName, &config)
require.NoError(t, err)
go m.RunPrometheusServer(ctx, false)
time.Sleep(1 * time.Second) // to confirm that the server doesn't start, even if we wait
resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", defaultPrometheusServerPort, defaultPrometheusServerPath))
resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", DefaultPrometheusServerPort, DefaultPrometheusServerPath))
if resp != nil {
defer resp.Body.Close()
}
Expand All @@ -37,16 +40,16 @@ func TestDisablePrometheusServer(t *testing.T) {
func TestPrometheusServer(t *testing.T) {
config := Config{
Enabled: true,
Path: defaultPrometheusServerPath,
Port: defaultPrometheusServerPort,
Path: DefaultPrometheusServerPath,
Port: DefaultPrometheusServerPort,
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
m, err := New(ctx, TestScopeName, &config, Callbacks{})
m, err := NewMetrics(ctx, testScopeName, testScopeName, &config)
require.NoError(t, err)
go m.RunPrometheusServer(ctx, false)
time.Sleep(1 * time.Second)
resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", defaultPrometheusServerPort, defaultPrometheusServerPath))
resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", DefaultPrometheusServerPort, DefaultPrometheusServerPath))
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)

Expand All @@ -57,22 +60,25 @@ func TestPrometheusServer(t *testing.T) {

bodyString := string(bodyBytes)
assert.NotEmpty(t, bodyString)

cancel() // Explicit cancel as sometimes in github CI port 9090 is still busy
time.Sleep(1 * time.Second) // Wait for prometheus server
}

func TestDummyPrometheusServer(t *testing.T) {
config := Config{
Enabled: true,
Path: defaultPrometheusServerPath,
Port: defaultPrometheusServerPort,
Path: DefaultPrometheusServerPath,
Port: DefaultPrometheusServerPort,
Secure: false,
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
m, err := New(ctx, TestScopeName, &config, Callbacks{})
m, err := NewMetrics(ctx, testScopeName, testScopeName, &config)
require.NoError(t, err)
go m.RunPrometheusServer(ctx, true)
time.Sleep(1 * time.Second)
resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", defaultPrometheusServerPort, defaultPrometheusServerPath))
resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", DefaultPrometheusServerPort, DefaultPrometheusServerPath))
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)

Expand All @@ -84,4 +90,7 @@ func TestDummyPrometheusServer(t *testing.T) {
bodyString := string(bodyBytes)

assert.Empty(t, bodyString) // expect the dummy metrics server to provide no metrics responses

cancel() // Explicit cancel as sometimes in github CI port 9090 is still busy
time.Sleep(1 * time.Second) // Wait for prometheus server
}
65 changes: 65 additions & 0 deletions util/telemetry/helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package telemetry

import (
"context"

"go.opentelemetry.io/otel/sdk/metric"
)

func createDefaultTestMetrics() (*Metrics, *TestMetricsExporter, error) {
config := Config{
Enabled: true,
}
return createTestMetrics(&config)
}

func createTestMetrics(config *Config) (*Metrics, *TestMetricsExporter, error) {
ctx /* with cancel*/ := context.Background()
te := NewTestMetricsExporter()

m, err := NewMetrics(ctx, TestScopeName, TestScopeName, config, metric.WithReader(te))
if err != nil {
return nil, nil, err
}
err = m.Populate(ctx, AddVersion, addTestingCounter, addTestingHistogram)
return m, te, err
}

const (
nameTestingHistogram = `testing_histogram`
nameTestingCounter = `testing_counter`
errorCauseTestingA = "TestingA"
errorCauseTestingB = "TestingB"
)

func addTestingHistogram(_ context.Context, m *Metrics) error {
// The buckets here are only the 'defaults' and can be overridden with configmap defaults
return m.CreateInstrument(Float64Histogram,
nameTestingHistogram,
"Testing Metric",
"s",
WithDefaultBuckets([]float64{0.0, 1.0, 5.0, 10.0}),
WithAsBuiltIn(),
)
}

func (m *Metrics) TestingHistogramRecord(ctx context.Context, value float64) {
m.Record(ctx, nameTestingHistogram, value, InstAttribs{})
}

func addTestingCounter(ctx context.Context, m *Metrics) error {
return m.CreateInstrument(Int64Counter,
nameTestingCounter,
"Testing Error Counting Metric",
"{errors}",
WithAsBuiltIn(),
)
}

func (m *Metrics) TestingErrorA(ctx context.Context) {
m.AddInt(ctx, nameTestingCounter, 1, InstAttribs{{Name: AttribErrorCause, Value: errorCauseTestingB}})
}

func (m *Metrics) TestingErrorB(ctx context.Context) {
m.AddInt(ctx, nameTestingCounter, 1, InstAttribs{{Name: AttribErrorCause, Value: errorCauseTestingB}})
}
Loading
Loading