diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d5a80b8..b2d3cf6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,6 @@ --- name: build + on: push: branches: @@ -22,21 +23,13 @@ on: - "grafana/**" - "systemd/**" - "renovate.json" + +concurrency: + group: build-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: - pre_job: - continue-on-error: true - runs-on: ubuntu-20.04 - outputs: - should_skip: ${{ steps.skip_check.outputs.should_skip }} - steps: - - id: skip_check - uses: fkirc/skip-duplicate-actions@v3.4.1 - with: - cancel_others: true - concurrent_skipping: same_content build: - needs: pre_job - if: ${{ needs.pre_job.outputs.should_skip != 'true' }} runs-on: ubuntu-20.04 steps: - name: Checkout @@ -45,19 +38,14 @@ jobs: uses: actions/setup-go@v3.0.0 with: # renovate: go - go-version: 1.17.5 + go-version: 1.18 - name: Ensure go.mod is already tidied run: go mod tidy && git diff --no-patch --exit-code - - name: Install gofumpt - run: go install mvdan.cc/gofumpt@v0.2.1 - - name: Ensure code is properly formatted using gofumpt - run: test -z "$(gofumpt -d .)" - name: Run linters uses: golangci/golangci-lint-action@v3.1.0 with: # renovate: golangci-lint version: v1.45.2 - skip-go-installation: true args: --timeout=3m0s - name: Install go-acc # renovate: go-acc diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 19f7b69..a567498 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,7 +3,7 @@ name: release on: push: tags: - - 'v*.*.*' + - "v*.*.*" jobs: release: runs-on: ubuntu-20.04 @@ -18,7 +18,7 @@ jobs: uses: actions/setup-go@v3.0.0 with: # renovate: go - go-version: 1.17.5 + go-version: 1.18 - name: Login to DockerHub uses: docker/login-action@v1.14.1 with: diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..b6c2017 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,75 @@ +# https://golangci-lint.run/usage/linters/ +linters: + enable: + - asciicheck + - bidichk + - containedctx + - contextcheck + - cyclop + - decorder + - dogsled + - dupl + - durationcheck + - errname + - errorlint + - exhaustive + - exportloopref + - forbidigo + - forcetypeassert + - funlen + - gci +# - gochecknoglobals + - gochecknoinits + - gocognit + - goconst + - gocritic + - gocyclo + - godot + - godox + - goerr113 + - gofumpt + - goheader +# - goimports # inconsistent with gci + - gomnd + - gomoddirectives +# - gomodguard # no need + - goprintffuncname + - gosec + - grouper + - ifshort + - importas + - ireturn + - lll + - maintidx + - makezero + - misspell + - nakedret + - nestif + - nilerr + - nilnil + - nlreturn + - noctx + - nolintlint + - paralleltest + - prealloc + - predeclared + - revive + - rowserrcheck +# - sqlclosecheck + - stylecheck + - tagliatelle + - tenv +# - testpackage # TODO: visit + - thelper + - tparallel + - unconvert + - unparam + - varnamelen + - wastedassign + - whitespace + - wrapcheck + - wsl + +linters-settings: + goconst: + ignore-tests: true diff --git a/cmd/nvidia_gpu_exporter/main.go b/cmd/nvidia_gpu_exporter/main.go index 85708a3..cd6976e 100644 --- a/cmd/nvidia_gpu_exporter/main.go +++ b/cmd/nvidia_gpu_exporter/main.go @@ -54,13 +54,15 @@ func main() { kingpin.Parse() logger := promlog.New(promlogConfig) - e, err := exporter.New(exporter.DefaultPrefix, *nvidiaSmiCommand, *qFields, logger) + + exp, err := exporter.New(exporter.DefaultPrefix, *nvidiaSmiCommand, *qFields, logger) if err != nil { _ = level.Error(logger).Log("msg", "Error on creating exporter", "err", err) + os.Exit(1) } - prometheus.MustRegister(e) + prometheus.MustRegister(exp) prometheus.MustRegister(version.NewCollector("nvidia_gpu_exporter")) _ = level.Info(logger).Log("msg", "Listening on address", "address", listenAddress) @@ -72,6 +74,7 @@ func main() { srv := &http.Server{Addr: *listenAddress} if err := web.ListenAndServe(srv, *webConfig, logger); err != nil { _ = level.Error(logger).Log("msg", "Error starting HTTP server", "err", err) + os.Exit(1) } } @@ -89,8 +92,7 @@ func NewRootHandler(logger log.Logger, metricsPath string) *RootHandler { } func (r *RootHandler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { - _, err := w.Write(r.response) - if err != nil { + if _, err := w.Write(r.response); err != nil { _ = level.Error(r.logger).Log("msg", "Error writing redirect", "err", err) } } diff --git a/go.mod b/go.mod index f598f56..bb5e46b 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,7 @@ module github.com/utkuozdemir/nvidia_gpu_exporter // renovate: go -go 1.17 +go 1.18 require ( github.com/go-kit/log v0.2.0 diff --git a/internal/exporter/csv.go b/internal/exporter/csv.go index 89f2e98..734cffb 100644 --- a/internal/exporter/csv.go +++ b/internal/exporter/csv.go @@ -22,6 +22,8 @@ type cell struct { rawValue string } +var ErrFieldCountMismatch = fmt.Errorf("field count mismatch") + func parseCSVIntoTable(queryResult string, qFields []qField) (table, error) { lines := strings.Split(strings.TrimSpace(queryResult), "\n") titlesLine := lines[0] @@ -42,30 +44,31 @@ func parseCSVIntoTable(queryResult string, qFields []qField) (table, error) { qFieldToCell := make(map[qField]cell, numCols) cells := make([]cell, numCols) rawValues := parseCSVLine(valuesLine) + if len(qFields) != len(rFields) { - return table{}, fmt.Errorf("query fields (%d) and returned fields (%d) have different sizes", len(qFields), len(rFields)) + return table{}, fmt.Errorf("%w: query fields: %d, returned fields: %d", + ErrFieldCountMismatch, len(qFields), len(rFields)) } for colIndex, rawValue := range rawValues { - q := qFields[colIndex] - r := rFields[colIndex] - gm := cell{ - qField: q, - rField: r, + currentQField := qFields[colIndex] + currentRField := rFields[colIndex] + tableCell := cell{ + qField: currentQField, + rField: currentRField, rawValue: rawValue, } - qFieldToCell[q] = gm - cells[colIndex] = gm - qFieldToCells[q][rowIndex] = gm + qFieldToCell[currentQField] = tableCell + cells[colIndex] = tableCell + qFieldToCells[currentQField][rowIndex] = tableCell } - gmc := row{ + tableRow := row{ qFieldToCells: qFieldToCell, cells: cells, } - rows[rowIndex] = gmc - + rows[rowIndex] = tableRow } return table{ @@ -78,8 +81,10 @@ func parseCSVIntoTable(queryResult string, qFields []qField) (table, error) { func parseCSVLine(line string) []string { values := strings.Split(line, ",") result := make([]string, len(values)) + for i, field := range values { result[i] = strings.TrimSpace(field) } + return result } diff --git a/internal/exporter/csv_test.go b/internal/exporter/csv_test.go index 7838543..071e495 100644 --- a/internal/exporter/csv_test.go +++ b/internal/exporter/csv_test.go @@ -15,7 +15,10 @@ Some Dummy GPU, 12.34 W ) func TestParseCsvIntoTable(t *testing.T) { + t.Parallel() + parsed, err := parseCSVIntoTable(testCsv, []qField{"name", "power.draw"}) + assert.NoError(t, err) assert.Len(t, parsed.rows, 2) assert.Equal(t, []rField{"name", "power.draw [W]"}, parsed.rFields) diff --git a/internal/exporter/exporter.go b/internal/exporter/exporter.go index ab8d76d..043e7eb 100644 --- a/internal/exporter/exporter.go +++ b/internal/exporter/exporter.go @@ -2,6 +2,7 @@ package exporter import ( "bytes" + "errors" "fmt" "os/exec" "regexp" @@ -14,18 +15,23 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -// qField stands for query field - the field name before the query +// qField stands for query field - the field name before the query. type qField string -// rField stands for returned field - the field name as returned by the nvidia-smi +// rField stands for returned field - the field name as returned by the nvidia-smi. type rField string const ( DefaultPrefix = "nvidia_smi" DefaultNvidiaSmiCommand = "nvidia-smi" + + floatBitSize = 64 ) var ( + ErrUnexpectedQueryField = errors.New("unexpected query field") + ErrParseNumber = errors.New("could not parse number from value") + numericRegex = regexp.MustCompile("[+-]?([0-9]*[.])?[0-9]+") requiredFields = []requiredField{ @@ -37,12 +43,19 @@ var ( {qField: driverVersionQField, label: "driver_version"}, } - runCmd = func(cmd *exec.Cmd) error { return cmd.Run() } + runCmd = func(cmd *exec.Cmd) error { + err := cmd.Run() + if err != nil { + return fmt.Errorf("error running command: %w", err) + } + + return nil + } ) -// Exporter collects stats and exports them using +// GPUExporter collects stats and exports them using // the prometheus metrics package. -type gpuExporter struct { +type GPUExporter struct { mutex sync.RWMutex prefix string qFields []qField @@ -54,17 +67,16 @@ type gpuExporter struct { logger log.Logger } -func New(prefix string, nvidiaSmiCommand string, qFieldsRaw string, logger log.Logger) (prometheus.Collector, error) { +func New(prefix string, nvidiaSmiCommand string, qFieldsRaw string, logger log.Logger) (*GPUExporter, error) { qFieldsOrdered, qFieldToRFieldMap, err := buildQFieldToRFieldMap(logger, qFieldsRaw, nvidiaSmiCommand) if err != nil { return nil, err } qFieldToMetricInfoMap := buildQFieldToMetricInfoMap(prefix, qFieldToRFieldMap) - // qFields := getKeys(qFieldToRFieldMap) infoLabels := getLabels(requiredFields) - e := gpuExporter{ + exporter := GPUExporter{ prefix: prefix, nvidiaSmiCommand: nvidiaSmiCommand, qFields: qFieldsOrdered, @@ -88,11 +100,12 @@ func New(prefix string, nvidiaSmiCommand string, qFieldsRaw string, logger log.L nil), } - return &e, nil + return &exporter, nil } func buildQFieldToRFieldMap(logger log.Logger, qFieldsRaw string, - nvidiaSmiCommand string) ([]qField, map[qField]rField, error) { + nvidiaSmiCommand string, +) ([]qField, map[qField]rField, error) { qFieldsSeparated := strings.Split(qFieldsRaw, ",") qFields := toQFieldSlice(qFieldsSeparated) @@ -103,28 +116,32 @@ func buildQFieldToRFieldMap(logger log.Logger, qFieldsRaw string, qFields = removeDuplicateQFields(qFields) if len(qFieldsSeparated) == 1 && qFieldsSeparated[0] == qFieldsAuto { - parsed, err := ParseAutoQFields(nvidiaSmiCommand) + parsed, err := parseAutoQFields(nvidiaSmiCommand) if err != nil { _ = level.Warn(logger).Log("msg", "Failed to auto-determine query field names, "+ - "falling back to the built-in list") + "falling back to the built-in list", "error", err) + return getKeys(fallbackQFieldToRFieldMap), fallbackQFieldToRFieldMap, nil } qFields = parsed } - _, t, err := scrape(qFields, nvidiaSmiCommand) + _, resultTable, err := scrape(qFields, nvidiaSmiCommand) + var rFields []rField + if err != nil { _ = level.Warn(logger).Log("msg", "Failed to run an initial scrape, using the built-in list for field mapping") + rFields, err = getFallbackValues(qFields) if err != nil { return nil, nil, err } } else { - rFields = t.rFields + rFields = resultTable.rFields } r := make(map[qField]rField, len(qFields)) @@ -137,7 +154,7 @@ func buildQFieldToRFieldMap(logger log.Logger, qFieldsRaw string, // Describe describes all the metrics ever exported by the exporter. It // implements prometheus.Collector. -func (e *gpuExporter) Describe(ch chan<- *prometheus.Desc) { +func (e *GPUExporter) Describe(ch chan<- *prometheus.Desc) { for _, m := range e.qFieldToMetricInfoMap { ch <- m.desc } @@ -146,43 +163,47 @@ func (e *gpuExporter) Describe(ch chan<- *prometheus.Desc) { } // Collect fetches the stats and delivers them as Prometheus metrics. It implements prometheus.Collector. -func (e *gpuExporter) Collect(ch chan<- prometheus.Metric) { +func (e *GPUExporter) Collect(metricCh chan<- prometheus.Metric) { e.mutex.Lock() defer e.mutex.Unlock() - exitCode, t, err := scrape(e.qFields, e.nvidiaSmiCommand) + exitCode, currentTable, err := scrape(e.qFields, e.nvidiaSmiCommand) e.exitCode.Set(float64(exitCode)) - ch <- e.exitCode + metricCh <- e.exitCode + if err != nil { _ = level.Error(e.logger).Log("error", err) - ch <- e.failedScrapesTotal + metricCh <- e.failedScrapesTotal e.failedScrapesTotal.Inc() + return } - for _, r := range t.rows { - uuid := strings.TrimPrefix(strings.ToLower(r.qFieldToCells[uuidQField].rawValue), "gpu-") - name := r.qFieldToCells[nameQField].rawValue - driverModelCurrent := r.qFieldToCells[driverModelCurrentQField].rawValue - driverModelPending := r.qFieldToCells[driverModelPendingQField].rawValue - vBiosVersion := r.qFieldToCells[vBiosVersionQField].rawValue - driverVersion := r.qFieldToCells[driverVersionQField].rawValue + for _, currentRow := range currentTable.rows { + uuid := strings.TrimPrefix(strings.ToLower(currentRow.qFieldToCells[uuidQField].rawValue), "gpu-") + name := currentRow.qFieldToCells[nameQField].rawValue + driverModelCurrent := currentRow.qFieldToCells[driverModelCurrentQField].rawValue + driverModelPending := currentRow.qFieldToCells[driverModelPendingQField].rawValue + vBiosVersion := currentRow.qFieldToCells[vBiosVersionQField].rawValue + driverVersion := currentRow.qFieldToCells[driverVersionQField].rawValue infoMetric := prometheus.MustNewConstMetric(e.gpuInfoDesc, prometheus.GaugeValue, 1, uuid, name, driverModelCurrent, driverModelPending, vBiosVersion, driverVersion) - ch <- infoMetric + metricCh <- infoMetric + + for _, currentCell := range currentRow.cells { + metricInfo := e.qFieldToMetricInfoMap[currentCell.qField] - for _, c := range r.cells { - mi := e.qFieldToMetricInfoMap[c.qField] - num, err := transformRawValue(c.rawValue, mi.valueMultiplier) + num, err := transformRawValue(currentCell.rawValue, metricInfo.valueMultiplier) if err != nil { _ = level.Debug(e.logger).Log("error", err, "query_field_name", - c.qField, "raw_value", c.rawValue) + currentCell.qField, "raw_value", currentCell.rawValue) + continue } - ch <- prometheus.MustNewConstMetric(mi.desc, mi.mType, num, uuid) + metricCh <- prometheus.MustNewConstMetric(metricInfo.desc, metricInfo.mType, num, uuid) } } } @@ -195,19 +216,24 @@ func scrape(qFields []qField, nvidiaSmiCommand string) (int, *table, error) { cmdAndArgs = append(cmdAndArgs, "--format=csv") var stdout bytes.Buffer + var stderr bytes.Buffer - cmd := exec.Command(cmdAndArgs[0], cmdAndArgs[1:]...) + + cmd := exec.Command(cmdAndArgs[0], cmdAndArgs[1:]...) //nolint:gosec cmd.Stdout = &stdout cmd.Stderr = &stderr err := runCmd(cmd) if err != nil { exitCode := -1 - if exitError, ok := err.(*exec.ExitError); ok { + + var exitError *exec.ExitError + if errors.As(err, &exitError) { exitCode = exitError.ExitCode() } - return exitCode, nil, fmt.Errorf("command failed. stderr: %s err: %w", stderr.String(), err) + return exitCode, nil, fmt.Errorf("%w: command failed. code: %d | command: %s | stdout: %s | stderr: %s", + err, exitCode, strings.Join(cmdAndArgs, " "), stdout.String(), stderr.String()) } t, err := parseCSVIntoTable(strings.TrimSpace(stdout.String()), qFields) @@ -224,6 +250,7 @@ type MetricInfo struct { valueMultiplier float64 } +//nolint:gomnd func transformRawValue(rawValue string, valueMultiplier float64) (float64, error) { trimmed := strings.TrimSpace(rawValue) if strings.HasPrefix(trimmed, "0x") { @@ -246,18 +273,22 @@ func transformRawValue(rawValue string, valueMultiplier float64) (float64, error case "exclusive_process": return 3, nil default: - allNums := numericRegex.FindAllString(val, 2) - if len(allNums) != 1 { - return -1, fmt.Errorf("couldn't parse number from: %s", val) - } + return parseSanitizedValueWithBestEffort(val, valueMultiplier) + } +} - parsed, err := strconv.ParseFloat(allNums[0], 64) - if err != nil { - return -1, err - } +func parseSanitizedValueWithBestEffort(sanitizedValue string, valueMultiplier float64) (float64, error) { + allNums := numericRegex.FindAllString(sanitizedValue, 2) //nolint:gomnd + if len(allNums) != 1 { + return -1, fmt.Errorf("%w: %s", ErrParseNumber, sanitizedValue) + } - return parsed * valueMultiplier, err + parsed, err := strconv.ParseFloat(allNums[0], floatBitSize) + if err != nil { + return -1, fmt.Errorf("failed to parse float: %w", err) } + + return parsed * valueMultiplier, nil } func buildQFieldToMetricInfoMap(prefix string, qFieldtoRFieldMap map[qField]rField) map[qField]MetricInfo { @@ -272,6 +303,7 @@ func buildQFieldToMetricInfoMap(prefix string, qFieldtoRFieldMap map[qField]rFie func buildMetricInfo(prefix string, rField rField) MetricInfo { fqName, multiplier := buildFQNameAndMultiplier(prefix, rField) desc := prometheus.NewDesc(fqName, string(rField), []string{"uuid"}, nil) + return MetricInfo{ desc: desc, mType: prometheus.GaugeValue, @@ -284,6 +316,8 @@ func buildFQNameAndMultiplier(prefix string, rField rField) (string, float64) { suffixTransformed := rFieldStr multiplier := 1.0 split := strings.Split(rFieldStr, " ")[0] + + //nolint:gocritic if strings.HasSuffix(rFieldStr, " [W]") { suffixTransformed = split + "_watts" } else if strings.HasSuffix(rFieldStr, " [MHz]") { @@ -304,28 +338,34 @@ func buildFQNameAndMultiplier(prefix string, rField rField) (string, float64) { } func getKeys(m map[qField]rField) []qField { - r := make([]qField, len(m)) + qFields := make([]qField, len(m)) + i := 0 + for key := range m { - r[i] = key + qFields[i] = key i++ } - return r + + return qFields } func getFallbackValues(qFields []qField) ([]rField, error) { - r := make([]rField, len(qFields)) - i := 0 + rFields := make([]rField, len(qFields)) + + counter := 0 + for _, q := range qFields { val, contains := fallbackQFieldToRFieldMap[q] if !contains { - return nil, fmt.Errorf("unexpected query field: %s", q) + return nil, fmt.Errorf("%w: %s", ErrUnexpectedQueryField, q) } - r[i] = val - i++ + rFields[counter] = val + counter++ } - return r, nil + + return rFields, nil } func getLabels(reqFields []requiredField) []string { @@ -333,6 +373,7 @@ func getLabels(reqFields []requiredField) []string { for i, reqField := range reqFields { r[i] = reqField.label } + return r } @@ -342,15 +383,17 @@ type requiredField struct { } func removeDuplicateQFields(qFields []qField) []qField { - m := make(map[qField]struct{}) - var r []qField - for _, f := range qFields { - _, exists := m[f] + qFieldMap := make(map[qField]struct{}) + + var uniqueQFields []qField + + for _, field := range qFields { + _, exists := qFieldMap[field] if !exists { - r = append(r, f) - m[f] = struct{}{} + uniqueQFields = append(uniqueQFields, field) + qFieldMap[field] = struct{}{} } } - return r + return uniqueQFields } diff --git a/internal/exporter/exporter_test.go b/internal/exporter/exporter_test.go index cf62af9..3057e06 100644 --- a/internal/exporter/exporter_test.go +++ b/internal/exporter/exporter_test.go @@ -19,10 +19,14 @@ const ( var queryTest string func assertFloat(t *testing.T, expected, actual float64) bool { + t.Helper() + return assert.InDelta(t, expected, actual, delta) } func TestTransformRawValueValidValues(t *testing.T) { + t.Parallel() + expectedConversions := map[string]float64{ "disabled": 0, "enabled": 1, @@ -46,6 +50,8 @@ func TestTransformRawValueValidValues(t *testing.T) { } func TestTransformRawValueInvalidValues(t *testing.T) { + t.Parallel() + rawValues := []string{ "aaaaa", "0X1234", "aa111aa111", "123.456.789", } @@ -57,7 +63,10 @@ func TestTransformRawValueInvalidValues(t *testing.T) { } func TestTransformRawMultiplier(t *testing.T) { + t.Parallel() + val, err := transformRawValue("11", 2) + assert.NoError(t, err) assertFloat(t, 22, val) @@ -71,69 +80,99 @@ func TestTransformRawMultiplier(t *testing.T) { } func TestBuildFQNameAndMultiplierRegular(t *testing.T) { + t.Parallel() + fqName, multiplier := buildFQNameAndMultiplier("prefix", "encoder.stats.sessionCount") + assertFloat(t, 1, multiplier) assert.Equal(t, "prefix_encoder_stats_session_count", fqName) } func TestBuildFQNameAndMultiplierWatts(t *testing.T) { + t.Parallel() + fqName, multiplier := buildFQNameAndMultiplier("prefix", "power.draw [W]") + assertFloat(t, 1, multiplier) assert.Equal(t, "prefix_power_draw_watts", fqName) } func TestBuildFQNameAndMultiplierMiB(t *testing.T) { + t.Parallel() + fqName, multiplier := buildFQNameAndMultiplier("prefix", "memory.total [MiB]") + assertFloat(t, 1048576, multiplier) assert.Equal(t, "prefix_memory_total_bytes", fqName) } func TestBuildFQNameAndMultiplierMHZ(t *testing.T) { + t.Parallel() + fqName, multiplier := buildFQNameAndMultiplier("prefix", "clocks.current.graphics [MHz]") + assertFloat(t, 1000000, multiplier) assert.Equal(t, "prefix_clocks_current_graphics_clock_hz", fqName) } func TestBuildFQNameAndMultiplierRatio(t *testing.T) { + t.Parallel() + fqName, multiplier := buildFQNameAndMultiplier("prefix", "fan.speed [%]") + assertFloat(t, 0.01, multiplier) assert.Equal(t, "prefix_fan_speed_ratio", fqName) } func TestBuildFQNameAndMultiplierNoPrefix(t *testing.T) { + t.Parallel() + fqName, multiplier := buildFQNameAndMultiplier("", "encoder.stats.sessionCount") + assertFloat(t, 1, multiplier) assert.Equal(t, "encoder_stats_session_count", fqName) } func TestBuildMetricInfo(t *testing.T) { + t.Parallel() + metricInfo := buildMetricInfo("prefix", "encoder.stats.sessionCount") + assertFloat(t, 1, metricInfo.valueMultiplier) assert.Equal(t, prometheus.GaugeValue, metricInfo.mType) } func TestBuildQFieldToMetricInfoMap(t *testing.T) { - m := buildQFieldToMetricInfoMap("prefix", map[qField]rField{"aaa": "AAA", "bbb": "BBB"}) - assert.Len(t, m, 2) + t.Parallel() - metricInfo1 := m["aaa"] + qFieldToMetricInfoMap := buildQFieldToMetricInfoMap("prefix", map[qField]rField{"aaa": "AAA", "bbb": "BBB"}) + + assert.Len(t, qFieldToMetricInfoMap, 2) + + metricInfo1 := qFieldToMetricInfoMap["aaa"] assertFloat(t, 1, metricInfo1.valueMultiplier) assert.Equal(t, prometheus.GaugeValue, metricInfo1.mType) - metricInfo2 := m["bbb"] + metricInfo2 := qFieldToMetricInfoMap["bbb"] assertFloat(t, 1, metricInfo2.valueMultiplier) assert.Equal(t, prometheus.GaugeValue, metricInfo2.mType) } func TestNewUnknownField(t *testing.T) { + t.Parallel() + logger := log.NewNopLogger() _, err := New("aaa", "bbb", "a", logger) + assert.Error(t, err) } func TestDescribe(t *testing.T) { + t.Parallel() + logger := log.NewNopLogger() exp, err := New("aaa", "bbb", "fan.speed,memory.used", logger) + assert.NoError(t, err) doneCh := make(chan bool) @@ -171,11 +210,14 @@ end: } func TestCollect(t *testing.T) { + t.Parallel() + runCmdOriginal := runCmd defer func() { runCmd = runCmdOriginal }() runCmd = func(cmd *exec.Cmd) error { _, _ = cmd.Stdout.Write([]byte(queryTest)) + return nil } @@ -216,8 +258,11 @@ end: } func TestCollectError(t *testing.T) { + t.Parallel() + logger := log.NewNopLogger() exp, err := New("aaa", "bbb", "fan.speed,memory.used", logger) + assert.NoError(t, err) doneCh := make(chan bool) diff --git a/internal/exporter/fields.go b/internal/exporter/fields.go index 48df7a2..dd33f2e 100644 --- a/internal/exporter/fields.go +++ b/internal/exporter/fields.go @@ -3,6 +3,7 @@ package exporter import ( "bytes" "errors" + "fmt" "os/exec" "regexp" "strings" @@ -20,6 +21,8 @@ const ( ) var ( + ErrNoQueryFields = errors.New("could not extract any query fields") + fieldRegex = regexp.MustCompile(`(?m)\n\s*\n^"([^"]+)"`) fallbackQFieldToRFieldMap = map[qField]rField{ @@ -141,33 +144,52 @@ var ( } ) -func ParseAutoQFields(nvidiaSmiCommand string) ([]qField, error) { +func parseAutoQFields(nvidiaSmiCommand string) ([]qField, error) { cmdAndArgs := strings.Fields(nvidiaSmiCommand) cmdAndArgs = append(cmdAndArgs, "--help-query-gpu") - cmd := exec.Command(cmdAndArgs[0], cmdAndArgs[1:]...) + cmd := exec.Command(cmdAndArgs[0], cmdAndArgs[1:]...) //nolint:gosec var stdout bytes.Buffer + + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := runCmd(cmd) + + outStr := stdout.String() + errStr := stdout.String() + + exitCode := -1 + + var exitError *exec.ExitError + if errors.As(err, &exitError) { + exitCode = exitError.ExitCode() + } + if err != nil { - return nil, err + return nil, fmt.Errorf("%w: command failed. code: %d | command: %s | stdout: %s | stderr: %s", err, + exitCode, strings.Join(cmdAndArgs, " "), outStr, errStr) } - out := stdout.String() - fields := extractQFields(out) + fields := extractQFields(outStr) if fields == nil { - return nil, errors.New("could not extract any query fields") + return nil, fmt.Errorf("%w: code: %d | command: %s | stdout: %s | stderr: %s", ErrNoQueryFields, + exitCode, strings.Join(cmdAndArgs, " "), outStr, errStr) } + return fields, nil } func extractQFields(text string) []qField { found := fieldRegex.FindAllStringSubmatch(text, -1) - var fields []qField - for _, ss := range found { - fields = append(fields, qField(ss[1])) + fields := make([]qField, len(found)) + for i, ss := range found { + fields[i] = qField(ss[1]) } + return fields } @@ -176,6 +198,7 @@ func toQFieldSlice(ss []string) []qField { for i, s := range ss { r[i] = qField(s) } + return r } @@ -184,6 +207,7 @@ func toRFieldSlice(ss []string) []rField { for i, s := range ss { r[i] = rField(s) } + return r } @@ -192,5 +216,6 @@ func QFieldSliceToStringSlice(qs []qField) []string { for i, q := range qs { r[i] = string(q) } + return r } diff --git a/internal/exporter/fields_test.go b/internal/exporter/fields_test.go index 77d25a0..ac66f50 100644 --- a/internal/exporter/fields_test.go +++ b/internal/exporter/fields_test.go @@ -53,22 +53,29 @@ var ( ) func TestExtractQFields(t *testing.T) { + t.Parallel() + fields := extractQFields(fieldsTest) + assert.Equal(t, expectedQFields, fields) } func TestParseAutoQFields(t *testing.T) { + t.Parallel() + runCmdOriginal := runCmd defer func() { runCmd = runCmdOriginal }() var capturedCmd *exec.Cmd + runCmd = func(cmd *exec.Cmd) error { capturedCmd = cmd _, _ = cmd.Stdout.Write([]byte(fieldsTest)) + return nil } - fields, err := ParseAutoQFields("nvidia-smi") + fields, err := parseAutoQFields("nvidia-smi") assert.Len(t, capturedCmd.Args, 2) assert.Equal(t, capturedCmd.Args[0], "nvidia-smi") diff --git a/internal/exporter/util.go b/internal/exporter/util.go index e9314cb..80e192f 100644 --- a/internal/exporter/util.go +++ b/internal/exporter/util.go @@ -6,6 +6,11 @@ import ( "strings" ) +const ( + hexToDecimalBase = 16 + hexToDecimalUIntBitSize = 64 +) + var ( matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)") matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])") @@ -14,13 +19,15 @@ var ( func toSnakeCase(str string) string { snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}") snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}") + return strings.ToLower(snake) } func hexToDecimal(hex string) (float64, error) { s := hex - s = strings.Replace(s, "0x", "", -1) - s = strings.Replace(s, "0X", "", -1) - parsed, err := strconv.ParseUint(s, 16, 64) + s = strings.ReplaceAll(s, "0x", "") + s = strings.ReplaceAll(s, "0X", "") + parsed, err := strconv.ParseUint(s, hexToDecimalBase, hexToDecimalUIntBitSize) + return float64(parsed), err } diff --git a/internal/exporter/util_test.go b/internal/exporter/util_test.go index 15d813e..75464e9 100644 --- a/internal/exporter/util_test.go +++ b/internal/exporter/util_test.go @@ -11,18 +11,27 @@ import ( ) func TestToSnakeCase(t *testing.T) { + t.Parallel() + snakeCase := toSnakeCase("aaaAAA_aaaAaa") + assert.Equal(t, "aaa_aaa_aaa_aaa", snakeCase) } func TestHexToDecimal(t *testing.T) { + t.Parallel() + decimal, err := hexToDecimal("0x40051458") + assert.NoError(t, err) assert.True(t, almostEqual(decimal, 1074074712.0)) } func TestHexToDecimalError(t *testing.T) { + t.Parallel() + _, err := hexToDecimal("SOMETHING") + assert.Error(t, err) } @@ -30,16 +39,21 @@ func almostEqual(a, b float64) bool { return math.Abs(a-b) <= 1e-9 } -// TestParseQueryFields is ran manually +// TestParseQueryFields is ran manually. +//nolint:forbidigo func TestParseQueryFields(t *testing.T) { t.SkipNow() + t.Parallel() + nvidiaSmiCommand := "nvidia-smi" - qFields, err := ParseAutoQFields(nvidiaSmiCommand) + qFields, err := parseAutoQFields(nvidiaSmiCommand) if err != nil { fmt.Printf("error: %v\n", err) os.Exit(1) } + fields := QFieldSliceToStringSlice(qFields) + fmt.Printf("Fields:\n\n%s\n", strings.Join(fields, "\n")) } diff --git a/samples/sample-source.sh b/samples/sample-source.sh index 2cf6471..e83c99b 100755 --- a/samples/sample-source.sh +++ b/samples/sample-source.sh @@ -2,3 +2,5 @@ echo "driver_version,uuid,name,driver_model.current,driver_model.pending,vbios_version" echo "460.91.03,GPU-df6e7a7c-7314-46f8-abc4-b88b36dcf3aa,NVIDIA GeForce RTX 2080 SUPER, WDDM, WDDM, 90.04.7A.40.73" + +exit 1