Skip to content

Commit

Permalink
Add an optional health metric for the sqlserver input plugin (influxd…
Browse files Browse the repository at this point in the history
  • Loading branch information
coquagli authored Mar 11, 2021
1 parent 35b75e9 commit 30e189d
Show file tree
Hide file tree
Showing 5 changed files with 372 additions and 4 deletions.
5 changes: 5 additions & 0 deletions etc/telegraf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5859,6 +5859,11 @@
# ## If you are using AzureDB, setting this to true will gather resource utilization metrics
# # azuredb = false

# ## Toggling this to true will emit an additional metric called "sqlserver_telegraf_health".
# ## This metric tracks the count of attempted queries and successful queries for each SQL instance specified in "servers".
# ## The purpose of this metric is to assist with identifying and diagnosing any connectivity or query issues.
# ## This setting/metric is optional and is disabled by default.
# # health_metric = false

# # Gather timeseries from Google Cloud Platform v3 monitoring API
# [[inputs.stackdriver]]
Expand Down
22 changes: 22 additions & 0 deletions plugins/inputs/sqlserver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ GO
## If you are using AzureDB, setting this to true will gather resource utilization metrics
# azuredb = false

## Toggling this to true will emit an additional metric called "sqlserver_telegraf_health".
## This metric tracks the count of attempted queries and successful queries for each SQL instance specified in "servers".
## The purpose of this metric is to assist with identifying and diagnosing any connectivity or query issues.
## This setting/metric is optional and is disabled by default.
# health_metric = false

## Possible queries accross different versions of the collectors
## Queries enabled by default for specific Database Type

Expand Down Expand Up @@ -323,4 +329,20 @@ Version 2 queries have the following tags:
- `sql_instance`: Physical host and instance name (hostname:instance)
- `database_name`: For Azure SQLDB, database_name denotes the name of the Azure SQL Database as server name is a logical construct.

#### Health Metric
All collection versions (version 1, version 2, and database_type) support an optional plugin health metric called `sqlserver_telegraf_health`. This metric tracks if connections to SQL Server are succeeding or failing. Users can leverage this metric to detect if their SQL Server monitoring is not working as intended.

In the configuration file, toggling `health_metric` to `true` will enable collection of this metric. By default, this value is set to `false` and the metric is not collected. The health metric emits one record for each connection specified by `servers` in the configuration file.

The health metric emits the following tags:
- `sql_instance` - Name of the server specified in the connection string. This value is emitted as-is in the connection string. If the server could not be parsed from the connection string, a constant placeholder value is emitted
- `database_name` - Name of the database or (initial catalog) specified in the connection string. This value is emitted as-is in the connection string. If the database could not be parsed from the connection string, a constant placeholder value is emitted

The health metric emits the following fields:
- `attempted_queries` - Number of queries that were attempted for this connection
- `successful_queries` - Number of queries that completed successfully for this connection
- `database_type` - Type of database as specified by `database_type`. If `database_type` is empty, the `QueryVersion` and `AzureDB` fields are concatenated instead

If `attempted_queries` and `successful_queries` are not equal for a given connection, some metrics were not successfully gathered for that connection. If `successful_queries` is 0, no metrics were successfully gathered.

[cardinality]: /docs/FAQ.md#user-content-q-how-can-i-manage-series-cardinality
100 changes: 100 additions & 0 deletions plugins/inputs/sqlserver/connectionstring.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package sqlserver

import (
"net/url"
"strings"
)

const (
emptySqlInstance = "<empty-sql-instance>"
emptyDatabaseName = "<empty-database-name>"
)

// getConnectionIdentifiers returns the sqlInstance and databaseName from the given connection string.
// The name of the SQL instance is returned as-is in the connection string
// If the connection string could not be parsed or sqlInstance/databaseName were not present, a placeholder value is returned
func getConnectionIdentifiers(connectionString string) (sqlInstance string, databaseName string) {
if len(connectionString) == 0 {
return emptySqlInstance, emptyDatabaseName
}

trimmedConnectionString := strings.TrimSpace(connectionString)

if strings.HasPrefix(trimmedConnectionString, "odbc:") {
connectionStringWithoutOdbc := strings.TrimPrefix(trimmedConnectionString, "odbc:")
return parseConnectionStringKeyValue(connectionStringWithoutOdbc)
}
if strings.HasPrefix(trimmedConnectionString, "sqlserver://") {
return parseConnectionStringURL(trimmedConnectionString)
}
return parseConnectionStringKeyValue(trimmedConnectionString)
}

// parseConnectionStringKeyValue parses a "key=value;" connection string and returns the SQL instance and database name
func parseConnectionStringKeyValue(connectionString string) (sqlInstance string, databaseName string) {
sqlInstance = ""
databaseName = ""

keyValuePairs := strings.Split(connectionString, ";")
for _, keyValuePair := range keyValuePairs {
if len(keyValuePair) == 0 {
continue
}

keyAndValue := strings.SplitN(keyValuePair, "=", 2)
key := strings.TrimSpace(strings.ToLower(keyAndValue[0]))
if len(key) == 0 {
continue
}

value := ""
if len(keyAndValue) > 1 {
value = strings.TrimSpace(keyAndValue[1])
}
if strings.EqualFold("server", key) {
sqlInstance = value
continue
}
if strings.EqualFold("database", key) {
databaseName = value
}
}

if sqlInstance == "" {
sqlInstance = emptySqlInstance
}
if databaseName == "" {
databaseName = emptyDatabaseName
}

return sqlInstance, databaseName
}

// parseConnectionStringURL parses a URL-formatted connection string and returns the SQL instance and database name
func parseConnectionStringURL(connectionString string) (sqlInstance string, databaseName string) {
sqlInstance = emptySqlInstance
databaseName = emptyDatabaseName

u, err := url.Parse(connectionString)
if err != nil {
return emptySqlInstance, emptyDatabaseName
}

sqlInstance = u.Hostname()

if len(u.Path) > 1 {
// There was a SQL instance name specified in addition to the host
// E.g. "the.host.com:1234/InstanceName" or "the.host.com/InstanceName"
sqlInstance = sqlInstance + "\\" + u.Path[1:]
}

query := u.Query()
for key, value := range query {
if strings.EqualFold("database", key) {
databaseName = value[0]
break
}
}

return sqlInstance, databaseName
}
85 changes: 81 additions & 4 deletions plugins/inputs/sqlserver/sqlserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type SQLServer struct {
DatabaseType string `toml:"database_type"`
IncludeQuery []string `toml:"include_query"`
ExcludeQuery []string `toml:"exclude_query"`
HealthMetric bool `toml:"health_metric"`
queries MapQuery
isInitialized bool
}
Expand All @@ -36,8 +37,29 @@ type Query struct {
// MapQuery type
type MapQuery map[string]Query

// HealthMetric struct tracking the number of attempted vs successful connections for each connection string
type HealthMetric struct {
AttemptedQueries int
SuccessfulQueries int
}

const defaultServer = "Server=.;app name=telegraf;log=1;"

const (
typeAzureSQLDB = "AzureSQLDB"
typeAzureSQLManagedInstance = "AzureSQLManagedInstance"
typeSQLServer = "SQLServer"
)

const (
healthMetricName = "sqlserver_telegraf_health"
healthMetricInstanceTag = "sql_instance"
healthMetricDatabaseTag = "database_name"
healthMetricAttemptedQueries = "attempted_queries"
healthMetricSuccessfulQueries = "successful_queries"
healthMetricDatabaseType = "database_type"
)

const sampleConfig = `
## Specify instances to monitor with a list of connection strings.
## All connection parameters are optional.
Expand Down Expand Up @@ -124,7 +146,7 @@ func initQueries(s *SQLServer) error {
// Constant defintiions for type "AzureSQLDB" start with sqlAzureDB
// Constant defintiions for type "AzureSQLManagedInstance" start with sqlAzureMI
// Constant defintiions for type "SQLServer" start with sqlServer
if s.DatabaseType == "AzureSQLDB" {
if s.DatabaseType == typeAzureSQLDB {
queries["AzureSQLDBResourceStats"] = Query{ScriptName: "AzureSQLDBResourceStats", Script: sqlAzureDBResourceStats, ResultByRow: false}
queries["AzureSQLDBResourceGovernance"] = Query{ScriptName: "AzureSQLDBResourceGovernance", Script: sqlAzureDBResourceGovernance, ResultByRow: false}
queries["AzureSQLDBWaitStats"] = Query{ScriptName: "AzureSQLDBWaitStats", Script: sqlAzureDBWaitStats, ResultByRow: false}
Expand All @@ -135,7 +157,7 @@ func initQueries(s *SQLServer) error {
queries["AzureSQLDBPerformanceCounters"] = Query{ScriptName: "AzureSQLDBPerformanceCounters", Script: sqlAzureDBPerformanceCounters, ResultByRow: false}
queries["AzureSQLDBRequests"] = Query{ScriptName: "AzureSQLDBRequests", Script: sqlAzureDBRequests, ResultByRow: false}
queries["AzureSQLDBSchedulers"] = Query{ScriptName: "AzureSQLDBSchedulers", Script: sqlAzureDBSchedulers, ResultByRow: false}
} else if s.DatabaseType == "AzureSQLManagedInstance" {
} else if s.DatabaseType == typeAzureSQLManagedInstance {
queries["AzureSQLMIResourceStats"] = Query{ScriptName: "AzureSQLMIResourceStats", Script: sqlAzureMIResourceStats, ResultByRow: false}
queries["AzureSQLMIResourceGovernance"] = Query{ScriptName: "AzureSQLMIResourceGovernance", Script: sqlAzureMIResourceGovernance, ResultByRow: false}
queries["AzureSQLMIDatabaseIO"] = Query{ScriptName: "AzureSQLMIDatabaseIO", Script: sqlAzureMIDatabaseIO, ResultByRow: false}
Expand All @@ -145,7 +167,7 @@ func initQueries(s *SQLServer) error {
queries["AzureSQLMIPerformanceCounters"] = Query{ScriptName: "AzureSQLMIPerformanceCounters", Script: sqlAzureMIPerformanceCounters, ResultByRow: false}
queries["AzureSQLMIRequests"] = Query{ScriptName: "AzureSQLMIRequests", Script: sqlAzureMIRequests, ResultByRow: false}
queries["AzureSQLMISchedulers"] = Query{ScriptName: "AzureSQLMISchedulers", Script: sqlAzureMISchedulers, ResultByRow: false}
} else if s.DatabaseType == "SQLServer" { //These are still V2 queries and have not been refactored yet.
} else if s.DatabaseType == typeSQLServer { //These are still V2 queries and have not been refactored yet.
queries["SQLServerPerformanceCounters"] = Query{ScriptName: "SQLServerPerformanceCounters", Script: sqlServerPerformanceCounters, ResultByRow: false}
queries["SQLServerWaitStatsCategorized"] = Query{ScriptName: "SQLServerWaitStatsCategorized", Script: sqlServerWaitStatsCategorized, ResultByRow: false}
queries["SQLServerDatabaseIO"] = Query{ScriptName: "SQLServerDatabaseIO", Script: sqlServerDatabaseIO, ResultByRow: false}
Expand Down Expand Up @@ -222,18 +244,33 @@ func (s *SQLServer) Gather(acc telegraf.Accumulator) error {
}

var wg sync.WaitGroup
var mutex sync.Mutex
var healthMetrics = make(map[string]*HealthMetric)

for _, serv := range s.Servers {
for _, query := range s.queries {
wg.Add(1)
go func(serv string, query Query) {
defer wg.Done()
acc.AddError(s.gatherServer(serv, query, acc))
queryError := s.gatherServer(serv, query, acc)

if s.HealthMetric {
mutex.Lock()
s.gatherHealth(healthMetrics, serv, queryError)
mutex.Unlock()
}

acc.AddError(queryError)
}(serv, query)
}
}

wg.Wait()

if s.HealthMetric {
s.accHealth(healthMetrics, acc)
}

return nil
}

Expand Down Expand Up @@ -323,6 +360,46 @@ func (s *SQLServer) accRow(query Query, acc telegraf.Accumulator, row scanner) e
return nil
}

// gatherHealth stores info about any query errors in the healthMetrics map
func (s *SQLServer) gatherHealth(healthMetrics map[string]*HealthMetric, serv string, queryError error) {
if healthMetrics[serv] == nil {
healthMetrics[serv] = &HealthMetric{}
}

healthMetrics[serv].AttemptedQueries++
if queryError == nil {
healthMetrics[serv].SuccessfulQueries++
}
}

// accHealth accumulates the query health data contained within the healthMetrics map
func (s *SQLServer) accHealth(healthMetrics map[string]*HealthMetric, acc telegraf.Accumulator) {
for connectionString, connectionStats := range healthMetrics {
sqlInstance, databaseName := getConnectionIdentifiers(connectionString)
tags := map[string]string{healthMetricInstanceTag: sqlInstance, healthMetricDatabaseTag: databaseName}
fields := map[string]interface{}{
healthMetricAttemptedQueries: connectionStats.AttemptedQueries,
healthMetricSuccessfulQueries: connectionStats.SuccessfulQueries,
healthMetricDatabaseType: s.getDatabaseTypeToLog(),
}

acc.AddFields(healthMetricName, fields, tags, time.Now())
}
}

// getDatabaseTypeToLog returns the type of database monitored by this plugin instance
func (s *SQLServer) getDatabaseTypeToLog() string {
if s.DatabaseType == typeAzureSQLDB || s.DatabaseType == typeAzureSQLManagedInstance || s.DatabaseType == typeSQLServer {
return s.DatabaseType
}

logname := fmt.Sprintf("QueryVersion-%d", s.QueryVersion)
if s.AzureDB {
logname += "-AzureDB"
}
return logname
}

func (s *SQLServer) Init() error {
if len(s.Servers) == 0 {
log.Println("W! Warning: Server list is empty.")
Expand Down
Loading

0 comments on commit 30e189d

Please sign in to comment.