Skip to content

Commit

Permalink
Improve xml parser's handling of missing values (influxdata#8971)
Browse files Browse the repository at this point in the history
  • Loading branch information
srebhan authored Mar 16, 2021
1 parent 1b09a9f commit 808b7c1
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 10 deletions.
8 changes: 8 additions & 0 deletions plugins/parsers/xml/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
p.debugEmptyQuery("metric selection", doc, config.Selection)
return nil, fmt.Errorf("cannot parse with empty selection node")
}
p.Log.Debugf("Number of selected metric nodes: %d", len(selectedNodes))

for _, selected := range selectedNodes {
m, err := p.parseQuery(t, doc, selected, config)
Expand Down Expand Up @@ -165,6 +166,8 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c
case float64:
// Assume the value to contain a timestamp in seconds and fractions thereof.
timestamp = time.Unix(0, int64(v.(float64)*1e9))
case nil:
// No timestamp found. Just ignore the time and use "starttime"
default:
return nil, fmt.Errorf("unknown format '%T' for timestamp query '%v'", v, config.Timestamp)
}
Expand All @@ -185,6 +188,8 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c
tags[name] = strconv.FormatBool(v.(bool))
case float64:
tags[name] = strconv.FormatFloat(v.(float64), 'G', -1, 64)
case nil:
continue
default:
return nil, fmt.Errorf("unknown format '%T' for tag '%s'", v, name)
}
Expand Down Expand Up @@ -214,6 +219,8 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c
}
case float64:
fields[name] = int64(v.(float64))
case nil:
continue
default:
return nil, fmt.Errorf("unknown format '%T' for field (int) '%s'", v, name)
}
Expand Down Expand Up @@ -244,6 +251,7 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c
if err != nil {
return nil, err
}
p.Log.Debugf("Number of selected field nodes: %d", len(selectedFieldNodes))
if len(selectedFieldNodes) > 0 && selectedFieldNodes[0] != nil {
for _, selectedfield := range selectedFieldNodes {
n, err := executeQuery(doc, selectedfield, fieldnamequery)
Expand Down
24 changes: 14 additions & 10 deletions plugins/parsers/xml/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestParseInvalidXML(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

_, err := parser.ParseLine(tt.input)
require.Error(t, err)
Expand Down Expand Up @@ -162,7 +162,7 @@ func TestInvalidTypeQueriesFail(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

_, err := parser.ParseLine(tt.input)
require.Error(t, err)
Expand Down Expand Up @@ -227,7 +227,7 @@ func TestInvalidTypeQueries(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.ParseLine(tt.input)
require.NoError(t, err)
Expand Down Expand Up @@ -356,7 +356,7 @@ func TestParseTimestamps(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.ParseLine(tt.input)
require.NoError(t, err)
Expand Down Expand Up @@ -560,7 +560,7 @@ func TestParseSingleValues(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.ParseLine(tt.input)
require.NoError(t, err)
Expand Down Expand Up @@ -771,7 +771,7 @@ func TestParseSingleAttributes(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.ParseLine(tt.input)
require.NoError(t, err)
Expand Down Expand Up @@ -857,7 +857,7 @@ func TestParseMultiValues(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.ParseLine(tt.input)
require.NoError(t, err)
Expand Down Expand Up @@ -969,7 +969,7 @@ func TestParseMultiNodes(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.Parse([]byte(tt.input))
require.NoError(t, err)
Expand Down Expand Up @@ -1014,7 +1014,7 @@ func TestParseMetricQuery(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags}
parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}}

actual, err := parser.ParseLine(tt.input)
require.NoError(t, err)
Expand Down Expand Up @@ -1114,6 +1114,10 @@ func TestTestCases(t *testing.T) {
name: "openweathermap forecast",
filename: "testcases/openweathermap.conf",
},
{
name: "earthquakes quakeml",
filename: "testcases/earthquakes.conf",
},
}

parser := influx.NewParser(influx.NewMetricHandler())
Expand Down Expand Up @@ -1141,7 +1145,7 @@ func TestTestCases(t *testing.T) {
expectedErrors, _ := testutil.ParseRawLinesFrom(header, "Expected Error:")

// Setup the parser and run it.
parser := Parser{Configs: []Config{*cfg}}
parser := &Parser{Configs: []Config{*cfg}, Log: testutil.Logger{Name: "parsers.xml"}}
outputs, err := parser.Parse(content)
if len(expectedErrors) == 0 {
require.NoError(t, err)
Expand Down
44 changes: 44 additions & 0 deletions plugins/parsers/xml/testcases/earthquakes.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Example for parsing QuakeML measurement data.
#
# File:
# testcases/earthquakes.quakeml
#
# Expected Output:
# earthquakes,agency=us,type=mww depth=13000,eventid="7000dg8x",lat=-37.6099,lon=179.6102,mag=6.3,station_count=33i 1614989782185000000
# earthquakes,agency=us,type=mww depth=17000,eventid="7000dft1",lat=-28.7146,lon=-176.5582,mag=6.3,station_count=15i 1614911436571000000
# earthquakes,agency=us,type=mww depth=26450,eventid="7000dflf",lat=-29.7347,lon=-177.2817,mag=8.1,station_count=81i 1614886112819000000
# earthquakes,agency=us,type=mb depth=10000,eventid="7000dfku",lat=39.7886,lon=22.1189,mag=5.8,station_count=279i 1614883099415000000
# earthquakes,agency=us,type=mww depth=53090,eventid="7000dfk3",lat=-29.6647,lon=-177.8343,mag=7.4,station_count=40i 1614879684425000000
# earthquakes,agency=us,type=mww depth=20780,eventid="7000dffl",lat=-37.5628,lon=179.4443,mag=7.3,station_count=33i 1614864456464000000
# earthquakes,agency=us,type=mww depth=10000,eventid="7000df40",lat=39.7641,lon=22.1756,mag=6.3,station_count=81i 1614766570197000000
# earthquakes,type=mww depth=42100,eventid="0212o88mof",lat=61.3286,lon=-149.9991,mag=5.3 1614452365398000000
# earthquakes,agency=us,type=mww depth=10000,eventid="6000dkmk",lat=63.9602,lon=-22.2736,mag=5.6,station_count=64i 1614161159873000000
# earthquakes,agency=NC,type=mw depth=6220,eventid="73526151",lat=37.0456667,lon=-121.4781667,mag=3.76,station_count=3i 1613957893840000000
# earthquakes,agency=US,type=mwr depth=7000,eventid="2021dmpg",lat=36.96366667,lon=-98.09383333,mag=4.2,station_count=39i 1613743017950000000
# earthquakes,agency=us,type=mww depth=5590,eventid="6000dhxn",lat=-17.8192,lon=167.5901,mag=6.2,station_count=24i 1613436564078000000
# earthquakes,agency=us,type=mww depth=49940,eventid="6000dher",lat=37.7453,lon=141.7494,mag=7.1,station_count=74i 1613225270397000000
# earthquakes,agency=us,type=mww depth=98950,eventid="6000dh48",lat=38.1314,lon=73.545,mag=5.9,station_count=34i 1613149295308000000
# earthquakes,agency=us,type=mww depth=10000,eventid="6000dg77",lat=-23.0508,lon=171.657,mag=7.7,station_count=54i 1612963195532000000
#

metric_selection = "//event"
metric_name = "string('earthquakes')"

# Convert from milliseconds to nanoseconds as golang unfortunately
# only supports RFC3339 with second OR nanosecond precision.
timestamp = "replace(normalize-space(origin/time), 'Z', '000000Z')"
timestamp_format = "2006-01-02T15:04:05.999999999Z"

[fields]
eventid = "@catalog:eventid"
lon = "number(origin/longitude/value)"
lat = "number(origin/latitude/value)"
depth = "number(origin/depth/value)"
mag = "number(magnitude/mag/value)"

[fields_int]
station_count = "magnitude/stationCount"

[tags]
agency = "magnitude/creationInfo/agencyID"
type = "magnitude/type"
Loading

0 comments on commit 808b7c1

Please sign in to comment.