From 808b7c1ba540fecb51b1825f746954a826ba2c62 Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Tue, 16 Mar 2021 21:15:18 +0100 Subject: [PATCH] Improve xml parser's handling of missing values (#8971) --- plugins/parsers/xml/parser.go | 8 ++++ plugins/parsers/xml/parser_test.go | 24 +++++----- .../parsers/xml/testcases/earthquakes.conf | 44 +++++++++++++++++++ .../parsers/xml/testcases/earthquakes.quakeml | 20 +++++++++ 4 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 plugins/parsers/xml/testcases/earthquakes.conf create mode 100644 plugins/parsers/xml/testcases/earthquakes.quakeml diff --git a/plugins/parsers/xml/parser.go b/plugins/parsers/xml/parser.go index bbe99286bafdd..8ee002ff3b0e7 100644 --- a/plugins/parsers/xml/parser.go +++ b/plugins/parsers/xml/parser.go @@ -58,6 +58,7 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { p.debugEmptyQuery("metric selection", doc, config.Selection) return nil, fmt.Errorf("cannot parse with empty selection node") } + p.Log.Debugf("Number of selected metric nodes: %d", len(selectedNodes)) for _, selected := range selectedNodes { m, err := p.parseQuery(t, doc, selected, config) @@ -165,6 +166,8 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c case float64: // Assume the value to contain a timestamp in seconds and fractions thereof. timestamp = time.Unix(0, int64(v.(float64)*1e9)) + case nil: + // No timestamp found. Just ignore the time and use "starttime" default: return nil, fmt.Errorf("unknown format '%T' for timestamp query '%v'", v, config.Timestamp) } @@ -185,6 +188,8 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c tags[name] = strconv.FormatBool(v.(bool)) case float64: tags[name] = strconv.FormatFloat(v.(float64), 'G', -1, 64) + case nil: + continue default: return nil, fmt.Errorf("unknown format '%T' for tag '%s'", v, name) } @@ -214,6 +219,8 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c } case float64: fields[name] = int64(v.(float64)) + case nil: + continue default: return nil, fmt.Errorf("unknown format '%T' for field (int) '%s'", v, name) } @@ -244,6 +251,7 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected *xmlquery.Node, c if err != nil { return nil, err } + p.Log.Debugf("Number of selected field nodes: %d", len(selectedFieldNodes)) if len(selectedFieldNodes) > 0 && selectedFieldNodes[0] != nil { for _, selectedfield := range selectedFieldNodes { n, err := executeQuery(doc, selectedfield, fieldnamequery) diff --git a/plugins/parsers/xml/parser_test.go b/plugins/parsers/xml/parser_test.go index 91896172d3679..023e9d20a6090 100644 --- a/plugins/parsers/xml/parser_test.go +++ b/plugins/parsers/xml/parser_test.go @@ -126,7 +126,7 @@ func TestParseInvalidXML(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} _, err := parser.ParseLine(tt.input) require.Error(t, err) @@ -162,7 +162,7 @@ func TestInvalidTypeQueriesFail(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} _, err := parser.ParseLine(tt.input) require.Error(t, err) @@ -227,7 +227,7 @@ func TestInvalidTypeQueries(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.ParseLine(tt.input) require.NoError(t, err) @@ -356,7 +356,7 @@ func TestParseTimestamps(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.ParseLine(tt.input) require.NoError(t, err) @@ -560,7 +560,7 @@ func TestParseSingleValues(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.ParseLine(tt.input) require.NoError(t, err) @@ -771,7 +771,7 @@ func TestParseSingleAttributes(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.ParseLine(tt.input) require.NoError(t, err) @@ -857,7 +857,7 @@ func TestParseMultiValues(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.ParseLine(tt.input) require.NoError(t, err) @@ -969,7 +969,7 @@ func TestParseMultiNodes(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.Parse([]byte(tt.input)) require.NoError(t, err) @@ -1014,7 +1014,7 @@ func TestParseMetricQuery(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags} + parser := &Parser{Configs: tt.configs, DefaultTags: tt.defaultTags, Log: testutil.Logger{Name: "parsers.xml"}} actual, err := parser.ParseLine(tt.input) require.NoError(t, err) @@ -1114,6 +1114,10 @@ func TestTestCases(t *testing.T) { name: "openweathermap forecast", filename: "testcases/openweathermap.conf", }, + { + name: "earthquakes quakeml", + filename: "testcases/earthquakes.conf", + }, } parser := influx.NewParser(influx.NewMetricHandler()) @@ -1141,7 +1145,7 @@ func TestTestCases(t *testing.T) { expectedErrors, _ := testutil.ParseRawLinesFrom(header, "Expected Error:") // Setup the parser and run it. - parser := Parser{Configs: []Config{*cfg}} + parser := &Parser{Configs: []Config{*cfg}, Log: testutil.Logger{Name: "parsers.xml"}} outputs, err := parser.Parse(content) if len(expectedErrors) == 0 { require.NoError(t, err) diff --git a/plugins/parsers/xml/testcases/earthquakes.conf b/plugins/parsers/xml/testcases/earthquakes.conf new file mode 100644 index 0000000000000..8f02f4384721f --- /dev/null +++ b/plugins/parsers/xml/testcases/earthquakes.conf @@ -0,0 +1,44 @@ +# Example for parsing QuakeML measurement data. +# +# File: +# testcases/earthquakes.quakeml +# +# Expected Output: +# earthquakes,agency=us,type=mww depth=13000,eventid="7000dg8x",lat=-37.6099,lon=179.6102,mag=6.3,station_count=33i 1614989782185000000 +# earthquakes,agency=us,type=mww depth=17000,eventid="7000dft1",lat=-28.7146,lon=-176.5582,mag=6.3,station_count=15i 1614911436571000000 +# earthquakes,agency=us,type=mww depth=26450,eventid="7000dflf",lat=-29.7347,lon=-177.2817,mag=8.1,station_count=81i 1614886112819000000 +# earthquakes,agency=us,type=mb depth=10000,eventid="7000dfku",lat=39.7886,lon=22.1189,mag=5.8,station_count=279i 1614883099415000000 +# earthquakes,agency=us,type=mww depth=53090,eventid="7000dfk3",lat=-29.6647,lon=-177.8343,mag=7.4,station_count=40i 1614879684425000000 +# earthquakes,agency=us,type=mww depth=20780,eventid="7000dffl",lat=-37.5628,lon=179.4443,mag=7.3,station_count=33i 1614864456464000000 +# earthquakes,agency=us,type=mww depth=10000,eventid="7000df40",lat=39.7641,lon=22.1756,mag=6.3,station_count=81i 1614766570197000000 +# earthquakes,type=mww depth=42100,eventid="0212o88mof",lat=61.3286,lon=-149.9991,mag=5.3 1614452365398000000 +# earthquakes,agency=us,type=mww depth=10000,eventid="6000dkmk",lat=63.9602,lon=-22.2736,mag=5.6,station_count=64i 1614161159873000000 +# earthquakes,agency=NC,type=mw depth=6220,eventid="73526151",lat=37.0456667,lon=-121.4781667,mag=3.76,station_count=3i 1613957893840000000 +# earthquakes,agency=US,type=mwr depth=7000,eventid="2021dmpg",lat=36.96366667,lon=-98.09383333,mag=4.2,station_count=39i 1613743017950000000 +# earthquakes,agency=us,type=mww depth=5590,eventid="6000dhxn",lat=-17.8192,lon=167.5901,mag=6.2,station_count=24i 1613436564078000000 +# earthquakes,agency=us,type=mww depth=49940,eventid="6000dher",lat=37.7453,lon=141.7494,mag=7.1,station_count=74i 1613225270397000000 +# earthquakes,agency=us,type=mww depth=98950,eventid="6000dh48",lat=38.1314,lon=73.545,mag=5.9,station_count=34i 1613149295308000000 +# earthquakes,agency=us,type=mww depth=10000,eventid="6000dg77",lat=-23.0508,lon=171.657,mag=7.7,station_count=54i 1612963195532000000 +# + +metric_selection = "//event" +metric_name = "string('earthquakes')" + +# Convert from milliseconds to nanoseconds as golang unfortunately +# only supports RFC3339 with second OR nanosecond precision. +timestamp = "replace(normalize-space(origin/time), 'Z', '000000Z')" +timestamp_format = "2006-01-02T15:04:05.999999999Z" + +[fields] + eventid = "@catalog:eventid" + lon = "number(origin/longitude/value)" + lat = "number(origin/latitude/value)" + depth = "number(origin/depth/value)" + mag = "number(magnitude/mag/value)" + +[fields_int] + station_count = "magnitude/stationCount" + +[tags] + agency = "magnitude/creationInfo/agencyID" + type = "magnitude/type" diff --git a/plugins/parsers/xml/testcases/earthquakes.quakeml b/plugins/parsers/xml/testcases/earthquakes.quakeml new file mode 100644 index 0000000000000..fa5f5fb7f63bd --- /dev/null +++ b/plugins/parsers/xml/testcases/earthquakes.quakeml @@ -0,0 +1,20 @@ + + + +earthquake name182 km NE of Gisborne, New Zealand179.6102-37.60991300017008100horizontal uncertainty2901.04341.036manualus2021-03-09T03:01:59.040Z6.30.054mww33quakeml:earthquake.usgs.gov/realtime/product/origin/us7000dg8x/us/1615258919040/product.xmlmanualus2021-03-09T03:01:59.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dg8x/us/1615258919040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dg8x/us/1615258919040/product.xml#magnitudeearthquakeus2021-03-09T03:05:51.084Z +earthquake nameKermadec Islands region-176.5582-28.71461700018009800horizontal uncertainty891.25419.815manualus2021-03-05T18:47:44.040Z6.30.08mww15quakeml:earthquake.usgs.gov/realtime/product/origin/us7000dft1/us/1614970064040/product.xmlmanualus2021-03-05T18:47:44.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dft1/us/1614970064040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dft1/us/1614970064040/product.xml#magnitudeearthquakeus2021-03-06T02:34:07.561Z +earthquake nameKermadec Islands, New Zealand-177.2817-29.73472645037007800horizontal uncertainty1300.67210.746manualus2021-03-05T18:08:31.040Z8.10.034mww81quakeml:earthquake.usgs.gov/realtime/product/origin/us7000dflf/us/1614967711040/product.xmlmanualus2021-03-05T18:08:31.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dflf/us/1614967711040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dflf/us/1614967711040/product.xml#magnitudeearthquakeus2021-03-09T18:52:08.298Z +earthquake nameGreece22.118939.78861000018005200horizontal uncertainty1400.9190.424manualus2021-03-05T15:03:03.040Z5.80.036mb279quakeml:earthquake.usgs.gov/realtime/product/origin/us7000dfku/us/1614956583040/product.xmlmanualus2021-03-05T15:03:03.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dfku/us/1614956583040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dfku/us/1614956583040/product.xml#magnitudeearthquakeus2021-03-07T08:43:06.987Z +earthquake nameKermadec Islands, New Zealand-177.8343-29.66475309036007800horizontal uncertainty1321.14300.426manualus2021-03-05T13:49:34.040Z7.40.049mww40quakeml:earthquake.usgs.gov/realtime/product/origin/us7000dfk3/us/1614952174040/product.xmlmanualus2021-03-05T13:49:34.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dfk3/us/1614952174040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dfk3/us/1614952174040/product.xml#magnitudeearthquakeus2021-03-09T18:42:04.756Z +earthquake name174 km NE of Gisborne, New Zealand179.4443-37.56282078032006600horizontal uncertainty1411.35230.904manualus2021-03-04T15:08:47.040Z7.30.054mww33quakeml:earthquake.usgs.gov/realtime/product/origin/us7000dffl/us/1614870527040/product.xmlmanualus2021-03-04T15:08:47.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dffl/us/1614870527040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000dffl/us/1614870527040/product.xml#magnitudeearthquakeus2021-03-10T21:54:32.975Z +earthquake name10 km WNW of Týrnavos, Greece22.175639.76411000018005400horizontal uncertainty1291.05170.415manualus2021-03-03T10:31:58.040Z6.30.034mww81quakeml:earthquake.usgs.gov/realtime/product/origin/us7000df40/us/1614767518040/product.xmlmanualus2021-03-03T10:31:58.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us7000df40/us/1614767518040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us7000df40/us/1614767518040/product.xml#magnitudeearthquakeus2021-03-08T04:19:29.249Z +earthquake name3 km SSW of Point MacKenzie, Alaska-149.999161.3286421003000horizontal uncertainty1340.86manual2021-02-27T19:20:59.442Z25.3mwwquakeml:earthquake.usgs.gov/realtime/product/origin/AK0212o88mof/ak/1614453659442/product.xmlmanual2021-02-27T19:20:59.442Zquakeml:earthquake.usgs.gov/realtime/product/origin/AK0212o88mof/ak/1614453659442/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/AK0212o88mof/ak/1614453659442/product.xml#magnitudeearthquakeak2021-03-10T19:09:33.840Z2 +earthquake name5 km ESE of Vogar, Iceland-22.273663.96021000018005600horizontal uncertainty1291.22460.891manualus2021-02-24T15:05:24.040Z5.60.039mww64quakeml:earthquake.usgs.gov/realtime/product/origin/us6000dkmk/us/1614179124040/product.xmlmanualus2021-02-24T15:05:24.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dkmk/us/1614179124040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dkmk/us/1614179124040/product.xml#magnitudeearthquakeus2021-03-07T02:32:18.760Z +earthquake name9km ENE of Gilroy, CA-121.478166737.0456667622024090horizontal uncertainty1781640.15330.02089manualNC2021-02-23T00:54:06.560Z103.76mw3quakeml:earthquake.usgs.gov/realtime/product/origin/nc73526151/nc/1614041646560/product.xmlmanualNC2021-02-23T00:54:06.560Zquakeml:earthquake.usgs.gov/realtime/product/origin/nc73526151/nc/1614041646560/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/nc73526151/nc/1614041646560/product.xml#magnitudeearthquakenc2021-03-04T06:33:36.782Z10 +earthquake name6 km SW of Manchester, Oklahoma-98.0938333336.9636666770003000horizontal uncertainty182980.15960manualOK2021-02-19T14:42:10.861Z4.2mwr39quakeml:earthquake.usgs.gov/realtime/product/origin/ogs2021dmpg/ok/1613745730861/product.xmlmanualUS2021-02-19T14:42:10.861Zquakeml:earthquake.usgs.gov/realtime/product/origin/ogs2021dmpg/ok/1613745730861/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/ogs2021dmpg/ok/1613745730861/product.xml#magnitudeearthquakeok2021-03-05T02:13:24.659Z +earthquake name77 km W of Port-Vila, Vanuatu167.5901-17.8192559033007400horizontal uncertainty3860.86323.666manualus2021-02-19T03:36:41.040Z6.20.063mww24quakeml:earthquake.usgs.gov/realtime/product/origin/us6000dhxn/us/1613705801040/product.xmlmanualus2021-02-19T03:36:41.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dhxn/us/1613705801040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dhxn/us/1613705801040/product.xml#magnitudeearthquakeus2021-03-04T11:07:03.880Z +earthquake name72 km ENE of Namie, Japan141.749437.74534994035007000horizontal uncertainty1441.12333.073manualus2021-02-14T22:04:22.040Z7.10.036mww74quakeml:earthquake.usgs.gov/realtime/product/origin/us6000dher/us/1613340262040/product.xmlmanualus2021-02-14T22:04:22.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dher/us/1613340262040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dher/us/1613340262040/product.xml#magnitudeearthquakeus2021-03-05T13:32:14.760Z +earthquake name37 km W of Murghob, Tajikistan73.54538.13149895012005400horizontal uncertainty2980.91161.915manualus2021-02-18T17:53:33.040Z5.90.053mww34quakeml:earthquake.usgs.gov/realtime/product/origin/us6000dh48/us/1613670813040/product.xmlmanualus2021-02-18T17:53:33.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dh48/us/1613670813040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dh48/us/1613670813040/product.xml#magnitudeearthquakeus2021-03-04T10:24:38.562Z +earthquake namesoutheast of the Loyalty Islands171.657-23.05081000018007800horizontal uncertainty2700.42157.988manualus2021-03-08T07:54:50.040Z7.70.042mww54quakeml:earthquake.usgs.gov/realtime/product/origin/us6000dg77/us/1615190090040/product.xmlmanualus2021-03-08T07:54:50.040Zquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dg77/us/1615190090040/product.xmlquakeml:earthquake.usgs.gov/realtime/product/origin/us6000dg77/us/1615190090040/product.xml#magnitudeearthquakeus2021-03-08T08:07:24.427Z +2021-03-11T11:55:37.000Z + \ No newline at end of file