From 30c89f8982c0c68d4fabf3c4a05702a07e54fa22 Mon Sep 17 00:00:00 2001 From: Pavel Shaydo Date: Wed, 15 Jan 2020 22:43:45 +0100 Subject: [PATCH] parse input containing NaN and Infinity values JSON standard does not allow NaN or Infinity, but some applications produce output that contains such values. One example of such application would be TensorFlow Serving[1]. An example of JSON parser that accepts NaN and Infinity is the one that comes with Python[2]. This commit introduces a new configuration parameter AllowNaN, that enables jsoniter to unmarshal input containing NaN and Infinity. 1. https://www.tensorflow.org/tfx/serving/api_rest#json_conformance 2. https://docs.python.org/3/library/json.html#infinite-and-nan-number-values --- config.go | 3 +++ iter.go | 2 ++ iter_float.go | 50 +++++++++++++++++++++++++++++++++++++ value_tests/float_test.go | 52 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+) diff --git a/config.go b/config.go index 8c58fcba..44aa0d94 100644 --- a/config.go +++ b/config.go @@ -25,6 +25,8 @@ type Config struct { ValidateJsonRawMessage bool ObjectFieldMustBeSimpleString bool CaseSensitive bool + // AllowNaN parses input that contains non-standard NaN and Infinity values + AllowNaN bool } // API the public interface of this package. @@ -49,6 +51,7 @@ type API interface { // ConfigDefault the default API var ConfigDefault = Config{ EscapeHTML: true, + AllowNaN: true, }.Froze() // ConfigCompatibleWithStandardLibrary tries to be 100% compatible with standard library behavior diff --git a/iter.go b/iter.go index 29b31cf7..3b6688c2 100644 --- a/iter.go +++ b/iter.go @@ -59,6 +59,8 @@ func init() { valueTypes['7'] = NumberValue valueTypes['8'] = NumberValue valueTypes['9'] = NumberValue + valueTypes['N'] = NumberValue + valueTypes['I'] = NumberValue valueTypes['t'] = BoolValue valueTypes['f'] = BoolValue valueTypes['n'] = NilValue diff --git a/iter_float.go b/iter_float.go index b9754638..faf6013f 100644 --- a/iter_float.go +++ b/iter_float.go @@ -156,6 +156,52 @@ non_decimal_loop: return iter.readFloat32SlowPath() } +var nanBytes = []byte("NaN") + +func (iter *Iterator) readNaN() (ret []byte) { + for _, b := range nanBytes { + if iter.readByte() != b { + iter.ReportError("readNaN", "expect NaN") + return + } + } + if !iter.cfg.configBeforeFrozen.AllowNaN { + iter.ReportError("readInfinity", "invalid number, AllowNaN is not set") + return + } + return nanBytes +} + +var infinityBytes = []byte("Infinity") + +func (iter *Iterator) readInfinity() (ret []byte) { + for _, b := range infinityBytes { + if iter.readByte() != b { + iter.ReportError("readInfinity", "expect Infinity") + return + } + } + if !iter.cfg.configBeforeFrozen.AllowNaN { + iter.ReportError("readInfinity", "invalid number, AllowNaN is not set") + return + } + return infinityBytes +} + +func (iter *Iterator) readNaNOrInf(str []byte) []byte { + switch iter.buf[iter.head] { + case 'N': + if len(str) == 0 { + str = iter.readNaN() + } + case 'I': + if len(str) == 0 || len(str) == 1 && str[0] == '-' { + str = append(str, iter.readInfinity()...) + } + } + return str +} + func (iter *Iterator) readNumberAsString() (ret string) { strBuf := [16]byte{} str := strBuf[0:0] @@ -167,6 +213,10 @@ load_loop: case '+', '-', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': str = append(str, c) continue + case 'N', 'I': + iter.head = i + str = iter.readNaNOrInf(str) + break load_loop default: iter.head = i break load_loop diff --git a/value_tests/float_test.go b/value_tests/float_test.go index 3c00b269..f836c492 100644 --- a/value_tests/float_test.go +++ b/value_tests/float_test.go @@ -6,10 +6,62 @@ import ( "fmt" "github.com/json-iterator/go" "github.com/stretchr/testify/require" + "math" "strconv" "testing" ) +func Test_NaN_Inf(t *testing.T) { + cases := []struct { + json string + check func(float64) bool + }{ + { + json: "NaN", + check: math.IsNaN, + }, + { + json: "-Infinity", + check: func(f float64) bool { return math.IsInf(f, -1) }, + }, + { + json: "Infinity", + check: func(f float64) bool { return math.IsInf(f, 1) }, + }, + } + + for _, tc := range cases { + iter := jsoniter.ParseString(jsoniter.ConfigDefault, tc.json+",") + if res := iter.ReadFloat64(); !tc.check(res) || iter.Error != nil { + t.Errorf("couldn't parse %s, got %f (%v)", tc.json, res, iter.Error) + } + iterStd := jsoniter.ParseString(jsoniter.ConfigCompatibleWithStandardLibrary, tc.json+",") + res := iterStd.Read() + if iterStd.Error == nil { + t.Errorf("standard compatible parser should have returned an error for %s, but got %v", + tc.json, res) + } + cfgNum := jsoniter.Config{ + EscapeHTML: true, + AllowNaN: true, + UseNumber: true, + }.Froze() + iterNum := jsoniter.ParseString(cfgNum, tc.json+",") + if res := iterNum.ReadNumber(); iterNum.Error != nil || string(res) != tc.json { + t.Errorf("expected to get %s as string, but got %v (%v)", tc.json, res, iterNum.Error) + } + } + + // those strings should result in an error + invalid := []string{"NAN", "None", "Infinite", "nan", "infinity"} + for _, str := range invalid { + iter := jsoniter.ParseString(jsoniter.ConfigDefault, str+",") + if res := iter.ReadFloat64(); iter.Error == nil { + t.Errorf("expected %s result in error, got %f", str, res) + } + } +} + func Test_read_float(t *testing.T) { inputs := []string{ `1.1`, `1000`, `9223372036854775807`, `12.3`, `-12.3`, `720368.54775807`, `720368.547758075`,