From 3e82e72268b9a088dfa207f7fdbe26eaea9433b0 Mon Sep 17 00:00:00 2001 From: Mikhail Deshevoy Date: Mon, 1 Apr 2024 05:36:51 +0300 Subject: [PATCH 1/2] Use ReadStringAsSlice instead of ReadString - prevent string([]byte) escapes to heap --- any.go | 5 +-- extra/binary_as_string_codec.go | 19 +++++----- iter_object.go | 19 +++++----- iter_skip_strict.go | 5 +-- iter_str.go | 56 ++++++++---------------------- misc_tests/jsoniter_object_test.go | 7 ++-- reflect_json_number.go | 11 +++--- reflect_map.go | 2 +- reflect_marshaler.go | 4 +-- reflect_native.go | 7 ++-- reflect_struct_decoder.go | 17 ++++----- 11 files changed, 63 insertions(+), 89 deletions(-) diff --git a/any.go b/any.go index f6b8aeab..6c81af88 100644 --- a/any.go +++ b/any.go @@ -3,11 +3,12 @@ package jsoniter import ( "errors" "fmt" - "github.com/modern-go/reflect2" "io" "reflect" "strconv" "unsafe" + + "github.com/modern-go/reflect2" ) // Any generic object representation. @@ -155,7 +156,7 @@ func (iter *Iterator) readAny() Any { switch c { case '"': iter.unreadByte() - return &stringAny{baseAny{}, iter.ReadString()} + return &stringAny{baseAny{}, string(iter.ReadStringAsSlice())} case 'n': iter.skipThreeBytes('u', 'l', 'l') // null return &nilAny{} diff --git a/extra/binary_as_string_codec.go b/extra/binary_as_string_codec.go index 13bc97ea..364a2f71 100644 --- a/extra/binary_as_string_codec.go +++ b/extra/binary_as_string_codec.go @@ -1,10 +1,12 @@ package extra import ( - "github.com/json-iterator/go" - "github.com/modern-go/reflect2" "unicode/utf8" "unsafe" + + "github.com/modern-go/reflect2" + + "github.com/json-iterator/go" ) // safeSet holds the value true if the ASCII character with the given array @@ -142,19 +144,14 @@ func (codec *binaryAsStringCodec) Decode(ptr unsafe.Pointer, iter *jsoniter.Iter b := rawBytes[i] if b == '\\' { b2 := rawBytes[i+1] - if b2 != '\\' { - iter.ReportError("decode binary as string", `\\x is only supported escape`) + if b2 != 'x' { + iter.ReportError("decode binary as string", `\x is only supported escape`) return } b3 := rawBytes[i+2] - if b3 != 'x' { - iter.ReportError("decode binary as string", `\\x is only supported escape`) - return - } b4 := rawBytes[i+3] - b5 := rawBytes[i+4] - i += 4 - b = readHex(iter, b4, b5) + i += 3 + b = readHex(iter, b3, b4) } bytes = append(bytes, b) } diff --git a/iter_object.go b/iter_object.go index 58ee89c8..79971d0f 100644 --- a/iter_object.go +++ b/iter_object.go @@ -59,7 +59,7 @@ func (iter *Iterator) readFieldHash() int64 { b := iter.buf[i] if b == '\\' { iter.head = i - for _, b := range iter.readStringSlowPath() { + for _, b := range iter.readStringSlowPathAsSlice() { if 'A' <= b && b <= 'Z' && !iter.cfg.caseSensitive { b += 'a' - 'A' } @@ -110,7 +110,6 @@ func calcHash(str string, caseSensitive bool) int64 { // ReadObjectCB read object with callback, the key is ascii only and field name not copied func (iter *Iterator) ReadObjectCB(callback func(*Iterator, string) bool) bool { c := iter.nextToken() - var field string if c == '{' { if !iter.incrementDepth() { return false @@ -118,23 +117,23 @@ func (iter *Iterator) ReadObjectCB(callback func(*Iterator, string) bool) bool { c = iter.nextToken() if c == '"' { iter.unreadByte() - field = iter.ReadString() + field := iter.ReadStringAsSlice() c = iter.nextToken() if c != ':' { iter.ReportError("ReadObject", "expect : after object field, but found "+string([]byte{c})) } - if !callback(iter, field) { + if !callback(iter, string(field)) { iter.decrementDepth() return false } c = iter.nextToken() for c == ',' { - field = iter.ReadString() + field = iter.ReadStringAsSlice() c = iter.nextToken() if c != ':' { iter.ReportError("ReadObject", "expect : after object field, but found "+string([]byte{c})) } - if !callback(iter, field) { + if !callback(iter, string(field)) { iter.decrementDepth() return false } @@ -172,25 +171,25 @@ func (iter *Iterator) ReadMapCB(callback func(*Iterator, string) bool) bool { c = iter.nextToken() if c == '"' { iter.unreadByte() - field := iter.ReadString() + field := iter.ReadStringAsSlice() if iter.nextToken() != ':' { iter.ReportError("ReadMapCB", "expect : after object field, but found "+string([]byte{c})) iter.decrementDepth() return false } - if !callback(iter, field) { + if !callback(iter, string(field)) { iter.decrementDepth() return false } c = iter.nextToken() for c == ',' { - field = iter.ReadString() + field = iter.ReadStringAsSlice() if iter.nextToken() != ':' { iter.ReportError("ReadMapCB", "expect : after object field, but found "+string([]byte{c})) iter.decrementDepth() return false } - if !callback(iter, field) { + if !callback(iter, string(field)) { iter.decrementDepth() return false } diff --git a/iter_skip_strict.go b/iter_skip_strict.go index 6cf66d04..af79fc08 100644 --- a/iter_skip_strict.go +++ b/iter_skip_strict.go @@ -1,4 +1,5 @@ -//+build !jsoniter_sloppy +//go:build !jsoniter_sloppy +// +build !jsoniter_sloppy package jsoniter @@ -61,7 +62,7 @@ func (iter *Iterator) trySkipNumber() bool { func (iter *Iterator) skipString() { if !iter.trySkipString() { iter.unreadByte() - iter.ReadString() + iter.ReadStringAsSlice() } } diff --git a/iter_str.go b/iter_str.go index adc487ea..f348023d 100644 --- a/iter_str.go +++ b/iter_str.go @@ -7,38 +7,44 @@ import ( // ReadString read string from iterator func (iter *Iterator) ReadString() (ret string) { + return string(iter.ReadStringAsSlice()) +} + +// ReadStringAsSlice read string from iterator without copying into string form. +// The []byte can not be kept, as it will change after next iterator call. +func (iter *Iterator) ReadStringAsSlice() (ret []byte) { c := iter.nextToken() if c == '"' { for i := iter.head; i < iter.tail; i++ { c := iter.buf[i] if c == '"' { - ret = string(iter.buf[iter.head:i]) + head := iter.head iter.head = i + 1 - return ret + return iter.buf[head:i] } else if c == '\\' { break } else if c < ' ' { - iter.ReportError("ReadString", + iter.ReportError("ReadStringAsSlice", fmt.Sprintf(`invalid control character found: %d`, c)) return } } - return iter.readStringSlowPath() + return iter.readStringSlowPathAsSlice() } else if c == 'n' { iter.skipThreeBytes('u', 'l', 'l') - return "" + return } - iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c})) + iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c})) return } -func (iter *Iterator) readStringSlowPath() (ret string) { +func (iter *Iterator) readStringSlowPathAsSlice() (ret []byte) { var str []byte var c byte for iter.Error == nil { c = iter.readByte() if c == '"' { - return string(str) + return str } if c == '\\' { c = iter.readByte() @@ -47,7 +53,7 @@ func (iter *Iterator) readStringSlowPath() (ret string) { str = append(str, c) } } - iter.ReportError("readStringSlowPath", "unexpected end of input") + iter.ReportError("readStringSlowPathAsSlice", "unexpected end of input") return } @@ -111,38 +117,6 @@ func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte { return str } -// ReadStringAsSlice read string from iterator without copying into string form. -// The []byte can not be kept, as it will change after next iterator call. -func (iter *Iterator) ReadStringAsSlice() (ret []byte) { - c := iter.nextToken() - if c == '"' { - for i := iter.head; i < iter.tail; i++ { - // require ascii string and no escape - // for: field name, base64, number - if iter.buf[i] == '"' { - // fast path: reuse the underlying buffer - ret = iter.buf[iter.head:i] - iter.head = i + 1 - return ret - } - } - readLen := iter.tail - iter.head - copied := make([]byte, readLen, readLen*2) - copy(copied, iter.buf[iter.head:iter.tail]) - iter.head = iter.tail - for iter.Error == nil { - c := iter.readByte() - if c == '"' { - return copied - } - copied = append(copied, c) - } - return copied - } - iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c})) - return -} - func (iter *Iterator) readU4() (ret rune) { for i := 0; i < 4; i++ { c := iter.readByte() diff --git a/misc_tests/jsoniter_object_test.go b/misc_tests/jsoniter_object_test.go index 00807bae..9c81d410 100644 --- a/misc_tests/jsoniter_object_test.go +++ b/misc_tests/jsoniter_object_test.go @@ -3,12 +3,13 @@ package misc_tests import ( "bytes" "reflect" + "strings" "testing" + "time" - "github.com/json-iterator/go" "github.com/stretchr/testify/require" - "strings" - "time" + + "github.com/json-iterator/go" ) func Test_empty_object(t *testing.T) { diff --git a/reflect_json_number.go b/reflect_json_number.go index 98d45c1e..64a3dc3c 100644 --- a/reflect_json_number.go +++ b/reflect_json_number.go @@ -2,9 +2,10 @@ package jsoniter import ( "encoding/json" - "github.com/modern-go/reflect2" "strconv" "unsafe" + + "github.com/modern-go/reflect2" ) type Number string @@ -61,7 +62,7 @@ type jsonNumberCodec struct { func (codec *jsonNumberCodec) Decode(ptr unsafe.Pointer, iter *Iterator) { switch iter.WhatIsNext() { case StringValue: - *((*json.Number)(ptr)) = json.Number(iter.ReadString()) + *((*json.Number)(ptr)) = json.Number(iter.ReadStringAsSlice()) case NilValue: iter.skipFourBytes('n', 'u', 'l', 'l') *((*json.Number)(ptr)) = "" @@ -89,12 +90,14 @@ type jsoniterNumberCodec struct { func (codec *jsoniterNumberCodec) Decode(ptr unsafe.Pointer, iter *Iterator) { switch iter.WhatIsNext() { case StringValue: - *((*Number)(ptr)) = Number(iter.ReadString()) + num := iter.ReadStringAsSlice() + *((*Number)(ptr)) = Number(string(num)) case NilValue: iter.skipFourBytes('n', 'u', 'l', 'l') *((*Number)(ptr)) = "" default: - *((*Number)(ptr)) = Number([]byte(iter.readNumberAsString())) + num := iter.ReadStringAsSlice() + *((*Number)(ptr)) = Number(string(num)) } } diff --git a/reflect_map.go b/reflect_map.go index 4e479c8a..dba88931 100644 --- a/reflect_map.go +++ b/reflect_map.go @@ -305,7 +305,7 @@ func (encoder *sortKeysMapEncoder) Encode(ptr unsafe.Pointer, stream *Stream) { } encodedKey := subStream.Buffer()[subStreamIndex:] subIter.ResetBytes(encodedKey) - decodedKey := subIter.ReadString() + decodedKey := string(subIter.ReadStringAsSlice()) if stream.indention > 0 { subStream.writeTwoBytes(byte(':'), byte(' ')) } else { diff --git a/reflect_marshaler.go b/reflect_marshaler.go index 3e21f375..2aea694f 100644 --- a/reflect_marshaler.go +++ b/reflect_marshaler.go @@ -217,8 +217,8 @@ func (decoder *textUnmarshalerDecoder) Decode(ptr unsafe.Pointer, iter *Iterator obj = valType.UnsafeIndirect(ptr) } unmarshaler := (obj).(encoding.TextUnmarshaler) - str := iter.ReadString() - err := unmarshaler.UnmarshalText([]byte(str)) + str := iter.ReadStringAsSlice() + err := unmarshaler.UnmarshalText(str) if err != nil { iter.ReportError("textUnmarshalerDecoder", err.Error()) } diff --git a/reflect_native.go b/reflect_native.go index f88722d1..68a0a7f8 100644 --- a/reflect_native.go +++ b/reflect_native.go @@ -206,7 +206,8 @@ type stringCodec struct { } func (codec *stringCodec) Decode(ptr unsafe.Pointer, iter *Iterator) { - *((*string)(ptr)) = iter.ReadString() + data := iter.ReadStringAsSlice() + *((*string)(ptr)) = string(data) } func (codec *stringCodec) Encode(ptr unsafe.Pointer, stream *Stream) { @@ -417,8 +418,8 @@ func (codec *base64Codec) Decode(ptr unsafe.Pointer, iter *Iterator) { } switch iter.WhatIsNext() { case StringValue: - src := iter.ReadString() - dst, err := base64.StdEncoding.DecodeString(src) + src := iter.ReadStringAsSlice() + dst, err := base64.StdEncoding.DecodeString(string(src)) if err != nil { iter.ReportError("decode base64", err.Error()) } else { diff --git a/reflect_struct_decoder.go b/reflect_struct_decoder.go index 92ae912d..b8e72809 100644 --- a/reflect_struct_decoder.go +++ b/reflect_struct_decoder.go @@ -522,16 +522,12 @@ func (decoder *generalStructDecoder) decodeOneField(ptr unsafe.Pointer, iter *It if iter.cfg.objectFieldMustBeSimpleString { fieldBytes := iter.ReadStringAsSlice() field = *(*string)(unsafe.Pointer(&fieldBytes)) - fieldDecoder = decoder.fields[field] - if fieldDecoder == nil && !iter.cfg.caseSensitive { - fieldDecoder = decoder.fields[strings.ToLower(field)] - } } else { - field = iter.ReadString() - fieldDecoder = decoder.fields[field] - if fieldDecoder == nil && !iter.cfg.caseSensitive { - fieldDecoder = decoder.fields[strings.ToLower(field)] - } + field = string(iter.ReadStringAsSlice()) + } + fieldDecoder = decoder.fields[field] + if fieldDecoder == nil && !iter.cfg.caseSensitive { + fieldDecoder = decoder.fields[strings.ToLower(field)] } if fieldDecoder == nil { if decoder.disallowUnknownFields { @@ -1067,7 +1063,8 @@ func (decoder *stringModeStringDecoder) Decode(ptr unsafe.Pointer, iter *Iterato str := *((*string)(ptr)) tempIter := decoder.cfg.BorrowIterator([]byte(str)) defer decoder.cfg.ReturnIterator(tempIter) - *((*string)(ptr)) = tempIter.ReadString() + data := tempIter.ReadStringAsSlice() + *((*string)(ptr)) = string(data) } type stringModeNumberDecoder struct { From c067c83954a8f9a6a437fddae05493d3b4f2e87b Mon Sep 17 00:00:00 2001 From: Mikhail Deshevoy Date: Mon, 1 Apr 2024 05:59:20 +0300 Subject: [PATCH 2/2] safe field name --- reflect_struct_decoder.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/reflect_struct_decoder.go b/reflect_struct_decoder.go index b8e72809..16b067cc 100644 --- a/reflect_struct_decoder.go +++ b/reflect_struct_decoder.go @@ -517,15 +517,8 @@ func (decoder *generalStructDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) } func (decoder *generalStructDecoder) decodeOneField(ptr unsafe.Pointer, iter *Iterator) { - var field string - var fieldDecoder *structFieldDecoder - if iter.cfg.objectFieldMustBeSimpleString { - fieldBytes := iter.ReadStringAsSlice() - field = *(*string)(unsafe.Pointer(&fieldBytes)) - } else { - field = string(iter.ReadStringAsSlice()) - } - fieldDecoder = decoder.fields[field] + field := string(iter.ReadStringAsSlice()) + fieldDecoder := decoder.fields[field] if fieldDecoder == nil && !iter.cfg.caseSensitive { fieldDecoder = decoder.fields[strings.ToLower(field)] }