From 699550ab4a68a9d525a8d0b09d77125692a4c1d9 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 6 Sep 2023 14:56:33 -0700 Subject: [PATCH] Remove jsontext.WithEscapeFunc (#313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARNING: This commit includes breaking changes. This option original existed as a way for users to implement EscapeForHTML and EscapeForJS on their own. Since we added first-class support for those two escaping, drop the general purpose escape logic. It carries a unjustified amount of complexity. Generally speaking, users can trivially do this escaping themselves for any character that is not ASCII. Performance: name old time/op new time/op delta Testdata/CanadaGeometry/Marshal/Concrete 1.39ms ± 2% 1.39ms ± 2% ~ (p=0.745 n=55+49) Testdata/CitmCatalog/Marshal/Concrete 1.41ms ± 2% 1.42ms ± 2% +0.95% (p=0.000 n=57+59) Testdata/GolangSource/Marshal/Concrete 5.27ms ± 2% 5.16ms ± 1% -2.18% (p=0.000 n=58+56) Testdata/StringEscaped/Marshal/Concrete 26.9µs ± 2% 25.7µs ± 2% -4.60% (p=0.000 n=60+56) Testdata/StringUnicode/Marshal/Concrete 27.0µs ± 2% 25.6µs ± 2% -5.01% (p=0.000 n=56+60) Testdata/SyntheaFhir/Marshal/Concrete 8.35ms ± 2% 8.08ms ± 2% -3.25% (p=0.000 n=60+50) Testdata/TwitterStatus/Marshal/Concrete 925µs ± 2% 916µs ± 2% -1.00% (p=0.000 n=58+40) --- arshal_default.go | 6 +- arshal_inlined.go | 2 +- arshal_test.go | 67 +++------------ diff_test.go | 3 +- fields.go | 3 +- internal/jsonflags/flags.go | 34 +++----- internal/jsonflags/flags_test.go | 28 ++++--- internal/jsonopts/options.go | 37 +++------ internal/jsonwire/decode.go | 2 +- internal/jsonwire/encode.go | 138 ++++++++++++++----------------- internal/jsonwire/encode_test.go | 86 +++++++++---------- internal/jsonwire/escape.go | 125 ---------------------------- internal/jsonwire/escape_test.go | 27 ------ jsontext/encode.go | 22 ++--- jsontext/encode_test.go | 9 +- jsontext/options.go | 16 ---- jsontext/quote.go | 7 +- jsontext/token.go | 9 +- jsontext/value.go | 1 - 19 files changed, 177 insertions(+), 445 deletions(-) delete mode 100644 internal/jsonwire/escape.go delete mode 100644 internal/jsonwire/escape_test.go diff --git a/arshal_default.go b/arshal_default.go index 502e073..9e5f8c7 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -170,7 +170,7 @@ func makeStringArshaler(t reflect.Type) *arshaler { // Optimize for marshaling without preceding whitespace or string escaping. s := va.String() - if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() && !jsonwire.NeedEscape(s, xe.EscapeRunes) { + if optimizeCommon && !xe.Flags.Get(jsonflags.Expand) && !xe.Tokens.Last.NeedObjectName() && !jsonwire.NeedEscape(s) { b := xe.Buf b = xe.Tokens.MayAppendDelim(b, '"') b = append(b, '"') @@ -984,10 +984,10 @@ func makeStructArshaler(t reflect.Type) *arshaler { // Append the token to the output and to the state machine. n0 := len(b) // offset before calling AppendQuote - if xe.EscapeRunes.IsCanonical() { + if !xe.Flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS) { b = append(b, f.quotedName...) } else { - b, _ = jsonwire.AppendQuote(b, f.name, false, xe.EscapeRunes) + b, _ = jsonwire.AppendQuote(b, f.name, &xe.Flags) } xe.Buf = b if !xe.Flags.Get(jsonflags.AllowDuplicateNames) { diff --git a/arshal_inlined.go b/arshal_inlined.go index a455261..51e05f9 100644 --- a/arshal_inlined.go +++ b/arshal_inlined.go @@ -111,7 +111,7 @@ func marshalInlinedFallbackAll(enc *jsontext.Encoder, va addressableValue, mo *j mv := newAddressableValue(m.Type().Elem()) marshalKey := func(mk addressableValue) error { xe := export.Encoder(enc) - b, err := jsonwire.AppendQuote(enc.UnusedBuffer(), mk.String(), !xe.Flags.Get(jsonflags.AllowInvalidUTF8), nil) + b, err := jsonwire.AppendQuote(enc.UnusedBuffer(), mk.String(), &xe.Flags) if err != nil { return err } diff --git a/arshal_test.go b/arshal_test.go index cd209cb..85396cf 100644 --- a/arshal_test.go +++ b/arshal_test.go @@ -1056,9 +1056,17 @@ func TestMarshal(t *testing.T) { want: `{"":"empty",",":"comma","\"":"quote"}`, }, { name: jsontest.Name("Structs/EscapedNames"), - opts: []Options{jsontext.WithEscapeFunc(func(rune) bool { return true })}, - in: structWeirdNames{Empty: "empty", Comma: "comma", Quote: "quote"}, - want: `{"":"\u0065\u006d\u0070\u0074\u0079","\u002c":"\u0063\u006f\u006d\u006d\u0061","\u0022":"\u0071\u0075\u006f\u0074\u0065"}`, + opts: []Options{jsontext.EscapeForHTML(true), jsontext.EscapeForJS(true)}, + in: struct { + S string "json:\"'abc<>&\u2028\u2029xyz'\"" + M any + I structInlineTextValue + }{ + S: "abc<>&\u2028\u2029xyz", + M: map[string]string{"abc<>&\u2028\u2029xyz": "abc<>&\u2028\u2029xyz"}, + I: structInlineTextValue{X: jsontext.Value(`{"abc<>&` + "\u2028\u2029" + `xyz":"abc<>&` + "\u2028\u2029" + `xyz"}`)}, + }, + want: `{"abc\u003c\u003e\u0026\u2028\u2029xyz":"abc\u003c\u003e\u0026\u2028\u2029xyz","M":{"abc\u003c\u003e\u0026\u2028\u2029xyz":"abc\u003c\u003e\u0026\u2028\u2029xyz"},"I":{"abc\u003c\u003e\u0026\u2028\u2029xyz":"abc\u003c\u003e\u0026\u2028\u2029xyz"}}`, }, { name: jsontest.Name("Structs/NoCase"), in: structNoCase{AaA: "AaA", AAa: "AAa", AAA: "AAA"}, @@ -1354,59 +1362,6 @@ func TestMarshal(t *testing.T) { "Interface": null }, "Interface": null -}`, - }, { - name: jsontest.Name("Structs/Stringified/Escaped"), - opts: []Options{ - jsontext.Expand(true), - jsontext.WithEscapeFunc(func(rune) bool { return true }), - }, - in: structStringifiedAll{ - Bool: true, - String: "hello", - Bytes: []byte{1, 2, 3}, - Int: -64, // should be stringified and escaped - Uint: +64, // should be stringified and escaped - Float: 3.14159, // should be stringified and escaped - }, - want: `{ - "\u0042\u006f\u006f\u006c": true, - "\u0053\u0074\u0072\u0069\u006e\u0067": "\u0068\u0065\u006c\u006c\u006f", - "\u0042\u0079\u0074\u0065\u0073": "\u0041\u0051\u0049\u0044", - "\u0049\u006e\u0074": "\u002d\u0036\u0034", - "\u0055\u0069\u006e\u0074": "\u0036\u0034", - "\u0046\u006c\u006f\u0061\u0074": "\u0033\u002e\u0031\u0034\u0031\u0035\u0039", - "\u004d\u0061\u0070": {}, - "\u0053\u0074\u0072\u0075\u0063\u0074\u0053\u0063\u0061\u006c\u0061\u0072\u0073": { - "\u0042\u006f\u006f\u006c": false, - "\u0053\u0074\u0072\u0069\u006e\u0067": "", - "\u0042\u0079\u0074\u0065\u0073": "", - "\u0049\u006e\u0074": "\u0030", - "\u0055\u0069\u006e\u0074": "\u0030", - "\u0046\u006c\u006f\u0061\u0074": "\u0030" - }, - "\u0053\u0074\u0072\u0075\u0063\u0074\u004d\u0061\u0070\u0073": { - "\u004d\u0061\u0070\u0042\u006f\u006f\u006c": {}, - "\u004d\u0061\u0070\u0053\u0074\u0072\u0069\u006e\u0067": {}, - "\u004d\u0061\u0070\u0042\u0079\u0074\u0065\u0073": {}, - "\u004d\u0061\u0070\u0049\u006e\u0074": {}, - "\u004d\u0061\u0070\u0055\u0069\u006e\u0074": {}, - "\u004d\u0061\u0070\u0046\u006c\u006f\u0061\u0074": {} - }, - "\u0053\u0074\u0072\u0075\u0063\u0074\u0053\u006c\u0069\u0063\u0065\u0073": { - "\u0053\u006c\u0069\u0063\u0065\u0042\u006f\u006f\u006c": [], - "\u0053\u006c\u0069\u0063\u0065\u0053\u0074\u0072\u0069\u006e\u0067": [], - "\u0053\u006c\u0069\u0063\u0065\u0042\u0079\u0074\u0065\u0073": [], - "\u0053\u006c\u0069\u0063\u0065\u0049\u006e\u0074": [], - "\u0053\u006c\u0069\u0063\u0065\u0055\u0069\u006e\u0074": [], - "\u0053\u006c\u0069\u0063\u0065\u0046\u006c\u006f\u0061\u0074": [] - }, - "\u0053\u006c\u0069\u0063\u0065": [], - "\u0041\u0072\u0072\u0061\u0079": [ - "" - ], - "\u0050\u006f\u0069\u006e\u0074\u0065\u0072": null, - "\u0049\u006e\u0074\u0065\u0072\u0066\u0061\u0063\u0065": null }`, }, { name: jsontest.Name("Structs/OmitZero/Zero"), diff --git a/diff_test.go b/diff_test.go index 90bf6fb..db52f84 100644 --- a/diff_test.go +++ b/diff_test.go @@ -731,8 +731,7 @@ func TestMapDeterminism(t *testing.T) { // In v1, JSON string encoding escapes special characters related to HTML. // In v2, JSON string encoding uses a normalized representation (per RFC 8785). // -// Users of v2 can opt into the v1 behavior by setting WithEscapeFunc. -// See the EscapeHTML example. +// Users of v2 can opt into the v1 behavior by setting EscapeForHTML and EscapeForJS. // // Escaping HTML-specific characters in a JSON library is a layering violation. // It presumes that JSON is always used with HTML and ignores other diff --git a/fields.go b/fields.go index f985cbc..776dd7e 100644 --- a/fields.go +++ b/fields.go @@ -16,6 +16,7 @@ import ( "unicode" "unicode/utf8" + "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonwire" ) @@ -363,7 +364,7 @@ func parseFieldOptions(sf reflect.StructField) (out fieldOptions, ignored bool, out.name = opt tag = tag[n:] } - b, _ := jsonwire.AppendQuote(nil, out.name, false, nil) + b, _ := jsonwire.AppendQuote(nil, out.name, &jsonflags.Flags{}) out.quotedName = string(b) // Handle any additional tag options (if any). diff --git a/internal/jsonflags/flags.go b/internal/jsonflags/flags.go index dba1f53..c2473cd 100644 --- a/internal/jsonflags/flags.go +++ b/internal/jsonflags/flags.go @@ -8,12 +8,11 @@ package jsonflags import "github.com/go-json-experiment/json/internal" -// Bools represents zero or more boolean flag, all set to true or false. +// Bools represents zero or more boolean flags, all set to true or false. // The least-significant bit is the boolean value of all flags in the set. -// The remaining bits identify a particular flag. +// The remaining bits identify which particular flags. // // In common usage, this is OR'd with 0 or 1. For example: -// // - (AllowInvalidUTF8 | 0) means "AllowInvalidUTF8 is false" // - (Expand | Indent | 1) means "Expand and Indent are true" type Bools uint64 @@ -37,7 +36,6 @@ const ( // where the value is some other concrete Go type. // The value of the flag is stored within jsonopts.Struct. NonBooleanFlags = 0 | - EscapeFunc | Indent | IndentPrefix | ByteLimit | @@ -81,7 +79,6 @@ const ( CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize EscapeForHTML // encode only EscapeForJS // encode only - EscapeFunc // encode only; non-boolean flag Expand // encode only Indent // encode only; non-boolean flag IndentPrefix // encode only; non-boolean flag @@ -128,13 +125,13 @@ const ( maxArshalV1Flag ) -// Flags is a set boolean flags. +// Flags is a set of boolean flags. // If the presence bit is zero, then the value bit must also be zero. // The least-significant bit of both fields is always zero. // // Unlike Bools, which can represent a set of bools that are all true or false, // Flags represents a set of bools, each individually may be true or false. -type Flags struct{ Presence, Value uint64 } +type Flags struct{ Presence, Values uint64 } // Join joins two sets of flags such that the latter takes precedence. func (dst *Flags) Join(src Flags) { @@ -144,8 +141,8 @@ func (dst *Flags) Join(src Flags) { // e.g., dst := Flags{Presence: 0b_1100_0011, Value: 0b_1000_0011} // e.g., src := Flags{Presence: 0b_0101_1010, Value: 0b_1001_0010} dst.Presence |= src.Presence // e.g., 0b_1100_0011 | 0b_0101_1010 -> 0b_110_11011 - dst.Value &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001 - dst.Value |= src.Value // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011 + dst.Values &= ^src.Presence // e.g., 0b_1000_0011 & 0b_1010_0101 -> 0b_100_00001 + dst.Values |= src.Values // e.g., 0b_1000_0001 | 0b_1001_0010 -> 0b_100_10011 } // Set sets both the presence and value for the provided bool (or set of bools). @@ -156,24 +153,17 @@ func (fs *Flags) Set(f Bools) { // then copy over all the identifier bits to the value if LSB is 1. // e.g., fs := Flags{Presence: 0b_0101_0010, Value: 0b_0001_0010} // e.g., f := 0b_1001_0001 - id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000 - fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011 - fs.Value &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010 - fs.Value |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010 + id := uint64(f) &^ uint64(1) // e.g., 0b_1001_0001 & 0b_1111_1110 -> 0b_1001_0000 + fs.Presence |= id // e.g., 0b_0101_0010 | 0b_1001_0000 -> 0b_1101_0011 + fs.Values &= ^id // e.g., 0b_0001_0010 & 0b_0110_1111 -> 0b_0000_0010 + fs.Values |= uint64(f&1) * id // e.g., 0b_0000_0010 | 0b_1001_0000 -> 0b_1001_0010 } // Get reports whether the bool (or any of the bools) is true. // This is generally only used with a singular bool. // The value bit of f (i.e., the LSB) is ignored. func (fs Flags) Get(f Bools) bool { - return fs.Value&uint64(f) > 0 -} - -// GetOk reports the value of the bool and whether it was set. -// This is generally only used with a singular bool. -// The value bit of f (i.e., the LSB) is ignored. -func (fs Flags) GetOk(f Bools) (v, ok bool) { - return fs.Get(f), fs.Has(f) + return fs.Values&uint64(f) > 0 } // Has reports whether the bool (or any of the bools) is set. @@ -190,5 +180,5 @@ func (fs *Flags) Clear(f Bools) { // e.g., f := 0b_0001_1000 mask := uint64(^f) // e.g., 0b_0001_1000 -> 0b_1110_0111 fs.Presence &= mask // e.g., 0b_0101_0010 & 0b_1110_0111 -> 0b_0100_0010 - fs.Value &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010 + fs.Values &= mask // e.g., 0b_0001_0010 & 0b_1110_0111 -> 0b_0000_0010 } diff --git a/internal/jsonflags/flags_test.go b/internal/jsonflags/flags_test.go index 00e4c5c..a916bfd 100644 --- a/internal/jsonflags/flags_test.go +++ b/internal/jsonflags/flags_test.go @@ -23,31 +23,31 @@ func TestFlags(t *testing.T) { Get{in: AllowDuplicateNames, want: false, wantOk: true}, Set{in: AllowDuplicateNames | 1}, Get{in: AllowDuplicateNames, want: true, wantOk: true}, - Check{want: Flags{Presence: uint64(AllowDuplicateNames), Value: uint64(AllowDuplicateNames)}}, + Check{want: Flags{Presence: uint64(AllowDuplicateNames), Values: uint64(AllowDuplicateNames)}}, Get{in: AllowInvalidUTF8, want: false, wantOk: false}, Set{in: AllowInvalidUTF8 | 1}, Get{in: AllowInvalidUTF8, want: true, wantOk: true}, Set{in: AllowInvalidUTF8 | 0}, Get{in: AllowInvalidUTF8, want: false, wantOk: true}, Get{in: AllowDuplicateNames, want: true, wantOk: true}, - Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Value: uint64(AllowDuplicateNames)}}, + Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}}, Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 0}, - Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Value: uint64(0)}}, + Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(0)}}, Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 0}, - Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Value: uint64(0)}}, + Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(0)}}, Set{in: AllowDuplicateNames | AllowInvalidUTF8 | 1}, - Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Value: uint64(AllowDuplicateNames | AllowInvalidUTF8)}}, - Join{in: Flags{Presence: 0, Value: 0}}, - Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Value: uint64(AllowDuplicateNames | AllowInvalidUTF8)}}, - Join{in: Flags{Presence: uint64(Expand | AllowInvalidUTF8), Value: uint64(AllowDuplicateNames)}}, - Check{want: Flags{Presence: uint64(Expand | AllowDuplicateNames | AllowInvalidUTF8), Value: uint64(AllowDuplicateNames)}}, + Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames | AllowInvalidUTF8)}}, + Join{in: Flags{Presence: 0, Values: 0}}, + Check{want: Flags{Presence: uint64(AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames | AllowInvalidUTF8)}}, + Join{in: Flags{Presence: uint64(Expand | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}}, + Check{want: Flags{Presence: uint64(Expand | AllowDuplicateNames | AllowInvalidUTF8), Values: uint64(AllowDuplicateNames)}}, Clear{in: AllowDuplicateNames | AllowInvalidUTF8}, - Check{want: Flags{Presence: uint64(Expand), Value: uint64(0)}}, + Check{want: Flags{Presence: uint64(Expand), Values: uint64(0)}}, Set{in: AllowInvalidUTF8 | Deterministic | IgnoreStructErrors | 1}, Set{in: Expand | StringifyNumbers | RejectFloatOverflow | 0}, - Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Deterministic | IgnoreStructErrors | Expand | StringifyNumbers | RejectFloatOverflow), Value: uint64(AllowInvalidUTF8 | Deterministic | IgnoreStructErrors)}}, + Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Deterministic | IgnoreStructErrors | Expand | StringifyNumbers | RejectFloatOverflow), Values: uint64(AllowInvalidUTF8 | Deterministic | IgnoreStructErrors)}}, Clear{in: ^AllCoderFlags}, - Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Expand), Value: uint64(AllowInvalidUTF8)}}, + Check{want: Flags{Presence: uint64(AllowInvalidUTF8 | Expand), Values: uint64(AllowInvalidUTF8)}}, } var fs Flags for i, call := range calls { @@ -59,7 +59,9 @@ func TestFlags(t *testing.T) { case Clear: fs.Clear(call.in) case Get: - if got, gotOk := fs.GetOk(call.in); got != call.want || gotOk != call.wantOk { + got := fs.Get(call.in) + gotOk := fs.Has(call.in) + if got != call.want || gotOk != call.wantOk { t.Fatalf("%d: GetOk = (%v, %v), want (%v, %v)", i, got, gotOk, call.want, call.wantOk) } case Check: diff --git a/internal/jsonopts/options.go b/internal/jsonopts/options.go index b9901a6..6b63d5c 100644 --- a/internal/jsonopts/options.go +++ b/internal/jsonopts/options.go @@ -25,11 +25,10 @@ type Struct struct { } type CoderValues struct { - EscapeFunc func(rune) bool // jsonflags.EscapeFunc - Indent string // jsonflags.Indent - IndentPrefix string // jsonflags.IndentPrefix - ByteLimit int64 // jsonflags.ByteLimit - DepthLimit int // jsonflags.DepthLimit + Indent string // jsonflags.Indent + IndentPrefix string // jsonflags.IndentPrefix + ByteLimit int64 // jsonflags.ByteLimit + DepthLimit int // jsonflags.DepthLimit } type ArshalValues struct { @@ -48,7 +47,7 @@ type ArshalValues struct { var DefaultOptionsV2 = Struct{ Flags: jsonflags.Flags{ Presence: uint64(jsonflags.AllFlags), - Value: uint64(0), + Values: uint64(0), }, CoderValues: CoderValues{Indent: "\t"}, // Indent is set, but Expand is set to false } @@ -57,7 +56,7 @@ var DefaultOptionsV2 = Struct{ var DefaultOptionsV1 = Struct{ Flags: jsonflags.Flags{ Presence: uint64(jsonflags.AllFlags), - Value: uint64(jsonflags.DefaultV1Flags), + Values: uint64(jsonflags.DefaultV1Flags), }, CoderValues: CoderValues{Indent: "\t"}, // Indent is set, but Expand is set to false } @@ -92,13 +91,9 @@ func GetOption[T any](opts Options, setter func(T) Options) (T, bool) { var zero T switch opt := setter(zero).(type) { case jsonflags.Bools: - v, ok := structOpts.Flags.GetOk(opt) + v := structOpts.Flags.Get(opt) + ok := structOpts.Flags.Has(opt) return any(v).(T), ok - case EscapeFunc: - if !structOpts.Flags.Has(jsonflags.EscapeFunc) { - return zero, false - } - return any(structOpts.EscapeFunc).(T), true case Indent: if !structOpts.Flags.Has(jsonflags.Indent) { return zero, false @@ -136,9 +131,6 @@ func (dst *Struct) Join(srcs ...Options) { continue case jsonflags.Bools: dst.Flags.Set(src) - case EscapeFunc: - dst.Flags.Set(jsonflags.EscapeFunc | 1) - dst.EscapeFunc = src case Indent: dst.Flags.Set(jsonflags.Expand | jsonflags.Indent | 1) dst.Indent = string(src) @@ -154,9 +146,6 @@ func (dst *Struct) Join(srcs ...Options) { case *Struct: dst.Flags.Join(src.Flags) if src.Flags.Has(jsonflags.NonBooleanFlags) { - if src.Flags.Has(jsonflags.EscapeFunc) { - dst.EscapeFunc = src.EscapeFunc - } if src.Flags.Has(jsonflags.Indent) { dst.Indent = src.Indent } @@ -187,16 +176,14 @@ func (dst *Struct) Join(srcs ...Options) { } type ( - EscapeFunc func(rune) bool // jsontext.WithEscapeFunc - Indent string // jsontext.WithIndent - IndentPrefix string // jsontext.WithIndentPrefix - ByteLimit int64 // jsontext.WithByteLimit - DepthLimit int // jsontext.WithDepthLimit + Indent string // jsontext.WithIndent + IndentPrefix string // jsontext.WithIndentPrefix + ByteLimit int64 // jsontext.WithByteLimit + DepthLimit int // jsontext.WithDepthLimit // type for jsonflags.Marshalers declared in "json" package // type for jsonflags.Unmarshalers declared in "json" package ) -func (EscapeFunc) JSONOptions(internal.NotForPublicUse) {} func (Indent) JSONOptions(internal.NotForPublicUse) {} func (IndentPrefix) JSONOptions(internal.NotForPublicUse) {} func (ByteLimit) JSONOptions(internal.NotForPublicUse) {} diff --git a/internal/jsonwire/decode.go b/internal/jsonwire/decode.go index cb18a83..e440af6 100644 --- a/internal/jsonwire/decode.go +++ b/internal/jsonwire/decode.go @@ -96,7 +96,7 @@ func ConsumeSimpleString(b []byte) (n int) { // NOTE: The arguments and logic are kept simple to keep this inlinable. if len(b) > 0 && b[0] == '"' { n++ - for len(b) > n && b[n] < utf8.RuneSelf && !escapeHTML.needEscapeASCII(b[n]) { + for len(b) > n && b[n] < utf8.RuneSelf && escapeASCII[b[n]] == 0 { n++ } if uint(len(b)) > uint(n) && b[n] == '"' { diff --git a/internal/jsonwire/encode.go b/internal/jsonwire/encode.go index 6c3e681..c9ea707 100644 --- a/internal/jsonwire/encode.go +++ b/internal/jsonwire/encode.go @@ -10,24 +10,37 @@ import ( "strconv" "unicode/utf16" "unicode/utf8" + + "github.com/go-json-experiment/json/internal/jsonflags" ) +// escapeASCII reports whether the ASCII character needs to be escaped. +// It conservatively assumes EscapeForHTML. +var escapeASCII = [...]uint8{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // escape control characters + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // escape '"' and '&' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, // escape '<' and '>' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // escape '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +} + // NeedEscape reports whether src needs escaping of any characters. +// It conservatively assumes EscapeForHTML and EscapeForJS. // It reports true for inputs with invalid UTF-8. -func NeedEscape[Bytes ~[]byte | ~string](src Bytes, escape *EscapeRunes) bool { - if escape == nil { - escape = &escapeCanonical - } +func NeedEscape[Bytes ~[]byte | ~string](src Bytes) bool { var i int for uint(len(src)) > uint(i) { if c := src[i]; c < utf8.RuneSelf { - if escape.needEscapeASCII(c) { + if escapeASCII[c] > 0 { return true } i++ } else { r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:]))) - if r == utf8.RuneError || escape.needEscapeRune(r) { + if r == utf8.RuneError || r == '\u2028' || r == '\u2029' { return true } i += rn @@ -38,87 +51,55 @@ func NeedEscape[Bytes ~[]byte | ~string](src Bytes, escape *EscapeRunes) bool { // AppendQuote appends src to dst as a JSON string per RFC 7159, section 7. // -// If validateUTF8 is specified, this rejects input that contains invalid UTF-8 -// otherwise invalid bytes are replaced with the Unicode replacement character. -// If escapeRune is provided, it specifies which runes to escape using -// hexadecimal sequences. If nil, the shortest representable form is used, -// which is also the canonical form for strings (RFC 8785, section 3.2.2.2). +// It takes in flags and respects the following: +// - EscapeForHTML escapes '<', '>', and '&'. +// - EscapeForJS escapes '\u2028' and '\u2029'. +// - AllowInvalidUTF8 avoids reporting an error for invalid UTF-8. // -// Note that this API allows full control over the formatting of strings -// except for whether a forward solidus '/' may be formatted as '\/' and -// the casing of hexadecimal Unicode escape sequences. -func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, validateUTF8 bool, escape *EscapeRunes) ([]byte, error) { +// Regardless of whether AllowInvalidUTF8 is specified, +// invalid bytes are replaced with the Unicode replacement character ('\ufffd'). +// If no escape flags are set, then the shortest representable form is used, +// which is also the canonical form for strings (RFC 8785, section 3.2.2.2). +func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflags.Flags) ([]byte, error) { var i, n int var hasInvalidUTF8 bool dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`)) dst = append(dst, '"') - if escape == nil || escape.IsCanonical() { - // Optimize for canonical formatting. - for uint(len(src)) > uint(n) { - // Handle single-byte ASCII. - if c := src[n]; c < utf8.RuneSelf { - n++ - if escapeCanonical.needEscapeASCII(c) { - dst = append(dst, src[i:n-1]...) - dst = appendEscapedASCII(dst, c) - i = n + for uint(len(src)) > uint(n) { + // Handle single-byte ASCII. + if c := src[n]; c < utf8.RuneSelf { + n++ + if escapeASCII[c] > 0 { + if (c == '<' || c == '>' || c == '&') && !flags.Get(jsonflags.EscapeForHTML) { + continue } - continue - } - - // Handle multi-byte Unicode. - _, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))) - n += rn - if rn == 1 { // must be utf8.RuneError since we already checked for single-byte ASCII - hasInvalidUTF8 = true - dst = append(dst, src[i:n-rn]...) - dst = append(dst, "\ufffd"...) + dst = append(dst, src[i:n-1]...) + dst = appendEscapedASCII(dst, c) i = n } + continue } - } else { - // Handle arbitrary escaping. - for uint(len(src)) > uint(n) { - // Handle single-byte ASCII. - if c := src[n]; c < utf8.RuneSelf { - n++ - if escape.needEscapeASCII(c) { - dst = append(dst, src[i:n-1]...) - if escape.needEscapeASCIIAsUTF16(c) { - dst = appendEscapedUTF16(dst, uint16(c)) - } else { - dst = appendEscapedASCII(dst, c) - } - i = n - } - continue - } - // Handle multi-byte Unicode. - switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { - case r == utf8.RuneError && rn == 1: - hasInvalidUTF8 = true - dst = append(dst, src[i:n]...) - if escape.needEscapeRune(r) { - dst = append(dst, `\ufffd`...) - } else { - dst = append(dst, "\ufffd"...) - } - n += rn - i = n - case escape.needEscapeRune(r): - dst = append(dst, src[i:n]...) - dst = appendEscapedUnicode(dst, r) - n += rn - i = n - default: - n += rn - } + // Handle multi-byte Unicode. + switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); { + case r == utf8.RuneError && rn == 1: + hasInvalidUTF8 = true + dst = append(dst, src[i:n]...) + dst = append(dst, "\ufffd"...) + n += rn + i = n + case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS): + dst = append(dst, src[i:n]...) + dst = appendEscapedUnicode(dst, r) + n += rn + i = n + default: + n += rn } } dst = append(dst, src[i:n]...) dst = append(dst, '"') - if validateUTF8 && hasInvalidUTF8 { + if hasInvalidUTF8 && !flags.Get(jsonflags.AllowInvalidUTF8) { return dst, ErrInvalidUTF8 } return dst, nil @@ -162,14 +143,15 @@ func appendEscapedUTF16(dst []byte, x uint16) []byte { // ReformatString consumes a JSON string from src and appends it to dst, // reformatting it if necessary for the given escapeRune parameter. // It returns the appended output and the number of consumed input bytes. -func ReformatString(dst, src []byte, validateUTF8, preserveRaw bool, escape *EscapeRunes) ([]byte, int, error) { +func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) { // TODO: Should this update ValueFlags as input? - var flags ValueFlags - n, err := ConsumeString(&flags, src, validateUTF8) + var valFlags ValueFlags + n, err := ConsumeString(&valFlags, src, !flags.Get(jsonflags.AllowInvalidUTF8)) if err != nil { return dst, n, err } - if preserveRaw || (escape.IsCanonical() && flags.IsCanonical()) { + isCanonical := !flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS) + if flags.Get(jsonflags.PreserveRawStrings) || (isCanonical && valFlags.IsCanonical()) { dst = append(dst, src[:n]...) // copy the string verbatim return dst, n, nil } @@ -179,7 +161,7 @@ func ReformatString(dst, src []byte, validateUTF8, preserveRaw bool, escape *Esc // it would be faster to simply append src to dst without going through // an intermediary representation in a separate buffer. b, _ := AppendUnquote(nil, src[:n]) - dst, _ = AppendQuote(dst, string(b), validateUTF8, escape) + dst, _ = AppendQuote(dst, string(b), flags) return dst, n, nil } diff --git a/internal/jsonwire/encode_test.go b/internal/jsonwire/encode_test.go index 09d497b..9123770 100644 --- a/internal/jsonwire/encode_test.go +++ b/internal/jsonwire/encode_test.go @@ -19,69 +19,63 @@ import ( "strings" "testing" "time" - "unicode" + + "github.com/go-json-experiment/json/internal/jsonflags" ) func TestAppendQuote(t *testing.T) { - var ( - escapeNothing = MakeEscapeRunes(false, false, nil) - escapeHTML = MakeEscapeRunes(true, true, nil) - escapeNonASCII = MakeEscapeRunes(false, false, func(r rune) bool { return r > unicode.MaxASCII }) - escapeEverything = MakeEscapeRunes(false, false, func(r rune) bool { return true }) - ) - tests := []struct { in string - escapeRune *EscapeRunes + flags jsonflags.Bools want string wantErr error wantErrUTF8 error }{ - {"", nil, `""`, nil, nil}, - {"hello", nil, `"hello"`, nil, nil}, - {"\x00", nil, `"\u0000"`, nil, nil}, - {"\x1f", nil, `"\u001f"`, nil, nil}, - {"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", nil, `"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, nil, nil}, - {" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", nil, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil}, - {"x\x80\ufffd", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xff\ufffd", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\x80\ufffd", escapeNonASCII, "\"x\\ufffd\\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xff\ufffd", escapeNonASCII, "\"x\\ufffd\\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xc0", nil, "\"x\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xc0\x80", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xe0", nil, "\"x\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xe0\x80", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xe0\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xf0", nil, "\"x\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xf0\x80", nil, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xf0\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xf0\x80\x80\x80", nil, "\"x\ufffd\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"x\xed\xba\xad", nil, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, - {"\"\\/\b\f\n\r\t", nil, `"\"\\/\b\f\n\r\t"`, nil, nil}, - {"\"\\/\b\f\n\r\t", escapeEverything, `"\u0022\u005c\u002f\u0008\u000c\u000a\u000d\u0009"`, nil, nil}, - {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", nil, `"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃)."`, nil, nil}, - {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", escapeNonASCII, `"\u0669(-\u032e\u032e\u0303-\u0303)\u06f6 \u0669(\u25cf\u032e\u032e\u0303\u2022\u0303)\u06f6 \u0669(\u0361\u0e4f\u032f\u0361\u0e4f)\u06f6 \u0669(-\u032e\u032e\u0303\u2022\u0303)."`, nil, nil}, - {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", escapeEverything, `"\u0669\u0028\u002d\u032e\u032e\u0303\u002d\u0303\u0029\u06f6\u0020\u0669\u0028\u25cf\u032e\u032e\u0303\u2022\u0303\u0029\u06f6\u0020\u0669\u0028\u0361\u0e4f\u032f\u0361\u0e4f\u0029\u06f6\u0020\u0669\u0028\u002d\u032e\u032e\u0303\u2022\u0303\u0029\u002e"`, nil, nil}, - {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", nil, "\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", nil, nil}, - {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", escapeEverything, `"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\ud83d\ude02"`, nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", nil, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeNothing, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeHTML, "\"\\u0000\\u001f\u0020\\\"\\u0026\\u003c\\u003e\\\\\u007f\u0080\\u2028\\u2029\ufffd\U0001f602\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeNonASCII, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\\u0080\\u2028\\u2029\\ufffd\\ud83d\\ude02\"", nil, nil}, - {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", escapeEverything, "\"\\u0000\\u001f\\u0020\\u0022\\u0026\\u003c\\u003e\\u005c\\u007f\\u0080\\u2028\\u2029\\ufffd\\ud83d\\ude02\"", nil, nil}, + {"", 0, `""`, nil, nil}, + {"hello", 0, `"hello"`, nil, nil}, + {"\x00", 0, `"\u0000"`, nil, nil}, + {"\x1f", 0, `"\u001f"`, nil, nil}, + {"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 0, `"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"`, nil, nil}, + {" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", 0, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil}, + {" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", jsonflags.EscapeForHTML, "\" !#$%\\u0026'()*+,-./0123456789:;\\u003c=\\u003e?@[]^_`{|}~\x7f\"", nil, nil}, + {" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f", jsonflags.EscapeForJS, "\" !#$%&'()*+,-./0123456789:;<=>?@[]^_`{|}~\x7f\"", nil, nil}, + {"\u2027\u2028\u2029\u2030", 0, "\"\u2027\u2028\u2029\u2030\"", nil, nil}, + {"\u2027\u2028\u2029\u2030", jsonflags.EscapeForHTML, "\"\u2027\u2028\u2029\u2030\"", nil, nil}, + {"\u2027\u2028\u2029\u2030", jsonflags.EscapeForJS, "\"\u2027\\u2028\\u2029\u2030\"", nil, nil}, + {"x\x80\ufffd", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xff\ufffd", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xc0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xc0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xe0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xe0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xe0\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0", 0, "\"x\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0\x80", 0, "\"x\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xf0\x80\x80\x80", 0, "\"x\ufffd\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"x\xed\xba\xad", 0, "\"x\ufffd\ufffd\ufffd\"", nil, ErrInvalidUTF8}, + {"\"\\/\b\f\n\r\t", 0, `"\"\\/\b\f\n\r\t"`, nil, nil}, + {"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃).", 0, `"٩(-̮̮̃-̃)۶ ٩(●̮̮̃•̃)۶ ٩(͡๏̯͡๏)۶ ٩(-̮̮̃•̃)."`, nil, nil}, + {"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602", 0, "\"\u0080\u00f6\u20ac\ud799\ue000\ufb33\ufffd\U0001f602\"", nil, nil}, + {"\u0000\u001f\u0020\u0022\u0026\u003c\u003e\u005c\u007f\u0080\u2028\u2029\ufffd\U0001f602", 0, "\"\\u0000\\u001f\u0020\\\"\u0026\u003c\u003e\\\\\u007f\u0080\u2028\u2029\ufffd\U0001f602\"", nil, nil}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - got, gotErr := AppendQuote(nil, tt.in, false, tt.escapeRune) + var flags jsonflags.Flags + flags.Set(tt.flags | 1) + + flags.Set(jsonflags.AllowInvalidUTF8 | 1) + got, gotErr := AppendQuote(nil, tt.in, &flags) if string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr) { - t.Errorf("AppendQuote(nil, %q, false, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) + t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) } - switch got, gotErr := AppendQuote(nil, tt.in, true, tt.escapeRune); { + flags.Set(jsonflags.AllowInvalidUTF8 | 0) + switch got, gotErr := AppendQuote(nil, tt.in, &flags); { case tt.wantErrUTF8 == nil && (string(got) != tt.want || !reflect.DeepEqual(gotErr, tt.wantErr)): - t.Errorf("AppendQuote(nil, %q, true, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) + t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErr) case tt.wantErrUTF8 != nil && (!strings.HasPrefix(tt.want, string(got)) || !reflect.DeepEqual(gotErr, tt.wantErrUTF8)): - t.Errorf("AppendQuote(nil, %q, true, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErrUTF8) + t.Errorf("AppendQuote(nil, %q, ...) = (%s, %v), want (%s, %v)", tt.in, got, gotErr, tt.want, tt.wantErrUTF8) } }) } diff --git a/internal/jsonwire/escape.go b/internal/jsonwire/escape.go deleted file mode 100644 index cc024d7..0000000 --- a/internal/jsonwire/escape.go +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package jsonwire - -import "unicode/utf8" - -// Validity of these checked in TestEscapeRunesTables. -var ( - escapeCanonical = EscapeRunes{ - asciiCache: [...]int8{ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 00, 00, -1, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, -1, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - }, - canonical: true, - } - escapeHTMLJS = EscapeRunes{ - asciiCache: [...]int8{ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 00, 00, -1, 00, 00, 00, +1, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, +1, 00, +1, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, -1, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, - }, - escapeHTML: true, - escapeJS: true, - } - escapeHTML = EscapeRunes{asciiCache: escapeHTMLJS.asciiCache, escapeHTML: true} - escapeJS = EscapeRunes{asciiCache: escapeCanonical.asciiCache, escapeJS: true} -) - -// EscapeRunes reports whether a rune must be escaped. -type EscapeRunes struct { - // asciiCache is a cache of whether an ASCII character must be escaped, - // where 0 means not escaped, -1 escapes with the short sequence (e.g., \n), - // and +1 escapes with the \uXXXX sequence. - asciiCache [utf8.RuneSelf]int8 - - canonical bool // whether there are no custom escapes - escapeHTML bool // should escape '<', '>', and '&' - escapeJS bool // should escape '\u2028' and '\u2029' - - escapeFunc func(rune) bool // arbitrary runes that need escaping; may be nil -} - -// MakeEscapeRunes constructs an escape table for the escape parameters. -func MakeEscapeRunes(html, js bool, fn func(rune) bool) *EscapeRunes { - if fn == nil { - switch [2]bool{html, js} { - case [2]bool{false, false}: - return &escapeCanonical - case [2]bool{true, true}: - return &escapeHTMLJS - case [2]bool{true, false}: - return &escapeHTML - case [2]bool{false, true}: - return &escapeJS - } - } - return makeEscapeRunesSlow(html, js, fn) -} - -func makeEscapeRunesSlow(html, js bool, fn func(rune) bool) *EscapeRunes { - e := EscapeRunes{escapeHTML: html, escapeJS: js, escapeFunc: fn} - e.canonical = !e.escapeHTML && !e.escapeJS && e.escapeFunc == nil - - // Escape characters that are required by JSON. - for i := 0; i < ' '; i++ { - e.asciiCache[i] = -1 - } - e.asciiCache['\\'] = -1 - e.asciiCache['"'] = -1 - - // Escape characters with significance in HTML. - if e.escapeHTML { - e.asciiCache['<'] = +1 - e.asciiCache['>'] = +1 - e.asciiCache['&'] = +1 - } - - // Escape characters specified by the user-provided function. - if e.escapeFunc != nil { - for r := range e.asciiCache[:] { - if e.escapeFunc(rune(r)) { - e.asciiCache[r] = +1 - } - } - } - - return &e -} - -// IsCanonical reports whether this uses canonical escaping, -// which is the minimal amount of escaping to produce a valid JSON string. -func (e *EscapeRunes) IsCanonical() bool { return e.canonical } - -// HasEscapeFunc reports whether EscapeFunc is in use. -func (e *EscapeRunes) HasEscapeFunc() bool { return e.escapeFunc != nil } - -// needEscapeASCII reports whether c must be escaped. -// It assumes c < utf8.RuneSelf. -func (e *EscapeRunes) needEscapeASCII(c byte) bool { - return e.asciiCache[c] != 0 -} - -// needEscapeASCIIAsUTF16 reports whether c must be escaped using a \uXXXX sequence. -func (e *EscapeRunes) needEscapeASCIIAsUTF16(c byte) bool { - return e.asciiCache[c] > 0 -} - -// needEscapeRune reports whether r must be escaped. -// It assumes r >= utf8.RuneSelf. -func (e *EscapeRunes) needEscapeRune(r rune) bool { - return (e.escapeJS && (r == '\u2028' || r == '\u2029')) || (e.escapeFunc != nil && e.escapeFunc(r)) -} diff --git a/internal/jsonwire/escape_test.go b/internal/jsonwire/escape_test.go deleted file mode 100644 index 522b37b..0000000 --- a/internal/jsonwire/escape_test.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package jsonwire - -import ( - "reflect" - "testing" -) - -func TestEscapeRunesTables(t *testing.T) { - tests := []struct { - got *EscapeRunes - want *EscapeRunes - }{ - {&escapeCanonical, makeEscapeRunesSlow(false, false, nil)}, - {&escapeHTMLJS, makeEscapeRunesSlow(true, true, nil)}, - {&escapeHTML, makeEscapeRunesSlow(true, false, nil)}, - {&escapeJS, makeEscapeRunesSlow(false, true, nil)}, - } - for _, tt := range tests { - if !reflect.DeepEqual(tt.got, tt.want) { - t.Errorf("table mismatch:\n\tgot: %+v\n\twant: %+v", tt.got, tt.want) - } - } -} diff --git a/jsontext/encode.go b/jsontext/encode.go index f45c9a6..496e2e7 100644 --- a/jsontext/encode.go +++ b/jsontext/encode.go @@ -54,7 +54,6 @@ type encoderState struct { encodeBuffer jsonopts.Struct - EscapeRunes *jsonwire.EscapeRunes SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers } @@ -117,11 +116,6 @@ func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) { } e.Struct = jsonopts.Struct{} e.Struct.Join(opts...) - e.EscapeRunes = jsonwire.MakeEscapeRunes( - e.Flags.Get(jsonflags.EscapeForHTML), - e.Flags.Get(jsonflags.EscapeForJS), - e.EscapeFunc, - ) if e.Flags.Get(jsonflags.Expand) && !e.Flags.Has(jsonflags.Indent) { e.Indent = "\t" } @@ -361,7 +355,7 @@ func (e *encoderState) WriteToken(t Token) error { b = append(b, "true"...) err = e.Tokens.appendLiteral() case '"': - if b, err = t.appendString(b, !e.Flags.Get(jsonflags.AllowInvalidUTF8), e.Flags.Get(jsonflags.PreserveRawStrings), e.EscapeRunes); err != nil { + if b, err = t.appendString(b, &e.Flags); err != nil { break } if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() { @@ -452,13 +446,11 @@ func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ( // Check whether we need to escape the string and if necessary // copy it to a scratch buffer and then escape it back. - isVerbatim := ((safeASCII && !e.EscapeRunes.HasEscapeFunc()) || - !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)], e.EscapeRunes)) + isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)]) if !isVerbatim { var err error - validateUTF8 := !e.Flags.Get(jsonflags.AllowInvalidUTF8) b2 := append(e.unusedCache, b[pos+len(`"`):len(b)-len(`"`)]...) - b, err = jsonwire.AppendQuote(b[:pos], string(b2), validateUTF8, e.EscapeRunes) + b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags) e.unusedCache = b2[:0] if err != nil { return e.injectSyntacticErrorWithPosition(err, pos) @@ -635,11 +627,11 @@ func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, } return append(dst, "true"...), len("true"), nil case '"': - if n := jsonwire.ConsumeSimpleString(src); n > 0 && !e.EscapeRunes.HasEscapeFunc() { + if n := jsonwire.ConsumeSimpleString(src); n > 0 { dst, src = append(dst, src[:n]...), src[n:] // copy simple strings verbatim return dst, n, nil } - return jsonwire.ReformatString(dst, src, !e.Flags.Get(jsonflags.AllowInvalidUTF8), e.Flags.Get(jsonflags.PreserveRawStrings), e.EscapeRunes) + return jsonwire.ReformatString(dst, src, &e.Flags) case '0': if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) { dst, src = append(dst, src[:n]...), src[n:] // copy simple numbers verbatim @@ -699,10 +691,10 @@ func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, return dst, n, io.ErrUnexpectedEOF } m := jsonwire.ConsumeSimpleString(src[n:]) - if m > 0 && !e.EscapeRunes.HasEscapeFunc() { + if m > 0 { dst = append(dst, src[n:n+m]...) } else { - dst, m, err = jsonwire.ReformatString(dst, src[n:], !e.Flags.Get(jsonflags.AllowInvalidUTF8), e.Flags.Get(jsonflags.PreserveRawStrings), e.EscapeRunes) + dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags) if err != nil { return dst, n + m, err } diff --git a/jsontext/encode_test.go b/jsontext/encode_test.go index da75f30..fd20dcb 100644 --- a/jsontext/encode_test.go +++ b/jsontext/encode_test.go @@ -20,7 +20,7 @@ import ( // TestEncoder tests whether we can produce JSON with either tokens or raw values. func TestEncoder(t *testing.T) { for _, td := range coderTestdata { - for _, formatName := range []string{"Compact", "Escaped", "Indented"} { + for _, formatName := range []string{"Compact", "Indented"} { for _, typeName := range []string{"Token", "Value", "TokenDelims"} { t.Run(path.Join(td.name.Name, typeName, formatName), func(t *testing.T) { testEncoder(t, td.name.Where, formatName, typeName, td) @@ -36,11 +36,6 @@ func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName stri opts = append(opts, jsonflags.OmitTopLevelNewline|1) want = td.outCompacted switch formatName { - case "Escaped": - opts = append(opts, WithEscapeFunc(func(rune) bool { return true })) - if td.outEscaped != "" { - want = td.outEscaped - } case "Indented": opts = append(opts, Expand(true)) opts = append(opts, WithIndentPrefix("\t")) @@ -80,7 +75,7 @@ func testEncoder(t *testing.T, where jsontest.CasePos, formatName, typeName stri default: val := Value(tok.String()) if tok.Kind() == '"' { - val, _ = jsonwire.AppendQuote(nil, tok.String(), false, nil) + val, _ = jsonwire.AppendQuote(nil, tok.String(), &jsonflags.Flags{}) } if err := enc.WriteValue(val); err != nil { t.Fatalf("%s: Encoder.WriteValue error: %v", where, err) diff --git a/jsontext/options.go b/jsontext/options.go index 134943e..056a703 100644 --- a/jsontext/options.go +++ b/jsontext/options.go @@ -78,22 +78,6 @@ func EscapeForJS(v bool) Options { } } -// WithEscapeFunc specifies a function to determine whether to escape characters -// within JSON strings as a hexadecimal Unicode codepoint (e.g., \ufffd). -// The function must be deterministic and is not guaranteed to be called -// for every encoded character in a string. -// -// This can be composed with [EscapeForHTML] and [EscapeForJS] such that -// any option may specify that a particular character should be escaped. -// If nil and neither [EscapeForHTML] nor [EscapeForJS] are specified, -// then the shortest and simplest encoding will be used, -// which is also the formatting specified by RFC 8785, section 3.2.2.2. -// -// This only affects encoding and is ignored when decoding. -func WithEscapeFunc(fn func(rune) bool) Options { - return jsonopts.EscapeFunc(fn) -} - // Expand specifies that the JSON output should be expanded, // where every JSON object member or JSON array element // appears on a new, indented line according to the nesting depth. diff --git a/jsontext/quote.go b/jsontext/quote.go index 9fe7aa2..27f846d 100644 --- a/jsontext/quote.go +++ b/jsontext/quote.go @@ -4,7 +4,10 @@ package jsontext -import "github.com/go-json-experiment/json/internal/jsonwire" +import ( + "github.com/go-json-experiment/json/internal/jsonflags" + "github.com/go-json-experiment/json/internal/jsonwire" +) var errInvalidUTF8 = &SyntacticError{str: "invalid UTF-8 within string"} @@ -14,7 +17,7 @@ var errInvalidUTF8 = &SyntacticError{str: "invalid UTF-8 within string"} // Invalid UTF-8 bytes are replaced with the Unicode replacement character // and an error is returned at the end indicating the presence of invalid UTF-8. func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) { - return jsonwire.AppendQuote(dst, src, true, nil) + return jsonwire.AppendQuote(dst, src, &jsonflags.Flags{}) } // AppendUnquote appends the decoded interpretation of src as a diff --git a/jsontext/token.go b/jsontext/token.go index c770fbb..157a5bd 100644 --- a/jsontext/token.go +++ b/jsontext/token.go @@ -8,6 +8,7 @@ import ( "math" "strconv" + "github.com/go-json-experiment/json/internal/jsonflags" "github.com/go-json-experiment/json/internal/jsonwire" ) @@ -207,20 +208,20 @@ func (t Token) Bool() bool { // appendString appends a JSON string to dst and returns it. // It panics if t is not a JSON string. -func (t Token) appendString(dst []byte, validateUTF8, preserveRaw bool, escape *jsonwire.EscapeRunes) ([]byte, error) { +func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) { if raw := t.raw; raw != nil { // Handle raw string value. buf := raw.PreviousBuffer() if Kind(buf[0]) == '"' { - if !escape.HasEscapeFunc() && jsonwire.ConsumeSimpleString(buf) == len(buf) { + if jsonwire.ConsumeSimpleString(buf) == len(buf) { return append(dst, buf...), nil } - dst, _, err := jsonwire.ReformatString(dst, buf, validateUTF8, preserveRaw, escape) + dst, _, err := jsonwire.ReformatString(dst, buf, flags) return dst, err } } else if len(t.str) != 0 && t.num == 0 { // Handle exact string value. - return jsonwire.AppendQuote(dst, t.str, validateUTF8, escape) + return jsonwire.AppendQuote(dst, t.str, flags) } panic("invalid JSON token kind: " + t.Kind().String()) diff --git a/jsontext/value.go b/jsontext/value.go index cd3cf51..361c1fc 100644 --- a/jsontext/value.go +++ b/jsontext/value.go @@ -150,7 +150,6 @@ func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error eo.Flags.Set(jsonflags.PreserveRawStrings | 0) // per RFC 8785, section 3.2.2.2 eo.Flags.Set(jsonflags.EscapeForHTML | 0) // per RFC 8785, section 3.2.2.2 eo.Flags.Set(jsonflags.EscapeForJS | 0) // per RFC 8785, section 3.2.2.2 - eo.Flags.Set(jsonflags.EscapeFunc | 0) // per RFC 8785, section 3.2.2.2 eo.Flags.Set(jsonflags.Expand | 0) // per RFC 8785, section 3.2.1 } else { if s := strings.TrimLeft(prefix, " \t"); len(s) > 0 {