Skip to content

Commit

Permalink
Implement legacy semantics for string escaping behavior
Browse files Browse the repository at this point in the history
For doubly escaped strings, the v1 behavior was to apply
escaping for EscapeForHTML and EscapeForJS on the first pass,
rather than on the second pass. This does lead to unnecessarily
longer outputs since escaping on the first pass means that
the newly included '\\' characters have to be escaped again
in the second pass. Doubly escaped strings are already an
esoteric feature of v1, so we should preserve the semantic.

Also, when encoding the raw output of MarshalJSON,
the v1 behavior was to preserve any pre-existing escape sequences,
while still escaping any characters that might need escaping
per EscapeForHTML and EscapeForJS. Replicate this behavior.
  • Loading branch information
dsnet committed Dec 23, 2024
1 parent 5815faa commit 78c0b90
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 15 deletions.
8 changes: 4 additions & 4 deletions arshal_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,11 @@ func makeStringArshaler(t reflect.Type) *arshaler {
}

if mo.Flags.Get(jsonflags.StringifyBoolsAndStrings) {
b, err := jsontext.AppendQuote(nil, s) // only fails for invalid UTF-8
q, _ := jsontext.AppendQuote(nil, b) // cannot fail since b is valid UTF-8
if err != nil && !xe.Flags.Get(jsonflags.AllowInvalidUTF8) {
return newMarshalErrorBefore(enc, t, err)
b, err := jsonwire.AppendQuote(nil, s, &mo.Flags)
if err != nil {
return newMarshalErrorBefore(enc, t, &jsontext.SyntacticError{Err: err})
}
q, _ := jsontext.AppendQuote(nil, b) // cannot fail since b is valid UTF-8
return enc.WriteValue(q)
}
return enc.WriteToken(jsontext.String(s))
Expand Down
3 changes: 2 additions & 1 deletion internal/jsonflags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const (
EscapeForHTML |
EscapeForJS |
EscapeInvalidUTF8 |
PreserveRawStrings |
Deterministic |
FormatNilMapAsNull |
FormatNilSliceAsNull |
Expand Down Expand Up @@ -84,7 +85,7 @@ const (
AllowInvalidUTF8 // encode or decode
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
PreserveRawStrings // encode only; for internal use by jsontext.Value.Canonicalize
PreserveRawStrings // encode only; exposed in v1 and also used by jsontext.Value.Canonicalize
CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize
EscapeForHTML // encode only
EscapeForJS // encode only
Expand Down
45 changes: 38 additions & 7 deletions internal/jsonwire/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,18 +154,49 @@ func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error
if err != nil {
return dst, n, err
}
isCanonical := !flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS | jsonflags.EscapeInvalidUTF8)
if flags.Get(jsonflags.PreserveRawStrings) || (isCanonical && valFlags.IsCanonical()) {

// If the output requires no special escapes, and the input
// is already in canonical form or should be preserved verbatim,
// then directly copy the input to the output.
if !flags.Get(jsonflags.EscapeForHTML|jsonflags.EscapeForJS) &&
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
dst = append(dst, src[:n]...) // copy the string verbatim
return dst, n, nil
}

// TODO: Implement a direct, raw-to-raw reformat for strings.
// If the escapeRune option would have resulted in no changes to the output,
// it would be faster to simply append src to dst without going through
// an intermediary representation in a separate buffer.
// If the input should be preserved verbatim, we still need to
// respect the EscapeForHTML and EscapeForJS options.
// Note that EscapeInvalidUTF8 is not respected.
// This logic ensures that pre-escaped sequences remained escaped.
if flags.Get(jsonflags.PreserveRawStrings) {
var i, lastAppendIndex int
for i < n {
if c := src[i]; c < utf8.RuneSelf {
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedASCII(dst, c)
lastAppendIndex = i + 1
}
i++
} else {
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:])))
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedUnicode(dst, r)
lastAppendIndex = i + rn
}
i += rn
}
}
return append(dst, src[lastAppendIndex:n]...), n, nil
}

// The input contains characters that might need escaping,
// unnecessary escape sequences, or invalid UTF-8.
// Perform a round-trip unquote and quote to properly reformat
// these sequences according the current flags.
b, _ := AppendUnquote(nil, src[:n])
dst, _ = AppendQuote(dst, string(b), flags)
dst, _ = AppendQuote(dst, b, flags)
return dst, n, nil
}

Expand Down
2 changes: 2 additions & 0 deletions jsontext/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error
eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 1)
eo.Flags.Set(jsonflags.AllowDuplicateNames | 1)
eo.Flags.Set(jsonflags.PreserveRawStrings | 1)
eo.Flags.Set(jsonflags.EscapeForHTML | 0) // ensure strings are preserved
eo.Flags.Set(jsonflags.EscapeForJS | 0) // ensure strings are preserved
if multiline {
eo.Flags.Set(jsonflags.Multiline | 1)
eo.Flags.Set(jsonflags.SpaceAfterColon | 1)
Expand Down
3 changes: 0 additions & 3 deletions v1/failing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ TestNilMarshal
TestNilMarshal/#08
TestNilMarshal/#11
TestNilMarshalerTextMapKey
TestEncoderSetEscapeHTML
TestEncoderSetEscapeHTML/stringOption
TestRawMessage
TestStringOption
TestStringOption/Unmarshal/Null/v1
TestStringOption/Unmarshal/Deep/v1
Expand Down
17 changes: 17 additions & 0 deletions v1/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type Options = jsonopts.Options
// - [IgnoreStructErrors]
// - [MatchCaseSensitiveDelimiter]
// - [OmitEmptyWithLegacyDefinition]
// - [PreserveRawStrings]
// - [RejectFloatOverflow]
// - [ReportLegacyErrorValues]
// - [StringifyWithLegacySemantics]
Expand Down Expand Up @@ -168,6 +169,22 @@ func OmitEmptyWithLegacyDefinition(v bool) Options {
}
}

// PreserveRawStrings specifies that raw JSON string values passed to
// [jsontext.Encoder.WriteValue] and [jsontext.Encoder.WriteToken]
// preserve their original encoding.
// However, characters that still need escaping according to
// [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] are escaped.
//
// This only affects encoding and is ignored when decoding.
// The v1 default is true.
func PreserveRawStrings(v bool) Options {
if v {
return jsonflags.PreserveRawStrings | 1
} else {
return jsonflags.PreserveRawStrings | 0
}
}

// RejectFloatOverflow specifies that unmarshaling a JSON number that
// exceeds the maximum representation of a Go float32 or float64
// results in an error, rather than succeeding with the floating-point values
Expand Down

0 comments on commit 78c0b90

Please sign in to comment.