diff --git a/arshal_default.go b/arshal_default.go index c6c5c96..5fa055f 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -207,11 +207,11 @@ func makeStringArshaler(t reflect.Type) *arshaler { } if mo.Flags.Get(jsonflags.StringifyBoolsAndStrings) { - b, err := jsontext.AppendQuote(nil, s) // only fails for invalid UTF-8 - q, _ := jsontext.AppendQuote(nil, b) // cannot fail since b is valid UTF-8 - if err != nil && !xe.Flags.Get(jsonflags.AllowInvalidUTF8) { - return newMarshalErrorBefore(enc, t, err) + b, err := jsonwire.AppendQuote(nil, s, &mo.Flags) + if err != nil { + return newMarshalErrorBefore(enc, t, &jsontext.SyntacticError{Err: err}) } + q, _ := jsontext.AppendQuote(nil, b) // cannot fail since b is valid UTF-8 return enc.WriteValue(q) } return enc.WriteToken(jsontext.String(s)) diff --git a/internal/jsonflags/flags.go b/internal/jsonflags/flags.go index 7f094e6..d32debf 100644 --- a/internal/jsonflags/flags.go +++ b/internal/jsonflags/flags.go @@ -51,6 +51,7 @@ const ( EscapeForHTML | EscapeForJS | EscapeInvalidUTF8 | + PreserveRawStrings | Deterministic | FormatNilMapAsNull | FormatNilSliceAsNull | @@ -84,7 +85,7 @@ const ( AllowInvalidUTF8 // encode or decode WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite - PreserveRawStrings // encode only; for internal use by jsontext.Value.Canonicalize + PreserveRawStrings // encode only; exposed in v1 and also used by jsontext.Value.Canonicalize CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize EscapeForHTML // encode only EscapeForJS // encode only diff --git a/internal/jsonwire/encode.go b/internal/jsonwire/encode.go index 66aae06..3a235a2 100644 --- a/internal/jsonwire/encode.go +++ b/internal/jsonwire/encode.go @@ -154,18 +154,49 @@ func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error if err != nil { return dst, n, err } - isCanonical := !flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS | jsonflags.EscapeInvalidUTF8) - if flags.Get(jsonflags.PreserveRawStrings) || (isCanonical && valFlags.IsCanonical()) { + + // If the output requires no special escapes, and the input + // is already in canonical form or should be preserved verbatim, + // then directly copy the input to the output. + if !flags.Get(jsonflags.EscapeForHTML|jsonflags.EscapeForJS) && + (valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) { dst = append(dst, src[:n]...) // copy the string verbatim return dst, n, nil } - // TODO: Implement a direct, raw-to-raw reformat for strings. - // If the escapeRune option would have resulted in no changes to the output, - // it would be faster to simply append src to dst without going through - // an intermediary representation in a separate buffer. + // If the input should be preserved verbatim, we still need to + // respect the EscapeForHTML and EscapeForJS options. + // Note that EscapeInvalidUTF8 is not respected. + // This logic ensures that pre-escaped sequences remained escaped. + if flags.Get(jsonflags.PreserveRawStrings) { + var i, lastAppendIndex int + for i < n { + if c := src[i]; c < utf8.RuneSelf { + if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) { + dst = append(dst, src[lastAppendIndex:i]...) + dst = appendEscapedASCII(dst, c) + lastAppendIndex = i + 1 + } + i++ + } else { + r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[i:]))) + if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) { + dst = append(dst, src[lastAppendIndex:i]...) + dst = appendEscapedUnicode(dst, r) + lastAppendIndex = i + rn + } + i += rn + } + } + return append(dst, src[lastAppendIndex:n]...), n, nil + } + + // The input contains characters that might need escaping, + // unnecessary escape sequences, or invalid UTF-8. + // Perform a round-trip unquote and quote to properly reformat + // these sequences according the current flags. b, _ := AppendUnquote(nil, src[:n]) - dst, _ = AppendQuote(dst, string(b), flags) + dst, _ = AppendQuote(dst, b, flags) return dst, n, nil } diff --git a/jsontext/value.go b/jsontext/value.go index e294ee0..97c97b1 100644 --- a/jsontext/value.go +++ b/jsontext/value.go @@ -162,6 +162,8 @@ func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 1) eo.Flags.Set(jsonflags.AllowDuplicateNames | 1) eo.Flags.Set(jsonflags.PreserveRawStrings | 1) + eo.Flags.Set(jsonflags.EscapeForHTML | 0) // ensure strings are preserved + eo.Flags.Set(jsonflags.EscapeForJS | 0) // ensure strings are preserved if multiline { eo.Flags.Set(jsonflags.Multiline | 1) eo.Flags.Set(jsonflags.SpaceAfterColon | 1) diff --git a/v1/failing.txt b/v1/failing.txt index 623a34f..b40a9d9 100644 --- a/v1/failing.txt +++ b/v1/failing.txt @@ -21,9 +21,6 @@ TestNilMarshal TestNilMarshal/#08 TestNilMarshal/#11 TestNilMarshalerTextMapKey -TestEncoderSetEscapeHTML -TestEncoderSetEscapeHTML/stringOption -TestRawMessage TestStringOption TestStringOption/Unmarshal/Null/v1 TestStringOption/Unmarshal/Deep/v1 diff --git a/v1/options.go b/v1/options.go index a626723..a7ee4d1 100644 --- a/v1/options.go +++ b/v1/options.go @@ -39,6 +39,7 @@ type Options = jsonopts.Options // - [IgnoreStructErrors] // - [MatchCaseSensitiveDelimiter] // - [OmitEmptyWithLegacyDefinition] +// - [PreserveRawStrings] // - [RejectFloatOverflow] // - [ReportLegacyErrorValues] // - [StringifyWithLegacySemantics] @@ -168,6 +169,22 @@ func OmitEmptyWithLegacyDefinition(v bool) Options { } } +// PreserveRawStrings specifies that raw JSON string values passed to +// [jsontext.Encoder.WriteValue] and [jsontext.Encoder.WriteToken] +// preserve their original encoding. +// However, characters that still need escaping according to +// [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] are escaped. +// +// This only affects encoding and is ignored when decoding. +// The v1 default is true. +func PreserveRawStrings(v bool) Options { + if v { + return jsonflags.PreserveRawStrings | 1 + } else { + return jsonflags.PreserveRawStrings | 0 + } +} + // RejectFloatOverflow specifies that unmarshaling a JSON number that // exceeds the maximum representation of a Go float32 or float64 // results in an error, rather than succeeding with the floating-point values