diff --git a/v1/bench_test.go b/v1/bench_test.go index 43a7963..fae5bc9 100644 --- a/v1/bench_test.go +++ b/v1/bench_test.go @@ -12,13 +12,8 @@ package json import ( "bytes" - "fmt" "io" - "reflect" - "regexp" - "runtime" "strings" - "sync" "testing" "github.com/go-json-experiment/json/internal/jsontest" @@ -451,67 +446,6 @@ func BenchmarkUnmapped(b *testing.B) { }) } -func BenchmarkTypeFieldsCache(b *testing.B) { - b.ReportAllocs() - var maxTypes int = 1e6 - - // Dynamically generate many new types. - types := make([]reflect.Type, maxTypes) - fs := []reflect.StructField{{ - Type: reflect.TypeFor[string](), - Index: []int{0}, - }} - for i := range types { - fs[0].Name = fmt.Sprintf("TypeFieldsCache%d", i) - types[i] = reflect.StructOf(fs) - } - - // clearClear clears the cache. Other JSON operations, must not be running. - clearCache := func() { - fieldCache = sync.Map{} - } - - // MissTypes tests the performance of repeated cache misses. - // This measures the time to rebuild a cache of size nt. - for nt := 1; nt <= maxTypes; nt *= 10 { - ts := types[:nt] - b.Run(fmt.Sprintf("MissTypes%d", nt), func(b *testing.B) { - nc := runtime.GOMAXPROCS(0) - for i := 0; i < b.N; i++ { - clearCache() - var wg sync.WaitGroup - for j := 0; j < nc; j++ { - wg.Add(1) - go func(j int) { - for _, t := range ts[(j*len(ts))/nc : ((j+1)*len(ts))/nc] { - cachedTypeFields(t) - } - wg.Done() - }(j) - } - wg.Wait() - } - }) - } - - // HitTypes tests the performance of repeated cache hits. - // This measures the average time of each cache lookup. - for nt := 1; nt <= maxTypes; nt *= 10 { - // Pre-warm a cache of size nt. - clearCache() - for _, t := range types[:nt] { - cachedTypeFields(t) - } - b.Run(fmt.Sprintf("HitTypes%d", nt), func(b *testing.B) { - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - cachedTypeFields(types[0]) - } - }) - }) - } -} - func BenchmarkEncodeMarshaler(b *testing.B) { b.ReportAllocs() @@ -545,29 +479,3 @@ func BenchmarkEncoderEncode(b *testing.B) { } }) } - -func BenchmarkNumberIsValid(b *testing.B) { - s := "-61657.61667E+61673" - for i := 0; i < b.N; i++ { - isValidNumber(s) - } -} - -func BenchmarkNumberIsValidRegexp(b *testing.B) { - var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) - s := "-61657.61667E+61673" - for i := 0; i < b.N; i++ { - jsonNumberRegexp.MatchString(s) - } -} - -func BenchmarkUnmarshalNumber(b *testing.B) { - b.ReportAllocs() - data := []byte(`"-61657.61667E+61673"`) - var number Number - for i := 0; i < b.N; i++ { - if err := Unmarshal(data, &number); err != nil { - b.Fatal("Unmarshal:", err) - } - } -} diff --git a/v1/decode.go b/v1/decode.go index 9810229..d66936f 100644 --- a/v1/decode.go +++ b/v1/decode.go @@ -8,16 +8,13 @@ package json import ( - "encoding" - "encoding/base64" "fmt" "reflect" "strconv" - "strings" - "unicode" - "unicode/utf16" - "unicode/utf8" - _ "unsafe" // for linkname + + jsonv2 "github.com/go-json-experiment/json" + "github.com/go-json-experiment/json/internal/jsonwire" + "github.com/go-json-experiment/json/jsontext" ) // Unmarshal parses the JSON-encoded data and stores the result @@ -95,17 +92,7 @@ import ( // Instead, they are replaced by the Unicode replacement // character U+FFFD. func Unmarshal(data []byte, v any) error { - // Check for well-formedness. - // Avoids filling out half a data structure - // before discovering a JSON syntax error. - var d decodeState - err := checkValid(data, &d.scan) - if err != nil { - return err - } - - d.init(data) - return d.unmarshal(v) + return jsonv2.Unmarshal(data, v, DefaultOptionsV1()) } // Unmarshaler is the interface implemented by types @@ -116,9 +103,7 @@ func Unmarshal(data []byte, v any) error { // // By convention, to approximate the behavior of [Unmarshal] itself, // Unmarshalers implement UnmarshalJSON([]byte("null")) as a no-op. -type Unmarshaler interface { - UnmarshalJSON([]byte) error -} +type Unmarshaler = jsonv2.UnmarshalerV1 // An UnmarshalTypeError describes a JSON value that was // not appropriate for a value of a specific Go type. @@ -168,23 +153,6 @@ func (e *InvalidUnmarshalError) Error() string { return "json: Unmarshal(nil " + e.Type.String() + ")" } -func (d *decodeState) unmarshal(v any) error { - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Pointer || rv.IsNil() { - return &InvalidUnmarshalError{reflect.TypeOf(v)} - } - - d.scan.reset() - d.scanWhile(scanSkipSpace) - // We decode rv not rv.Elem because the Unmarshaler interface - // test must be applied at the top level of the value. - err := d.value(rv) - if err != nil { - return d.addErrorContext(err) - } - return d.savedError -} - // A Number represents a JSON number literal. type Number string @@ -201,1112 +169,54 @@ func (n Number) Int64() (int64, error) { return strconv.ParseInt(string(n), 10, 64) } -// An errorContext provides context for type errors during decoding. -type errorContext struct { - Struct reflect.Type - FieldStack []string -} - -// decodeState represents the state while decoding a JSON value. -type decodeState struct { - data []byte - off int // next read offset in data - opcode int // last read result - scan scanner - errorContext *errorContext - savedError error - useNumber bool - disallowUnknownFields bool -} - -// readIndex returns the position of the last byte read. -func (d *decodeState) readIndex() int { - return d.off - 1 -} - -// phasePanicMsg is used as a panic message when we end up with something that -// shouldn't happen. It can indicate a bug in the JSON decoder, or that -// something is editing the data slice while the decoder executes. -const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" - -func (d *decodeState) init(data []byte) *decodeState { - d.data = data - d.off = 0 - d.savedError = nil - if d.errorContext != nil { - d.errorContext.Struct = nil - // Reuse the allocated space for the FieldStack slice. - d.errorContext.FieldStack = d.errorContext.FieldStack[:0] - } - return d -} - -// saveError saves the first err it is called with, -// for reporting at the end of the unmarshal. -func (d *decodeState) saveError(err error) { - if d.savedError == nil { - d.savedError = d.addErrorContext(err) - } -} - -// addErrorContext returns a new error enhanced with information from d.errorContext -func (d *decodeState) addErrorContext(err error) error { - if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) { - switch err := err.(type) { - case *UnmarshalTypeError: - err.Struct = d.errorContext.Struct.Name() - fieldStack := d.errorContext.FieldStack - if err.Field != "" { - fieldStack = append(fieldStack, err.Field) - } - err.Field = strings.Join(fieldStack, ".") - } - } - return err -} - -// skip scans to the end of what was started. -func (d *decodeState) skip() { - s, data, i := &d.scan, d.data, d.off - depth := len(s.parseState) - for { - op := s.step(s, data[i]) - i++ - if len(s.parseState) < depth { - d.off = i - d.opcode = op - return - } - } -} - -// scanNext processes the byte at d.data[d.off]. -func (d *decodeState) scanNext() { - if d.off < len(d.data) { - d.opcode = d.scan.step(&d.scan, d.data[d.off]) - d.off++ - } else { - d.opcode = d.scan.eof() - d.off = len(d.data) + 1 // mark processed EOF with len+1 - } -} - -// scanWhile processes bytes in d.data[d.off:] until it -// receives a scan code not equal to op. -func (d *decodeState) scanWhile(op int) { - s, data, i := &d.scan, d.data, d.off - for i < len(data) { - newOp := s.step(s, data[i]) - i++ - if newOp != op { - d.opcode = newOp - d.off = i - return - } - } - - d.off = len(data) + 1 // mark processed EOF with len+1 - d.opcode = d.scan.eof() -} +var numberType = reflect.TypeFor[Number]() -// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the -// common case where we're decoding a literal. The decoder scans the input -// twice, once for syntax errors and to check the length of the value, and the -// second to perform the decoding. -// -// Only in the second step do we use decodeState to tokenize literals, so we -// know there aren't any syntax errors. We can take advantage of that knowledge, -// and scan a literal's bytes much more quickly. -func (d *decodeState) rescanLiteral() { - data, i := d.data, d.off -Switch: - switch data[i-1] { - case '"': // string - for ; i < len(data); i++ { - switch data[i] { - case '\\': - i++ // escaped char - case '"': - i++ // tokenize the closing quote too - break Switch - } - } - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number - for ; i < len(data); i++ { - switch data[i] { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - '.', 'e', 'E', '+', '-': - default: - break Switch - } - } - case 't': // true - i += len("rue") - case 'f': // false - i += len("alse") - case 'n': // null - i += len("ull") - } - if i < len(data) { - d.opcode = stateEndValue(&d.scan, data[i]) +// MarshalJSONV2 implements [jsonv2.MarshalerV2]. +func (n Number) MarshalJSONV2(enc *jsontext.Encoder, opts jsonv2.Options) error { + if n == "" { + return enc.WriteToken(jsontext.Int(0)) + } + var num []byte + val := enc.UnusedBuffer() + if stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers); stringify { + val = append(val, '"') + val = append(val, n...) + val = append(val, '"') + num = val[len(`"`) : len(val)-len(`"`)] } else { - d.opcode = scanEnd + val = append(val, n...) + num = val } - d.off = i + 1 -} - -// value consumes a JSON value from d.data[d.off-1:], decoding into v, and -// reads the following byte ahead. If v is invalid, the value is discarded. -// The first byte of the value has been read already. -func (d *decodeState) value(v reflect.Value) error { - switch d.opcode { - default: - panic(phasePanicMsg) - - case scanBeginArray: - if v.IsValid() { - if err := d.array(v); err != nil { - return err - } - } else { - d.skip() - } - d.scanNext() - - case scanBeginObject: - if v.IsValid() { - if err := d.object(v); err != nil { - return err - } - } else { - d.skip() - } - d.scanNext() - - case scanBeginLiteral: - // All bytes inside literal return scanContinue op code. - start := d.readIndex() - d.rescanLiteral() - - if v.IsValid() { - if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil { - return err - } - } - } - return nil -} - -type unquotedValue struct{} - -// valueQuoted is like value but decodes a -// quoted string literal or literal null into an interface value. -// If it finds anything other than a quoted string literal or null, -// valueQuoted returns unquotedValue{}. -func (d *decodeState) valueQuoted() any { - switch d.opcode { - default: - panic(phasePanicMsg) - - case scanBeginArray, scanBeginObject: - d.skip() - d.scanNext() - - case scanBeginLiteral: - v := d.literalInterface() - switch v.(type) { - case nil, string: - return v - } + if n, err := jsonwire.ConsumeNumber(num); n != len(num) || err != nil { + return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax) } - return unquotedValue{} + return enc.WriteValue(val) } -// indirect walks down v allocating pointers as needed, -// until it gets to a non-pointer. -// If it encounters an Unmarshaler, indirect stops and returns that. -// If decodingNull is true, indirect stops at the first settable pointer so it -// can be set to nil. -func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { - // Issue #24153 indicates that it is generally not a guaranteed property - // that you may round-trip a reflect.Value by calling Value.Addr().Elem() - // and expect the value to still be settable for values derived from - // unexported embedded struct fields. - // - // The logic below effectively does this when it first addresses the value - // (to satisfy possible pointer methods) and continues to dereference - // subsequent pointers as necessary. - // - // After the first round-trip, we set v back to the original value to - // preserve the original RW flags contained in reflect.Value. - v0 := v - haveAddr := false - - // If v is a named type and is addressable, - // start with its address, so that if the type has pointer methods, - // we find them. - if v.Kind() != reflect.Pointer && v.Type().Name() != "" && v.CanAddr() { - haveAddr = true - v = v.Addr() - } - for { - // Load value from interface, but only if the result will be - // usefully addressable. - if v.Kind() == reflect.Interface && !v.IsNil() { - e := v.Elem() - if e.Kind() == reflect.Pointer && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Pointer) { - haveAddr = false - v = e - continue - } - } - - if v.Kind() != reflect.Pointer { - break - } - - if decodingNull && v.CanSet() { - break - } - - // Prevent infinite loop if v is an interface pointing to its own address: - // var v any - // v = &v - if v.Elem().Kind() == reflect.Interface && v.Elem().Elem().Equal(v) { - v = v.Elem() - break - } - if v.IsNil() { - v.Set(reflect.New(v.Type().Elem())) - } - if v.Type().NumMethod() > 0 && v.CanInterface() { - if u, ok := v.Interface().(Unmarshaler); ok { - return u, nil, reflect.Value{} - } - if !decodingNull { - if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { - return nil, u, reflect.Value{} - } - } - } - - if haveAddr { - v = v0 // restore original value after round-trip Value.Addr().Elem() - haveAddr = false - } else { - v = v.Elem() - } - } - return nil, nil, v -} - -// array consumes an array from d.data[d.off-1:], decoding into v. -// The first byte of the array ('[') has been read already. -func (d *decodeState) array(v reflect.Value) error { - // Check for unmarshaler. - u, ut, pv := indirect(v, false) - if u != nil { - start := d.readIndex() - d.skip() - return u.UnmarshalJSON(d.data[start:d.off]) - } - if ut != nil { - d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) - d.skip() - return nil - } - v = pv - - // Check type of target. - switch v.Kind() { - case reflect.Interface: - if v.NumMethod() == 0 { - // Decoding into nil interface? Switch to non-reflect code. - ai := d.arrayInterface() - v.Set(reflect.ValueOf(ai)) - return nil - } - // Otherwise it's invalid. - fallthrough - default: - d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) - d.skip() - return nil - case reflect.Array, reflect.Slice: - break - } - - i := 0 - for { - // Look ahead for ] - can only happen on first iteration. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndArray { - break - } - - // Expand slice length, growing the slice if necessary. - if v.Kind() == reflect.Slice { - if i >= v.Cap() { - v.Grow(1) - } - if i >= v.Len() { - v.SetLen(i + 1) - } - } - - if i < v.Len() { - // Decode into element. - if err := d.value(v.Index(i)); err != nil { - return err - } - } else { - // Ran out of fixed array: skip. - if err := d.value(reflect.Value{}); err != nil { - return err - } - } - i++ - - // Next token must be , or ]. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode == scanEndArray { - break - } - if d.opcode != scanArrayValue { - panic(phasePanicMsg) - } - } - - if i < v.Len() { - if v.Kind() == reflect.Array { - for ; i < v.Len(); i++ { - v.Index(i).SetZero() // zero remainder of array - } - } else { - v.SetLen(i) // truncate the slice - } - } - if i == 0 && v.Kind() == reflect.Slice { - v.Set(reflect.MakeSlice(v.Type(), 0, 0)) - } - return nil -} - -var nullLiteral = []byte("null") -var textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() - -// object consumes an object from d.data[d.off-1:], decoding into v. -// The first byte ('{') of the object has been read already. -func (d *decodeState) object(v reflect.Value) error { - // Check for unmarshaler. - u, ut, pv := indirect(v, false) - if u != nil { - start := d.readIndex() - d.skip() - return u.UnmarshalJSON(d.data[start:d.off]) - } - if ut != nil { - d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) - d.skip() - return nil - } - v = pv - t := v.Type() - - // Decoding into nil interface? Switch to non-reflect code. - if v.Kind() == reflect.Interface && v.NumMethod() == 0 { - oi := d.objectInterface() - v.Set(reflect.ValueOf(oi)) - return nil - } - - var fields structFields - - // Check type of target: - // struct or - // map[T1]T2 where T1 is string, an integer type, - // or an encoding.TextUnmarshaler - switch v.Kind() { - case reflect.Map: - // Map key must either have string kind, have an integer kind, - // or be an encoding.TextUnmarshaler. - switch t.Key().Kind() { - case reflect.String, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - default: - if !reflect.PointerTo(t.Key()).Implements(textUnmarshalerType) { - d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) - d.skip() - return nil - } - } - if v.IsNil() { - v.Set(reflect.MakeMap(t)) - } - case reflect.Struct: - fields = cachedTypeFields(t) - // ok - default: - d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) - d.skip() - return nil - } - - var mapElem reflect.Value - var origErrorContext errorContext - if d.errorContext != nil { - origErrorContext = *d.errorContext - } - - for { - // Read opening " of string key or closing }. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndObject { - // closing } - can only happen on first iteration. - break - } - if d.opcode != scanBeginLiteral { - panic(phasePanicMsg) - } - - // Read key. - start := d.readIndex() - d.rescanLiteral() - item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) - if !ok { - panic(phasePanicMsg) - } - - // Figure out field corresponding to key. - var subv reflect.Value - destring := false // whether the value is wrapped in a string to be decoded first - - if v.Kind() == reflect.Map { - elemType := t.Elem() - if !mapElem.IsValid() { - mapElem = reflect.New(elemType).Elem() - } else { - mapElem.SetZero() - } - subv = mapElem - } else { - f := fields.byExactName[string(key)] - if f == nil { - f = fields.byFoldedName[string(foldName(key))] - } - if f != nil { - subv = v - destring = f.quoted - if d.errorContext == nil { - d.errorContext = new(errorContext) - } - for i, ind := range f.index { - if subv.Kind() == reflect.Pointer { - if subv.IsNil() { - // If a struct embeds a pointer to an unexported type, - // it is not possible to set a newly allocated value - // since the field is unexported. - // - // See https://golang.org/issue/21357 - if !subv.CanSet() { - d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem())) - // Invalidate subv to ensure d.value(subv) skips over - // the JSON value without assigning it to subv. - subv = reflect.Value{} - destring = false - break - } - subv.Set(reflect.New(subv.Type().Elem())) - } - subv = subv.Elem() - } - if i < len(f.index)-1 { - d.errorContext.FieldStack = append( - d.errorContext.FieldStack, - subv.Type().Field(ind).Name, - ) - } - subv = subv.Field(ind) - } - d.errorContext.Struct = t - d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name) - } else if d.disallowUnknownFields { - d.saveError(fmt.Errorf("json: unknown field %q", key)) - } - } - - // Read : before value. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode != scanObjectKey { - panic(phasePanicMsg) - } - d.scanWhile(scanSkipSpace) - - if destring { - switch qv := d.valueQuoted().(type) { - case nil: - if err := d.literalStore(nullLiteral, subv, false); err != nil { - return err - } - case string: - if err := d.literalStore([]byte(qv), subv, true); err != nil { - return err - } - default: - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) - } - } else { - if err := d.value(subv); err != nil { - return err - } - } - - // Write value back to map; - // if using struct, subv points into struct already. - if v.Kind() == reflect.Map { - kt := t.Key() - var kv reflect.Value - if reflect.PointerTo(kt).Implements(textUnmarshalerType) { - kv = reflect.New(kt) - if err := d.literalStore(item, kv, true); err != nil { - return err - } - kv = kv.Elem() - } else { - switch kt.Kind() { - case reflect.String: - kv = reflect.New(kt).Elem() - kv.SetString(string(key)) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - s := string(key) - n, err := strconv.ParseInt(s, 10, 64) - if err != nil || kt.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) - break - } - kv = reflect.New(kt).Elem() - kv.SetInt(n) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - s := string(key) - n, err := strconv.ParseUint(s, 10, 64) - if err != nil || kt.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) - break - } - kv = reflect.New(kt).Elem() - kv.SetUint(n) - default: - panic("json: Unexpected key type") // should never occur - } - } - if kv.IsValid() { - v.SetMapIndex(kv, subv) - } - } - - // Next token must be , or }. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.errorContext != nil { - // Reset errorContext to its original state. - // Keep the same underlying array for FieldStack, to reuse the - // space and avoid unnecessary allocs. - d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)] - d.errorContext.Struct = origErrorContext.Struct - } - if d.opcode == scanEndObject { - break - } - if d.opcode != scanObjectValue { - panic(phasePanicMsg) - } - } - return nil -} - -// convertNumber converts the number literal s to a float64 or a Number -// depending on the setting of d.useNumber. -func (d *decodeState) convertNumber(s string) (any, error) { - if d.useNumber { - return Number(s), nil - } - f, err := strconv.ParseFloat(s, 64) +// UnmarshalJSONV2 implements [jsonv2.UnmarshalerV2]. +func (n *Number) UnmarshalJSONV2(dec *jsontext.Decoder, opts jsonv2.Options) error { + val, err := dec.ReadValue() if err != nil { - return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeFor[float64](), Offset: int64(d.off)} + return err } - return f, nil -} - -var numberType = reflect.TypeFor[Number]() - -// literalStore decodes a literal stored in item into v. -// -// fromQuoted indicates whether this literal came from unwrapping a -// string from the ",string" struct tag option. this is used only to -// produce more helpful error messages. -func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error { - // Check for unmarshaler. - if len(item) == 0 { - // Empty string given. - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + k := val.Kind() + switch k { + case 'n': + *n = "" // TODO: Should we merge with legacy semantics? return nil - } - isNull := item[0] == 'n' // null - u, ut, pv := indirect(v, isNull) - if u != nil { - return u.UnmarshalJSON(item) - } - if ut != nil { - if item[0] != '"' { - if fromQuoted { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - return nil - } - val := "number" - switch item[0] { - case 'n': - val = "null" - case 't', 'f': - val = "bool" - } - d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) - return nil - } - s, ok := unquoteBytes(item) - if !ok { - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - panic(phasePanicMsg) - } - return ut.UnmarshalText(s) - } - - v = pv - - switch c := item[0]; c { - case 'n': // null - // The main parser checks that only true and false can reach here, - // but if this was a quoted string input, it could be anything. - if fromQuoted && string(item) != "null" { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - break - } - switch v.Kind() { - case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice: - v.SetZero() - // otherwise, ignore null for primitives/string - } - case 't', 'f': // true, false - value := item[0] == 't' - // The main parser checks that only true and false can reach here, - // but if this was a quoted string input, it could be anything. - if fromQuoted && string(item) != "true" && string(item) != "false" { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - break - } - switch v.Kind() { - default: - if fromQuoted { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) - } - case reflect.Bool: - v.SetBool(value) - case reflect.Interface: - if v.NumMethod() == 0 { - v.Set(reflect.ValueOf(value)) - } else { - d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) - } - } - - case '"': // string - s, ok := unquoteBytes(item) - if !ok { - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - panic(phasePanicMsg) - } - switch v.Kind() { - default: - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) - case reflect.Slice: - if v.Type().Elem().Kind() != reflect.Uint8 { - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) - n, err := base64.StdEncoding.Decode(b, s) - if err != nil { - d.saveError(err) - break - } - v.SetBytes(b[:n]) - case reflect.String: - t := string(s) - if v.Type() == numberType && !isValidNumber(t) { - return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) - } - v.SetString(t) - case reflect.Interface: - if v.NumMethod() == 0 { - v.Set(reflect.ValueOf(string(s))) - } else { - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) - } - } - - default: // number - if c != '-' && (c < '0' || c > '9') { - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - panic(phasePanicMsg) - } - switch v.Kind() { - default: - if v.Kind() == reflect.String && v.Type() == numberType { - // s must be a valid number, because it's - // already been tokenized. - v.SetString(string(item)) - break - } - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) - case reflect.Interface: - n, err := d.convertNumber(string(item)) - if err != nil { - d.saveError(err) - break - } - if v.NumMethod() != 0 { - d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.Set(reflect.ValueOf(n)) - - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - n, err := strconv.ParseInt(string(item), 10, 64) - if err != nil || v.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.SetInt(n) - - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - n, err := strconv.ParseUint(string(item), 10, 64) - if err != nil || v.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.SetUint(n) - - case reflect.Float32, reflect.Float64: - n, err := strconv.ParseFloat(string(item), v.Type().Bits()) - if err != nil || v.OverflowFloat(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.SetFloat(n) - } - } - return nil -} - -// The xxxInterface routines build up a value to be stored -// in an empty interface. They are not strictly necessary, -// but they avoid the weight of reflection in this common case. - -// valueInterface is like value but returns any. -func (d *decodeState) valueInterface() (val any) { - switch d.opcode { - default: - panic(phasePanicMsg) - case scanBeginArray: - val = d.arrayInterface() - d.scanNext() - case scanBeginObject: - val = d.objectInterface() - d.scanNext() - case scanBeginLiteral: - val = d.literalInterface() - } - return -} - -// arrayInterface is like array but returns []any. -func (d *decodeState) arrayInterface() []any { - var v = make([]any, 0) - for { - // Look ahead for ] - can only happen on first iteration. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndArray { + case '"': + if stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers); !stringify { break } - - v = append(v, d.valueInterface()) - - // Next token must be , or ]. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode == scanEndArray { - break - } - if d.opcode != scanArrayValue { - panic(phasePanicMsg) - } - } - return v -} - -// objectInterface is like object but returns map[string]any. -func (d *decodeState) objectInterface() map[string]any { - m := make(map[string]any) - for { - // Read opening " of string key or closing }. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndObject { - // closing } - can only happen on first iteration. - break - } - if d.opcode != scanBeginLiteral { - panic(phasePanicMsg) - } - - // Read string key. - start := d.readIndex() - d.rescanLiteral() - item := d.data[start:d.readIndex()] - key, ok := unquote(item) - if !ok { - panic(phasePanicMsg) - } - - // Read : before value. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode != scanObjectKey { - panic(phasePanicMsg) + verbatim := jsonwire.ConsumeSimpleString(val) == len(val) + val = jsonwire.UnquoteMayCopy(val, verbatim) + if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil { + return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax) } - d.scanWhile(scanSkipSpace) - - // Read value. - m[key] = d.valueInterface() - - // Next token must be , or }. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode == scanEndObject { - break - } - if d.opcode != scanObjectValue { - panic(phasePanicMsg) - } - } - return m -} - -// literalInterface consumes and returns a literal from d.data[d.off-1:] and -// it reads the following byte ahead. The first byte of the literal has been -// read already (that's how the caller knows it's a literal). -func (d *decodeState) literalInterface() any { - // All bytes inside literal return scanContinue op code. - start := d.readIndex() - d.rescanLiteral() - - item := d.data[start:d.readIndex()] - - switch c := item[0]; c { - case 'n': // null + fallthrough + case '0': + *n = Number(val) return nil - - case 't', 'f': // true, false - return c == 't' - - case '"': // string - s, ok := unquote(item) - if !ok { - panic(phasePanicMsg) - } - return s - - default: // number - if c != '-' && (c < '0' || c > '9') { - panic(phasePanicMsg) - } - n, err := d.convertNumber(string(item)) - if err != nil { - d.saveError(err) - } - return n - } -} - -// getu4 decodes \uXXXX from the beginning of s, returning the hex value, -// or it returns -1. -func getu4(s []byte) rune { - if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { - return -1 - } - var r rune - for _, c := range s[2:6] { - switch { - case '0' <= c && c <= '9': - c = c - '0' - case 'a' <= c && c <= 'f': - c = c - 'a' + 10 - case 'A' <= c && c <= 'F': - c = c - 'A' + 10 - default: - return -1 - } - r = r*16 + rune(c) - } - return r -} - -// unquote converts a quoted JSON string literal s into an actual string t. -// The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) - t = string(s) - return -} - -// unquoteBytes should be an internal detail, -// but widely used packages access it using linkname. -// Notable members of the hall of shame include: -// - github.com/bytedance/sonic -// -// Do not remove or change the type signature. -// See go.dev/issue/67401. -// -//go:linkname unquoteBytes -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { - return - } - s = s[1 : len(s)-1] - - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { - return s, true - } - - b := make([]byte, len(s)+2*utf8.UTFMax) - w := copy(b, s[0:r]) - for r < len(s) { - // Out of room? Can only happen if s is full of - // malformed UTF-8 and we're replacing each - // byte with RuneError. - if w >= len(b)-2*utf8.UTFMax { - nb := make([]byte, (len(b)+utf8.UTFMax)*2) - copy(nb, b[0:w]) - b = nb - } - switch c := s[r]; { - case c == '\\': - r++ - if r >= len(s) { - return - } - switch s[r] { - default: - return - case '"', '\\', '/', '\'': - b[w] = s[r] - r++ - w++ - case 'b': - b[w] = '\b' - r++ - w++ - case 'f': - b[w] = '\f' - r++ - w++ - case 'n': - b[w] = '\n' - r++ - w++ - case 'r': - b[w] = '\r' - r++ - w++ - case 't': - b[w] = '\t' - r++ - w++ - case 'u': - r-- - rr := getu4(s[r:]) - if rr < 0 { - return - } - r += 6 - if utf16.IsSurrogate(rr) { - rr1 := getu4(s[r:]) - if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { - // A valid pair; consume. - r += 6 - w += utf8.EncodeRune(b[w:], dec) - break - } - // Invalid surrogate; fall back to replacement rune. - rr = unicode.ReplacementChar - } - w += utf8.EncodeRune(b[w:], rr) - } - - // Quote, control characters are invalid. - case c == '"', c < ' ': - return - - // ASCII - case c < utf8.RuneSelf: - b[w] = c - r++ - w++ - - // Coerce to well-formed UTF-8. - default: - rr, size := utf8.DecodeRune(s[r:]) - r += size - w += utf8.EncodeRune(b[w:], rr) - } } - return b[0:w], true + return &jsonv2.SemanticError{JSONKind: k, GoType: numberType} } diff --git a/v1/decode_test.go b/v1/decode_test.go index de09fae..a1a4e58 100644 --- a/v1/decode_test.go +++ b/v1/decode_test.go @@ -1107,6 +1107,7 @@ func TestMarshalInvalidUTF8(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) got, err := Marshal(tt.in) if string(got) != tt.want || err != nil { t.Errorf("%s: Marshal(%q):\n\tgot: (%q, %v)\n\twant: (%q, nil)", tt.Where, tt.in, got, err, tt.want) @@ -1128,6 +1129,7 @@ func TestMarshalNumberZeroVal(t *testing.T) { } func TestMarshalEmbeds(t *testing.T) { + skipKnownFailure(t) top := &Top{ Level0: 1, Embed0: Embed0{ @@ -1198,9 +1200,9 @@ func equalError(a, b error) bool { func TestUnmarshal(t *testing.T) { for _, tt := range unmarshalTests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) in := []byte(tt.in) - var scan scanner - if err := checkValid(in, &scan); err != nil { + if err := checkValid(in); err != nil { if !equalError(err, tt.err) { t.Fatalf("%s: checkValid error: %#v", tt.Where, err) } @@ -1401,6 +1403,7 @@ func TestErrorMessageFromMisusedString(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) r := strings.NewReader(tt.in) var s WrongString err := NewDecoder(r).Decode(&s) @@ -1777,6 +1780,7 @@ func TestEmptyString(t *testing.T) { // Test that a null for ,string is not replaced with the previous quoted string (issue 7046). // It should also not be an error (issue 2540, issue 8587). func TestNullString(t *testing.T) { + skipKnownFailure(t) type T struct { A int `json:",string"` B int `json:",string"` @@ -1832,6 +1836,7 @@ func TestInterfaceSet(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) b := struct{ X any }{tt.pre} blob := `{"X":` + tt.json + `}` if err := Unmarshal([]byte(blob), &b); err != nil { @@ -1882,6 +1887,7 @@ type NullTest struct { // JSON null values should be ignored for primitives and string values instead of resulting in an error. // Issue 2540 func TestUnmarshalNulls(t *testing.T) { + skipKnownFailure(t) // Unmarshal docs: // The JSON null value unmarshals into an interface, map, pointer, or slice // by setting that Go value to nil. Because null is often used in JSON to mean @@ -2087,6 +2093,7 @@ func TestUnmarshalTypeError(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) err := Unmarshal([]byte(tt.in), tt.dest) if _, ok := err.(*UnmarshalTypeError); !ok { t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %T\n\twant: %T", @@ -2113,6 +2120,7 @@ func TestUnmarshalSyntax(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) err := Unmarshal([]byte(tt.in), &x) if _, ok := err.(*SyntaxError); !ok { t.Errorf("%s: Unmarshal(%#q, any):\n\tgot: %T\n\twant: %T", @@ -2133,6 +2141,7 @@ type unexportedFields struct { } func TestUnmarshalUnexported(t *testing.T) { + skipKnownFailure(t) input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}` want := &unexportedFields{Name: "Bob"} @@ -2228,6 +2237,7 @@ func TestPrefilled(t *testing.T) { }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) ptrstr := fmt.Sprintf("%v", tt.ptr) err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here if err != nil { @@ -2257,6 +2267,7 @@ func TestInvalidUnmarshal(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) switch gotErr := Unmarshal([]byte(tt.in), tt.v); { case gotErr == nil: t.Fatalf("%s: Unmarshal error: got nil, want non-nil", tt.Where) @@ -2422,6 +2433,7 @@ func TestUnmarshalEmbeddedUnexported(t *testing.T) { }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) err := Unmarshal([]byte(tt.in), tt.ptr) if !equalError(err, tt.err) { t.Errorf("%s: Unmarshal error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) @@ -2461,6 +2473,7 @@ func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) dec := NewDecoder(strings.NewReader(tt.in)) var err error for err == nil { @@ -2491,6 +2504,7 @@ func TestUnmarshalPanic(t *testing.T) { // The decoder used to hang if decoding into an interface pointing to its own address. // See golang.org/issues/31740. func TestUnmarshalRecursivePointer(t *testing.T) { + t.Skip("TODO: implement cycle detection in v2?") var v any v = &v data := []byte(`{"a": "b"}`) diff --git a/v1/encode.go b/v1/encode.go index d53e862..85a8186 100644 --- a/v1/encode.go +++ b/v1/encode.go @@ -11,20 +11,10 @@ package json import ( - "bytes" - "cmp" - "encoding" - "encoding/base64" - "fmt" - "math" "reflect" - "slices" "strconv" - "strings" - "sync" - "unicode" - "unicode/utf8" - _ "unsafe" // for linkname + + jsonv2 "github.com/go-json-experiment/json" ) // Marshal returns the JSON encoding of v. @@ -169,16 +159,7 @@ import ( // handle them. Passing cyclic structures to Marshal will result in // an error. func Marshal(v any) ([]byte, error) { - e := newEncodeState() - defer encodeStatePool.Put(e) - - err := e.marshal(v, encOpts{escapeHTML: true}) - if err != nil { - return nil, err - } - buf := append([]byte(nil), e.Bytes()...) - - return buf, nil + return jsonv2.Marshal(v, DefaultOptionsV1()) } // MarshalIndent is like [Marshal] but applies [Indent] to format the output. @@ -189,19 +170,16 @@ func MarshalIndent(v any, prefix, indent string) ([]byte, error) { if err != nil { return nil, err } - b2 := make([]byte, 0, indentGrowthFactor*len(b)) - b2, err = appendIndent(b2, b, prefix, indent) + b, err = appendIndent(nil, b, prefix, indent) if err != nil { return nil, err } - return b2, nil + return b, nil } // Marshaler is the interface implemented by types that // can marshal themselves into valid JSON. -type Marshaler interface { - MarshalJSON() ([]byte, error) -} +type Marshaler = jsonv2.MarshalerV1 // An UnsupportedTypeError is returned by [Marshal] when attempting // to encode an unsupported value type. @@ -258,1084 +236,3 @@ func (e *MarshalerError) Error() string { // Unwrap returns the underlying error. func (e *MarshalerError) Unwrap() error { return e.Err } - -const hex = "0123456789abcdef" - -// An encodeState encodes JSON into a bytes.Buffer. -type encodeState struct { - bytes.Buffer // accumulated output - - // Keep track of what pointers we've seen in the current recursive call - // path, to avoid cycles that could lead to a stack overflow. Only do - // the relatively expensive map operations if ptrLevel is larger than - // startDetectingCyclesAfter, so that we skip the work if we're within a - // reasonable amount of nested pointers deep. - ptrLevel uint - ptrSeen map[any]struct{} -} - -const startDetectingCyclesAfter = 1000 - -var encodeStatePool sync.Pool - -func newEncodeState() *encodeState { - if v := encodeStatePool.Get(); v != nil { - e := v.(*encodeState) - e.Reset() - if len(e.ptrSeen) > 0 { - panic("ptrEncoder.encode should have emptied ptrSeen via defers") - } - e.ptrLevel = 0 - return e - } - return &encodeState{ptrSeen: make(map[any]struct{})} -} - -// jsonError is an error wrapper type for internal use only. -// Panics with errors are wrapped in jsonError so that the top-level recover -// can distinguish intentional panics from this package. -type jsonError struct{ error } - -func (e *encodeState) marshal(v any, opts encOpts) (err error) { - defer func() { - if r := recover(); r != nil { - if je, ok := r.(jsonError); ok { - err = je.error - } else { - panic(r) - } - } - }() - e.reflectValue(reflect.ValueOf(v), opts) - return nil -} - -// error aborts the encoding by panicking with err wrapped in jsonError. -func (e *encodeState) error(err error) { - panic(jsonError{err}) -} - -func isEmptyValue(v reflect.Value) bool { - switch v.Kind() { - case reflect.Array, reflect.Map, reflect.Slice, reflect.String: - return v.Len() == 0 - case reflect.Bool, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, - reflect.Float32, reflect.Float64, - reflect.Interface, reflect.Pointer: - return v.IsZero() - } - return false -} - -func (e *encodeState) reflectValue(v reflect.Value, opts encOpts) { - valueEncoder(v)(e, v, opts) -} - -type encOpts struct { - // quoted causes primitive fields to be encoded inside JSON strings. - quoted bool - // escapeHTML causes '<', '>', and '&' to be escaped in JSON strings. - escapeHTML bool -} - -type encoderFunc func(e *encodeState, v reflect.Value, opts encOpts) - -var encoderCache sync.Map // map[reflect.Type]encoderFunc - -func valueEncoder(v reflect.Value) encoderFunc { - if !v.IsValid() { - return invalidValueEncoder - } - return typeEncoder(v.Type()) -} - -func typeEncoder(t reflect.Type) encoderFunc { - if fi, ok := encoderCache.Load(t); ok { - return fi.(encoderFunc) - } - - // To deal with recursive types, populate the map with an - // indirect func before we build it. This type waits on the - // real func (f) to be ready and then calls it. This indirect - // func is only used for recursive types. - var ( - wg sync.WaitGroup - f encoderFunc - ) - wg.Add(1) - fi, loaded := encoderCache.LoadOrStore(t, encoderFunc(func(e *encodeState, v reflect.Value, opts encOpts) { - wg.Wait() - f(e, v, opts) - })) - if loaded { - return fi.(encoderFunc) - } - - // Compute the real encoder and replace the indirect func with it. - f = newTypeEncoder(t, true) - wg.Done() - encoderCache.Store(t, f) - return f -} - -var ( - marshalerType = reflect.TypeFor[Marshaler]() - textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() -) - -// newTypeEncoder constructs an encoderFunc for a type. -// The returned encoder only checks CanAddr when allowAddr is true. -func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { - // If we have a non-pointer value whose type implements - // Marshaler with a value receiver, then we're better off taking - // the address of the value - otherwise we end up with an - // allocation as we cast the value to an interface. - if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(marshalerType) { - return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) - } - if t.Implements(marshalerType) { - return marshalerEncoder - } - if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(textMarshalerType) { - return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) - } - if t.Implements(textMarshalerType) { - return textMarshalerEncoder - } - - switch t.Kind() { - case reflect.Bool: - return boolEncoder - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return intEncoder - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return uintEncoder - case reflect.Float32: - return float32Encoder - case reflect.Float64: - return float64Encoder - case reflect.String: - return stringEncoder - case reflect.Interface: - return interfaceEncoder - case reflect.Struct: - return newStructEncoder(t) - case reflect.Map: - return newMapEncoder(t) - case reflect.Slice: - return newSliceEncoder(t) - case reflect.Array: - return newArrayEncoder(t) - case reflect.Pointer: - return newPtrEncoder(t) - default: - return unsupportedTypeEncoder - } -} - -func invalidValueEncoder(e *encodeState, v reflect.Value, _ encOpts) { - e.WriteString("null") -} - -func marshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.Kind() == reflect.Pointer && v.IsNil() { - e.WriteString("null") - return - } - m, ok := v.Interface().(Marshaler) - if !ok { - e.WriteString("null") - return - } - b, err := m.MarshalJSON() - if err == nil { - e.Grow(len(b)) - out := e.AvailableBuffer() - out, err = appendCompact(out, b, opts.escapeHTML) - e.Buffer.Write(out) - } - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) - } -} - -func addrMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - va := v.Addr() - if va.IsNil() { - e.WriteString("null") - return - } - m := va.Interface().(Marshaler) - b, err := m.MarshalJSON() - if err == nil { - e.Grow(len(b)) - out := e.AvailableBuffer() - out, err = appendCompact(out, b, opts.escapeHTML) - e.Buffer.Write(out) - } - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) - } -} - -func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.Kind() == reflect.Pointer && v.IsNil() { - e.WriteString("null") - return - } - m, ok := v.Interface().(encoding.TextMarshaler) - if !ok { - e.WriteString("null") - return - } - b, err := m.MarshalText() - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalText"}) - } - e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) -} - -func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - va := v.Addr() - if va.IsNil() { - e.WriteString("null") - return - } - m := va.Interface().(encoding.TextMarshaler) - b, err := m.MarshalText() - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalText"}) - } - e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) -} - -func boolEncoder(e *encodeState, v reflect.Value, opts encOpts) { - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = strconv.AppendBool(b, v.Bool()) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -func intEncoder(e *encodeState, v reflect.Value, opts encOpts) { - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = strconv.AppendInt(b, v.Int(), 10) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -func uintEncoder(e *encodeState, v reflect.Value, opts encOpts) { - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = strconv.AppendUint(b, v.Uint(), 10) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -type floatEncoder int // number of bits - -func (bits floatEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - f := v.Float() - if math.IsInf(f, 0) || math.IsNaN(f) { - e.error(&UnsupportedValueError{v, strconv.FormatFloat(f, 'g', -1, int(bits))}) - } - - // Convert as if by ES6 number to string conversion. - // This matches most other JSON generators. - // See golang.org/issue/6384 and golang.org/issue/14135. - // Like fmt %g, but the exponent cutoffs are different - // and exponents themselves are not padded to two digits. - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - abs := math.Abs(f) - fmt := byte('f') - // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. - if abs != 0 { - if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { - fmt = 'e' - } - } - b = strconv.AppendFloat(b, f, fmt, -1, int(bits)) - if fmt == 'e' { - // clean up e-09 to e-9 - n := len(b) - if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' { - b[n-2] = b[n-1] - b = b[:n-1] - } - } - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -var ( - float32Encoder = (floatEncoder(32)).encode - float64Encoder = (floatEncoder(64)).encode -) - -func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.Type() == numberType { - numStr := v.String() - // In Go1.5 the empty string encodes to "0", while this is not a valid number literal - // we keep compatibility so check validity after this. - if numStr == "" { - numStr = "0" // Number's zero-val - } - if !isValidNumber(numStr) { - e.error(fmt.Errorf("json: invalid number literal %q", numStr)) - } - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = append(b, numStr...) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) - return - } - if opts.quoted { - b := appendString(nil, v.String(), opts.escapeHTML) - e.Write(appendString(e.AvailableBuffer(), b, false)) // no need to escape again since it is already escaped - } else { - e.Write(appendString(e.AvailableBuffer(), v.String(), opts.escapeHTML)) - } -} - -// isValidNumber reports whether s is a valid JSON number literal. -// -// isValidNumber should be an internal detail, -// but widely used packages access it using linkname. -// Notable members of the hall of shame include: -// - github.com/bytedance/sonic -// -// Do not remove or change the type signature. -// See go.dev/issue/67401. -// -//go:linkname isValidNumber -func isValidNumber(s string) bool { - // This function implements the JSON numbers grammar. - // See https://tools.ietf.org/html/rfc7159#section-6 - // and https://www.json.org/img/number.png - - if s == "" { - return false - } - - // Optional - - if s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - - // Digits - switch { - default: - return false - - case s[0] == '0': - s = s[1:] - - case '1' <= s[0] && s[0] <= '9': - s = s[1:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // . followed by 1 or more digits. - if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { - s = s[2:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // e or E followed by an optional - or + and - // 1 or more digits. - if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { - s = s[1:] - if s[0] == '+' || s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // Make sure we are at the end. - return s == "" -} - -func interfaceEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - e.reflectValue(v.Elem(), opts) -} - -func unsupportedTypeEncoder(e *encodeState, v reflect.Value, _ encOpts) { - e.error(&UnsupportedTypeError{v.Type()}) -} - -type structEncoder struct { - fields structFields -} - -type structFields struct { - list []field - byExactName map[string]*field - byFoldedName map[string]*field -} - -func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - next := byte('{') -FieldLoop: - for i := range se.fields.list { - f := &se.fields.list[i] - - // Find the nested struct field by following f.index. - fv := v - for _, i := range f.index { - if fv.Kind() == reflect.Pointer { - if fv.IsNil() { - continue FieldLoop - } - fv = fv.Elem() - } - fv = fv.Field(i) - } - - if (f.omitEmpty && isEmptyValue(fv)) || - (f.omitZero && (f.isZero == nil && fv.IsZero() || (f.isZero != nil && f.isZero(fv)))) { - continue - } - e.WriteByte(next) - next = ',' - if opts.escapeHTML { - e.WriteString(f.nameEscHTML) - } else { - e.WriteString(f.nameNonEsc) - } - opts.quoted = f.quoted - f.encoder(e, fv, opts) - } - if next == '{' { - e.WriteString("{}") - } else { - e.WriteByte('}') - } -} - -func newStructEncoder(t reflect.Type) encoderFunc { - se := structEncoder{fields: cachedTypeFields(t)} - return se.encode -} - -type mapEncoder struct { - elemEnc encoderFunc -} - -func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { - // We're a large number of nested ptrEncoder.encode calls deep; - // start checking if we've run into a pointer cycle. - ptr := v.UnsafePointer() - if _, ok := e.ptrSeen[ptr]; ok { - e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) - } - e.ptrSeen[ptr] = struct{}{} - defer delete(e.ptrSeen, ptr) - } - e.WriteByte('{') - - // Extract and sort the keys. - var ( - sv = make([]reflectWithString, v.Len()) - mi = v.MapRange() - err error - ) - for i := 0; mi.Next(); i++ { - if sv[i].ks, err = resolveKeyName(mi.Key()); err != nil { - e.error(fmt.Errorf("json: encoding error for type %q: %q", v.Type().String(), err.Error())) - } - sv[i].v = mi.Value() - } - slices.SortFunc(sv, func(i, j reflectWithString) int { - return strings.Compare(i.ks, j.ks) - }) - - for i, kv := range sv { - if i > 0 { - e.WriteByte(',') - } - e.Write(appendString(e.AvailableBuffer(), kv.ks, opts.escapeHTML)) - e.WriteByte(':') - me.elemEnc(e, kv.v, opts) - } - e.WriteByte('}') - e.ptrLevel-- -} - -func newMapEncoder(t reflect.Type) encoderFunc { - switch t.Key().Kind() { - case reflect.String, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - default: - if !t.Key().Implements(textMarshalerType) { - return unsupportedTypeEncoder - } - } - me := mapEncoder{typeEncoder(t.Elem())} - return me.encode -} - -func encodeByteSlice(e *encodeState, v reflect.Value, _ encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - - s := v.Bytes() - b := e.AvailableBuffer() - b = append(b, '"') - b = base64.StdEncoding.AppendEncode(b, s) - b = append(b, '"') - e.Write(b) -} - -// sliceEncoder just wraps an arrayEncoder, checking to make sure the value isn't nil. -type sliceEncoder struct { - arrayEnc encoderFunc -} - -func (se sliceEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { - // We're a large number of nested ptrEncoder.encode calls deep; - // start checking if we've run into a pointer cycle. - // Here we use a struct to memorize the pointer to the first element of the slice - // and its length. - ptr := struct { - ptr any // always an unsafe.Pointer, but avoids a dependency on package unsafe - len int - }{v.UnsafePointer(), v.Len()} - if _, ok := e.ptrSeen[ptr]; ok { - e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) - } - e.ptrSeen[ptr] = struct{}{} - defer delete(e.ptrSeen, ptr) - } - se.arrayEnc(e, v, opts) - e.ptrLevel-- -} - -func newSliceEncoder(t reflect.Type) encoderFunc { - // Byte slices get special treatment; arrays don't. - if t.Elem().Kind() == reflect.Uint8 { - p := reflect.PointerTo(t.Elem()) - if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) { - return encodeByteSlice - } - } - enc := sliceEncoder{newArrayEncoder(t)} - return enc.encode -} - -type arrayEncoder struct { - elemEnc encoderFunc -} - -func (ae arrayEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - e.WriteByte('[') - n := v.Len() - for i := 0; i < n; i++ { - if i > 0 { - e.WriteByte(',') - } - ae.elemEnc(e, v.Index(i), opts) - } - e.WriteByte(']') -} - -func newArrayEncoder(t reflect.Type) encoderFunc { - enc := arrayEncoder{typeEncoder(t.Elem())} - return enc.encode -} - -type ptrEncoder struct { - elemEnc encoderFunc -} - -func (pe ptrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { - // We're a large number of nested ptrEncoder.encode calls deep; - // start checking if we've run into a pointer cycle. - ptr := v.Interface() - if _, ok := e.ptrSeen[ptr]; ok { - e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) - } - e.ptrSeen[ptr] = struct{}{} - defer delete(e.ptrSeen, ptr) - } - pe.elemEnc(e, v.Elem(), opts) - e.ptrLevel-- -} - -func newPtrEncoder(t reflect.Type) encoderFunc { - enc := ptrEncoder{typeEncoder(t.Elem())} - return enc.encode -} - -type condAddrEncoder struct { - canAddrEnc, elseEnc encoderFunc -} - -func (ce condAddrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.CanAddr() { - ce.canAddrEnc(e, v, opts) - } else { - ce.elseEnc(e, v, opts) - } -} - -// newCondAddrEncoder returns an encoder that checks whether its value -// CanAddr and delegates to canAddrEnc if so, else to elseEnc. -func newCondAddrEncoder(canAddrEnc, elseEnc encoderFunc) encoderFunc { - enc := condAddrEncoder{canAddrEnc: canAddrEnc, elseEnc: elseEnc} - return enc.encode -} - -func isValidTag(s string) bool { - if s == "" { - return false - } - for _, c := range s { - switch { - case strings.ContainsRune("!#$%&()*+-./:;<=>?@[]^_{|}~ ", c): - // Backslash and quote chars are reserved, but - // otherwise any punctuation chars are allowed - // in a tag name. - case !unicode.IsLetter(c) && !unicode.IsDigit(c): - return false - } - } - return true -} - -func typeByIndex(t reflect.Type, index []int) reflect.Type { - for _, i := range index { - if t.Kind() == reflect.Pointer { - t = t.Elem() - } - t = t.Field(i).Type - } - return t -} - -type reflectWithString struct { - v reflect.Value - ks string -} - -func resolveKeyName(k reflect.Value) (string, error) { - if k.Kind() == reflect.String { - return k.String(), nil - } - if tm, ok := k.Interface().(encoding.TextMarshaler); ok { - if k.Kind() == reflect.Pointer && k.IsNil() { - return "", nil - } - buf, err := tm.MarshalText() - return string(buf), err - } - switch k.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return strconv.FormatInt(k.Int(), 10), nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return strconv.FormatUint(k.Uint(), 10), nil - } - panic("unexpected map key type") -} - -func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool) []byte { - dst = append(dst, '"') - start := 0 - for i := 0; i < len(src); { - if b := src[i]; b < utf8.RuneSelf { - if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { - i++ - continue - } - dst = append(dst, src[start:i]...) - switch b { - case '\\', '"': - dst = append(dst, '\\', b) - case '\b': - dst = append(dst, '\\', 'b') - case '\f': - dst = append(dst, '\\', 'f') - case '\n': - dst = append(dst, '\\', 'n') - case '\r': - dst = append(dst, '\\', 'r') - case '\t': - dst = append(dst, '\\', 't') - default: - // This encodes bytes < 0x20 except for \b, \f, \n, \r and \t. - // If escapeHTML is set, it also escapes <, >, and & - // because they can lead to security holes when - // user-controlled strings are rendered into JSON - // and served to some browsers. - dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF]) - } - i++ - start = i - continue - } - // TODO(https://go.dev/issue/56948): Use generic utf8 functionality. - // For now, cast only a small portion of byte slices to a string - // so that it can be stack allocated. This slows down []byte slightly - // due to the extra copy, but keeps string performance roughly the same. - n := len(src) - i - if n > utf8.UTFMax { - n = utf8.UTFMax - } - c, size := utf8.DecodeRuneInString(string(src[i : i+n])) - if c == utf8.RuneError && size == 1 { - dst = append(dst, src[start:i]...) - dst = append(dst, `\ufffd`...) - i += size - start = i - continue - } - // U+2028 is LINE SEPARATOR. - // U+2029 is PARAGRAPH SEPARATOR. - // They are both technically valid characters in JSON strings, - // but don't work in JSONP, which has to be evaluated as JavaScript, - // and can lead to security holes there. It is valid JSON to - // escape them, so we do so unconditionally. - // See https://en.wikipedia.org/wiki/JSON#Safety. - if c == '\u2028' || c == '\u2029' { - dst = append(dst, src[start:i]...) - dst = append(dst, '\\', 'u', '2', '0', '2', hex[c&0xF]) - i += size - start = i - continue - } - i += size - } - dst = append(dst, src[start:]...) - dst = append(dst, '"') - return dst -} - -// A field represents a single field found in a struct. -type field struct { - name string - nameBytes []byte // []byte(name) - - nameNonEsc string // `"` + name + `":` - nameEscHTML string // `"` + HTMLEscape(name) + `":` - - tag bool - index []int - typ reflect.Type - omitEmpty bool - omitZero bool - isZero func(reflect.Value) bool - quoted bool - - encoder encoderFunc -} - -type isZeroer interface { - IsZero() bool -} - -var isZeroerType = reflect.TypeFor[isZeroer]() - -// typeFields returns a list of fields that JSON should recognize for the given type. -// The algorithm is breadth-first search over the set of structs to include - the top struct -// and then any reachable anonymous structs. -// -// typeFields should be an internal detail, -// but widely used packages access it using linkname. -// Notable members of the hall of shame include: -// - github.com/bytedance/sonic -// -// Do not remove or change the type signature. -// See go.dev/issue/67401. -// -//go:linkname typeFields -func typeFields(t reflect.Type) structFields { - // Anonymous fields to explore at the current level and the next. - current := []field{} - next := []field{{typ: t}} - - // Count of queued names for current level and the next. - var count, nextCount map[reflect.Type]int - - // Types already visited at an earlier level. - visited := map[reflect.Type]bool{} - - // Fields found. - var fields []field - - // Buffer to run appendHTMLEscape on field names. - var nameEscBuf []byte - - for len(next) > 0 { - current, next = next, current[:0] - count, nextCount = nextCount, map[reflect.Type]int{} - - for _, f := range current { - if visited[f.typ] { - continue - } - visited[f.typ] = true - - // Scan f.typ for fields to include. - for i := 0; i < f.typ.NumField(); i++ { - sf := f.typ.Field(i) - if sf.Anonymous { - t := sf.Type - if t.Kind() == reflect.Pointer { - t = t.Elem() - } - if !sf.IsExported() && t.Kind() != reflect.Struct { - // Ignore embedded fields of unexported non-struct types. - continue - } - // Do not ignore embedded fields of unexported struct types - // since they may have exported fields. - } else if !sf.IsExported() { - // Ignore unexported non-embedded fields. - continue - } - tag := sf.Tag.Get("json") - if tag == "-" { - continue - } - name, opts := parseTag(tag) - if !isValidTag(name) { - name = "" - } - index := make([]int, len(f.index)+1) - copy(index, f.index) - index[len(f.index)] = i - - ft := sf.Type - if ft.Name() == "" && ft.Kind() == reflect.Pointer { - // Follow pointer. - ft = ft.Elem() - } - - // Only strings, floats, integers, and booleans can be quoted. - quoted := false - if opts.Contains("string") { - switch ft.Kind() { - case reflect.Bool, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, - reflect.Float32, reflect.Float64, - reflect.String: - quoted = true - } - } - - // Record found field and index sequence. - if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { - tagged := name != "" - if name == "" { - name = sf.Name - } - field := field{ - name: name, - tag: tagged, - index: index, - typ: ft, - omitEmpty: opts.Contains("omitempty"), - omitZero: opts.Contains("omitzero"), - quoted: quoted, - } - field.nameBytes = []byte(field.name) - - // Build nameEscHTML and nameNonEsc ahead of time. - nameEscBuf = appendHTMLEscape(nameEscBuf[:0], field.nameBytes) - field.nameEscHTML = `"` + string(nameEscBuf) + `":` - field.nameNonEsc = `"` + field.name + `":` - - if field.omitZero { - t := sf.Type - // Provide a function that uses a type's IsZero method. - switch { - case t.Kind() == reflect.Interface && t.Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - // Avoid panics calling IsZero on a nil interface or - // non-nil interface with nil pointer. - return v.IsNil() || - (v.Elem().Kind() == reflect.Pointer && v.Elem().IsNil()) || - v.Interface().(isZeroer).IsZero() - } - case t.Kind() == reflect.Pointer && t.Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - // Avoid panics calling IsZero on nil pointer. - return v.IsNil() || v.Interface().(isZeroer).IsZero() - } - case t.Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - return v.Interface().(isZeroer).IsZero() - } - case reflect.PointerTo(t).Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - if !v.CanAddr() { - // Temporarily box v so we can take the address. - v2 := reflect.New(v.Type()).Elem() - v2.Set(v) - v = v2 - } - return v.Addr().Interface().(isZeroer).IsZero() - } - } - } - - fields = append(fields, field) - if count[f.typ] > 1 { - // If there were multiple instances, add a second, - // so that the annihilation code will see a duplicate. - // It only cares about the distinction between 1 and 2, - // so don't bother generating any more copies. - fields = append(fields, fields[len(fields)-1]) - } - continue - } - - // Record new anonymous struct to explore in next round. - nextCount[ft]++ - if nextCount[ft] == 1 { - next = append(next, field{name: ft.Name(), index: index, typ: ft}) - } - } - } - } - - slices.SortFunc(fields, func(a, b field) int { - // sort field by name, breaking ties with depth, then - // breaking ties with "name came from json tag", then - // breaking ties with index sequence. - if c := strings.Compare(a.name, b.name); c != 0 { - return c - } - if c := cmp.Compare(len(a.index), len(b.index)); c != 0 { - return c - } - if a.tag != b.tag { - if a.tag { - return -1 - } - return +1 - } - return slices.Compare(a.index, b.index) - }) - - // Delete all fields that are hidden by the Go rules for embedded fields, - // except that fields with JSON tags are promoted. - - // The fields are sorted in primary order of name, secondary order - // of field index length. Loop over names; for each name, delete - // hidden fields by choosing the one dominant field that survives. - out := fields[:0] - for advance, i := 0, 0; i < len(fields); i += advance { - // One iteration per name. - // Find the sequence of fields with the name of this first field. - fi := fields[i] - name := fi.name - for advance = 1; i+advance < len(fields); advance++ { - fj := fields[i+advance] - if fj.name != name { - break - } - } - if advance == 1 { // Only one field with this name - out = append(out, fi) - continue - } - dominant, ok := dominantField(fields[i : i+advance]) - if ok { - out = append(out, dominant) - } - } - - fields = out - slices.SortFunc(fields, func(i, j field) int { - return slices.Compare(i.index, j.index) - }) - - for i := range fields { - f := &fields[i] - f.encoder = typeEncoder(typeByIndex(t, f.index)) - } - exactNameIndex := make(map[string]*field, len(fields)) - foldedNameIndex := make(map[string]*field, len(fields)) - for i, field := range fields { - exactNameIndex[field.name] = &fields[i] - // For historical reasons, first folded match takes precedence. - if _, ok := foldedNameIndex[string(foldName(field.nameBytes))]; !ok { - foldedNameIndex[string(foldName(field.nameBytes))] = &fields[i] - } - } - return structFields{fields, exactNameIndex, foldedNameIndex} -} - -// dominantField looks through the fields, all of which are known to -// have the same name, to find the single field that dominates the -// others using Go's embedding rules, modified by the presence of -// JSON tags. If there are multiple top-level fields, the boolean -// will be false: This condition is an error in Go and we skip all -// the fields. -func dominantField(fields []field) (field, bool) { - // The fields are sorted in increasing index-length order, then by presence of tag. - // That means that the first field is the dominant one. We need only check - // for error cases: two fields at top level, either both tagged or neither tagged. - if len(fields) > 1 && len(fields[0].index) == len(fields[1].index) && fields[0].tag == fields[1].tag { - return field{}, false - } - return fields[0], true -} - -var fieldCache sync.Map // map[reflect.Type]structFields - -// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. -func cachedTypeFields(t reflect.Type) structFields { - if f, ok := fieldCache.Load(t); ok { - return f.(structFields) - } - f, _ := fieldCache.LoadOrStore(t, typeFields(t)) - return f.(structFields) -} - -func mayAppendQuote(b []byte, quoted bool) []byte { - if quoted { - b = append(b, '"') - } - return b -} diff --git a/v1/encode_test.go b/v1/encode_test.go index 79c4817..3ed847f 100644 --- a/v1/encode_test.go +++ b/v1/encode_test.go @@ -85,6 +85,10 @@ func (nps *NoPanicStruct) IsZero() bool { return nps.Int != 0 } +type isZeroer interface { + IsZero() bool +} + type OptionalsZero struct { Sr string `json:"sr"` So string `json:"so,omitzero"` @@ -324,6 +328,7 @@ type renamedByteSlice []byte type renamedRenamedByteSlice []renamedByte func TestEncodeRenamedByteSlice(t *testing.T) { + skipKnownFailure(t) s := renamedByteSlice("abc") got, err := Marshal(s) if err != nil { @@ -380,6 +385,7 @@ func init() { mapCycle["x"] = mapCycle sliceCycle[0] = sliceCycle sliceNoCycle[1] = sliceNoCycle[:1] + const startDetectingCyclesAfter = 1e3 for i := startDetectingCyclesAfter; i > 0; i-- { sliceNoCycle = []any{sliceNoCycle} } @@ -414,6 +420,7 @@ func TestUnsupportedValues(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) if _, err := Marshal(tt.in); err != nil { if _, ok := err.(*UnsupportedValueError); !ok { t.Errorf("%s: Marshal error:\n\tgot: %T\n\twant: %T", tt.Where, err, new(UnsupportedValueError)) @@ -711,6 +718,7 @@ func TestAnonymousFields(t *testing.T) { for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) b, err := Marshal(tt.makeInput()) if err != nil { t.Fatalf("%s: Marshal error: %v", tt.Where, err) @@ -789,6 +797,7 @@ func TestNilMarshal(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) switch got, err := Marshal(tt.in); { case err != nil: t.Fatalf("%s: Marshal error: %v", tt.Where, err) @@ -1094,6 +1103,7 @@ func TestTextMarshalerMapKeysAreSorted(t *testing.T) { // https://golang.org/issue/33675 func TestNilMarshalerTextMapKey(t *testing.T) { + skipKnownFailure(t) got, err := Marshal(map[*unmarshalerText]int{ (*unmarshalerText)(nil): 1, {"A", "B"}: 2, @@ -1303,6 +1313,7 @@ func TestMarshalRawMessageValue(t *testing.T) { for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) b, err := Marshal(tt.in) if ok := (err == nil); ok != tt.ok { if err != nil { diff --git a/v1/failing.txt b/v1/failing.txt new file mode 100644 index 0000000..dd89e37 --- /dev/null +++ b/v1/failing.txt @@ -0,0 +1,179 @@ +TestMarshalInvalidUTF8 +TestMarshalInvalidUTF8/#00 +TestMarshalInvalidUTF8/#02 +TestMarshalInvalidUTF8/#03 +TestMarshalInvalidUTF8/#04 +TestMarshalInvalidUTF8/#05 +TestMarshalEmbeds +TestUnmarshal +TestUnmarshal/#07 +TestUnmarshal/#13 +TestUnmarshal/#14 +TestUnmarshal/#15 +TestUnmarshal/#16 +TestUnmarshal/#17 +TestUnmarshal/#18 +TestUnmarshal/#19 +TestUnmarshal/#21 +TestUnmarshal/#23 +TestUnmarshal/#30 +TestUnmarshal/#32 +TestUnmarshal/#35 +TestUnmarshal/#36 +TestUnmarshal/#37 +TestUnmarshal/#38 +TestUnmarshal/#39 +TestUnmarshal/#40 +TestUnmarshal/#41 +TestUnmarshal/#42 +TestUnmarshal/#43 +TestUnmarshal/#44 +TestUnmarshal/#45 +TestUnmarshal/#82 +TestUnmarshal/#83 +TestUnmarshal/#84 +TestUnmarshal/#85 +TestUnmarshal/#86 +TestUnmarshal/#87 +TestUnmarshal/#90 +TestUnmarshal/#93 +TestUnmarshal/#95 +TestUnmarshal/#105 +TestUnmarshal/#106 +TestUnmarshal/#107 +TestUnmarshal/#109 +TestUnmarshal/#111 +TestUnmarshal/#113 +TestUnmarshal/#130 +TestUnmarshal/#131 +TestUnmarshal/#132 +TestUnmarshal/#135 +TestUnmarshal/#136 +TestUnmarshal/#137 +TestUnmarshal/#138 +TestUnmarshal/#139 +TestUnmarshal/#140 +TestUnmarshal/#141 +TestUnmarshal/#142 +TestUnmarshal/#143 +TestUnmarshal/#144 +TestUnmarshal/#145 +TestUnmarshal/#146 +TestUnmarshal/#147 +TestUnmarshal/#148 +TestUnmarshal/#149 +TestUnmarshal/#150 +TestUnmarshal/#151 +TestUnmarshal/#152 +TestUnmarshal/#153 +TestErrorMessageFromMisusedString +TestErrorMessageFromMisusedString/#00 +TestErrorMessageFromMisusedString/#01 +TestErrorMessageFromMisusedString/#02 +TestErrorMessageFromMisusedString/#03 +TestErrorMessageFromMisusedString/#04 +TestErrorMessageFromMisusedString/#05 +TestNullString +TestInterfaceSet +TestInterfaceSet/#01 +TestInterfaceSet/#02 +TestInterfaceSet/#07 +TestInterfaceSet/#10 +TestInterfaceSet/#11 +TestUnmarshalNulls +TestUnmarshalTypeError +TestUnmarshalTypeError/#00 +TestUnmarshalTypeError/#01 +TestUnmarshalTypeError/#02 +TestUnmarshalTypeError/#03 +TestUnmarshalTypeError/#04 +TestUnmarshalTypeError/#05 +TestUnmarshalSyntax +TestUnmarshalSyntax/#00 +TestUnmarshalSyntax/#01 +TestUnmarshalSyntax/#02 +TestUnmarshalSyntax/#03 +TestUnmarshalSyntax/#04 +TestUnmarshalSyntax/#05 +TestUnmarshalSyntax/#06 +TestUnmarshalSyntax/#07 +TestUnmarshalUnexported +TestPrefilled +TestPrefilled/#00 +TestPrefilled/#01 +TestInvalidUnmarshal +TestInvalidUnmarshal/#00 +TestInvalidUnmarshal/#01 +TestInvalidUnmarshal/#02 +TestInvalidUnmarshal/#03 +TestInvalidUnmarshal/#04 +TestInvalidUnmarshal/#05 +TestInvalidUnmarshal/#06 +TestUnmarshalEmbeddedUnexported +TestUnmarshalEmbeddedUnexported/#00 +TestUnmarshalEmbeddedUnexported/#01 +TestUnmarshalEmbeddedUnexported/#02 +TestUnmarshalEmbeddedUnexported/#03 +TestUnmarshalEmbeddedUnexported/#04 +TestUnmarshalEmbeddedUnexported/#05 +TestUnmarshalEmbeddedUnexported/#06 +TestUnmarshalEmbeddedUnexported/#07 +TestUnmarshalEmbeddedUnexported/#08 +TestUnmarshalErrorAfterMultipleJSON +TestUnmarshalErrorAfterMultipleJSON/#00 +TestUnmarshalErrorAfterMultipleJSON/#01 +TestUnmarshalErrorAfterMultipleJSON/#02 +TestUnmarshalErrorAfterMultipleJSON/#03 +TestUnmarshalErrorAfterMultipleJSON/#04 +TestEncodeRenamedByteSlice +TestUnsupportedValues +TestUnsupportedValues/#00 +TestUnsupportedValues/#01 +TestUnsupportedValues/#02 +TestUnsupportedValues/#03 +TestUnsupportedValues/#04 +TestUnsupportedValues/#05 +TestUnsupportedValues/#06 +TestUnsupportedValues/#07 +TestAnonymousFields +TestAnonymousFields/UnexportedEmbeddedInt +TestAnonymousFields/ExportedEmbeddedInt +TestAnonymousFields/UnexportedEmbeddedIntPointer +TestAnonymousFields/ExportedEmbeddedIntPointer +TestAnonymousFields/EmbeddedStruct +TestAnonymousFields/EmbeddedStructPointer +TestAnonymousFields/NestedStructAndInts +TestNilMarshal +TestNilMarshal/#08 +TestNilMarshal/#11 +TestNilMarshalerTextMapKey +TestMarshalRawMessageValue +TestMarshalRawMessageValue/#20 +TestMarshalRawMessageValue/#21 +TestMarshalRawMessageValue/#22 +TestMarshalRawMessageValue/#23 +TestMarshalRawMessageValue/#24 +TestMarshalRawMessageValue/#25 +TestMarshalRawMessageValue/#26 +TestMarshalRawMessageValue/#27 +TestMarshalRawMessageValue/#28 +TestMarshalRawMessageValue/#29 +TestMarshalRawMessageValue/#30 +TestMarshalRawMessageValue/#31 +TestMarshalRawMessageValue/#33 +TestMarshalRawMessageValue/#35 +TestIndentErrors +TestIndentErrors/#00 +TestIndentErrors/#01 +TestEncoderSetEscapeHTML +TestEncoderSetEscapeHTML/tagStruct +TestEncoderSetEscapeHTML/stringOption +TestRawMessage +TestDecodeInStream +TestDecodeInStream/#14 +TestDecodeInStream/#15 +TestDecodeInStream/#16 +TestDecodeInStream/#17 +TestStructTagObjectKey +TestStructTagObjectKey/#07 +TestStructTagObjectKey/#11 diff --git a/v1/failing_test.go b/v1/failing_test.go new file mode 100644 index 0000000..6312ed3 --- /dev/null +++ b/v1/failing_test.go @@ -0,0 +1,89 @@ +package json + +import ( + _ "embed" + "flag" + "fmt" + "os" + "os/exec" + "slices" + "strings" + "sync" + "testing" +) + +var skipKnownFailures = flag.Bool("skip-known-failures", true, "skip tests that are known to already be failing") +var updateKnownFailures = flag.Bool("update-known-failures", false, "update the list of known failures") + +//go:embed failing.txt +var knownFailuresText string +var knownFailures = sync.OnceValue(func() map[string]bool { + failures := make(map[string]bool) + for _, s := range strings.Split(knownFailuresText, "\n") { + failures[s] = true + } + return failures +}) + +// skipKnownFailure skips the current test if it is in the failing.old list. +func skipKnownFailure(t *testing.T) { + if *skipKnownFailures && knownFailures()[t.Name()] { + t.SkipNow() + } +} + +// TestKnownFailures tests whether the failing.old is up-to-date. +func TestKnownFailures(t *testing.T) { + if !*skipKnownFailures { + return // avoid infinite recursion calling the same test + } + + // Produce a sorted list of currently known failures. + b, _ := exec.Command("go", "test", "-skip-known-failures=false", ".").CombinedOutput() + var newFailing []string + for _, line := range strings.Split(string(b), "\n") { + if _, suffix, ok := strings.Cut(line, "--- FAIL: "); ok { + suffix = strings.TrimSuffix(suffix, ")") + suffix = strings.TrimRight(suffix, ".0123456789s") + suffix = strings.TrimSuffix(suffix, " (") + newFailing = append(newFailing, suffix) + } + } + newFailingSorted := slices.Clone(newFailing) + slices.Sort(newFailingSorted) + + // Produce a sorted list of previously known failures. + oldFailing := strings.Split(strings.TrimSuffix(knownFailuresText, "\n"), "\n") + oldFailingSorted := slices.Clone(oldFailing) + slices.Sort(oldFailingSorted) + + // Check whether the two lists match. + if !slices.Equal(newFailingSorted, oldFailingSorted) { + var diff []string + before, after := oldFailingSorted, newFailingSorted + for len(before)|len(after) > 0 { + switch { + case len(before) == 0: + diff = append(diff, fmt.Sprintf("+ %s\n", after[0])) + after = after[1:] + case len(after) == 0: + diff = append(diff, fmt.Sprintf("- %s\n", before[0])) + before = before[1:] + case after[0] < before[0]: + diff = append(diff, fmt.Sprintf("+ %s\n", after[0])) + after = after[1:] + case before[0] < after[0]: + diff = append(diff, fmt.Sprintf("- %s\n", before[0])) + before = before[1:] + default: + before, after = before[1:], after[1:] + } + } + t.Errorf("known failures mismatch (-old +new):\n%s", strings.Join(diff, "")) + if *updateKnownFailures { + if err := os.WriteFile("failing.txt", []byte(strings.Join(newFailing, "\n")+"\n"), 0664); err != nil { + t.Errorf("os.WriteFile error: %v", err) + } + } + } +} diff --git a/v1/fold.go b/v1/fold.go deleted file mode 100644 index c4c671b..0000000 --- a/v1/fold.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "unicode" - "unicode/utf8" -) - -// foldName returns a folded string such that foldName(x) == foldName(y) -// is identical to bytes.EqualFold(x, y). -func foldName(in []byte) []byte { - // This is inlinable to take advantage of "function outlining". - var arr [32]byte // large enough for most JSON names - return appendFoldedName(arr[:0], in) -} - -func appendFoldedName(out, in []byte) []byte { - for i := 0; i < len(in); { - // Handle single-byte ASCII. - if c := in[i]; c < utf8.RuneSelf { - if 'a' <= c && c <= 'z' { - c -= 'a' - 'A' - } - out = append(out, c) - i++ - continue - } - // Handle multi-byte Unicode. - r, n := utf8.DecodeRune(in[i:]) - out = utf8.AppendRune(out, foldRune(r)) - i += n - } - return out -} - -// foldRune is returns the smallest rune for all runes in the same fold set. -func foldRune(r rune) rune { - for { - r2 := unicode.SimpleFold(r) - if r2 <= r { - return r2 - } - r = r2 - } -} diff --git a/v1/fold_test.go b/v1/fold_test.go deleted file mode 100644 index 9d6fd05..0000000 --- a/v1/fold_test.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "bytes" - "testing" -) - -func FuzzEqualFold(f *testing.F) { - for _, ss := range [][2]string{ - {"", ""}, - {"123abc", "123ABC"}, - {"αβδ", "ΑΒΔ"}, - {"abc", "xyz"}, - {"abc", "XYZ"}, - {"1", "2"}, - {"hello, world!", "hello, world!"}, - {"hello, world!", "Hello, World!"}, - {"hello, world!", "HELLO, WORLD!"}, - {"hello, world!", "jello, world!"}, - {"γειά, κόσμε!", "γειά, κόσμε!"}, - {"γειά, κόσμε!", "Γειά, Κόσμε!"}, - {"γειά, κόσμε!", "ΓΕΙΆ, ΚΌΣΜΕ!"}, - {"γειά, κόσμε!", "ΛΕΙΆ, ΚΌΣΜΕ!"}, - {"AESKey", "aesKey"}, - {"AESKEY", "aes_key"}, - {"aes_key", "AES_KEY"}, - {"AES_KEY", "aes-key"}, - {"aes-key", "AES-KEY"}, - {"AES-KEY", "aesKey"}, - {"aesKey", "AesKey"}, - {"AesKey", "AESKey"}, - {"AESKey", "aeskey"}, - {"DESKey", "aeskey"}, - {"AES Key", "aeskey"}, - } { - f.Add([]byte(ss[0]), []byte(ss[1])) - } - equalFold := func(x, y []byte) bool { return string(foldName(x)) == string(foldName(y)) } - f.Fuzz(func(t *testing.T, x, y []byte) { - got := equalFold(x, y) - want := bytes.EqualFold(x, y) - if got != want { - t.Errorf("equalFold(%q, %q) = %v, want %v", x, y, got, want) - } - }) -} diff --git a/v1/indent.go b/v1/indent.go index 01bfdf6..90fe480 100644 --- a/v1/indent.go +++ b/v1/indent.go @@ -4,7 +4,12 @@ package json -import "bytes" +import ( + "bytes" + "strings" + + "github.com/go-json-experiment/json/jsontext" +) // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 @@ -17,6 +22,7 @@ func HTMLEscape(dst *bytes.Buffer, src []byte) { } func appendHTMLEscape(dst, src []byte) []byte { + const hex = "0123456789abcdef" // The characters can only appear in string literals, // so just scan the string one byte at a time. start := 0 @@ -41,59 +47,12 @@ func appendHTMLEscape(dst, src []byte) []byte { func Compact(dst *bytes.Buffer, src []byte) error { dst.Grow(len(src)) b := dst.AvailableBuffer() - b, err := appendCompact(b, src, false) - dst.Write(b) - return err -} - -func appendCompact(dst, src []byte, escape bool) ([]byte, error) { - origLen := len(dst) - scan := newScanner() - defer freeScanner(scan) - start := 0 - for i, c := range src { - if escape && (c == '<' || c == '>' || c == '&') { - if start < i { - dst = append(dst, src[start:i]...) - } - dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF]) - start = i + 1 - } - // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). - if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { - if start < i { - dst = append(dst, src[start:i]...) - } - dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF]) - start = i + 3 - } - v := scan.step(scan, c) - if v >= scanSkipSpace { - if v == scanError { - break - } - if start < i { - dst = append(dst, src[start:i]...) - } - start = i + 1 - } - } - if scan.eof() == scanError { - return dst[:origLen], scan.err - } - if start < len(src) { - dst = append(dst, src[start:]...) + b = append(b, src...) + if err := (*jsontext.Value)(&b).Compact(); err != nil { + return transformSyntacticError(err) } - return dst, nil -} - -func appendNewline(dst []byte, prefix, indent string, depth int) []byte { - dst = append(dst, '\n') - dst = append(dst, prefix...) - for i := 0; i < depth; i++ { - dst = append(dst, indent...) - } - return dst + dst.Write(b) + return nil } // indentGrowthFactor specifies the growth factor of indenting JSON input. @@ -124,59 +83,40 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { } func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) { - origLen := len(dst) - scan := newScanner() - defer freeScanner(scan) - needIndent := false - depth := 0 - for _, c := range src { - scan.bytes++ - v := scan.step(scan, c) - if v == scanSkipSpace { - continue - } - if v == scanError { - break - } - if needIndent && v != scanEndObject && v != scanEndArray { - needIndent = false - depth++ - dst = appendNewline(dst, prefix, indent, depth) - } - - // Emit semantically uninteresting bytes - // (in particular, punctuation in strings) unmodified. - if v == scanContinue { - dst = append(dst, c) - continue - } - - // Add spacing around real punctuation. - switch c { - case '{', '[': - // delay indent so that empty object and array are formatted as {} and []. - needIndent = true - dst = append(dst, c) - case ',': - dst = append(dst, c) - dst = appendNewline(dst, prefix, indent, depth) - case ':': - dst = append(dst, c, ' ') - case '}', ']': - if needIndent { - // suppress indent in empty object/array - needIndent = false - } else { - depth-- - dst = appendNewline(dst, prefix, indent, depth) + // In v2, trailing whitespace is discarded, while v1 preserved it. + dstLen := len(dst) + if n := len(src) - len(bytes.TrimRight(src, " \n\r\t")); n > 0 { + // Append the trailing whitespace afterwards. + defer func() { + if len(dst) > dstLen { + dst = append(dst, src[len(src)-n:]...) } - dst = append(dst, c) - default: - dst = append(dst, c) - } + }() + } + // In v2, only spaces and tabs are allowed, while v1 allowed any character. + if len(strings.Trim(prefix, " \t"))+len(strings.Trim(indent, " \t")) > 0 { + // Use placeholder spaces of correct length, and replace afterwards. + invalidPrefix, invalidIndent := prefix, indent + prefix = strings.Repeat(" ", len(prefix)) + indent = strings.Repeat(" ", len(indent)) + defer func() { + b := dst[dstLen:] + for i := bytes.IndexByte(b, '\n'); i >= 0; i = bytes.IndexByte(b, '\n') { + b = b[i+len("\n"):] + n := len(b) - len(bytes.TrimLeft(b, " ")) // len(prefix)+n*len(indent) + spaces := b[:n] + spaces = spaces[copy(spaces, invalidPrefix):] + for len(spaces) > 0 { + spaces = spaces[copy(spaces, invalidIndent):] + } + b = b[n:] + } + }() } - if scan.eof() == scanError { - return dst[:origLen], scan.err + + dst = append(dst, src...) + if err := (*jsontext.Value)(&dst).Indent(prefix, indent); err != nil { + return dst[:dstLen], transformSyntacticError(err) } return dst, nil } diff --git a/v1/number_test.go b/v1/number_test.go deleted file mode 100644 index c82e6de..0000000 --- a/v1/number_test.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "regexp" - "testing" -) - -func TestNumberIsValid(t *testing.T) { - // From: https://stackoverflow.com/a/13340826 - var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) - - validTests := []string{ - "0", - "-0", - "1", - "-1", - "0.1", - "-0.1", - "1234", - "-1234", - "12.34", - "-12.34", - "12E0", - "12E1", - "12e34", - "12E-0", - "12e+1", - "12e-34", - "-12E0", - "-12E1", - "-12e34", - "-12E-0", - "-12e+1", - "-12e-34", - "1.2E0", - "1.2E1", - "1.2e34", - "1.2E-0", - "1.2e+1", - "1.2e-34", - "-1.2E0", - "-1.2E1", - "-1.2e34", - "-1.2E-0", - "-1.2e+1", - "-1.2e-34", - "0E0", - "0E1", - "0e34", - "0E-0", - "0e+1", - "0e-34", - "-0E0", - "-0E1", - "-0e34", - "-0E-0", - "-0e+1", - "-0e-34", - } - - for _, test := range validTests { - if !isValidNumber(test) { - t.Errorf("%s should be valid", test) - } - - var f float64 - if err := Unmarshal([]byte(test), &f); err != nil { - t.Errorf("%s should be valid but Unmarshal failed: %v", test, err) - } - - if !jsonNumberRegexp.MatchString(test) { - t.Errorf("%s should be valid but regexp does not match", test) - } - } - - invalidTests := []string{ - "", - "invalid", - "1.0.1", - "1..1", - "-1-2", - "012a42", - "01.2", - "012", - "12E12.12", - "1e2e3", - "1e+-2", - "1e--23", - "1e", - "e1", - "1e+", - "1ea", - "1a", - "1.a", - "1.", - "01", - "1.e1", - } - - for _, test := range invalidTests { - if isValidNumber(test) { - t.Errorf("%s should be invalid", test) - } - - var f float64 - if err := Unmarshal([]byte(test), &f); err == nil { - t.Errorf("%s should be invalid but unmarshal wrote %v", test, f) - } - - if jsonNumberRegexp.MatchString(test) { - t.Errorf("%s should be invalid but matches regexp", test) - } - } -} diff --git a/v1/options.go b/v1/options.go index 57b8d09..26dde71 100644 --- a/v1/options.go +++ b/v1/options.go @@ -181,3 +181,13 @@ func UnmarshalArrayFromAnyLength(v bool) Options { return jsonflags.UnmarshalArrayFromAnyLength | 0 } } + +// unmarshalAnyWithRawNumber specifies that unmarshaling a JSON number into +// an empty Go interface should use the Number type instead of a float64. +func unmarshalAnyWithRawNumber(v bool) Options { + if v { + return jsonflags.UnmarshalAnyWithRawNumber | 1 + } else { + return jsonflags.UnmarshalAnyWithRawNumber | 0 + } +} diff --git a/v1/scanner.go b/v1/scanner.go index da6ea2a..1e97ea5 100644 --- a/v1/scanner.go +++ b/v1/scanner.go @@ -4,40 +4,32 @@ package json -// JSON value parser state machine. -// Just about at the limit of what is reasonable to write by hand. -// Some parts are a bit tedious, but overall it nicely factors out the -// otherwise common code from the multiple scanning functions -// in this package (Compact, Indent, checkValid, etc). -// -// This file starts with two simple examples using the scanner -// before diving into the scanner itself. - import ( - "strconv" - "sync" + "errors" + + "github.com/go-json-experiment/json/internal" + "github.com/go-json-experiment/json/internal/jsonflags" + "github.com/go-json-experiment/json/jsontext" ) +// export exposes internal functionality of the "jsontext" package. +var export = jsontext.Internal.Export(&internal.AllowInternalUse) + // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { - scan := newScanner() - defer freeScanner(scan) - return checkValid(data, scan) == nil + return checkValid(data) == nil } -// checkValid verifies that data is valid JSON-encoded data. -// scan is passed in for use by checkValid to avoid an allocation. -// checkValid returns nil or a SyntaxError. -func checkValid(data []byte, scan *scanner) error { - scan.reset() - for _, c := range data { - scan.bytes++ - if scan.step(scan, c) == scanError { - return scan.err - } +func checkValid(data []byte) error { + d := export.GetBufferedDecoder(data) + defer export.PutBufferedDecoder(d) + xd := export.Decoder(d) + xd.Struct.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + if _, err := d.ReadValue(); err != nil { + return transformSyntacticError(err) } - if scan.eof() == scanError { - return scan.err + if err := xd.CheckEOF(); err != nil { + return transformSyntacticError(err) } return nil } @@ -51,560 +43,15 @@ type SyntaxError struct { func (e *SyntaxError) Error() string { return e.msg } -// A scanner is a JSON scanning state machine. -// Callers call scan.reset and then pass bytes in one at a time -// by calling scan.step(&scan, c) for each byte. -// The return value, referred to as an opcode, tells the -// caller about significant parsing events like beginning -// and ending literals, objects, and arrays, so that the -// caller can follow along if it wishes. -// The return value scanEnd indicates that a single top-level -// JSON value has been completed, *before* the byte that -// just got passed in. (The indication must be delayed in order -// to recognize the end of numbers: is 123 a whole value or -// the beginning of 12345e+6?). -type scanner struct { - // The step is a func to be called to execute the next transition. - // Also tried using an integer constant and a single func - // with a switch, but using the func directly was 10% faster - // on a 64-bit Mac Mini, and it's nicer to read. - step func(*scanner, byte) int - - // Reached end of top-level value. - endTop bool - - // Stack of what we're in the middle of - array values, object keys, object values. - parseState []int - - // Error that happened, if any. - err error - - // total bytes consumed, updated by decoder.Decode (and deliberately - // not set to zero by scan.reset) - bytes int64 -} - -var scannerPool = sync.Pool{ - New: func() any { - return &scanner{} - }, -} - -func newScanner() *scanner { - scan := scannerPool.Get().(*scanner) - // scan.reset by design doesn't set bytes to zero - scan.bytes = 0 - scan.reset() - return scan -} - -func freeScanner(scan *scanner) { - // Avoid hanging on to too much memory in extreme cases. - if len(scan.parseState) > 1024 { - scan.parseState = nil - } - scannerPool.Put(scan) -} - -// These values are returned by the state transition functions -// assigned to scanner.state and the method scanner.eof. -// They give details about the current state of the scan that -// callers might be interested to know about. -// It is okay to ignore the return value of any particular -// call to scanner.state: if one call returns scanError, -// every subsequent call will return scanError too. -const ( - // Continue. - scanContinue = iota // uninteresting byte - scanBeginLiteral // end implied by next result != scanContinue - scanBeginObject // begin object - scanObjectKey // just finished object key (string) - scanObjectValue // just finished non-last object value - scanEndObject // end object (implies scanObjectValue if possible) - scanBeginArray // begin array - scanArrayValue // just finished array value - scanEndArray // end array (implies scanArrayValue if possible) - scanSkipSpace // space byte; can skip; known to be last "continue" result - - // Stop. - scanEnd // top-level value ended *before* this byte; known to be first "stop" result - scanError // hit an error, scanner.err. -) - -// These values are stored in the parseState stack. -// They give the current state of a composite value -// being scanned. If the parser is inside a nested value -// the parseState describes the nested state, outermost at entry 0. -const ( - parseObjectKey = iota // parsing object key (before colon) - parseObjectValue // parsing object value (after colon) - parseArrayValue // parsing array value -) - -// This limits the max nesting depth to prevent stack overflow. -// This is permitted by https://tools.ietf.org/html/rfc7159#section-9 -const maxNestingDepth = 10000 - -// reset prepares the scanner for use. -// It must be called before calling s.step. -func (s *scanner) reset() { - s.step = stateBeginValue - s.parseState = s.parseState[0:0] - s.err = nil - s.endTop = false -} - -// eof tells the scanner that the end of input has been reached. -// It returns a scan status just as s.step does. -func (s *scanner) eof() int { - if s.err != nil { - return scanError - } - if s.endTop { - return scanEnd - } - s.step(s, ' ') - if s.endTop { - return scanEnd - } - if s.err == nil { - s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} - } - return scanError -} - -// pushParseState pushes a new parse state p onto the parse stack. -// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. -func (s *scanner) pushParseState(c byte, newParseState int, successState int) int { - s.parseState = append(s.parseState, newParseState) - if len(s.parseState) <= maxNestingDepth { - return successState - } - return s.error(c, "exceeded max depth") -} - -// popParseState pops a parse state (already obtained) off the stack -// and updates s.step accordingly. -func (s *scanner) popParseState() { - n := len(s.parseState) - 1 - s.parseState = s.parseState[0:n] - if n == 0 { - s.step = stateEndTop - s.endTop = true - } else { - s.step = stateEndValue - } -} - -func isSpace(c byte) bool { - return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') -} - -// stateBeginValueOrEmpty is the state after reading `[`. -func stateBeginValueOrEmpty(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - if c == ']' { - return stateEndValue(s, c) - } - return stateBeginValue(s, c) -} - -// stateBeginValue is the state at the beginning of the input. -func stateBeginValue(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - switch c { - case '{': - s.step = stateBeginStringOrEmpty - return s.pushParseState(c, parseObjectKey, scanBeginObject) - case '[': - s.step = stateBeginValueOrEmpty - return s.pushParseState(c, parseArrayValue, scanBeginArray) - case '"': - s.step = stateInString - return scanBeginLiteral - case '-': - s.step = stateNeg - return scanBeginLiteral - case '0': // beginning of 0.123 - s.step = state0 - return scanBeginLiteral - case 't': // beginning of true - s.step = stateT - return scanBeginLiteral - case 'f': // beginning of false - s.step = stateF - return scanBeginLiteral - case 'n': // beginning of null - s.step = stateN - return scanBeginLiteral - } - if '1' <= c && c <= '9' { // beginning of 1234.5 - s.step = state1 - return scanBeginLiteral - } - return s.error(c, "looking for beginning of value") -} - -// stateBeginStringOrEmpty is the state after reading `{`. -func stateBeginStringOrEmpty(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - if c == '}' { - n := len(s.parseState) - s.parseState[n-1] = parseObjectValue - return stateEndValue(s, c) - } - return stateBeginString(s, c) -} - -// stateBeginString is the state after reading `{"key": value,`. -func stateBeginString(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - if c == '"' { - s.step = stateInString - return scanBeginLiteral - } - return s.error(c, "looking for beginning of object key string") -} - -// stateEndValue is the state after completing a value, -// such as after reading `{}` or `true` or `["x"`. -func stateEndValue(s *scanner, c byte) int { - n := len(s.parseState) - if n == 0 { - // Completed top-level before the current byte. - s.step = stateEndTop - s.endTop = true - return stateEndTop(s, c) - } - if isSpace(c) { - s.step = stateEndValue - return scanSkipSpace - } - ps := s.parseState[n-1] - switch ps { - case parseObjectKey: - if c == ':' { - s.parseState[n-1] = parseObjectValue - s.step = stateBeginValue - return scanObjectKey - } - return s.error(c, "after object key") - case parseObjectValue: - if c == ',' { - s.parseState[n-1] = parseObjectKey - s.step = stateBeginString - return scanObjectValue - } - if c == '}' { - s.popParseState() - return scanEndObject - } - return s.error(c, "after object key:value pair") - case parseArrayValue: - if c == ',' { - s.step = stateBeginValue - return scanArrayValue - } - if c == ']' { - s.popParseState() - return scanEndArray - } - return s.error(c, "after array element") - } - return s.error(c, "") -} - -// stateEndTop is the state after finishing the top-level value, -// such as after reading `{}` or `[1,2,3]`. -// Only space characters should be seen now. -func stateEndTop(s *scanner, c byte) int { - if !isSpace(c) { - // Complain about non-space byte on next call. - s.error(c, "after top-level value") - } - return scanEnd -} - -// stateInString is the state after reading `"`. -func stateInString(s *scanner, c byte) int { - if c == '"' { - s.step = stateEndValue - return scanContinue - } - if c == '\\' { - s.step = stateInStringEsc - return scanContinue - } - if c < 0x20 { - return s.error(c, "in string literal") - } - return scanContinue -} - -// stateInStringEsc is the state after reading `"\` during a quoted string. -func stateInStringEsc(s *scanner, c byte) int { - switch c { - case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': - s.step = stateInString - return scanContinue - case 'u': - s.step = stateInStringEscU - return scanContinue - } - return s.error(c, "in string escape code") -} - -// stateInStringEscU is the state after reading `"\u` during a quoted string. -func stateInStringEscU(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU1 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. -func stateInStringEscU1(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU12 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. -func stateInStringEscU12(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU123 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. -func stateInStringEscU123(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInString - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateNeg is the state after reading `-` during a number. -func stateNeg(s *scanner, c byte) int { - if c == '0' { - s.step = state0 - return scanContinue - } - if '1' <= c && c <= '9' { - s.step = state1 - return scanContinue - } - return s.error(c, "in numeric literal") -} - -// state1 is the state after reading a non-zero integer during a number, -// such as after reading `1` or `100` but not `0`. -func state1(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - s.step = state1 - return scanContinue - } - return state0(s, c) -} - -// state0 is the state after reading `0` during a number. -func state0(s *scanner, c byte) int { - if c == '.' { - s.step = stateDot - return scanContinue - } - if c == 'e' || c == 'E' { - s.step = stateE - return scanContinue - } - return stateEndValue(s, c) -} - -// stateDot is the state after reading the integer and decimal point in a number, -// such as after reading `1.`. -func stateDot(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - s.step = stateDot0 - return scanContinue - } - return s.error(c, "after decimal point in numeric literal") -} - -// stateDot0 is the state after reading the integer, decimal point, and subsequent -// digits of a number, such as after reading `3.14`. -func stateDot0(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - return scanContinue - } - if c == 'e' || c == 'E' { - s.step = stateE - return scanContinue - } - return stateEndValue(s, c) -} - -// stateE is the state after reading the mantissa and e in a number, -// such as after reading `314e` or `0.314e`. -func stateE(s *scanner, c byte) int { - if c == '+' || c == '-' { - s.step = stateESign - return scanContinue - } - return stateESign(s, c) -} - -// stateESign is the state after reading the mantissa, e, and sign in a number, -// such as after reading `314e-` or `0.314e+`. -func stateESign(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - s.step = stateE0 - return scanContinue - } - return s.error(c, "in exponent of numeric literal") -} - -// stateE0 is the state after reading the mantissa, e, optional sign, -// and at least one digit of the exponent in a number, -// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. -func stateE0(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - return scanContinue - } - return stateEndValue(s, c) -} - -// stateT is the state after reading `t`. -func stateT(s *scanner, c byte) int { - if c == 'r' { - s.step = stateTr - return scanContinue +func transformSyntacticError(err error) error { + switch serr, ok := err.(*jsontext.SyntacticError); { + case serr != nil: + return &SyntaxError{Offset: serr.ByteOffset, msg: serr.Error()} + case ok: + return (*SyntaxError)(nil) + case export.IsIOError(err): + return errors.Unwrap(err) // v1 historically did not wrap IO errors + default: + return err } - return s.error(c, "in literal true (expecting 'r')") -} - -// stateTr is the state after reading `tr`. -func stateTr(s *scanner, c byte) int { - if c == 'u' { - s.step = stateTru - return scanContinue - } - return s.error(c, "in literal true (expecting 'u')") -} - -// stateTru is the state after reading `tru`. -func stateTru(s *scanner, c byte) int { - if c == 'e' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal true (expecting 'e')") -} - -// stateF is the state after reading `f`. -func stateF(s *scanner, c byte) int { - if c == 'a' { - s.step = stateFa - return scanContinue - } - return s.error(c, "in literal false (expecting 'a')") -} - -// stateFa is the state after reading `fa`. -func stateFa(s *scanner, c byte) int { - if c == 'l' { - s.step = stateFal - return scanContinue - } - return s.error(c, "in literal false (expecting 'l')") -} - -// stateFal is the state after reading `fal`. -func stateFal(s *scanner, c byte) int { - if c == 's' { - s.step = stateFals - return scanContinue - } - return s.error(c, "in literal false (expecting 's')") -} - -// stateFals is the state after reading `fals`. -func stateFals(s *scanner, c byte) int { - if c == 'e' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal false (expecting 'e')") -} - -// stateN is the state after reading `n`. -func stateN(s *scanner, c byte) int { - if c == 'u' { - s.step = stateNu - return scanContinue - } - return s.error(c, "in literal null (expecting 'u')") -} - -// stateNu is the state after reading `nu`. -func stateNu(s *scanner, c byte) int { - if c == 'l' { - s.step = stateNul - return scanContinue - } - return s.error(c, "in literal null (expecting 'l')") -} - -// stateNul is the state after reading `nul`. -func stateNul(s *scanner, c byte) int { - if c == 'l' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal null (expecting 'l')") -} - -// stateError is the state after reaching a syntax error, -// such as after reading `[1}` or `5.1.2`. -func stateError(s *scanner, c byte) int { - return scanError -} - -// error records an error and switches to the error state. -func (s *scanner) error(c byte, context string) int { - s.step = stateError - s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} - return scanError -} - -// quoteChar formats c as a quoted character literal. -func quoteChar(c byte) string { - // special cases - different from quoted strings - if c == '\'' { - return `'\''` - } - if c == '"' { - return `'"'` - } - - // use quoted string with different quotation marks - s := strconv.Quote(string(c)) - return "'" + s[1:len(s)-1] + "'" } diff --git a/v1/scanner_test.go b/v1/scanner_test.go index 068439d..2694b2c 100644 --- a/v1/scanner_test.go +++ b/v1/scanner_test.go @@ -195,6 +195,7 @@ func TestIndentErrors(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) slice := make([]uint8, 0) buf := bytes.NewBuffer(slice) if err := Indent(buf, []uint8(tt.in), "", ""); err != nil { diff --git a/v1/stream.go b/v1/stream.go index e2d9470..d7f06d1 100644 --- a/v1/stream.go +++ b/v1/stream.go @@ -6,22 +6,17 @@ package json import ( "bytes" - "errors" "io" + + jsonv2 "github.com/go-json-experiment/json" + "github.com/go-json-experiment/json/jsontext" ) // A Decoder reads and decodes JSON values from an input stream. type Decoder struct { - r io.Reader - buf []byte - d decodeState - scanp int // start of unread data in buf - scanned int64 // amount of data already scanned - scan scanner - err error - - tokenState int - tokenStack []int + dec *jsontext.Decoder + opts jsonv2.Options + err error } // NewDecoder returns a new decoder that reads from r. @@ -29,17 +24,35 @@ type Decoder struct { // The decoder introduces its own buffering and may // read data from r beyond the JSON values requested. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} + // Hide bytes.Buffer from jsontext since it implements optimizations that + // also limits certain ways it could be used. For example, one cannot write + // to the bytes.Buffer while it is in use by jsontext.Decoder. + if _, ok := r.(*bytes.Buffer); ok { + r = struct{ io.Reader }{r} + } + + dec := new(Decoder) + dec.opts = DefaultOptionsV1() + dec.dec = jsontext.NewDecoder(r, dec.opts) + return dec } // UseNumber causes the Decoder to unmarshal a number into an // interface value as a [Number] instead of as a float64. -func (dec *Decoder) UseNumber() { dec.d.useNumber = true } +func (dec *Decoder) UseNumber() { + if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber { + dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true)) + } +} // DisallowUnknownFields causes the Decoder to return an error when the destination // is a struct and the input contains object keys which do not match any // non-ignored, exported fields in the destination. -func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } +func (dec *Decoder) DisallowUnknownFields() { + if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject { + dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true)) + } +} // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. @@ -50,151 +63,43 @@ func (dec *Decoder) Decode(v any) error { if dec.err != nil { return dec.err } - - if err := dec.tokenPrepareForDecode(); err != nil { - return err - } - - if !dec.tokenValueAllowed() { - return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} - } - - // Read whole value into buffer. - n, err := dec.readValue() + data, err := dec.dec.ReadValue() if err != nil { + err = transformSyntacticError(err) + dec.err = err return err } - dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) - dec.scanp += n - - // Don't save err from unmarshal into dec.err: - // the connection is still usable since we read a complete JSON - // object from it before the error happened. - err = dec.d.unmarshal(v) - - // fixup token streaming state - dec.tokenValueEnd() - - return err + return jsonv2.Unmarshal(data, v, dec.opts) } // Buffered returns a reader of the data remaining in the Decoder's // buffer. The reader is valid until the next call to [Decoder.Decode]. func (dec *Decoder) Buffered() io.Reader { - return bytes.NewReader(dec.buf[dec.scanp:]) -} - -// readValue reads a JSON value into dec.buf. -// It returns the length of the encoding. -func (dec *Decoder) readValue() (int, error) { - dec.scan.reset() - - scanp := dec.scanp - var err error -Input: - // help the compiler see that scanp is never negative, so it can remove - // some bounds checks below. - for scanp >= 0 { - - // Look in the buffer for a new value. - for ; scanp < len(dec.buf); scanp++ { - c := dec.buf[scanp] - dec.scan.bytes++ - switch dec.scan.step(&dec.scan, c) { - case scanEnd: - // scanEnd is delayed one byte so we decrement - // the scanner bytes count by 1 to ensure that - // this value is correct in the next call of Decode. - dec.scan.bytes-- - break Input - case scanEndObject, scanEndArray: - // scanEnd is delayed one byte. - // We might block trying to get that byte from src, - // so instead invent a space byte. - if stateEndValue(&dec.scan, ' ') == scanEnd { - scanp++ - break Input - } - case scanError: - dec.err = dec.scan.err - return 0, dec.scan.err - } - } - - // Did the last read have an error? - // Delayed until now to allow buffer scan. - if err != nil { - if err == io.EOF { - if dec.scan.step(&dec.scan, ' ') == scanEnd { - break Input - } - if nonSpace(dec.buf) { - err = io.ErrUnexpectedEOF - } - } - dec.err = err - return 0, err - } - - n := scanp - dec.scanp - err = dec.refill() - scanp = dec.scanp + n - } - return scanp - dec.scanp, nil -} - -func (dec *Decoder) refill() error { - // Make room to read more into the buffer. - // First slide down data already consumed. - if dec.scanp > 0 { - dec.scanned += int64(dec.scanp) - n := copy(dec.buf, dec.buf[dec.scanp:]) - dec.buf = dec.buf[:n] - dec.scanp = 0 - } - - // Grow buffer if not large enough. - const minRead = 512 - if cap(dec.buf)-len(dec.buf) < minRead { - newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) - copy(newBuf, dec.buf) - dec.buf = newBuf - } - - // Read. Delay error for next iteration (after scan). - n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) - dec.buf = dec.buf[0 : len(dec.buf)+n] - - return err -} - -func nonSpace(b []byte) bool { - for _, c := range b { - if !isSpace(c) { - return true - } - } - return false + return bytes.NewReader(dec.dec.UnreadBuffer()) } // An Encoder writes JSON values to an output stream. type Encoder struct { - w io.Writer - err error - escapeHTML bool + w io.Writer + opts jsonv2.Options + err error + + buf bytes.Buffer + indentBuf bytes.Buffer - indentBuf []byte indentPrefix string indentValue string } // NewEncoder returns a new encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { - return &Encoder{w: w, escapeHTML: true} + enc := new(Encoder) + enc.w = w + enc.opts = DefaultOptionsV1() + return enc } // Encode writes the JSON encoding of v to the stream, -// with insignificant space characters elided, // followed by a newline character. // // See the documentation for [Marshal] for details about the @@ -204,34 +109,25 @@ func (enc *Encoder) Encode(v any) error { return enc.err } - e := newEncodeState() - defer encodeStatePool.Put(e) - - err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) - if err != nil { + buf := &enc.buf + buf.Reset() + if err := jsonv2.MarshalWrite(buf, v, enc.opts); err != nil { return err } - - // Terminate each value with a newline. - // This makes the output look a little nicer - // when debugging, and some kind of space - // is required if the encoded value was a number, - // so that the reader knows there aren't more - // digits coming. - e.WriteByte('\n') - - b := e.Bytes() - if enc.indentPrefix != "" || enc.indentValue != "" { - enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) - if err != nil { + if len(enc.indentPrefix)+len(enc.indentValue) > 0 { + enc.indentBuf.Reset() + if err := Indent(&enc.indentBuf, buf.Bytes(), enc.indentPrefix, enc.indentValue); err != nil { return err } - b = enc.indentBuf + buf = &enc.indentBuf } - if _, err = enc.w.Write(b); err != nil { + buf.WriteByte('\n') + + if _, err := enc.w.Write(buf.Bytes()); err != nil { enc.err = err + return err } - return err + return nil } // SetIndent instructs the encoder to format each subsequent encoded @@ -250,33 +146,15 @@ func (enc *Encoder) SetIndent(prefix, indent string) { // In non-HTML settings where the escaping interferes with the readability // of the output, SetEscapeHTML(false) disables this behavior. func (enc *Encoder) SetEscapeHTML(on bool) { - enc.escapeHTML = on + if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on { + enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on)) + } } // RawMessage is a raw encoded JSON value. // It implements [Marshaler] and [Unmarshaler] and can // be used to delay JSON decoding or precompute a JSON encoding. -type RawMessage []byte - -// MarshalJSON returns m as the JSON encoding of m. -func (m RawMessage) MarshalJSON() ([]byte, error) { - if m == nil { - return []byte("null"), nil - } - return m, nil -} - -// UnmarshalJSON sets *m to a copy of data. -func (m *RawMessage) UnmarshalJSON(data []byte) error { - if m == nil { - return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") - } - *m = append((*m)[0:0], data...) - return nil -} - -var _ Marshaler = (*RawMessage)(nil) -var _ Unmarshaler = (*RawMessage)(nil) +type RawMessage = jsontext.Value // A Token holds a value of one of these types: // @@ -288,65 +166,6 @@ var _ Unmarshaler = (*RawMessage)(nil) // - nil, for JSON null type Token any -const ( - tokenTopValue = iota - tokenArrayStart - tokenArrayValue - tokenArrayComma - tokenObjectStart - tokenObjectKey - tokenObjectColon - tokenObjectValue - tokenObjectComma -) - -// advance tokenstate from a separator state to a value state -func (dec *Decoder) tokenPrepareForDecode() error { - // Note: Not calling peek before switch, to avoid - // putting peek into the standard Decode path. - // peek is only called when using the Token API. - switch dec.tokenState { - case tokenArrayComma: - c, err := dec.peek() - if err != nil { - return err - } - if c != ',' { - return &SyntaxError{"expected comma after array element", dec.InputOffset()} - } - dec.scanp++ - dec.tokenState = tokenArrayValue - case tokenObjectColon: - c, err := dec.peek() - if err != nil { - return err - } - if c != ':' { - return &SyntaxError{"expected colon after object key", dec.InputOffset()} - } - dec.scanp++ - dec.tokenState = tokenObjectValue - } - return nil -} - -func (dec *Decoder) tokenValueAllowed() bool { - switch dec.tokenState { - case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: - return true - } - return false -} - -func (dec *Decoder) tokenValueEnd() { - switch dec.tokenState { - case tokenArrayStart, tokenArrayValue: - dec.tokenState = tokenArrayComma - case tokenObjectValue: - dec.tokenState = tokenObjectComma - } -} - // A Delim is a JSON array or object delimiter, one of [ ] { or }. type Delim rune @@ -366,147 +185,47 @@ func (d Delim) String() string { // to mark the start and end of arrays and objects. // Commas and colons are elided. func (dec *Decoder) Token() (Token, error) { - for { - c, err := dec.peek() - if err != nil { - return nil, err - } - switch c { - case '[': - if !dec.tokenValueAllowed() { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenStack = append(dec.tokenStack, dec.tokenState) - dec.tokenState = tokenArrayStart - return Delim('['), nil - - case ']': - if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] - dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] - dec.tokenValueEnd() - return Delim(']'), nil - - case '{': - if !dec.tokenValueAllowed() { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenStack = append(dec.tokenStack, dec.tokenState) - dec.tokenState = tokenObjectStart - return Delim('{'), nil - - case '}': - if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] - dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] - dec.tokenValueEnd() - return Delim('}'), nil - - case ':': - if dec.tokenState != tokenObjectColon { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenState = tokenObjectValue - continue - - case ',': - if dec.tokenState == tokenArrayComma { - dec.scanp++ - dec.tokenState = tokenArrayValue - continue - } - if dec.tokenState == tokenObjectComma { - dec.scanp++ - dec.tokenState = tokenObjectKey - continue - } - return dec.tokenError(c) - - case '"': - if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { - var x string - old := dec.tokenState - dec.tokenState = tokenTopValue - err := dec.Decode(&x) - dec.tokenState = old - if err != nil { - return nil, err - } - dec.tokenState = tokenObjectColon - return x, nil - } - fallthrough - - default: - if !dec.tokenValueAllowed() { - return dec.tokenError(c) - } - var x any - if err := dec.Decode(&x); err != nil { - return nil, err - } - return x, nil + tok, err := dec.dec.ReadToken() + if err != nil { + return nil, transformSyntacticError(err) + } + switch tok.Kind() { + case 'n': + return nil, nil + case 'f': + return false, nil + case 't': + return true, nil + case '"': + return tok.String(), nil + case '0': + if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber { + return Number(tok.String()), nil } + return tok.Float(), nil + case '{': + return Delim('{'), nil + case '}': + return Delim('}'), nil + case '[': + return Delim('['), nil + case ']': + return Delim(']'), nil + default: + panic("unreachable") } } -func (dec *Decoder) tokenError(c byte) (Token, error) { - var context string - switch dec.tokenState { - case tokenTopValue: - context = " looking for beginning of value" - case tokenArrayStart, tokenArrayValue, tokenObjectValue: - context = " looking for beginning of value" - case tokenArrayComma: - context = " after array element" - case tokenObjectKey: - context = " looking for beginning of object key string" - case tokenObjectColon: - context = " after object key" - case tokenObjectComma: - context = " after object key:value pair" - } - return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} -} - // More reports whether there is another element in the // current array or object being parsed. func (dec *Decoder) More() bool { - c, err := dec.peek() - return err == nil && c != ']' && c != '}' -} - -func (dec *Decoder) peek() (byte, error) { - var err error - for { - for i := dec.scanp; i < len(dec.buf); i++ { - c := dec.buf[i] - if isSpace(c) { - continue - } - dec.scanp = i - return c, nil - } - // buffer has been scanned, now report any error - if err != nil { - return 0, err - } - err = dec.refill() - } + k := dec.dec.PeekKind() + return k > 0 && k != ']' && k != '}' } // InputOffset returns the input stream byte offset of the current decoder position. // The offset gives the location of the end of the most recently returned token // and the beginning of the next token. func (dec *Decoder) InputOffset() int64 { - return dec.scanned + int64(dec.scanp) + return dec.dec.InputOffset() } diff --git a/v1/stream_test.go b/v1/stream_test.go index 32ede8c..53e3a99 100644 --- a/v1/stream_test.go +++ b/v1/stream_test.go @@ -6,43 +6,23 @@ package json import ( "bytes" - "fmt" "io" "log" "net" "net/http" "net/http/httptest" - "path" "reflect" - "runtime" "runtime/debug" "strings" "testing" -) - -// TODO(https://go.dev/issue/52751): Replace with native testing support. - -// CaseName is a case name annotated with a file and line. -type CaseName struct { - Name string - Where CasePos -} -// Name annotates a case name with the file and line of the caller. -func Name(s string) (c CaseName) { - c.Name = s - runtime.Callers(2, c.Where.pc[:]) - return c -} + "github.com/go-json-experiment/json/internal/jsontest" +) -// CasePos represents a file and line number. -type CasePos struct{ pc [1]uintptr } +type CaseName = jsontest.CaseName +type CasePos = jsontest.CasePos -func (pos CasePos) String() string { - frames := runtime.CallersFrames(pos.pc[:]) - frame, _ := frames.Next() - return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line) -} +var Name = jsontest.Name // Test values for the stream test. // One of each JSON kind. @@ -79,9 +59,9 @@ func TestEncoder(t *testing.T) { t.Fatalf("#%d.%d Encode error: %v", i, j, err) } } - if have, want := buf.String(), nlines(streamEncoded, i); have != want { + if got, want := buf.String(), nlines(streamEncoded, i); got != want { t.Errorf("encoding %d items: mismatch:", i) - diff(t, []byte(have), []byte(want)) + diff(t, []byte(got), []byte(want)) break } } @@ -148,9 +128,9 @@ func TestEncoderIndent(t *testing.T) { for _, v := range streamTest { enc.Encode(v) } - if have, want := buf.String(), streamEncodedIndent; have != want { - t.Error("Encode mismatch:") - diff(t, []byte(have), []byte(want)) + if got, want := buf.String(), streamEncodedIndent; got != want { + t.Errorf("Encode mismatch:\ngot:\n%s\n\nwant:\n%s", got, want) + diff(t, []byte(got), []byte(want)) } } @@ -214,6 +194,7 @@ func TestEncoderSetEscapeHTML(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) var buf strings.Builder enc := NewEncoder(&buf) if err := enc.Encode(tt.v); err != nil { @@ -304,6 +285,7 @@ func nlines(s string, n int) string { } func TestRawMessage(t *testing.T) { + skipKnownFailure(t) var data struct { X float64 Id RawMessage @@ -460,6 +442,7 @@ func TestDecodeInStream(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) dec := NewDecoder(strings.NewReader(tt.json)) for i, want := range tt.expTokens { var got any diff --git a/v1/tables.go b/v1/tables.go deleted file mode 100644 index 10acdc1..0000000 --- a/v1/tables.go +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import "unicode/utf8" - -// safeSet holds the value true if the ASCII character with the given array -// position can be represented inside a JSON string without any further -// escaping. -// -// All values are true except for the ASCII control characters (0-31), the -// double quote ("), and the backslash character ("\"). -var safeSet = [utf8.RuneSelf]bool{ - ' ': true, - '!': true, - '"': false, - '#': true, - '$': true, - '%': true, - '&': true, - '\'': true, - '(': true, - ')': true, - '*': true, - '+': true, - ',': true, - '-': true, - '.': true, - '/': true, - '0': true, - '1': true, - '2': true, - '3': true, - '4': true, - '5': true, - '6': true, - '7': true, - '8': true, - '9': true, - ':': true, - ';': true, - '<': true, - '=': true, - '>': true, - '?': true, - '@': true, - 'A': true, - 'B': true, - 'C': true, - 'D': true, - 'E': true, - 'F': true, - 'G': true, - 'H': true, - 'I': true, - 'J': true, - 'K': true, - 'L': true, - 'M': true, - 'N': true, - 'O': true, - 'P': true, - 'Q': true, - 'R': true, - 'S': true, - 'T': true, - 'U': true, - 'V': true, - 'W': true, - 'X': true, - 'Y': true, - 'Z': true, - '[': true, - '\\': false, - ']': true, - '^': true, - '_': true, - '`': true, - 'a': true, - 'b': true, - 'c': true, - 'd': true, - 'e': true, - 'f': true, - 'g': true, - 'h': true, - 'i': true, - 'j': true, - 'k': true, - 'l': true, - 'm': true, - 'n': true, - 'o': true, - 'p': true, - 'q': true, - 'r': true, - 's': true, - 't': true, - 'u': true, - 'v': true, - 'w': true, - 'x': true, - 'y': true, - 'z': true, - '{': true, - '|': true, - '}': true, - '~': true, - '\u007f': true, -} - -// htmlSafeSet holds the value true if the ASCII character with the given -// array position can be safely represented inside a JSON string, embedded -// inside of HTML