Skip to content

Commit

Permalink
Optimize JSON parsing
Browse files Browse the repository at this point in the history
Co-authored-by: Neil Alexander <[email protected]>
  • Loading branch information
S7evinK and neilalexander committed Oct 3, 2024
1 parent 2c76491 commit c6e4bfe
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 50 deletions.
2 changes: 1 addition & 1 deletion eventV1.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ func (e *eventV1) SetUnsignedField(path string, value interface{}) error {
eventJSON = CanonicalJSONAssumeValid(eventJSON)

res := gjson.GetBytes(eventJSON, "unsigned")
unsigned := RawJSONFromResult(res, eventJSON)
unsigned := []byte(res.Raw)
e.eventFields.Unsigned = unsigned

e.eventJSON = eventJSON
Expand Down
82 changes: 36 additions & 46 deletions json.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package gomatrixserverlib
import (
"encoding/binary"
"errors"
"sort"
"slices"

Check failure on line 21 in json.go

View workflow job for this annotation

GitHub Actions / Unit tests (Go 1.20)

package slices is not in GOROOT (/opt/hostedtoolcache/go/1.20.14/x64/src/slices)
"strings"
"unicode/utf16"
"unicode/utf8"
Expand Down Expand Up @@ -112,7 +112,7 @@ func EnforcedCanonicalJSON(input []byte, roomVersion RoomVersion) ([]byte, error

var ErrCanonicalJSON = errors.New("value is outside of safe range")

func noVerifyCanonicalJSON(input []byte) error { return nil }
func noVerifyCanonicalJSON(_ []byte) error { return nil }

func verifyEnforcedCanonicalJSON(input []byte) error {
valid := true
Expand Down Expand Up @@ -151,47 +151,43 @@ func verifyEnforcedCanonicalJSON(input []byte) error {
// CanonicalJSONAssumeValid is the same as CanonicalJSON, but assumes the
// input is valid JSON
func CanonicalJSONAssumeValid(input []byte) []byte {
input = CompactJSON(input, make([]byte, 0, len(input)))
return SortJSON(input, make([]byte, 0, len(input)))
return SortJSON(CompactJSON(input))
}

// SortJSON reencodes the JSON with the object keys sorted by lexicographically
// by codepoint. The input must be valid JSON.
func SortJSON(input, output []byte) []byte {
func SortJSON(input []byte) []byte {
result := gjson.ParseBytes(input)

RawJSON := RawJSONFromResult(result, input)
return sortJSONValue(result, RawJSON, output)
return sortJSONValue(result, input[:0])
}

// sortJSONValue takes a gjson.Result and sorts it. inputJSON must be the
// raw JSON bytes that gjson.Result points to.
func sortJSONValue(input gjson.Result, inputJSON, output []byte) []byte {
func sortJSONValue(input gjson.Result, output []byte) []byte {
if input.IsArray() {
return sortJSONArray(input, inputJSON, output)
return sortJSONArray(input, output)
}

if input.IsObject() {
return sortJSONObject(input, inputJSON, output)
return sortJSONObject(input, output)
}

// If its neither an object nor an array then there is no sub structure
// to sort, so just append the raw bytes.
return append(output, inputJSON...)
return append(output, input.Raw...)
}

// sortJSONArray takes a gjson.Result and sorts it, assuming its an array.
// inputJSON must be the raw JSON bytes that gjson.Result points to.
func sortJSONArray(input gjson.Result, inputJSON, output []byte) []byte {
func sortJSONArray(input gjson.Result, output []byte) []byte {
sep := byte('[')

// Iterate over each value in the array and sort it.
input.ForEach(func(_, value gjson.Result) bool {
output = append(output, sep)
sep = ','

RawJSON := RawJSONFromResult(value, inputJSON)
output = sortJSONValue(value, RawJSON, output)
output = sortJSONValue(value, output)

return true // keep iterating
})
Expand All @@ -207,31 +203,31 @@ func sortJSONArray(input gjson.Result, inputJSON, output []byte) []byte {
return output
}

type entry struct {
key string // The parsed key string
value gjson.Result
}

// sortJSONObject takes a gjson.Result and sorts it, assuming its an object.
// inputJSON must be the raw JSON bytes that gjson.Result points to.
func sortJSONObject(input gjson.Result, inputJSON, output []byte) []byte {
type entry struct {
key string // The parsed key string
rawKey []byte // The raw, unparsed key JSON string
value gjson.Result
}
func sortJSONObject(input gjson.Result, output []byte) []byte {

var entries []entry
var _entries [128]*entry
entries := _entries[:0]

// Iterate over each key/value pair and add it to a slice
// that we can sort
input.ForEach(func(key, value gjson.Result) bool {
entries = append(entries, entry{
key: key.String(),
rawKey: RawJSONFromResult(key, inputJSON),
value: value,
entries = append(entries, &entry{
key: key.String(),
value: value,
})
return true // keep iterating
})

// Sort the slice based on the *parsed* key
sort.Slice(entries, func(a, b int) bool {
return entries[a].key < entries[b].key
slices.SortFunc(entries, func(a, b *entry) int {
return strings.Compare(a.key, b.key)
})

sep := byte('{')
Expand All @@ -241,12 +237,10 @@ func sortJSONObject(input gjson.Result, inputJSON, output []byte) []byte {
sep = ','

// Append the raw unparsed JSON key, *not* the parsed key
output = append(output, entry.rawKey...)
output = append(output, ':')

RawJSON := RawJSONFromResult(entry.value, inputJSON)

output = sortJSONValue(entry.value, RawJSON, output)
output = append(output, '"')
output = append(output, entry.key...)
output = append(output, '"', ':')
output = sortJSONValue(entry.value, output)
}
if sep == '{' {
// If sep is still '{' then the object was empty and we never wrote the
Expand All @@ -261,8 +255,9 @@ func sortJSONObject(input gjson.Result, inputJSON, output []byte) []byte {

// CompactJSON makes the encoded JSON as small as possible by removing
// whitespace and unneeded unicode escapes
func CompactJSON(input, output []byte) []byte {
func CompactJSON(input []byte) []byte {
var i int
output := input[:0]
for i < len(input) {
c := input[i]
i++
Expand Down Expand Up @@ -311,6 +306,11 @@ func CompactJSON(input, output []byte) []byte {
return output
}

const (
ESCAPES = "uuuuuuuubtnufruuuuuuuuuuuuuuuuuu"
HEX = "0123456789abcdef"
)

// compactUnicodeEscape unpacks a 4 byte unicode escape starting at index.
// Returns the output slice and a new input index.
func compactUnicodeEscape(input, output []byte, index int) ([]byte, int) {
Expand All @@ -319,10 +319,7 @@ func compactUnicodeEscape(input, output []byte, index int) ([]byte, int) {
n := utf8.EncodeRune(buffer[:], c)
output = append(output, buffer[:n]...)
}
const (
ESCAPES = "uuuuuuuubtnufruuuuuuuuuuuuuuuuuu"
HEX = "0123456789abcdef"
)

// If there aren't enough bytes to decode the hex escape then return.
if len(input)-index < 4 {
return output, len(input)
Expand Down Expand Up @@ -375,10 +372,3 @@ func readHexDigits(input []byte) rune {
hex |= hex >> 8
return rune(hex & 0xFFFF)
}

// RawJSONFromResult extracts the raw JSON bytes pointed to by result.
// input must be the json bytes that were used to generate result
// TODO: Why do we do this?
func RawJSONFromResult(result gjson.Result, _ []byte) []byte {
return []byte(result.Raw)
}
6 changes: 3 additions & 3 deletions json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ func TestJSONFloats(t *testing.T) {
}

func testSortJSON(t *testing.T, input, want string) {
got := SortJSON([]byte(input), nil)
got := SortJSON([]byte(input))

// Squash out the whitespace before comparing the JSON in case SortJSON had inserted whitespace.
if string(CompactJSON(got, nil)) != want {
if string(CompactJSON(got)) != want {
t.Errorf("SortJSON(%q): want %q got %q", input, want, got)
}
}
Expand All @@ -79,7 +79,7 @@ func TestSortJSON(t *testing.T) {
}

func testCompactJSON(t *testing.T, input, want string) {
bytes := CompactJSON([]byte(input), nil)
bytes := CompactJSON([]byte(input))
got := string(bytes)
if got != want {
t.Errorf("CompactJSON(%q):\n want: %q\n got: %q\n bytes: % X", input, want, got, bytes)
Expand Down

0 comments on commit c6e4bfe

Please sign in to comment.