Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize JSON parsing #439

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ["1.20", "1.21"]
go: ["1.21", "stable"]
steps:
- uses: actions/checkout@v3
- name: Setup go
Expand Down Expand Up @@ -80,7 +80,7 @@ jobs:
strategy:
fail-fast: false
matrix:
go: ["1.20", "1.21"]
go: ["1.21", "stable"]
steps:
- uses: actions/checkout@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ gomatrixserverlib

[![GoDoc](https://godoc.org/github.com/matrix-org/gomatrixserverlib?status.svg)](https://godoc.org/github.com/matrix-org/gomatrixserverlib)

Go library for common functions needed by matrix servers. This library assumes Go 1.18+.
Go library for common functions needed by matrix servers. This library assumes Go 1.21+.
2 changes: 1 addition & 1 deletion eventV1.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ func (e *eventV1) SetUnsignedField(path string, value interface{}) error {
eventJSON = CanonicalJSONAssumeValid(eventJSON)

res := gjson.GetBytes(eventJSON, "unsigned")
unsigned := RawJSONFromResult(res, eventJSON)
unsigned := []byte(res.Raw)
e.eventFields.Unsigned = unsigned

e.eventJSON = eventJSON
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
)

go 1.18
go 1.21.0
72 changes: 31 additions & 41 deletions json.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package gomatrixserverlib
import (
"encoding/binary"
"errors"
"sort"
"slices"
"strings"
"unicode/utf16"
"unicode/utf8"
Expand Down Expand Up @@ -112,7 +112,7 @@ func EnforcedCanonicalJSON(input []byte, roomVersion RoomVersion) ([]byte, error

var ErrCanonicalJSON = errors.New("value is outside of safe range")

func noVerifyCanonicalJSON(input []byte) error { return nil }
func noVerifyCanonicalJSON(_ []byte) error { return nil }

func verifyEnforcedCanonicalJSON(input []byte) error {
valid := true
Expand Down Expand Up @@ -151,47 +151,43 @@ func verifyEnforcedCanonicalJSON(input []byte) error {
// CanonicalJSONAssumeValid is the same as CanonicalJSON, but assumes the
// input is valid JSON
func CanonicalJSONAssumeValid(input []byte) []byte {
input = CompactJSON(input, make([]byte, 0, len(input)))
return SortJSON(input, make([]byte, 0, len(input)))
return SortJSON(CompactJSON(input))
}

// SortJSON reencodes the JSON with the object keys sorted by lexicographically
// by codepoint. The input must be valid JSON.
func SortJSON(input, output []byte) []byte {
func SortJSON(input []byte) []byte {
result := gjson.ParseBytes(input)

RawJSON := RawJSONFromResult(result, input)
return sortJSONValue(result, RawJSON, output)
return sortJSONValue(result, input[:0])
}

// sortJSONValue takes a gjson.Result and sorts it. inputJSON must be the
// raw JSON bytes that gjson.Result points to.
func sortJSONValue(input gjson.Result, inputJSON, output []byte) []byte {
func sortJSONValue(input gjson.Result, output []byte) []byte {
if input.IsArray() {
return sortJSONArray(input, inputJSON, output)
return sortJSONArray(input, output)
}

if input.IsObject() {
return sortJSONObject(input, inputJSON, output)
return sortJSONObject(input, output)
}

// If its neither an object nor an array then there is no sub structure
// to sort, so just append the raw bytes.
return append(output, inputJSON...)
return append(output, input.Raw...)
}

// sortJSONArray takes a gjson.Result and sorts it, assuming its an array.
// inputJSON must be the raw JSON bytes that gjson.Result points to.
func sortJSONArray(input gjson.Result, inputJSON, output []byte) []byte {
func sortJSONArray(input gjson.Result, output []byte) []byte {
sep := byte('[')

// Iterate over each value in the array and sort it.
input.ForEach(func(_, value gjson.Result) bool {
output = append(output, sep)
sep = ','

RawJSON := RawJSONFromResult(value, inputJSON)
output = sortJSONValue(value, RawJSON, output)
output = sortJSONValue(value, output)

return true // keep iterating
})
Expand All @@ -209,29 +205,30 @@ func sortJSONArray(input gjson.Result, inputJSON, output []byte) []byte {

// sortJSONObject takes a gjson.Result and sorts it, assuming its an object.
// inputJSON must be the raw JSON bytes that gjson.Result points to.
func sortJSONObject(input gjson.Result, inputJSON, output []byte) []byte {
func sortJSONObject(input gjson.Result, output []byte) []byte {
type entry struct {
key string // The parsed key string
rawKey []byte // The raw, unparsed key JSON string
value gjson.Result
key string // The parsed key string
value gjson.Result
}

var entries []entry
// Try to stay on the stack here if we can.
var _entries [128]*entry
entries := _entries[:0]

// Iterate over each key/value pair and add it to a slice
// that we can sort
input.ForEach(func(key, value gjson.Result) bool {
entries = append(entries, entry{
key: key.String(),
rawKey: RawJSONFromResult(key, inputJSON),
value: value,
entries = append(entries, &entry{
key: key.String(),
value: value,
})
return true // keep iterating
})

// Sort the slice based on the *parsed* key
sort.Slice(entries, func(a, b int) bool {
return entries[a].key < entries[b].key
// Using slices.SortFunc here instead of sort.Slice avoids
// heap escapes due to reflection.
slices.SortFunc(entries, func(a, b *entry) int {
return strings.Compare(a.key, b.key)
})

sep := byte('{')
Expand All @@ -241,12 +238,10 @@ func sortJSONObject(input gjson.Result, inputJSON, output []byte) []byte {
sep = ','

// Append the raw unparsed JSON key, *not* the parsed key
output = append(output, entry.rawKey...)
output = append(output, ':')

RawJSON := RawJSONFromResult(entry.value, inputJSON)

output = sortJSONValue(entry.value, RawJSON, output)
output = append(output, '"')
output = append(output, entry.key...)
output = append(output, '"', ':')
output = sortJSONValue(entry.value, output)
}
if sep == '{' {
// If sep is still '{' then the object was empty and we never wrote the
Expand All @@ -261,8 +256,9 @@ func sortJSONObject(input gjson.Result, inputJSON, output []byte) []byte {

// CompactJSON makes the encoded JSON as small as possible by removing
// whitespace and unneeded unicode escapes
func CompactJSON(input, output []byte) []byte {
func CompactJSON(input []byte) []byte {
var i int
output := input[:0]
for i < len(input) {
c := input[i]
i++
Expand Down Expand Up @@ -323,6 +319,7 @@ func compactUnicodeEscape(input, output []byte, index int) ([]byte, int) {
ESCAPES = "uuuuuuuubtnufruuuuuuuuuuuuuuuuuu"
HEX = "0123456789abcdef"
)

// If there aren't enough bytes to decode the hex escape then return.
if len(input)-index < 4 {
return output, len(input)
Expand Down Expand Up @@ -375,10 +372,3 @@ func readHexDigits(input []byte) rune {
hex |= hex >> 8
return rune(hex & 0xFFFF)
}

// RawJSONFromResult extracts the raw JSON bytes pointed to by result.
// input must be the json bytes that were used to generate result
// TODO: Why do we do this?
func RawJSONFromResult(result gjson.Result, _ []byte) []byte {
return []byte(result.Raw)
}
6 changes: 3 additions & 3 deletions json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ func TestJSONFloats(t *testing.T) {
}

func testSortJSON(t *testing.T, input, want string) {
got := SortJSON([]byte(input), nil)
got := SortJSON([]byte(input))

// Squash out the whitespace before comparing the JSON in case SortJSON had inserted whitespace.
if string(CompactJSON(got, nil)) != want {
if string(CompactJSON(got)) != want {
t.Errorf("SortJSON(%q): want %q got %q", input, want, got)
}
}
Expand All @@ -79,7 +79,7 @@ func TestSortJSON(t *testing.T) {
}

func testCompactJSON(t *testing.T, input, want string) {
bytes := CompactJSON([]byte(input), nil)
bytes := CompactJSON([]byte(input))
got := string(bytes)
if got != want {
t.Errorf("CompactJSON(%q):\n want: %q\n got: %q\n bytes: % X", input, want, got, bytes)
Expand Down
Loading