Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add special encoding of json array elements #3368

Merged
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
32c2440
Add json traversal functions
islamaliev Nov 24, 2024
53cb4cb
Add GetPath method to JSON
islamaliev Nov 25, 2024
f703e3a
Include array element index in path
islamaliev Nov 25, 2024
403c587
Fix json traversal
islamaliev Nov 28, 2024
1c059fb
Add JSON and Bool encoding
islamaliev Nov 28, 2024
c784f61
Correctly handle paths to json nodes
islamaliev Nov 29, 2024
9855f30
Base JSON index implementation
islamaliev Nov 30, 2024
0faa02b
Move match-related code to a file
islamaliev Dec 1, 2024
92f7958
Make index work for bool and string
islamaliev Dec 1, 2024
516d290
Add filter by json null value
islamaliev Dec 1, 2024
af5eba2
Add MD file for secondary indexes
islamaliev Dec 1, 2024
3dcb838
Add note about indexing of related docs
islamaliev Dec 2, 2024
e27d5db
Add note about json indexing
islamaliev Dec 2, 2024
7e00694
Enable filtering by json bool and string
islamaliev Dec 3, 2024
40341a0
Add unique json index
islamaliev Dec 6, 2024
61a7b90
Filter by array elements
islamaliev Dec 8, 2024
32ef7bc
Fix _in/_nin filter for json docs
islamaliev Dec 8, 2024
fc0eb2b
Add filtering on arrays of json docs
islamaliev Dec 9, 2024
70f8651
Remove filtering without array elements
islamaliev Dec 10, 2024
cdb9d34
Add tests for composite index with json
islamaliev Dec 14, 2024
adb71d4
Enable indexing of array within json docs
islamaliev Dec 16, 2024
bb67d2f
Enable json array traversal to only top level elements
islamaliev Dec 16, 2024
8f24c04
Fix lint
islamaliev Dec 16, 2024
279bb69
Update docs
islamaliev Dec 16, 2024
343f5fc
Fix test expectations
islamaliev Dec 16, 2024
a56d3bf
Add change detector note
islamaliev Dec 16, 2024
b31c6c0
Polish
islamaliev Dec 17, 2024
69a429b
Update documentation
islamaliev Dec 20, 2024
74605db
Update documentation
islamaliev Jan 2, 2025
efef1b1
Rename
islamaliev Jan 2, 2025
85b5e50
Update documentation
islamaliev Jan 2, 2025
c181d9e
Add encoding/decoding tests
islamaliev Jan 2, 2025
669cc85
Fix import
islamaliev Jan 2, 2025
122bc24
Improve coverage
islamaliev Jan 2, 2025
8308672
PR fixup
islamaliev Jan 2, 2025
f237f2f
Update copyright
islamaliev Jan 2, 2025
575a469
Merge remote-tracking branch 'upstream/develop' into feat/sec-index-o…
islamaliev Jan 7, 2025
edb8386
Add json array path encoding
islamaliev Jan 7, 2025
b6c6da5
follow up change
islamaliev Jan 9, 2025
1908a18
Merge remote-tracking branch 'upstream/develop' into feat/sec-index-o…
islamaliev Jan 9, 2025
507ad20
Scope _none to only arrays
islamaliev Jan 11, 2025
5bd11d8
Merge remote-tracking branch 'upstream/develop' into feat/json-array-…
islamaliev Jan 13, 2025
10d3e30
Add a note for data change
islamaliev Jan 13, 2025
e6153ce
Add comments
islamaliev Jan 14, 2025
213bba5
Merge remote-tracking branch 'upstream/develop' into feat/json-array-…
islamaliev Jan 14, 2025
29a0c7f
Move MakeJSONPath to tests
islamaliev Jan 15, 2025
d20e921
PR fixup
islamaliev Jan 16, 2025
e63fd8c
Turn json part interface in struct
islamaliev Jan 17, 2025
71dad5d
PR fixup
islamaliev Jan 17, 2025
3acb53d
Merge remote-tracking branch 'upstream/develop' into feat/json-array-…
islamaliev Jan 17, 2025
def1754
Add more comments
islamaliev Jan 20, 2025
528ee75
Merge remote-tracking branch 'upstream/develop' into feat/json-array-…
islamaliev Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 120 additions & 41 deletions client/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,91 @@
"encoding/json"
"io"
"strconv"
"strings"

"github.com/valyala/fastjson"
"golang.org/x/exp/constraints"
)

// JSONPathPart represents a part of a JSON path.
// Json path can be either a property of an object or an index of an element in an array.
// For example, consider the following JSON:
//
// {
// "custom": {
// "name": "John"
// },
// "0": {
// "val": 1
// },
// [
// {
// "val": 2
// }
// ]
// }
//
// The path to a top-level document is empty.
// The path to subtree { "name": "John" } can be described as "custom".
// The path to value "John" can be described as "custom.name".
// The paths to both values 1 and 2 can be described as "0.val":
// - for value 1 it's "0" property of the object and "val" property of the object
// - for value 2 it's "0" index of the array and "val" property of the object
// That's why we need to distinguish between properties and indices in the path.
type JSONPathPart struct {
value any
}

// Property returns the property name if the part is a property, and a boolean indicating if the part is a property.
func (p JSONPathPart) Property() (string, bool) {
v, ok := p.value.(string)
return v, ok
}

// Index returns the index if the part is an index, and a boolean indicating if the part is an index.
func (p JSONPathPart) Index() (uint64, bool) {
v, ok := p.value.(uint64)
return v, ok
}

// JSONPath represents a path to a JSON value in a JSON tree.
type JSONPath []JSONPathPart
AndrewSisley marked this conversation as resolved.
Show resolved Hide resolved

// Parts returns the parts of the JSON path.
func (p JSONPath) Parts() []JSONPathPart {
return p

Check warning on line 69 in client/json.go

View check run for this annotation

Codecov / codecov/patch

client/json.go#L68-L69

Added lines #L68 - L69 were not covered by tests
}

// AppendProperty appends a property part to the JSON path.
func (p JSONPath) AppendProperty(part string) JSONPath {
return append(p, JSONPathPart{value: part})
}

// AppendIndex appends an index part to the JSON path.
func (p JSONPath) AppendIndex(part uint64) JSONPath {
return append(p, JSONPathPart{value: part})
}

// String returns the string representation of the JSON path.
func (p JSONPath) String() string {
var sb strings.Builder
for i, part := range p {
if prop, ok := part.Property(); ok {
if i > 0 {
sb.WriteByte('.')
}
sb.WriteString(prop)
} else if index, ok := part.Index(); ok {
sb.WriteByte('[')
sb.WriteString(strconv.FormatUint(index, 10))
sb.WriteByte(']')
}
}
return sb.String()
}

// JSON represents a JSON value that can be any valid JSON type: object, array, number, string, boolean, or null.
// It can also represent a subtree of a JSON tree.
// It provides type-safe access to the underlying value through various accessor methods.
type JSON interface {
json.Marshaler
Expand Down Expand Up @@ -58,18 +137,18 @@
// Returns an error if marshaling fails.
Marshal(w io.Writer) error

// GetPath returns the path of the JSON value in the JSON tree.
GetPath() []string
// GetPath returns the path of the JSON value (or subtree) in the JSON tree.
GetPath() JSONPath
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

todo: Please note that this is the index path only, and does not represent the path of the base (datastore-doc) data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as I said earlier, although at the moment it's used only by the indexer, it is not index-specific, hence doesn't need to mention it.
And there no point in saying anything about storage, as it's irrelevant. It says "in the JSON tree".

I thought we agreed that TODOs are going to be used only if a reviewer considers the code (or absence of it) a blocker. I don't think any request for comment or documentation can ever be considered a blocker. To me it feels like we failed the experiment at it's very start.

Copy link
Contributor

@AndrewSisley AndrewSisley Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think any request for comment or documentation can ever be considered a blocker.

I very strongly disagree with this. Anyone can write greenfield code that gets the machine to do what they want. The hard, and valuable, bit is writing maintainable code that it readable by your peers, including your future self. Code documentation is a valuable tool when it comes to improving code readability, and IMO reviewers, with their fresh, unfocused eyes are far better judges than authors when it comes to guessing where information is missing.

The fact that there is still confusion around what this function and it's types represent suggests very strongly to me that the code is not self describing and requires further documentation in order for maintainers to safely and efficiently maintain it.

This particular doc request

I think perhaps a large contribution to the continued confusion here is a name clash! The existing code/documentation uses the word index a lot, and most of the time it is not referring to secondary indexes (the only place where the code is consumed), but to the index of elements in an array.

It is further confused by the magic 0, that forms part of the encoded result of this path, that has the appearance of an array-location, yet is not. The existence/behaviour of this confuses the example in the documentation for JSONPathPart for myself, although without looking into the codebase/raw-store this problem will not affect users.

Perhaps we can reduce this confusion by:

  1. suggestion: Avoid the use of the word index in the documentation for JSONPathPart, for example perhaps talk about the location in the array instead of array index.
  2. suggestion: Change the example in the documentation for JSONPathPart to avoid talking about the first index, so that we avoid the ambiguity of the hardcoded 0. Perhaps add a second element to the example array and talk about that instead. And/or note the existence of the magic 0 in that documentation and what it represents (that might be out of place though).
  3. suggestion: Add a line to the documentation for GetPath noting that it is currently only used internally for the encoding of secondary indexes (please make sure you include the word secondary, to minimise the whole index vs index confusion).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case I think the current documentation is relevant and precise enough. This is not index specific and not related to datastore keys so I don't think its relevant to state that it does not represent a datastore doc path.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we agreed that TODOs are going to be used only if a reviewer considers the code (or absence of it) a blocker. I don't think any request for comment or documentation can ever be considered a blocker. To me it feels like we failed the experiment at it's very start.

Reviewers are free to put todo on whatever they feel warrants it, reviewers should be more strict with what they mark as todo currently. As I said, its on the author to be able to declare the todo out of scope (importantly, this doesnt mean its wrong or shouldnt be done, just out of scope of this PR).

For this particular instance, a todo requesting a comment/documentation is a very small request and would take a total of 30 second to do. Unless you feel strongly that adding this comment is blatantly wrong or counter productive (which I don't personally see how it would be in this case).


// visit calls the visitor function for the JSON value at the given path.
visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error
visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error
}

// MakeVoidJSON creates a JSON value that represents a void value with just a path.
// This is necessary purely for creating a json path prefix for storage queries.
// All other json values will be encoded with some value after the path which makes
// them unsuitable to build a path prefix.
func MakeVoidJSON(path []string) JSON {
func MakeVoidJSON(path JSONPath) JSON {
return jsonBase[any]{path: path}
}

Expand All @@ -81,7 +160,7 @@
opt(&options)
}
if shouldVisitPath(options.pathPrefix, nil) {
return j.visit(visitor, []string{}, options)
return j.visit(visitor, JSONPath{}, options)
}
return nil
}
Expand All @@ -90,7 +169,7 @@

// TraverseJSONWithPrefix returns a traverseJSONOption that sets the path prefix for the traversal.
// Only nodes with paths that start with the prefix will be visited.
func TraverseJSONWithPrefix(prefix []string) traverseJSONOption {
func TraverseJSONWithPrefix(prefix JSONPath) traverseJSONOption {
return func(opts *traverseJSONOptions) {
opts.pathPrefix = prefix
}
Expand Down Expand Up @@ -131,7 +210,7 @@
// onlyLeaves when true visits only leaf nodes (not objects or arrays)
onlyLeaves bool
// pathPrefix when set visits only paths that start with this prefix
pathPrefix []string
pathPrefix JSONPath
// visitArrayElements when true visits array elements
visitArrayElements bool
// recurseVisitedArrayElements when true visits array elements recursively
Expand Down Expand Up @@ -166,14 +245,14 @@
return false
}

func (v jsonVoid) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (v jsonVoid) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {

Check warning on line 248 in client/json.go

View check run for this annotation

Codecov / codecov/patch

client/json.go#L248

Added line #L248 was not covered by tests
return nil
}

type jsonBase[T any] struct {
jsonVoid
val T
path []string
path JSONPath
}

func (v jsonBase[T]) Value() any {
Expand All @@ -192,7 +271,7 @@
return json.Marshal(v.val)
}

func (v jsonBase[T]) GetPath() []string {
func (v jsonBase[T]) GetPath() JSONPath {
return v.path
}

Expand All @@ -218,7 +297,7 @@
return result
}

func (obj jsonObject) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (obj jsonObject) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {
obj.path = path
if !opts.onlyLeaves && len(path) >= len(opts.pathPrefix) {
if err := visitor(obj); err != nil {
Expand All @@ -227,7 +306,7 @@
}

for k, v := range obj.val {
newPath := append(path, k)
newPath := path.AppendProperty(k)
if !shouldVisitPath(opts.pathPrefix, newPath) {
continue
}
Expand Down Expand Up @@ -261,7 +340,7 @@
return result
}

func (arr jsonArray) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (arr jsonArray) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {
arr.path = path
if !opts.onlyLeaves {
if err := visitor(arr); err != nil {
Expand All @@ -274,9 +353,9 @@
if !opts.recurseVisitedArrayElements && isCompositeJSON(arr.val[i]) {
continue
}
var newPath []string
var newPath JSONPath
if opts.includeArrayIndexInPath {
newPath = append(path, strconv.Itoa(i))
newPath = path.AppendIndex(uint64(i))
} else {
newPath = path
}
Expand All @@ -302,7 +381,7 @@
return n.val, true
}

func (n jsonNumber) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (n jsonNumber) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {
n.path = path
return visitor(n)
}
Expand All @@ -317,7 +396,7 @@
return s.val, true
}

func (n jsonString) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (n jsonString) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {
n.path = path
return visitor(n)
}
Expand All @@ -332,7 +411,7 @@
return b.val, true
}

func (n jsonBool) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (n jsonBool) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {
n.path = path
return visitor(n)
}
Expand All @@ -347,32 +426,32 @@
return true
}

func (n jsonNull) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error {
func (n jsonNull) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error {
n.path = path
return visitor(n)
}

func newJSONObject(val map[string]JSON, path []string) jsonObject {
func newJSONObject(val map[string]JSON, path JSONPath) jsonObject {
return jsonObject{jsonBase[map[string]JSON]{val: val, path: path}}
}

func newJSONArray(val []JSON, path []string) jsonArray {
func newJSONArray(val []JSON, path JSONPath) jsonArray {
return jsonArray{jsonBase[[]JSON]{val: val, path: path}}
}

func newJSONNumber(val float64, path []string) jsonNumber {
func newJSONNumber(val float64, path JSONPath) jsonNumber {
return jsonNumber{jsonBase[float64]{val: val, path: path}}
}

func newJSONString(val string, path []string) jsonString {
func newJSONString(val string, path JSONPath) jsonString {
return jsonString{jsonBase[string]{val: val, path: path}}
}

func newJSONBool(val bool, path []string) jsonBool {
func newJSONBool(val bool, path JSONPath) jsonBool {
return jsonBool{jsonBase[bool]{val: val, path: path}}
}

func newJSONNull(path []string) jsonNull {
func newJSONNull(path JSONPath) jsonNull {
return jsonNull{jsonBase[any]{path: path}}
}

Expand Down Expand Up @@ -426,12 +505,12 @@
// - slice of any above type
// - []any
// Returns error if the input cannot be converted to JSON.
func NewJSONWithPath(v any, path []string) (JSON, error) {
func NewJSONWithPath(v any, path JSONPath) (JSON, error) {
return newJSON(v, path)
}

// newJSON is an internal function that creates a new JSON value with parent and property name
func newJSON(v any, path []string) (JSON, error) {
func newJSON(v any, path JSONPath) (JSON, error) {
if v == nil {
return newJSONNull(path), nil
} else {
Expand Down Expand Up @@ -505,10 +584,10 @@
return nil, NewErrInvalidJSONPayload(v)
}

func newJsonArrayFromAnyArray(arr []any, path []string) (JSON, error) {
func newJsonArrayFromAnyArray(arr []any, path JSONPath) (JSON, error) {
result := make([]JSON, len(arr))
for i := range arr {
jsonVal, err := newJSON(arr[i], path)
jsonVal, err := newJSON(arr[i], path.AppendIndex(uint64(i)))
if err != nil {
return nil, err
}
Expand All @@ -517,46 +596,46 @@
return newJSONArray(result, path), nil
}

func newJSONBoolArray(v []bool, path []string) JSON {
func newJSONBoolArray(v []bool, path JSONPath) JSON {
arr := make([]JSON, len(v))
for i := range v {
arr[i] = newJSONBool(v[i], path)
arr[i] = newJSONBool(v[i], path.AppendIndex(uint64(i)))
}
return newJSONArray(arr, path)
}

func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path []string) JSON {
func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path JSONPath) JSON {
arr := make([]JSON, len(v))
for i := range v {
arr[i] = newJSONNumber(float64(v[i]), path)
arr[i] = newJSONNumber(float64(v[i]), path.AppendIndex(uint64(i)))
}
return newJSONArray(arr, path)
}

func newJSONStringArray(v []string, path []string) JSON {
func newJSONStringArray(v []string, path JSONPath) JSON {
arr := make([]JSON, len(v))
for i := range v {
arr[i] = newJSONString(v[i], path)
arr[i] = newJSONString(v[i], path.AppendIndex(uint64(i)))
}
return newJSONArray(arr, path)
}

// newJSONFromFastJSON is an internal function that creates a new JSON value with parent and property name
func newJSONFromFastJSON(v *fastjson.Value, path []string) JSON {
func newJSONFromFastJSON(v *fastjson.Value, path JSONPath) JSON {
switch v.Type() {
case fastjson.TypeObject:
fastObj := v.GetObject()
obj := make(map[string]JSON, fastObj.Len())
fastObj.Visit(func(k []byte, v *fastjson.Value) {
key := string(k)
obj[key] = newJSONFromFastJSON(v, append(path, key))
obj[key] = newJSONFromFastJSON(v, path.AppendProperty(key))
})
return newJSONObject(obj, path)
case fastjson.TypeArray:
fastArr := v.GetArray()
arr := make([]JSON, len(fastArr))
for i := range fastArr {
arr[i] = NewJSONFromFastJSON(fastArr[i])
arr[i] = newJSONFromFastJSON(fastArr[i], path.AppendIndex(uint64(i)))
}
return newJSONArray(arr, path)
case fastjson.TypeNumber:
Expand Down Expand Up @@ -585,10 +664,10 @@
return newJSONFromMap(data, nil)
}

func newJSONFromMap(data map[string]any, path []string) (JSON, error) {
func newJSONFromMap(data map[string]any, path JSONPath) (JSON, error) {
obj := make(map[string]JSON, len(data))
for k, v := range data {
jsonVal, err := newJSON(v, append(path, k))
jsonVal, err := newJSON(v, path.AppendProperty(k))
if err != nil {
return nil, err
}
Expand All @@ -597,7 +676,7 @@
return newJSONObject(obj, path), nil
}

func shouldVisitPath(prefix, path []string) bool {
func shouldVisitPath(prefix, path JSONPath) bool {
if len(prefix) == 0 {
return true
}
Expand Down
Loading
Loading