From 32c2440cfc34e7c05ff96c2886b75cf46114782c Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 24 Nov 2024 14:18:33 +0100 Subject: [PATCH 01/46] Add json traversal functions --- client/json.go | 139 +++++++++++-- client/json_traverse_test.go | 381 +++++++++++++++++++++++++++++++++++ 2 files changed, 508 insertions(+), 12 deletions(-) create mode 100644 client/json_traverse_test.go diff --git a/client/json.go b/client/json.go index 23a23de2b1..a2e5b083f1 100644 --- a/client/json.go +++ b/client/json.go @@ -13,6 +13,7 @@ package client import ( "encoding/json" "io" + "strconv" "github.com/valyala/fastjson" "golang.org/x/exp/constraints" @@ -56,6 +57,64 @@ type JSON interface { // Marshal writes the JSON value to the writer. // Returns an error if marshaling fails. Marshal(w io.Writer) error + + // accept calls the visitor function for the JSON value at the given path. + accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error +} + +// TraverseJSON traverses a JSON value and calls the visitor function for each node. +// opts controls how the traversal is performed. +func TraverseJSON(j JSON, visitor JSONVisitor, opts ...traverseJSONOption) error { + var options traverseJSONOptions + for _, opt := range opts { + opt(&options) + } + if shouldVisitPath(options.PathPrefix, nil) { + return j.accept(visitor, []string{}, options) + } + return nil +} + +type traverseJSONOption func(*traverseJSONOptions) + +// TraverseJSONWithPrefix returns a traverseJSONOption that sets the path prefix for the traversal. +// Only nodes with paths that start with the prefix will be visited. +func TraverseJSONWithPrefix(prefix []string) traverseJSONOption { + return func(opts *traverseJSONOptions) { + opts.PathPrefix = prefix + } +} + +// TraverseJSONOnlyLeaves returns a traverseJSONOption that sets the traversal to visit only leaf nodes. +// Leaf nodes are nodes that do not have any children. This means that visitor function will not +// be called for objects or arrays and proceed with theirs children. +func TraverseJSONOnlyLeaves() traverseJSONOption { + return func(opts *traverseJSONOptions) { + opts.OnlyLeaves = true + } +} + +// TraverseJSONVisitArrayElements returns a traverseJSONOption that sets the traversal to visit array elements. +// When this option is set, the visitor function will be called for each element of an array. +func TraverseJSONVisitArrayElements() traverseJSONOption { + return func(opts *traverseJSONOptions) { + opts.VisitArrayElements = true + } +} + +// JSONVisitor is a function that processes a JSON value at a given path. +// path represents the location of the value in the JSON tree. +// Returns an error if the processing fails. +type JSONVisitor func(path []string, value JSON) error + +// traverseJSONOptions configures how the JSON tree is traversed. +type traverseJSONOptions struct { + // OnlyLeaves when true visits only leaf nodes (not objects or arrays) + OnlyLeaves bool + // PathPrefix when set visits only paths that start with this prefix + PathPrefix []string + // VisitArrayElements when true visits array elements + VisitArrayElements bool } type jsonVoid struct{} @@ -105,6 +164,13 @@ func (v jsonBase[T]) MarshalJSON() ([]byte, error) { return json.Marshal(v.val) } +func (n jsonBase[T]) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + if shouldVisitPath(opts.PathPrefix, path) { + return visitor(path, n) + } + return nil +} + type jsonObject struct { jsonBase[map[string]JSON] } @@ -127,6 +193,26 @@ func (obj jsonObject) Unwrap() any { return result } +func (obj jsonObject) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + if !opts.OnlyLeaves && len(path) >= len(opts.PathPrefix) { + if err := visitor(path, obj); err != nil { + return err + } + } + + for k, v := range obj.val { + newPath := append(path, k) + if !shouldVisitPath(opts.PathPrefix, newPath) { + continue + } + + if err := v.accept(visitor, newPath, opts); err != nil { + return err + } + } + return nil +} + type jsonArray struct { jsonBase[[]JSON] } @@ -149,6 +235,28 @@ func (arr jsonArray) Unwrap() any { return result } +func (arr jsonArray) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + if !opts.OnlyLeaves { + if err := visitor(path, arr); err != nil { + return err + } + } + + if opts.VisitArrayElements { + for i := range arr.val { + newPath := append(path, strconv.Itoa(i)) + if !shouldVisitPath(opts.PathPrefix, newPath) { + continue + } + + if err := arr.val[i].accept(visitor, newPath, opts); err != nil { + return err + } + } + } + return nil +} + type jsonNumber struct { jsonBase[float64] } @@ -159,10 +267,6 @@ func (n jsonNumber) Number() (float64, bool) { return n.val, true } -func (n jsonNumber) MarshalJSON() ([]byte, error) { - return json.Marshal(n.val) -} - type jsonString struct { jsonBase[string] } @@ -173,10 +277,6 @@ func (s jsonString) String() (string, bool) { return s.val, true } -func (s jsonString) MarshalJSON() ([]byte, error) { - return json.Marshal(s.val) -} - type jsonBool struct { jsonBase[bool] } @@ -187,10 +287,6 @@ func (b jsonBool) Bool() (bool, bool) { return b.val, true } -func (b jsonBool) MarshalJSON() ([]byte, error) { - return json.Marshal(b.val) -} - type jsonNull struct { jsonVoid } @@ -217,6 +313,10 @@ func (n jsonNull) MarshalJSON() ([]byte, error) { return json.Marshal(nil) } +func (n jsonNull) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + return visitor(path, n) +} + func newJSONObject(val map[string]JSON) JSON { return jsonObject{jsonBase[map[string]JSON]{val: val}} } @@ -431,3 +531,18 @@ func NewJSONFromMap(data map[string]any) (JSON, error) { } return newJSONObject(obj), nil } + +func shouldVisitPath(prefix, path []string) bool { + if len(prefix) == 0 { + return true + } + for i := range prefix { + if len(path) <= i { + return true + } + if prefix[i] != path[i] { + return false + } + } + return true +} diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go new file mode 100644 index 0000000000..675897c2fd --- /dev/null +++ b/client/json_traverse_test.go @@ -0,0 +1,381 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package client + +import ( + "fmt" + "sort" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { + // Create a complex JSON structure for testing + json := newJSONObject(map[string]JSON{ + "string": newJSONString("value"), + "number": newJSONNumber(42), + "bool": newJSONBool(true), + "null": newJSONNull(), + "object": newJSONObject(map[string]JSON{ + "nested": newJSONString("inside"), + "deep": newJSONObject(map[string]JSON{ + "level": newJSONNumber(3), + }), + }), + "array": newJSONArray([]JSON{ + newJSONNumber(1), + newJSONString("two"), + newJSONObject(map[string]JSON{ + "key": newJSONString("value"), + }), + newJSONArray([]JSON{ + newJSONNumber(4), + newJSONNumber(5), + }), + }), + }) + + tests := []struct { + name string + options []traverseJSONOption + expected map[string]JSON // path -> value + }{ + { + name: "VisitAll", + options: nil, + expected: map[string]JSON{ + "": json, + "string": newJSONString("value"), + "number": newJSONNumber(42), + "bool": newJSONBool(true), + "null": newJSONNull(), + "object": json.Value().(map[string]JSON)["object"], + "object/nested": newJSONString("inside"), + "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], + "object/deep/level": newJSONNumber(3), + "array": json.Value().(map[string]JSON)["array"], + }, + }, + { + name: "OnlyLeaves", + options: []traverseJSONOption{ + TraverseJSONOnlyLeaves(), + }, + expected: map[string]JSON{ + "string": newJSONString("value"), + "number": newJSONNumber(42), + "bool": newJSONBool(true), + "null": newJSONNull(), + "object/nested": newJSONString("inside"), + "object/deep/level": newJSONNumber(3), + }, + }, + { + name: "WithPrefix_Object", + options: []traverseJSONOption{ + TraverseJSONWithPrefix([]string{"object"}), + }, + expected: map[string]JSON{ + "object": json.Value().(map[string]JSON)["object"], + "object/nested": newJSONString("inside"), + "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], + "object/deep/level": newJSONNumber(3), + }, + }, + { + name: "WithPrefix_Deep", + options: []traverseJSONOption{ + TraverseJSONWithPrefix([]string{"object", "deep"}), + }, + expected: map[string]JSON{ + "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], + "object/deep/level": newJSONNumber(3), + }, + }, + { + name: "VisitArrayElements", + options: []traverseJSONOption{ + TraverseJSONVisitArrayElements(), + }, + expected: map[string]JSON{ + "": json, + "string": newJSONString("value"), + "number": newJSONNumber(42), + "bool": newJSONBool(true), + "null": newJSONNull(), + "object": json.Value().(map[string]JSON)["object"], + "object/nested": newJSONString("inside"), + "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], + "object/deep/level": newJSONNumber(3), + "array": json.Value().(map[string]JSON)["array"], + "array/0": newJSONNumber(1), + "array/1": newJSONString("two"), + "array/2": json.Value().(map[string]JSON)["array"].Value().([]JSON)[2], + "array/2/key": newJSONString("value"), + "array/3": json.Value().(map[string]JSON)["array"].Value().([]JSON)[3], + "array/3/0": newJSONNumber(4), + "array/3/1": newJSONNumber(5), + }, + }, + { + name: "CombinedOptions", + options: []traverseJSONOption{ + TraverseJSONOnlyLeaves(), + TraverseJSONVisitArrayElements(), + TraverseJSONWithPrefix([]string{"array"}), + }, + expected: map[string]JSON{ + "array/0": newJSONNumber(1), + "array/1": newJSONString("two"), + "array/2/key": newJSONString("value"), + "array/3/0": newJSONNumber(4), + "array/3/1": newJSONNumber(5), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + visited := make(map[string]JSON) + err := TraverseJSON(json, func(path []string, value JSON) error { + key := joinPath(path) + visited[key] = value + return nil + }, tt.options...) + + require.NoError(t, err) + if diff := compareJSONMaps(tt.expected, visited); diff != "" { + t.Errorf("Maps are different:\n%s", diff) + } + }) + } +} + +// compareJSONMaps compares two maps of JSON values and returns a detailed difference report. +func compareJSONMaps(expected, actual map[string]JSON) string { + var diffs []string + + // Check for missing keys in actual + var expectedKeys []string + for k := range expected { + expectedKeys = append(expectedKeys, k) + } + sort.Strings(expectedKeys) + + var actualKeys []string + for k := range actual { + actualKeys = append(actualKeys, k) + } + sort.Strings(actualKeys) + + // Find missing keys + for _, k := range expectedKeys { + if _, ok := actual[k]; !ok { + diffs = append(diffs, fmt.Sprintf("- Missing key %q", k)) + } + } + + // Find extra keys + for _, k := range actualKeys { + if _, ok := expected[k]; !ok { + diffs = append(diffs, fmt.Sprintf("+ Extra key %q", k)) + } + } + + // Compare values for common keys + for _, k := range expectedKeys { + if actualVal, ok := actual[k]; ok { + expectedVal := expected[k] + if !compareJSON(expectedVal, actualVal) { + diffs = append(diffs, fmt.Sprintf("! Value mismatch for key %q:\n\tExpected: %s\n\tActual: %s", + k, formatJSON(expectedVal), formatJSON(actualVal))) + } + } + } + + if len(diffs) == 0 { + return "" + } + + return fmt.Sprintf("Found %d differences:\n%s", len(diffs), strings.Join(diffs, "\n")) +} + +// compareJSON compares two JSON values for equality +func compareJSON(expected, actual JSON) bool { + if expected.IsNull() != actual.IsNull() { + return false + } + + // Compare based on type + switch { + case expected.IsNull(): + return true // Both are null (checked above) + case isObject(expected): + return compareJSONObjects(expected, actual) + case isArray(expected): + return compareJSONArrays(expected, actual) + default: + // For primitive types, compare their marshaled form + expectedBytes, err1 := expected.MarshalJSON() + actualBytes, err2 := actual.MarshalJSON() + if err1 != nil || err2 != nil { + return false + } + return string(expectedBytes) == string(actualBytes) + } +} + +func compareJSONObjects(expected, actual JSON) bool { + expectedObj, ok1 := expected.Object() + actualObj, ok2 := actual.Object() + if !ok1 || !ok2 || len(expectedObj) != len(actualObj) { + return false + } + + for k, v1 := range expectedObj { + v2, exists := actualObj[k] + if !exists || !compareJSON(v1, v2) { + return false + } + } + return true +} + +func compareJSONArrays(expected, actual JSON) bool { + expectedArr, ok1 := expected.Array() + actualArr, ok2 := actual.Array() + if !ok1 || !ok2 || len(expectedArr) != len(actualArr) { + return false + } + + for i := range expectedArr { + if !compareJSON(expectedArr[i], actualArr[i]) { + return false + } + } + return true +} + +// formatJSON returns a human-readable string representation of a JSON value +func formatJSON(j JSON) string { + switch { + case j.IsNull(): + return "null" + case isObject(j): + obj, _ := j.Object() + pairs := make([]string, 0, len(obj)) + for k, v := range obj { + pairs = append(pairs, fmt.Sprintf("%q: %s", k, formatJSON(v))) + } + sort.Strings(pairs) + return "{" + strings.Join(pairs, ", ") + "}" + case isArray(j): + arr, _ := j.Array() + items := make([]string, len(arr)) + for i, v := range arr { + items[i] = formatJSON(v) + } + return "[" + strings.Join(items, ", ") + "]" + default: + bytes, _ := j.MarshalJSON() + return string(bytes) + } +} + +func isObject(j JSON) bool { + _, ok := j.Object() + return ok +} + +func isArray(j JSON) bool { + _, ok := j.Array() + return ok +} + +func TestTraverseJSON_WithError(t *testing.T) { + json := newJSONObject(map[string]JSON{ + "key": newJSONString("value"), + }) + + expectedErr := fmt.Errorf("test error") + err := TraverseJSON(json, func(path []string, value JSON) error { + return expectedErr + }) + + require.Equal(t, expectedErr, err) +} + +func TestShouldVisitPath(t *testing.T) { + tests := []struct { + name string + prefix []string + path []string + expected bool + }{ + { + name: "EmptyPrefix", + prefix: []string{}, + path: []string{"a", "b"}, + expected: true, + }, + { + name: "ExactMatch", + prefix: []string{"a", "b"}, + path: []string{"a", "b"}, + expected: true, + }, + { + name: "PrefixMatch", + prefix: []string{"a"}, + path: []string{"a", "b"}, + expected: true, + }, + { + name: "NoMatch", + prefix: []string{"a", "b"}, + path: []string{"a", "c"}, + expected: false, + }, + { + name: "PathTooShort", + prefix: []string{"a", "b"}, + path: []string{"a"}, + expected: true, + }, + { + name: "PathLonger", + prefix: []string{"a", "b"}, + path: []string{"a", "b", "c"}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := shouldVisitPath(tt.prefix, tt.path) + require.Equal(t, tt.expected, result) + }) + } +} + +// Helper function to join path segments +func joinPath(path []string) string { + if len(path) == 0 { + return "" + } + result := path[0] + for i := 1; i < len(path); i++ { + result += "/" + path[i] + } + return result +} From 53cb4cbd708f1c358b9d913cbad4ad5c33cfc246 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 25 Nov 2024 12:15:24 +0100 Subject: [PATCH 02/46] Add GetPath method to JSON --- client/json.go | 88 ++++++++++++++++++++---------------- client/json_traverse_test.go | 6 +-- 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/client/json.go b/client/json.go index a2e5b083f1..273418b35c 100644 --- a/client/json.go +++ b/client/json.go @@ -58,6 +58,9 @@ type JSON interface { // Returns an error if marshaling fails. Marshal(w io.Writer) error + // GetPath returns the path of the JSON value in the JSON tree. + GetPath() []string + // accept calls the visitor function for the JSON value at the given path. accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error } @@ -105,7 +108,7 @@ func TraverseJSONVisitArrayElements() traverseJSONOption { // JSONVisitor is a function that processes a JSON value at a given path. // path represents the location of the value in the JSON tree. // Returns an error if the processing fails. -type JSONVisitor func(path []string, value JSON) error +type JSONVisitor func(value JSON) error // traverseJSONOptions configures how the JSON tree is traversed. type traverseJSONOptions struct { @@ -145,7 +148,8 @@ func (v jsonVoid) IsNull() bool { type jsonBase[T any] struct { jsonVoid - val T + val T + path []string } func (v jsonBase[T]) Value() any { @@ -165,10 +169,12 @@ func (v jsonBase[T]) MarshalJSON() ([]byte, error) { } func (n jsonBase[T]) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { - if shouldVisitPath(opts.PathPrefix, path) { - return visitor(path, n) - } - return nil + n.path = path + return visitor(n) +} + +func (v jsonBase[T]) GetPath() []string { + return v.path } type jsonObject struct { @@ -194,8 +200,9 @@ func (obj jsonObject) Unwrap() any { } func (obj jsonObject) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + obj.path = path if !opts.OnlyLeaves && len(path) >= len(opts.PathPrefix) { - if err := visitor(path, obj); err != nil { + if err := visitor(obj); err != nil { return err } } @@ -236,8 +243,9 @@ func (arr jsonArray) Unwrap() any { } func (arr jsonArray) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + arr.path = path if !opts.OnlyLeaves { - if err := visitor(path, arr); err != nil { + if err := visitor(arr); err != nil { return err } } @@ -288,7 +296,7 @@ func (b jsonBool) Bool() (bool, bool) { } type jsonNull struct { - jsonVoid + jsonBase[any] } var _ JSON = jsonNull{} @@ -297,48 +305,33 @@ func (n jsonNull) IsNull() bool { return true } -func (n jsonNull) Value() any { - return nil -} - -func (n jsonNull) Unwrap() any { - return nil -} - -func (n jsonNull) Marshal(w io.Writer) error { - return json.NewEncoder(w).Encode(nil) -} - -func (n jsonNull) MarshalJSON() ([]byte, error) { - return json.Marshal(nil) -} - func (n jsonNull) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { - return visitor(path, n) + n.path = path + return visitor(n) } -func newJSONObject(val map[string]JSON) JSON { +func newJSONObject(val map[string]JSON) jsonObject { return jsonObject{jsonBase[map[string]JSON]{val: val}} } -func newJSONArray(val []JSON) JSON { +func newJSONArray(val []JSON) jsonArray { return jsonArray{jsonBase[[]JSON]{val: val}} } -func newJSONNumber(val float64) JSON { +func newJSONNumber(val float64) jsonNumber { return jsonNumber{jsonBase[float64]{val: val}} } -func newJSONString(val string) JSON { +func newJSONString(val string) jsonString { return jsonString{jsonBase[string]{val: val}} } -func newJSONBool(val bool) JSON { +func newJSONBool(val bool) jsonBool { return jsonBool{jsonBase[bool]{val: val}} } -func newJSONNull() JSON { - return jsonNull{} +func newJSONNull() jsonNull { + return jsonNull{jsonBase[any]{}} } // ParseJSONBytes parses the given JSON bytes into a JSON value. @@ -377,16 +370,21 @@ func ParseJSONString(data string) (JSON, error) { // - []any // Returns error if the input cannot be converted to JSON. func NewJSON(v any) (JSON, error) { + return newJSON(v) +} + +// newJSON is an internal function that creates a new JSON value with parent and property name +func newJSON(v any) (JSON, error) { if v == nil { return newJSONNull(), nil } switch val := v.(type) { case *fastjson.Value: - return NewJSONFromFastJSON(val), nil + return newJSONFromFastJSON(val), nil case string: return newJSONString(val), nil case map[string]any: - return NewJSONFromMap(val) + return newJSONFromMap(val) case bool: return newJSONBool(val), nil case int8: @@ -453,7 +451,7 @@ func NewJSON(v any) (JSON, error) { func newJsonArrayFromAnyArray(arr []any) (JSON, error) { result := make([]JSON, len(arr)) for i := range arr { - jsonVal, err := NewJSON(arr[i]) + jsonVal, err := newJSON(arr[i]) if err != nil { return nil, err } @@ -486,14 +484,15 @@ func newJSONStringArray(v []string) JSON { return newJSONArray(arr) } -// NewJSONFromFastJSON creates a JSON value from a fastjson.Value. -func NewJSONFromFastJSON(v *fastjson.Value) JSON { +// newJSONFromFastJSON is an internal function that creates a new JSON value with parent and property name +func newJSONFromFastJSON(v *fastjson.Value) JSON { switch v.Type() { case fastjson.TypeObject: fastObj := v.GetObject() obj := make(map[string]JSON, fastObj.Len()) fastObj.Visit(func(k []byte, v *fastjson.Value) { - obj[string(k)] = NewJSONFromFastJSON(v) + key := string(k) + obj[key] = newJSONFromFastJSON(v) }) return newJSONObject(obj) case fastjson.TypeArray: @@ -517,13 +516,22 @@ func NewJSONFromFastJSON(v *fastjson.Value) JSON { return nil } +// NewJSONFromFastJSON creates a JSON value from a fastjson.Value. +func NewJSONFromFastJSON(v *fastjson.Value) JSON { + return newJSONFromFastJSON(v) +} + // NewJSONFromMap creates a JSON object from a map[string]any. // The map values must be valid Go values that can be converted to JSON. // Returns error if any map value cannot be converted to JSON. func NewJSONFromMap(data map[string]any) (JSON, error) { + return newJSONFromMap(data) +} + +func newJSONFromMap(data map[string]any) (JSON, error) { obj := make(map[string]JSON, len(data)) for k, v := range data { - jsonVal, err := NewJSON(v) + jsonVal, err := newJSON(v) if err != nil { return nil, err } diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 675897c2fd..96e662a817 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -147,8 +147,8 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { visited := make(map[string]JSON) - err := TraverseJSON(json, func(path []string, value JSON) error { - key := joinPath(path) + err := TraverseJSON(json, func(value JSON) error { + key := joinPath(value.GetPath()) visited[key] = value return nil }, tt.options...) @@ -308,7 +308,7 @@ func TestTraverseJSON_WithError(t *testing.T) { }) expectedErr := fmt.Errorf("test error") - err := TraverseJSON(json, func(path []string, value JSON) error { + err := TraverseJSON(json, func(value JSON) error { return expectedErr }) From f703e3a18fa713257e1b5a86edc25df1e446e411 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 25 Nov 2024 13:51:57 +0100 Subject: [PATCH 03/46] Include array element index in path --- client/json.go | 16 ++- client/json_traverse_test.go | 247 +++++++++++++++++++---------------- 2 files changed, 150 insertions(+), 113 deletions(-) diff --git a/client/json.go b/client/json.go index 273418b35c..2ec5a61574 100644 --- a/client/json.go +++ b/client/json.go @@ -105,6 +105,13 @@ func TraverseJSONVisitArrayElements() traverseJSONOption { } } +// TraverseJSONWithArrayIndexInPath returns a traverseJSONOption that includes array indices in the path. +func TraverseJSONWithArrayIndexInPath() traverseJSONOption { + return func(opts *traverseJSONOptions) { + opts.IncludeArrayIndexInPath = true + } +} + // JSONVisitor is a function that processes a JSON value at a given path. // path represents the location of the value in the JSON tree. // Returns an error if the processing fails. @@ -118,6 +125,8 @@ type traverseJSONOptions struct { PathPrefix []string // VisitArrayElements when true visits array elements VisitArrayElements bool + // IncludeArrayIndexInPath when true includes array indices in the path + IncludeArrayIndexInPath bool } type jsonVoid struct{} @@ -252,7 +261,12 @@ func (arr jsonArray) accept(visitor JSONVisitor, path []string, opts traverseJSO if opts.VisitArrayElements { for i := range arr.val { - newPath := append(path, strconv.Itoa(i)) + var newPath []string + if opts.IncludeArrayIndexInPath { + newPath = append(path, strconv.Itoa(i)) + } else { + newPath = path + } if !shouldVisitPath(opts.PathPrefix, newPath) { continue } diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 96e662a817..189ad7c170 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -12,13 +12,17 @@ package client import ( "fmt" - "sort" "strings" "testing" "github.com/stretchr/testify/require" ) +type traverseNode struct { + value JSON + path string +} + func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { // Create a complex JSON structure for testing json := newJSONObject(map[string]JSON{ @@ -48,22 +52,22 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { tests := []struct { name string options []traverseJSONOption - expected map[string]JSON // path -> value + expected []traverseNode // path -> value }{ { name: "VisitAll", options: nil, - expected: map[string]JSON{ - "": json, - "string": newJSONString("value"), - "number": newJSONNumber(42), - "bool": newJSONBool(true), - "null": newJSONNull(), - "object": json.Value().(map[string]JSON)["object"], - "object/nested": newJSONString("inside"), - "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], - "object/deep/level": newJSONNumber(3), - "array": json.Value().(map[string]JSON)["array"], + expected: []traverseNode{ + {path: "", value: json}, + {path: "string", value: newJSONString("value")}, + {path: "number", value: newJSONNumber(42)}, + {path: "bool", value: newJSONBool(true)}, + {path: "null", value: newJSONNull()}, + {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object/nested", value: newJSONString("inside")}, + {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "array", value: json.Value().(map[string]JSON)["array"]}, }, }, { @@ -71,13 +75,13 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: []traverseJSONOption{ TraverseJSONOnlyLeaves(), }, - expected: map[string]JSON{ - "string": newJSONString("value"), - "number": newJSONNumber(42), - "bool": newJSONBool(true), - "null": newJSONNull(), - "object/nested": newJSONString("inside"), - "object/deep/level": newJSONNumber(3), + expected: []traverseNode{ + {path: "string", value: newJSONString("value")}, + {path: "number", value: newJSONNumber(42)}, + {path: "bool", value: newJSONBool(true)}, + {path: "null", value: newJSONNull()}, + {path: "object/nested", value: newJSONString("inside")}, + {path: "object/deep/level", value: newJSONNumber(3)}, }, }, { @@ -85,11 +89,11 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: []traverseJSONOption{ TraverseJSONWithPrefix([]string{"object"}), }, - expected: map[string]JSON{ - "object": json.Value().(map[string]JSON)["object"], - "object/nested": newJSONString("inside"), - "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], - "object/deep/level": newJSONNumber(3), + expected: []traverseNode{ + {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object/nested", value: newJSONString("inside")}, + {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep/level", value: newJSONNumber(3)}, }, }, { @@ -97,9 +101,9 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: []traverseJSONOption{ TraverseJSONWithPrefix([]string{"object", "deep"}), }, - expected: map[string]JSON{ - "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], - "object/deep/level": newJSONNumber(3), + expected: []traverseNode{ + {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep/level", value: newJSONNumber(3)}, }, }, { @@ -107,24 +111,50 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: []traverseJSONOption{ TraverseJSONVisitArrayElements(), }, - expected: map[string]JSON{ - "": json, - "string": newJSONString("value"), - "number": newJSONNumber(42), - "bool": newJSONBool(true), - "null": newJSONNull(), - "object": json.Value().(map[string]JSON)["object"], - "object/nested": newJSONString("inside"), - "object/deep": json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"], - "object/deep/level": newJSONNumber(3), - "array": json.Value().(map[string]JSON)["array"], - "array/0": newJSONNumber(1), - "array/1": newJSONString("two"), - "array/2": json.Value().(map[string]JSON)["array"].Value().([]JSON)[2], - "array/2/key": newJSONString("value"), - "array/3": json.Value().(map[string]JSON)["array"].Value().([]JSON)[3], - "array/3/0": newJSONNumber(4), - "array/3/1": newJSONNumber(5), + expected: []traverseNode{ + {path: "", value: json}, + {path: "string", value: newJSONString("value")}, + {path: "number", value: newJSONNumber(42)}, + {path: "bool", value: newJSONBool(true)}, + {path: "null", value: newJSONNull()}, + {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object/nested", value: newJSONString("inside")}, + {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array", value: newJSONNumber(1)}, + {path: "array", value: newJSONString("two")}, + {path: "array", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[2]}, + {path: "array/key", value: newJSONString("value")}, + {path: "array", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[3]}, + {path: "array", value: newJSONNumber(4)}, + {path: "array", value: newJSONNumber(5)}, + }, + }, + { + name: "VisitArrayElementsWithIndex", + options: []traverseJSONOption{ + TraverseJSONVisitArrayElements(), + TraverseJSONWithArrayIndexInPath(), + }, + expected: []traverseNode{ + {path: "", value: json}, + {path: "string", value: newJSONString("value")}, + {path: "number", value: newJSONNumber(42)}, + {path: "bool", value: newJSONBool(true)}, + {path: "null", value: newJSONNull()}, + {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object/nested", value: newJSONString("inside")}, + {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array/0", value: newJSONNumber(1)}, + {path: "array/1", value: newJSONString("two")}, + {path: "array/2", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[2]}, + {path: "array/2/key", value: newJSONString("value")}, + {path: "array/3", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[3]}, + {path: "array/3/0", value: newJSONNumber(4)}, + {path: "array/3/1", value: newJSONNumber(5)}, }, }, { @@ -133,73 +163,82 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { TraverseJSONOnlyLeaves(), TraverseJSONVisitArrayElements(), TraverseJSONWithPrefix([]string{"array"}), + TraverseJSONWithArrayIndexInPath(), }, - expected: map[string]JSON{ - "array/0": newJSONNumber(1), - "array/1": newJSONString("two"), - "array/2/key": newJSONString("value"), - "array/3/0": newJSONNumber(4), - "array/3/1": newJSONNumber(5), + expected: []traverseNode{ + {path: "array/0", value: newJSONNumber(1)}, + {path: "array/1", value: newJSONString("two")}, + {path: "array/2/key", value: newJSONString("value")}, + {path: "array/3/0", value: newJSONNumber(4)}, + {path: "array/3/1", value: newJSONNumber(5)}, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - visited := make(map[string]JSON) + visited := []traverseNode{} err := TraverseJSON(json, func(value JSON) error { key := joinPath(value.GetPath()) - visited[key] = value + visited = append(visited, traverseNode{path: key, value: value}) return nil }, tt.options...) require.NoError(t, err) - if diff := compareJSONMaps(tt.expected, visited); diff != "" { - t.Errorf("Maps are different:\n%s", diff) + if diff := compareTraverseNodes(tt.expected, visited); diff != "" { + t.Errorf("Slices are different:\n%s", diff) } }) } } -// compareJSONMaps compares two maps of JSON values and returns a detailed difference report. -func compareJSONMaps(expected, actual map[string]JSON) string { +// compareTraverseNodes compares two slices of traverseNode without relying on the order. +// It matches nodes based on their paths and compares their values. +// Handles multiple nodes with the same path and removes processed items. +func compareTraverseNodes(expected, actual []traverseNode) string { var diffs []string - // Check for missing keys in actual - var expectedKeys []string - for k := range expected { - expectedKeys = append(expectedKeys, k) - } - sort.Strings(expectedKeys) + // Group expected and actual nodes by path + expectedMap := groupNodesByPath(expected) + actualMap := groupNodesByPath(actual) - var actualKeys []string - for k := range actual { - actualKeys = append(actualKeys, k) - } - sort.Strings(actualKeys) + // Compare nodes with matching paths + for path, expNodes := range expectedMap { + actNodes, exists := actualMap[path] + if !exists { + diffs = append(diffs, fmt.Sprintf("Missing path %q in actual nodes", path)) + continue + } - // Find missing keys - for _, k := range expectedKeys { - if _, ok := actual[k]; !ok { - diffs = append(diffs, fmt.Sprintf("- Missing key %q", k)) + // Compare each expected node with actual nodes + for _, expNode := range expNodes { + matchFound := false + for i, actNode := range actNodes { + if compareJSON(expNode.value, actNode.value) { + // Remove matched node to prevent duplicate matching + actNodes = append(actNodes[:i], actNodes[i+1:]...) + actualMap[path] = actNodes + matchFound = true + break + } + } + if !matchFound { + diffs = append(diffs, fmt.Sprintf("No matching value found for path %q", path)) + } } - } - // Find extra keys - for _, k := range actualKeys { - if _, ok := expected[k]; !ok { - diffs = append(diffs, fmt.Sprintf("+ Extra key %q", k)) + // Remove path from actualMap if all nodes have been matched + if len(actNodes) == 0 { + delete(actualMap, path) + } else { + actualMap[path] = actNodes } } - // Compare values for common keys - for _, k := range expectedKeys { - if actualVal, ok := actual[k]; ok { - expectedVal := expected[k] - if !compareJSON(expectedVal, actualVal) { - diffs = append(diffs, fmt.Sprintf("! Value mismatch for key %q:\n\tExpected: %s\n\tActual: %s", - k, formatJSON(expectedVal), formatJSON(actualVal))) - } + // Any remaining actual nodes are extra + for path, actNodes := range actualMap { + for range actNodes { + diffs = append(diffs, fmt.Sprintf("Extra node found at path %q", path)) } } @@ -210,6 +249,16 @@ func compareJSONMaps(expected, actual map[string]JSON) string { return fmt.Sprintf("Found %d differences:\n%s", len(diffs), strings.Join(diffs, "\n")) } +// groupNodesByPath groups traverseNodes by their paths. +// It returns a map from path to a slice of nodes with that path. +func groupNodesByPath(nodes []traverseNode) map[string][]traverseNode { + nodeMap := make(map[string][]traverseNode) + for _, node := range nodes { + nodeMap[node.path] = append(nodeMap[node.path], node) + } + return nodeMap +} + // compareJSON compares two JSON values for equality func compareJSON(expected, actual JSON) bool { if expected.IsNull() != actual.IsNull() { @@ -266,32 +315,6 @@ func compareJSONArrays(expected, actual JSON) bool { return true } -// formatJSON returns a human-readable string representation of a JSON value -func formatJSON(j JSON) string { - switch { - case j.IsNull(): - return "null" - case isObject(j): - obj, _ := j.Object() - pairs := make([]string, 0, len(obj)) - for k, v := range obj { - pairs = append(pairs, fmt.Sprintf("%q: %s", k, formatJSON(v))) - } - sort.Strings(pairs) - return "{" + strings.Join(pairs, ", ") + "}" - case isArray(j): - arr, _ := j.Array() - items := make([]string, len(arr)) - for i, v := range arr { - items[i] = formatJSON(v) - } - return "[" + strings.Join(items, ", ") + "]" - default: - bytes, _ := j.MarshalJSON() - return string(bytes) - } -} - func isObject(j JSON) bool { _, ok := j.Object() return ok From 403c5879fdcb65a41da89b89a3b4bef7079d4d22 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 28 Nov 2024 15:24:32 +0100 Subject: [PATCH 04/46] Fix json traversal --- client/json.go | 20 +++++++++++++++----- client/json_traverse_test.go | 23 +++++++++++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/client/json.go b/client/json.go index 2ec5a61574..c74b9eebe9 100644 --- a/client/json.go +++ b/client/json.go @@ -177,11 +177,6 @@ func (v jsonBase[T]) MarshalJSON() ([]byte, error) { return json.Marshal(v.val) } -func (n jsonBase[T]) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { - n.path = path - return visitor(n) -} - func (v jsonBase[T]) GetPath() []string { return v.path } @@ -289,6 +284,11 @@ func (n jsonNumber) Number() (float64, bool) { return n.val, true } +func (n jsonNumber) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + n.path = path + return visitor(n) +} + type jsonString struct { jsonBase[string] } @@ -299,6 +299,11 @@ func (s jsonString) String() (string, bool) { return s.val, true } +func (n jsonString) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + n.path = path + return visitor(n) +} + type jsonBool struct { jsonBase[bool] } @@ -309,6 +314,11 @@ func (b jsonBool) Bool() (bool, bool) { return b.val, true } +func (n jsonBool) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + n.path = path + return visitor(n) +} + type jsonNull struct { jsonBase[any] } diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 189ad7c170..3806af798c 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -184,6 +184,29 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { return nil }, tt.options...) + for _, node := range visited { + if _, ok := node.value.Bool(); ok { + break + } + if _, ok := node.value.Number(); ok { + break + } + if _, ok := node.value.String(); ok { + break + } + if _, ok := node.value.Object(); ok { + break + } + if _, ok := node.value.Array(); ok { + break + } + if node.value.IsNull() { + break + } + + t.Errorf("Unexpected JSON value type: %T, for path: %s", node.value, node.path) + } + require.NoError(t, err) if diff := compareTraverseNodes(tt.expected, visited); diff != "" { t.Errorf("Slices are different:\n%s", diff) From 1c059fbf5b4e1870a4a03a64eb44d0689139a17d Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 28 Nov 2024 16:36:42 +0100 Subject: [PATCH 05/46] Add JSON and Bool encoding --- internal/encoding/bool.go | 43 +++++++ internal/encoding/bool_test.go | 175 ++++++++++++++++++++++++++ internal/encoding/encoding.go | 3 + internal/encoding/errors.go | 18 +++ internal/encoding/field_value.go | 34 +++-- internal/encoding/field_value_test.go | 12 +- internal/encoding/json.go | 148 ++++++++++++++++++++++ internal/encoding/json_test.go | 147 ++++++++++++++++++++++ internal/encoding/null.go | 9 +- internal/encoding/type.go | 6 + 10 files changed, 576 insertions(+), 19 deletions(-) create mode 100644 internal/encoding/bool.go create mode 100644 internal/encoding/bool_test.go create mode 100644 internal/encoding/json.go create mode 100644 internal/encoding/json_test.go diff --git a/internal/encoding/bool.go b/internal/encoding/bool.go new file mode 100644 index 0000000000..989b9081c9 --- /dev/null +++ b/internal/encoding/bool.go @@ -0,0 +1,43 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package encoding + +// EncodeBoolAscending encodes a boolean value in ascending order. +func EncodeBoolAscending(b []byte, v bool) []byte { + if v { + b = append(b, trueMarker) + } else { + b = append(b, falseMarker) + } + + return b +} + +// EncodeBoolDescending encodes a boolean value in descending order. +func EncodeBoolDescending(b []byte, v bool) []byte { + return EncodeBoolAscending(b, !v) +} + +// DecodeBoolAscending decodes a boolean value encoded in ascending order. +func DecodeBoolAscending(b []byte) ([]byte, bool, error) { + if PeekType(b) != Bool { + return b, false, NewErrMarkersNotFound(b, falseMarker, trueMarker) + } + + byte0 := b[0] + return b[1:], byte0 == trueMarker, nil +} + +// DecodeBoolDescending decodes a boolean value encoded in descending order. +func DecodeBoolDescending(b []byte) ([]byte, bool, error) { + leftover, v, err := DecodeBoolAscending(b) + return leftover, !v, err +} diff --git a/internal/encoding/bool_test.go b/internal/encoding/bool_test.go new file mode 100644 index 0000000000..1a21898449 --- /dev/null +++ b/internal/encoding/bool_test.go @@ -0,0 +1,175 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package encoding + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestEncodeBoolAscending(t *testing.T) { + tests := []struct { + name string + input bool + expected []byte + }{ + { + name: "true value", + input: true, + expected: []byte{trueMarker}, + }, + { + name: "false value", + input: false, + expected: []byte{falseMarker}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := EncodeBoolAscending(nil, tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestEncodeBoolDescending(t *testing.T) { + tests := []struct { + name string + input bool + expected []byte + }{ + { + name: "true value", + input: true, + expected: []byte{falseMarker}, // inverted due to descending order + }, + { + name: "false value", + input: false, + expected: []byte{trueMarker}, // inverted due to descending order + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := EncodeBoolDescending(nil, tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestDecodeBoolAscending(t *testing.T) { + tests := []struct { + name string + input []byte + expected bool + expectedErr error + remaining []byte + }{ + { + name: "decode true", + input: []byte{trueMarker}, + expected: true, + expectedErr: nil, + remaining: []byte{}, + }, + { + name: "decode false", + input: []byte{falseMarker}, + expected: false, + expectedErr: nil, + remaining: []byte{}, + }, + { + name: "invalid marker", + input: []byte{0x99}, + expected: false, + expectedErr: NewErrMarkersNotFound([]byte{0x99}, falseMarker, trueMarker), + remaining: []byte{0x99}, + }, + { + name: "with remaining bytes", + input: []byte{trueMarker, 0x01, 0x02}, + expected: true, + expectedErr: nil, + remaining: []byte{0x01, 0x02}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + remaining, result, err := DecodeBoolAscending(tt.input) + if tt.expectedErr != nil { + assert.Error(t, err) + assert.ErrorContains(t, err, tt.expectedErr.Error()) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + assert.Equal(t, tt.remaining, remaining) + }) + } +} + +func TestDecodeBoolDescending(t *testing.T) { + tests := []struct { + name string + input []byte + expected bool + expectedErr error + remaining []byte + }{ + { + name: "decode true", + input: []byte{falseMarker}, // inverted due to descending order + expected: true, + expectedErr: nil, + remaining: []byte{}, + }, + { + name: "decode false", + input: []byte{trueMarker}, // inverted due to descending order + expected: false, + expectedErr: nil, + remaining: []byte{}, + }, + { + name: "invalid marker", + input: []byte{0x99}, + expected: false, + expectedErr: NewErrMarkersNotFound([]byte{0x99}, falseMarker, trueMarker), + remaining: []byte{0x99}, + }, + { + name: "with remaining bytes", + input: []byte{falseMarker, 0x01, 0x02}, + expected: true, + expectedErr: nil, + remaining: []byte{0x01, 0x02}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + remaining, result, err := DecodeBoolDescending(tt.input) + if tt.expectedErr != nil { + assert.Error(t, err) + assert.ErrorContains(t, err, tt.expectedErr.Error()) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + assert.Equal(t, tt.remaining, remaining) + }) + } +} diff --git a/internal/encoding/encoding.go b/internal/encoding/encoding.go index 188cf73ad3..0dd78525d4 100644 --- a/internal/encoding/encoding.go +++ b/internal/encoding/encoding.go @@ -28,6 +28,9 @@ const ( bytesMarker bytesDescMarker timeMarker + falseMarker + trueMarker + jsonMarker // These constants define a range of values and are used to determine how many bytes are // needed to represent the given uint64 value. The constants IntMin and IntMax define the diff --git a/internal/encoding/errors.go b/internal/encoding/errors.go index 38b4671633..63c13e3a14 100644 --- a/internal/encoding/errors.go +++ b/internal/encoding/errors.go @@ -24,6 +24,8 @@ const ( errUnknownEscapeSequence = "unknown escape sequence" errInvalidUvarintLength = "invalid length for uvarint" errVarintOverflow = "varint overflows a 64-bit integer" + errInvalidJSONPayload = "invalid JSON payload" + errInvalidJSONPath = "invalid JSON path" ) var ( @@ -35,6 +37,8 @@ var ( ErrUnknownEscapeSequence = errors.New(errUnknownEscapeSequence) ErrInvalidUvarintLength = errors.New(errInvalidUvarintLength) ErrVarintOverflow = errors.New(errVarintOverflow) + ErrInvalidJSONPayload = errors.New(errInvalidJSONPayload) + ErrInvalidJSONPath = errors.New(errInvalidJSONPath) ) // NewErrInsufficientBytesToDecode returns a new error indicating that the provided @@ -89,3 +93,17 @@ func NewErrInvalidUvarintLength(b []byte, length int) error { func NewErrVarintOverflow(b []byte, value uint64) error { return errors.New(errVarintOverflow, errors.NewKV("Buffer", b), errors.NewKV("Value", value)) } + +// NewErrInvalidJSONPayload returns a new error indicating that the buffer +func NewErrInvalidJSONPayload(b []byte, path []string, err ...error) error { + kvs := []errors.KV{errors.NewKV("Buffer", b), errors.NewKV("Path", path)} + if len(err) > 0 { + kvs = append(kvs, errors.NewKV("Error", err[0])) + } + return errors.New(errInvalidJSONPayload, kvs...) +} + +// NewErrInvalidJSONPath returns a new error indicating that the buffer +func NewErrInvalidJSONPath(b []byte, err error) error { + return errors.New(errInvalidJSONPath, errors.NewKV("Buffer", b), errors.NewKV("Error", err)) +} diff --git a/internal/encoding/field_value.go b/internal/encoding/field_value.go index 34bbdd99df..745a07d717 100644 --- a/internal/encoding/field_value.go +++ b/internal/encoding/field_value.go @@ -27,24 +27,16 @@ func EncodeFieldValue(b []byte, val client.NormalValue, descending bool) []byte } } if v, ok := val.Bool(); ok { - var boolInt int64 = 0 - if v { - boolInt = 1 - } if descending { - return EncodeVarintDescending(b, boolInt) + return EncodeBoolDescending(b, v) } - return EncodeVarintAscending(b, boolInt) + return EncodeBoolAscending(b, v) } if v, ok := val.NillableBool(); ok { - var boolInt int64 = 0 - if v.Value() { - boolInt = 1 - } if descending { - return EncodeVarintDescending(b, boolInt) + return EncodeBoolDescending(b, v.Value()) } - return EncodeVarintAscending(b, boolInt) + return EncodeBoolAscending(b, v.Value()) } if v, ok := val.Int(); ok { if descending { @@ -94,6 +86,12 @@ func EncodeFieldValue(b []byte, val client.NormalValue, descending bool) []byte } return EncodeTimeAscending(b, v.Value()) } + if v, ok := val.JSON(); ok { + if descending { + return EncodeJSONDescending(b, v) + } + return EncodeJSONAscending(b, v) + } return b } @@ -107,6 +105,18 @@ func DecodeFieldValue(b []byte, descending bool, kind client.FieldKind) ([]byte, b, _ = DecodeIfNull(b) nilVal, err := client.NewNormalNil(kind) return b, nilVal, err + case Bool: + var v bool + var err error + if descending { + b, v, err = DecodeBoolDescending(b) + } else { + b, v, err = DecodeBoolAscending(b) + } + if err != nil { + return nil, nil, NewErrCanNotDecodeFieldValue(b, kind, err) + } + return b, client.NewNormalBool(v), nil case Int: var v int64 var err error diff --git a/internal/encoding/field_value_test.go b/internal/encoding/field_value_test.go index 69a8096f85..c1ae904fc3 100644 --- a/internal/encoding/field_value_test.go +++ b/internal/encoding/field_value_test.go @@ -41,16 +41,16 @@ func TestEncodeDecodeFieldValue(t *testing.T) { { name: "bool true", inputVal: client.NewNormalBool(true), - expectedBytes: EncodeVarintAscending(nil, 1), - expectedBytesDesc: EncodeVarintDescending(nil, 1), - expectedDecodedVal: client.NewNormalInt(1), + expectedBytes: EncodeBoolAscending(nil, true), + expectedBytesDesc: EncodeBoolDescending(nil, true), + expectedDecodedVal: client.NewNormalBool(true), }, { name: "bool false", inputVal: client.NewNormalBool(false), - expectedBytes: EncodeVarintAscending(nil, 0), - expectedBytesDesc: EncodeVarintDescending(nil, 0), - expectedDecodedVal: client.NewNormalInt(0), + expectedBytes: EncodeBoolAscending(nil, false), + expectedBytesDesc: EncodeBoolDescending(nil, false), + expectedDecodedVal: client.NewNormalBool(false), }, { name: "int", diff --git a/internal/encoding/json.go b/internal/encoding/json.go new file mode 100644 index 0000000000..b66164d2d0 --- /dev/null +++ b/internal/encoding/json.go @@ -0,0 +1,148 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package encoding + +import "github.com/sourcenetwork/defradb/client" + +// EncodeJSONAscending encodes a JSON value in ascending order. +func EncodeJSONAscending(b []byte, v client.JSON) []byte { + b = encodeJSONPath(b, v) + + if str, ok := v.String(); ok { + b = EncodeStringAscending(b, str) + } else if num, ok := v.Number(); ok { + b = EncodeFloatAscending(b, num) + } else if boolVal, ok := v.Bool(); ok { + b = EncodeBoolAscending(b, boolVal) + } else if v.IsNull() { + b = EncodeNullAscending(b) + } else { + return nil + } + + return b +} + +// EncodeJSONDescending encodes a JSON value in descending order. +func EncodeJSONDescending(b []byte, v client.JSON) []byte { + b = encodeJSONPath(b, v) + + if str, ok := v.String(); ok { + b = EncodeStringDescending(b, str) + } else if num, ok := v.Number(); ok { + b = EncodeFloatDescending(b, num) + } else if boolVal, ok := v.Bool(); ok { + b = EncodeBoolDescending(b, boolVal) + } else if v.IsNull() { + b = EncodeNullDescending(b) + } else { + return nil + } + + return b +} + +// DecodeJSONAscending decodes a JSON value encoded in ascending order. +func DecodeJSONAscending(b []byte) ([]byte, client.JSON, error) { + return decodeJSON(b, true) +} + +// DecodeJSONDescending decodes a JSON value encoded in descending order. +func DecodeJSONDescending(b []byte) ([]byte, client.JSON, error) { + return decodeJSON(b, false) +} + +func decodeJSON(b []byte, ascending bool) ([]byte, client.JSON, error) { + if PeekType(b) != JSON { + return b, nil, NewErrMarkersNotFound(b, jsonMarker) + } + + b = b[1:] // Skip the JSON marker + b, path, err := decodeJSONPath(b) + if err != nil { + return b, nil, err + } + + var jsonValue any + + switch PeekType(b) { + case Bytes, BytesDesc: + var v []byte + if ascending { + b, v, err = DecodeBytesAscending(b) + } else { + b, v, err = DecodeBytesDescending(b) + } + if err != nil { + return nil, nil, err + } + jsonValue = string(v) + case Float: + if ascending { + b, jsonValue, err = DecodeFloatAscending(b) + } else { + b, jsonValue, err = DecodeFloatDescending(b) + } + case Bool: + if ascending { + b, jsonValue, err = DecodeBoolAscending(b) + } else { + b, jsonValue, err = DecodeBoolDescending(b) + } + case Null: + b = decodeNull(b) + default: + err = NewErrInvalidJSONPayload(b, path) + } + + if err != nil { + return b, nil, err + } + + result, err := client.NewJSON(jsonValue) + + if err != nil { + return b, nil, err + } + + return b, result, nil +} + +func decodeJSONPath(b []byte) ([]byte, []string, error) { + var path []string + for { + if len(b) == 0 { + break + } + if b[0] == ascendingBytesEscapes.escapedTerm { + b = b[1:] + break + } + rem, part, err := DecodeBytesAscending(b) + if err != nil { + return b, nil, NewErrInvalidJSONPath(b, err) + } + path = append(path, string(part)) + b = rem + } + return b, path, nil +} + +func encodeJSONPath(b []byte, v client.JSON) []byte { + b = append(b, jsonMarker) + for _, part := range v.GetPath() { + pathBytes := unsafeConvertStringToBytes(part) + //b = encodeBytesAscendingWithTerminator(b, pathBytes, ascendingBytesEscapes.escapedTerm) + b = EncodeBytesAscending(b, pathBytes) + } + b = append(b, ascendingBytesEscapes.escapedTerm) + return b +} diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go new file mode 100644 index 0000000000..c27d22c09d --- /dev/null +++ b/internal/encoding/json_test.go @@ -0,0 +1,147 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package encoding + +import ( + "fmt" + "strings" + "testing" + + "github.com/sourcenetwork/defradb/client" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { + jsonMap := map[string]any{ + "str": "value", + "num": 123.5, + "bool": true, + "null": nil, + "obj": map[string]any{ + "obj_str": "obj_val", + "obj_num": 42, + "obj_bool": false, + "obj_null": nil, + "obj_obj": map[string]any{ + "obj_obj_str": "obj_obj_val", + }, + "obj_arr": []any{"obj_arr_val", 100}, + }, + "arr": []any{ + "arr_val", + 23, + false, + nil, + map[string]any{ + "arr_obj": "arr_obj_val", + }, + []any{"arr_arr_val", 1000}, + }, + } + + testJSON, err := client.NewJSON(jsonMap) + assert.NoError(t, err) + + pathMap := make(map[string][]client.JSON) + + err = client.TraverseJSON(testJSON, func(value client.JSON) error { + p := strings.Join(value.GetPath(), "/") + jsons := pathMap[p] + jsons = append(jsons, value) + pathMap[p] = jsons + return nil + }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements()) + assert.NoError(t, err) + + for path, jsons := range pathMap { + for i, value := range jsons { + for _, ascending := range []bool{true, false} { + t.Run(fmt.Sprintf("Path %s, index: %d, ascending: %v", path, i, ascending), func(t *testing.T) { + var encoded []byte + if ascending { + encoded = EncodeJSONAscending(nil, value) + } else { + encoded = EncodeJSONDescending(nil, value) + } + + var remaining []byte + var decoded client.JSON + var err error + + if ascending { + remaining, decoded, err = DecodeJSONAscending(encoded) + } else { + remaining, decoded, err = DecodeJSONDescending(encoded) + } + + require.NoError(t, err) + assert.Empty(t, remaining) + assert.Equal(t, value.Value(), decoded.Value()) + }) + } + } + } +} + +func TestJSONDecoding_MalformedData(t *testing.T) { + tests := []struct { + name string + input []byte + ascending bool + expectedErr string + }{ + { + name: "malformed json path", + input: []byte{jsonMarker, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, + ascending: true, + }, + { + name: "malformed json num", + input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, floatPos, 0xFF, 0xFF, 0xFF}, + ascending: true, + }, + { + name: "malformed json num", + input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, floatPos, 0xFF, 0xFF, 0xFF}, + ascending: false, + }, + { + name: "malformed json num", + input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, bytesMarker, 0xFF, 0xFF, 0xFF}, + ascending: true, + }, + { + name: "malformed json num", + input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, bytesDescMarker, 0xFF, 0xFF, 0xFF}, + ascending: false, + }, + { + name: "wrong type marker", + input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, timeMarker, 0xFF, 0xFF, 0xFF}, + ascending: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var err error + + if tt.ascending { + _, _, err = DecodeJSONAscending(tt.input) + } else { + _, _, err = DecodeJSONDescending(tt.input) + } + + assert.Error(t, err) + }) + } +} diff --git a/internal/encoding/null.go b/internal/encoding/null.go index 067c348122..5e66dae37c 100644 --- a/internal/encoding/null.go +++ b/internal/encoding/null.go @@ -35,7 +35,14 @@ func EncodeNullDescending(b []byte) []byte { // This function handles both ascendingly and descendingly encoded NULLs. func DecodeIfNull(b []byte) ([]byte, bool) { if PeekType(b) == Null { - return b[1:], true + return decodeNull(b), true } return b, false } + +// decodeNull decodes a NULL value from the input buffer. The input buffer is +// expected to start with the NULL value encoding. The decoded NULL value is +// removed from the input buffer and the remaining buffer is returned. +func decodeNull(b []byte) []byte { + return b[1:] +} diff --git a/internal/encoding/type.go b/internal/encoding/type.go index a551ca93e6..2210e1caa1 100644 --- a/internal/encoding/type.go +++ b/internal/encoding/type.go @@ -19,11 +19,13 @@ type Type int const ( Unknown Type = 0 Null Type = 1 + Bool Type = 2 Int Type = 3 Float Type = 4 Bytes Type = 6 BytesDesc Type = 7 Time Type = 8 + JSON Type = 9 ) // PeekType peeks at the type of the value encoded at the start of b. @@ -43,6 +45,10 @@ func PeekType(b []byte) Type { return Float case m == timeMarker: return Time + case m == falseMarker, m == trueMarker: + return Bool + case m == jsonMarker: + return JSON } } return Unknown From c784f619690319528ab131d646bdb680d375c026 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Fri, 29 Nov 2024 07:38:51 +0100 Subject: [PATCH 06/46] Correctly handle paths to json nodes --- client/json.go | 229 +++++++++++++------------- client/json_test.go | 300 ++++++++++++++++++++++++----------- client/json_traverse_test.go | 122 +++++++------- client/normal_value_test.go | 2 +- 4 files changed, 394 insertions(+), 259 deletions(-) diff --git a/client/json.go b/client/json.go index c74b9eebe9..dd3d427c8d 100644 --- a/client/json.go +++ b/client/json.go @@ -334,28 +334,28 @@ func (n jsonNull) accept(visitor JSONVisitor, path []string, opts traverseJSONOp return visitor(n) } -func newJSONObject(val map[string]JSON) jsonObject { - return jsonObject{jsonBase[map[string]JSON]{val: val}} +func newJSONObject(val map[string]JSON, path []string) jsonObject { + return jsonObject{jsonBase[map[string]JSON]{val: val, path: path}} } -func newJSONArray(val []JSON) jsonArray { - return jsonArray{jsonBase[[]JSON]{val: val}} +func newJSONArray(val []JSON, path []string) jsonArray { + return jsonArray{jsonBase[[]JSON]{val: val, path: path}} } -func newJSONNumber(val float64) jsonNumber { - return jsonNumber{jsonBase[float64]{val: val}} +func newJSONNumber(val float64, path []string) jsonNumber { + return jsonNumber{jsonBase[float64]{val: val, path: path}} } -func newJSONString(val string) jsonString { - return jsonString{jsonBase[string]{val: val}} +func newJSONString(val string, path []string) jsonString { + return jsonString{jsonBase[string]{val: val, path: path}} } -func newJSONBool(val bool) jsonBool { - return jsonBool{jsonBase[bool]{val: val}} +func newJSONBool(val bool, path []string) jsonBool { + return jsonBool{jsonBase[bool]{val: val, path: path}} } -func newJSONNull() jsonNull { - return jsonNull{jsonBase[any]{}} +func newJSONNull(path []string) jsonNull { + return jsonNull{jsonBase[any]{path: path}} } // ParseJSONBytes parses the given JSON bytes into a JSON value. @@ -394,174 +394,189 @@ func ParseJSONString(data string) (JSON, error) { // - []any // Returns error if the input cannot be converted to JSON. func NewJSON(v any) (JSON, error) { - return newJSON(v) + return newJSON(v, nil) +} + +// NewJSONWithPath creates a JSON value from a Go value with stored path to the value. +// The Go value must be one of: +// - nil (becomes JSON null) +// - *fastjson.Value +// - string +// - map[string]any +// - bool +// - numeric types (int8 through int64, uint8 through uint64, float32, float64) +// - slice of any above type +// - []any +// Returns error if the input cannot be converted to JSON. +func NewJSONWithPath(v any, path []string) (JSON, error) { + return newJSON(v, path) } // newJSON is an internal function that creates a new JSON value with parent and property name -func newJSON(v any) (JSON, error) { +func newJSON(v any, path []string) (JSON, error) { if v == nil { - return newJSONNull(), nil - } - switch val := v.(type) { - case *fastjson.Value: - return newJSONFromFastJSON(val), nil - case string: - return newJSONString(val), nil - case map[string]any: - return newJSONFromMap(val) - case bool: - return newJSONBool(val), nil - case int8: - return newJSONNumber(float64(val)), nil - case int16: - return newJSONNumber(float64(val)), nil - case int32: - return newJSONNumber(float64(val)), nil - case int64: - return newJSONNumber(float64(val)), nil - case int: - return newJSONNumber(float64(val)), nil - case uint8: - return newJSONNumber(float64(val)), nil - case uint16: - return newJSONNumber(float64(val)), nil - case uint32: - return newJSONNumber(float64(val)), nil - case uint64: - return newJSONNumber(float64(val)), nil - case uint: - return newJSONNumber(float64(val)), nil - case float32: - return newJSONNumber(float64(val)), nil - case float64: - return newJSONNumber(val), nil - - case []bool: - return newJSONBoolArray(val), nil - case []int8: - return newJSONNumberArray(val), nil - case []int16: - return newJSONNumberArray(val), nil - case []int32: - return newJSONNumberArray(val), nil - case []int64: - return newJSONNumberArray(val), nil - case []int: - return newJSONNumberArray(val), nil - case []uint8: - return newJSONNumberArray(val), nil - case []uint16: - return newJSONNumberArray(val), nil - case []uint32: - return newJSONNumberArray(val), nil - case []uint64: - return newJSONNumberArray(val), nil - case []uint: - return newJSONNumberArray(val), nil - case []float32: - return newJSONNumberArray(val), nil - case []float64: - return newJSONNumberArray(val), nil - case []string: - return newJSONStringArray(val), nil - - case []any: - return newJsonArrayFromAnyArray(val) + return newJSONNull(path), nil + } else { + switch val := v.(type) { + case *fastjson.Value: + return newJSONFromFastJSON(val, path), nil + case string: + return newJSONString(val, path), nil + case map[string]any: + return newJSONFromMap(val, path) + case bool: + return newJSONBool(val, path), nil + case int8: + return newJSONNumber(float64(val), path), nil + case int16: + return newJSONNumber(float64(val), path), nil + case int32: + return newJSONNumber(float64(val), path), nil + case int64: + return newJSONNumber(float64(val), path), nil + case int: + return newJSONNumber(float64(val), path), nil + case uint8: + return newJSONNumber(float64(val), path), nil + case uint16: + return newJSONNumber(float64(val), path), nil + case uint32: + return newJSONNumber(float64(val), path), nil + case uint64: + return newJSONNumber(float64(val), path), nil + case uint: + return newJSONNumber(float64(val), path), nil + case float32: + return newJSONNumber(float64(val), path), nil + case float64: + return newJSONNumber(val, path), nil + + case []bool: + return newJSONBoolArray(val, path), nil + case []int8: + return newJSONNumberArray(val, path), nil + case []int16: + return newJSONNumberArray(val, path), nil + case []int32: + return newJSONNumberArray(val, path), nil + case []int64: + return newJSONNumberArray(val, path), nil + case []int: + return newJSONNumberArray(val, path), nil + case []uint8: + return newJSONNumberArray(val, path), nil + case []uint16: + return newJSONNumberArray(val, path), nil + case []uint32: + return newJSONNumberArray(val, path), nil + case []uint64: + return newJSONNumberArray(val, path), nil + case []uint: + return newJSONNumberArray(val, path), nil + case []float32: + return newJSONNumberArray(val, path), nil + case []float64: + return newJSONNumberArray(val, path), nil + case []string: + return newJSONStringArray(val, path), nil + case []any: + return newJsonArrayFromAnyArray(val, path) + } } return nil, NewErrInvalidJSONPayload(v) } -func newJsonArrayFromAnyArray(arr []any) (JSON, error) { +func newJsonArrayFromAnyArray(arr []any, path []string) (JSON, error) { result := make([]JSON, len(arr)) for i := range arr { - jsonVal, err := newJSON(arr[i]) + jsonVal, err := newJSON(arr[i], path) if err != nil { return nil, err } result[i] = jsonVal } - return newJSONArray(result), nil + return newJSONArray(result, path), nil } -func newJSONBoolArray(v []bool) JSON { +func newJSONBoolArray(v []bool, path []string) JSON { arr := make([]JSON, len(v)) for i := range v { - arr[i] = newJSONBool(v[i]) + arr[i] = newJSONBool(v[i], path) } - return newJSONArray(arr) + return newJSONArray(arr, path) } -func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T) JSON { +func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path []string) JSON { arr := make([]JSON, len(v)) for i := range v { - arr[i] = newJSONNumber(float64(v[i])) + arr[i] = newJSONNumber(float64(v[i]), path) } - return newJSONArray(arr) + return newJSONArray(arr, path) } -func newJSONStringArray(v []string) JSON { +func newJSONStringArray(v []string, path []string) JSON { arr := make([]JSON, len(v)) for i := range v { - arr[i] = newJSONString(v[i]) + arr[i] = newJSONString(v[i], path) } - return newJSONArray(arr) + return newJSONArray(arr, path) } // newJSONFromFastJSON is an internal function that creates a new JSON value with parent and property name -func newJSONFromFastJSON(v *fastjson.Value) JSON { +func newJSONFromFastJSON(v *fastjson.Value, path []string) JSON { switch v.Type() { case fastjson.TypeObject: fastObj := v.GetObject() obj := make(map[string]JSON, fastObj.Len()) fastObj.Visit(func(k []byte, v *fastjson.Value) { key := string(k) - obj[key] = newJSONFromFastJSON(v) + obj[key] = newJSONFromFastJSON(v, append(path, key)) }) - return newJSONObject(obj) + return newJSONObject(obj, path) case fastjson.TypeArray: fastArr := v.GetArray() arr := make([]JSON, len(fastArr)) for i := range fastArr { arr[i] = NewJSONFromFastJSON(fastArr[i]) } - return newJSONArray(arr) + return newJSONArray(arr, path) case fastjson.TypeNumber: - return newJSONNumber(v.GetFloat64()) + return newJSONNumber(v.GetFloat64(), path) case fastjson.TypeString: - return newJSONString(string(v.GetStringBytes())) + return newJSONString(string(v.GetStringBytes()), path) case fastjson.TypeTrue: - return newJSONBool(true) + return newJSONBool(true, path) case fastjson.TypeFalse: - return newJSONBool(false) + return newJSONBool(false, path) case fastjson.TypeNull: - return newJSONNull() + return newJSONNull(path) } return nil } // NewJSONFromFastJSON creates a JSON value from a fastjson.Value. func NewJSONFromFastJSON(v *fastjson.Value) JSON { - return newJSONFromFastJSON(v) + return newJSONFromFastJSON(v, nil) } // NewJSONFromMap creates a JSON object from a map[string]any. // The map values must be valid Go values that can be converted to JSON. // Returns error if any map value cannot be converted to JSON. func NewJSONFromMap(data map[string]any) (JSON, error) { - return newJSONFromMap(data) + return newJSONFromMap(data, nil) } -func newJSONFromMap(data map[string]any) (JSON, error) { +func newJSONFromMap(data map[string]any, path []string) (JSON, error) { obj := make(map[string]JSON, len(data)) for k, v := range data { - jsonVal, err := newJSON(v) + jsonVal, err := newJSON(v, append(path, k)) if err != nil { return nil, err } obj[k] = jsonVal } - return newJSONObject(obj), nil + return newJSONObject(obj, path), nil } func shouldVisitPath(prefix, path []string) bool { diff --git a/client/json_test.go b/client/json_test.go index 9ac4d3b781..512742047d 100644 --- a/client/json_test.go +++ b/client/json_test.go @@ -13,9 +13,11 @@ package client import ( "bytes" "encoding/json" + "fmt" "strings" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/valyala/fastjson" ) @@ -119,13 +121,13 @@ func TestNewJSONFomString_WithInvalidInput_Error(t *testing.T) { func TestJSONObject_Methods_ShouldWorkAsExpected(t *testing.T) { m := map[string]JSON{ - "key": newJSONString("value"), + "key": newJSONString("value", nil), "nested": newJSONObject(map[string]JSON{ - "inner": newJSONNumber(42), - "array": newJSONArray([]JSON{newJSONString("test"), newJSONBool(true)}), - }), + "inner": newJSONNumber(42, nil), + "array": newJSONArray([]JSON{newJSONString("test", nil), newJSONBool(true, nil)}, nil), + }, nil), } - obj := newJSONObject(m) + obj := newJSONObject(m, nil) expectedUnwrapped := map[string]any{ "key": "value", "nested": map[string]any{ @@ -155,14 +157,14 @@ func TestJSONObject_Methods_ShouldWorkAsExpected(t *testing.T) { func TestJSONArray_Methods_ShouldWorkAsExpected(t *testing.T) { arr := []JSON{ - newJSONString("item1"), + newJSONString("item1", nil), newJSONObject(map[string]JSON{ - "key": newJSONString("value"), - "num": newJSONNumber(42), - }), - newJSONNumber(2), + "key": newJSONString("value", nil), + "num": newJSONNumber(42, nil), + }, nil), + newJSONNumber(2, nil), } - jsonArr := newJSONArray(arr) + jsonArr := newJSONArray(arr, nil) expectedUnwrapped := []any{ "item1", map[string]any{ @@ -192,7 +194,7 @@ func TestJSONArray_Methods_ShouldWorkAsExpected(t *testing.T) { } func TestJSONNumber_Methods_ShouldWorkAsExpected(t *testing.T) { - num := newJSONNumber(2.5) + num := newJSONNumber(2.5, nil) expected := 2.5 // Positive tests @@ -215,7 +217,7 @@ func TestJSONNumber_Methods_ShouldWorkAsExpected(t *testing.T) { } func TestJSONString_Methods_ShouldWorkAsExpected(t *testing.T) { - str := newJSONString("value") + str := newJSONString("value", nil) expected := "value" // Positive tests @@ -238,7 +240,7 @@ func TestJSONString_Methods_ShouldWorkAsExpected(t *testing.T) { } func TestJSONBool_Methods_ShouldWorkAsExpected(t *testing.T) { - b := newJSONBool(true) + b := newJSONBool(true, nil) expected := true // Positive tests @@ -261,7 +263,7 @@ func TestJSONBool_Methods_ShouldWorkAsExpected(t *testing.T) { } func TestJSONNull_Methods_ShouldWorkAsExpected(t *testing.T) { - null := newJSONNull() + null := newJSONNull(nil) // Positive tests require.True(t, null.IsNull()) @@ -292,193 +294,207 @@ func TestNewJSONAndMarshalJSON(t *testing.T) { { name: "Nil", input: nil, - expected: newJSONNull(), + expected: newJSONNull(nil), expectedJSON: "null", }, { name: "FastJSON", input: fastjson.MustParse(`{"key": "value"}`), - expected: newJSONObject(map[string]JSON{"key": newJSONString("value")}), + expected: newJSONObject(map[string]JSON{"key": newJSONString("value", nil)}, nil), expectedJSON: `{"key":"value"}`, }, { name: "Map", input: map[string]any{"key": "value"}, - expected: newJSONObject(map[string]JSON{"key": newJSONString("value")}), + expected: newJSONObject(map[string]JSON{"key": newJSONString("value", nil)}, nil), expectedJSON: `{"key":"value"}`, }, { name: "Bool", input: true, - expected: newJSONBool(true), + expected: newJSONBool(true, nil), expectedJSON: "true", }, { name: "String", input: "str", - expected: newJSONString("str"), + expected: newJSONString("str", nil), expectedJSON: `"str"`, }, { name: "Int8", input: int8(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Int16", input: int16(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Int32", input: int32(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Int64", input: int64(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Int", input: 42, - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Uint8", input: uint8(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Uint16", input: uint16(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Uint32", input: uint32(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Uint64", input: uint64(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Uint", input: uint(42), - expected: newJSONNumber(42), + expected: newJSONNumber(42, nil), expectedJSON: "42", }, { name: "Float32", input: float32(2.5), - expected: newJSONNumber(2.5), + expected: newJSONNumber(2.5, nil), expectedJSON: "2.5", }, { name: "Float64", input: float64(2.5), - expected: newJSONNumber(2.5), + expected: newJSONNumber(2.5, nil), expectedJSON: "2.5", }, { name: "BoolArray", input: []bool{true, false}, - expected: newJSONArray([]JSON{newJSONBool(true), newJSONBool(false)}), + expected: newJSONArray([]JSON{newJSONBool(true, nil), newJSONBool(false, nil)}, nil), expectedJSON: "[true,false]", }, { - name: "StringArray", - input: []string{"a", "b", "c"}, - expected: newJSONArray([]JSON{newJSONString("a"), newJSONString("b"), newJSONString("c")}), + name: "StringArray", + input: []string{"a", "b", "c"}, + expected: newJSONArray([]JSON{newJSONString("a", nil), newJSONString("b", nil), + newJSONString("c", nil)}, nil), expectedJSON: `["a","b","c"]`, }, { - name: "AnyArray", - input: []any{"a", 1, true}, - expected: newJSONArray([]JSON{newJSONString("a"), newJSONNumber(1), newJSONBool(true)}), + name: "AnyArray", + input: []any{"a", 1, true}, + expected: newJSONArray([]JSON{newJSONString("a", nil), newJSONNumber(1, nil), + newJSONBool(true, nil)}, nil), expectedJSON: `["a",1,true]`, }, { - name: "Int8Array", - input: []int8{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Int8Array", + input: []int8{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Int16Array", - input: []int16{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Int16Array", + input: []int16{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Int32Array", - input: []int32{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Int32Array", + input: []int32{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Int64Array", - input: []int64{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Int64Array", + input: []int64{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "IntArray", - input: []int{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "IntArray", + input: []int{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Uint8Array", - input: []uint8{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Uint8Array", + input: []uint8{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Uint16Array", - input: []uint16{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Uint16Array", + input: []uint16{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Uint32Array", - input: []uint32{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Uint32Array", + input: []uint32{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Uint64Array", - input: []uint64{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "Uint64Array", + input: []uint64{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "UintArray", - input: []uint{1, 2, 3}, - expected: newJSONArray([]JSON{newJSONNumber(1), newJSONNumber(2), newJSONNumber(3)}), + name: "UintArray", + input: []uint{1, 2, 3}, + expected: newJSONArray([]JSON{newJSONNumber(1, nil), newJSONNumber(2, nil), + newJSONNumber(3, nil)}, nil), expectedJSON: "[1,2,3]", }, { - name: "Float32Array", - input: []float32{1.0, 2.25, 3.5}, - expected: newJSONArray([]JSON{newJSONNumber(1.0), newJSONNumber(2.25), newJSONNumber(3.5)}), + name: "Float32Array", + input: []float32{1.0, 2.25, 3.5}, + expected: newJSONArray([]JSON{newJSONNumber(1.0, nil), newJSONNumber(2.25, nil), + newJSONNumber(3.5, nil)}, nil), expectedJSON: "[1,2.25,3.5]", }, { - name: "Float64Array", - input: []float64{1.0, 2.25, 3.5}, - expected: newJSONArray([]JSON{newJSONNumber(1.0), newJSONNumber(2.25), newJSONNumber(3.5)}), + name: "Float64Array", + input: []float64{1.0, 2.25, 3.5}, + expected: newJSONArray([]JSON{newJSONNumber(1.0, nil), newJSONNumber(2.25, nil), + newJSONNumber(3.5, nil)}, nil), expectedJSON: "[1,2.25,3.5]", }, { @@ -488,22 +504,41 @@ func TestNewJSONAndMarshalJSON(t *testing.T) { }, } + path := []string{"some", "path"} + for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, err := NewJSON(tt.input) - if tt.expectError { - require.Error(t, err, "Expected error, but got nil") - return - } - require.NoError(t, err, "NewJSON failed with error %v", err) - require.Equal(t, result, tt.expected) - - if !tt.expectError { - jsonBytes, err := result.MarshalJSON() - require.NoError(t, err, "MarshalJSON failed with error %v", err) - require.Equal(t, tt.expectedJSON, string(jsonBytes)) - } - }) + for _, withPath := range []bool{true, false} { + t.Run(fmt.Sprintf("Test: %s, withPath: %v", tt.name, withPath), func(t *testing.T) { + var result JSON + var err error + if withPath { + result, err = NewJSONWithPath(tt.input, path) + } else { + result, err = NewJSON(tt.input) + } + if tt.expectError { + require.Error(t, err, "Expected error, but got nil") + return + } + require.NoError(t, err, "NewJSON failed with error %v", err) + + if withPath { + traverseAndAssertPaths(t, result, path) + require.Equal(t, result.Unwrap(), tt.expected.Unwrap()) + require.Equal(t, path, result.GetPath()) + } else { + traverseAndAssertPaths(t, result, nil) + require.Equal(t, result.Unwrap(), tt.expected.Unwrap()) + require.Empty(t, result.GetPath()) + } + + if !tt.expectError { + jsonBytes, err := result.MarshalJSON() + require.NoError(t, err, "MarshalJSON failed with error %v", err) + require.Equal(t, tt.expectedJSON, string(jsonBytes)) + } + }) + } } } @@ -517,3 +552,88 @@ func TestNewJSONFromMap_WithInvalidValue_ShouldFail(t *testing.T) { _, err := NewJSONFromMap(input) require.Error(t, err) } + +func TestNewJSONFromMap_WithPaths(t *testing.T) { + tests := []struct { + name string + input map[string]any + expected []struct { + path []string + value any + } + }{ + { + name: "flat object", + input: map[string]any{ + "string": "value", + "number": 42, + "bool": true, + "null": nil, + }, + expected: []struct { + path []string + value any + }{ + {path: []string{"string"}, value: "value"}, + {path: []string{"number"}, value: float64(42)}, + {path: []string{"bool"}, value: true}, + {path: []string{"null"}, value: nil}, + }, + }, + { + name: "nested object", + input: map[string]any{ + "obj": map[string]any{ + "nested": "value", + "deep": map[string]any{ + "number": 42, + }, + }, + "arr": []any{ + "first", + map[string]any{ + "inside_arr": true, + }, + []any{1, "nested"}, + }, + }, + expected: []struct { + path []string + value any + }{ + {path: []string{"obj", "nested"}, value: "value"}, + {path: []string{"obj", "deep", "number"}, value: float64(42)}, + {path: []string{"arr"}, value: "first"}, + {path: []string{"arr", "inside_arr"}, value: true}, + {path: []string{"arr"}, value: float64(1)}, + {path: []string{"arr"}, value: "nested"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + json, err := NewJSONFromMap(tt.input) + require.NoError(t, err) + + traverseAndAssertPaths(t, json, nil) + }) + } +} + +func traverseAndAssertPaths(t *testing.T, j JSON, parentPath []string) { + assert.Equal(t, parentPath, j.GetPath(), "Expected path %v, got %v", parentPath, j.GetPath()) + + if obj, isObj := j.Object(); isObj { + for k, v := range obj { + newPath := append(parentPath, k) + traverseAndAssertPaths(t, v, newPath) + } + } + + if arr, isArr := j.Array(); isArr { + for _, v := range arr { + traverseAndAssertPaths(t, v, parentPath) + } + } +} diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 3806af798c..a764a24973 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -26,28 +26,28 @@ type traverseNode struct { func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { // Create a complex JSON structure for testing json := newJSONObject(map[string]JSON{ - "string": newJSONString("value"), - "number": newJSONNumber(42), - "bool": newJSONBool(true), - "null": newJSONNull(), + "string": newJSONString("value", nil), + "number": newJSONNumber(42, nil), + "bool": newJSONBool(true, nil), + "null": newJSONNull(nil), "object": newJSONObject(map[string]JSON{ - "nested": newJSONString("inside"), + "nested": newJSONString("inside", nil), "deep": newJSONObject(map[string]JSON{ - "level": newJSONNumber(3), - }), - }), + "level": newJSONNumber(3, nil), + }, nil), + }, nil), "array": newJSONArray([]JSON{ - newJSONNumber(1), - newJSONString("two"), + newJSONNumber(1, nil), + newJSONString("two", nil), newJSONObject(map[string]JSON{ - "key": newJSONString("value"), - }), + "key": newJSONString("value", nil), + }, nil), newJSONArray([]JSON{ - newJSONNumber(4), - newJSONNumber(5), - }), - }), - }) + newJSONNumber(4, nil), + newJSONNumber(5, nil), + }, nil), + }, nil), + }, nil) tests := []struct { name string @@ -59,14 +59,14 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: nil, expected: []traverseNode{ {path: "", value: json}, - {path: "string", value: newJSONString("value")}, - {path: "number", value: newJSONNumber(42)}, - {path: "bool", value: newJSONBool(true)}, - {path: "null", value: newJSONNull()}, + {path: "string", value: newJSONString("value", nil)}, + {path: "number", value: newJSONNumber(42, nil)}, + {path: "bool", value: newJSONBool(true, nil)}, + {path: "null", value: newJSONNull(nil)}, {path: "object", value: json.Value().(map[string]JSON)["object"]}, - {path: "object/nested", value: newJSONString("inside")}, + {path: "object/nested", value: newJSONString("inside", nil)}, {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, {path: "array", value: json.Value().(map[string]JSON)["array"]}, }, }, @@ -76,12 +76,12 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { TraverseJSONOnlyLeaves(), }, expected: []traverseNode{ - {path: "string", value: newJSONString("value")}, - {path: "number", value: newJSONNumber(42)}, - {path: "bool", value: newJSONBool(true)}, - {path: "null", value: newJSONNull()}, - {path: "object/nested", value: newJSONString("inside")}, - {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "string", value: newJSONString("value", nil)}, + {path: "number", value: newJSONNumber(42, nil)}, + {path: "bool", value: newJSONBool(true, nil)}, + {path: "null", value: newJSONNull(nil)}, + {path: "object/nested", value: newJSONString("inside", nil)}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, }, }, { @@ -91,9 +91,9 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { }, expected: []traverseNode{ {path: "object", value: json.Value().(map[string]JSON)["object"]}, - {path: "object/nested", value: newJSONString("inside")}, + {path: "object/nested", value: newJSONString("inside", nil)}, {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, }, }, { @@ -103,7 +103,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { }, expected: []traverseNode{ {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, }, }, { @@ -113,22 +113,22 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { }, expected: []traverseNode{ {path: "", value: json}, - {path: "string", value: newJSONString("value")}, - {path: "number", value: newJSONNumber(42)}, - {path: "bool", value: newJSONBool(true)}, - {path: "null", value: newJSONNull()}, + {path: "string", value: newJSONString("value", nil)}, + {path: "number", value: newJSONNumber(42, nil)}, + {path: "bool", value: newJSONBool(true, nil)}, + {path: "null", value: newJSONNull(nil)}, {path: "object", value: json.Value().(map[string]JSON)["object"]}, - {path: "object/nested", value: newJSONString("inside")}, + {path: "object/nested", value: newJSONString("inside", nil)}, {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, {path: "array", value: json.Value().(map[string]JSON)["array"]}, - {path: "array", value: newJSONNumber(1)}, - {path: "array", value: newJSONString("two")}, + {path: "array", value: newJSONNumber(1, nil)}, + {path: "array", value: newJSONString("two", nil)}, {path: "array", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[2]}, - {path: "array/key", value: newJSONString("value")}, + {path: "array/key", value: newJSONString("value", nil)}, {path: "array", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[3]}, - {path: "array", value: newJSONNumber(4)}, - {path: "array", value: newJSONNumber(5)}, + {path: "array", value: newJSONNumber(4, nil)}, + {path: "array", value: newJSONNumber(5, nil)}, }, }, { @@ -139,22 +139,22 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { }, expected: []traverseNode{ {path: "", value: json}, - {path: "string", value: newJSONString("value")}, - {path: "number", value: newJSONNumber(42)}, - {path: "bool", value: newJSONBool(true)}, - {path: "null", value: newJSONNull()}, + {path: "string", value: newJSONString("value", nil)}, + {path: "number", value: newJSONNumber(42, nil)}, + {path: "bool", value: newJSONBool(true, nil)}, + {path: "null", value: newJSONNull(nil)}, {path: "object", value: json.Value().(map[string]JSON)["object"]}, - {path: "object/nested", value: newJSONString("inside")}, + {path: "object/nested", value: newJSONString("inside", nil)}, {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3)}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, {path: "array", value: json.Value().(map[string]JSON)["array"]}, - {path: "array/0", value: newJSONNumber(1)}, - {path: "array/1", value: newJSONString("two")}, + {path: "array/0", value: newJSONNumber(1, nil)}, + {path: "array/1", value: newJSONString("two", nil)}, {path: "array/2", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[2]}, - {path: "array/2/key", value: newJSONString("value")}, + {path: "array/2/key", value: newJSONString("value", nil)}, {path: "array/3", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[3]}, - {path: "array/3/0", value: newJSONNumber(4)}, - {path: "array/3/1", value: newJSONNumber(5)}, + {path: "array/3/0", value: newJSONNumber(4, nil)}, + {path: "array/3/1", value: newJSONNumber(5, nil)}, }, }, { @@ -166,11 +166,11 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { TraverseJSONWithArrayIndexInPath(), }, expected: []traverseNode{ - {path: "array/0", value: newJSONNumber(1)}, - {path: "array/1", value: newJSONString("two")}, - {path: "array/2/key", value: newJSONString("value")}, - {path: "array/3/0", value: newJSONNumber(4)}, - {path: "array/3/1", value: newJSONNumber(5)}, + {path: "array/0", value: newJSONNumber(1, nil)}, + {path: "array/1", value: newJSONString("two", nil)}, + {path: "array/2/key", value: newJSONString("value", nil)}, + {path: "array/3/0", value: newJSONNumber(4, nil)}, + {path: "array/3/1", value: newJSONNumber(5, nil)}, }, }, } @@ -350,8 +350,8 @@ func isArray(j JSON) bool { func TestTraverseJSON_WithError(t *testing.T) { json := newJSONObject(map[string]JSON{ - "key": newJSONString("value"), - }) + "key": newJSONString("value", nil), + }, nil) expectedErr := fmt.Errorf("test error") err := TraverseJSON(json, func(value JSON) error { diff --git a/client/normal_value_test.go b/client/normal_value_test.go index 773727c72a..03c0daff0b 100644 --- a/client/normal_value_test.go +++ b/client/normal_value_test.go @@ -293,7 +293,7 @@ func TestNormalValue_NewValueAndTypeAssertion(t *testing.T) { }, { nType: JSONType, - input: newJSONNumber(2), + input: newJSONNumber(2, nil), }, { nType: NillableBoolType, From 9855f30d7cd4f5b963ca0074a2b0b23bdb569717 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sat, 30 Nov 2024 23:49:39 +0100 Subject: [PATCH 07/46] Base JSON index implementation --- internal/db/fetcher/indexer_iterators.go | 126 +++++++++++++++++++++-- internal/db/index.go | 126 ++++++++++++++++++++++- internal/encoding/field_value.go | 12 +++ internal/encoding/field_value_test.go | 50 +++++++++ tests/integration/index/json_test.go | 70 +++++++++++++ 5 files changed, 375 insertions(+), 9 deletions(-) create mode 100644 tests/integration/index/json_test.go diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 8f266a693b..62539c05bc 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -17,6 +17,7 @@ import ( "time" ds "github.com/ipfs/go-datastore" + "golang.org/x/exp/slices" "github.com/sourcenetwork/defradb/client" "github.com/sourcenetwork/defradb/datastore" @@ -305,6 +306,57 @@ func (iter *arrayIndexIterator) Close() error { return iter.inner.Close() } +type jsonIndexIterator struct { + inner indexIterator + + fetchedDocs map[string]struct{} + jsonPath []string + + ctx context.Context + store datastore.DSReaderWriter +} + +var _ indexIterator = (*jsonIndexIterator)(nil) + +func (iter *jsonIndexIterator) Init(ctx context.Context, store datastore.DSReaderWriter) error { + iter.ctx = ctx + iter.store = store + iter.fetchedDocs = make(map[string]struct{}) + return iter.inner.Init(ctx, store) +} + +func (iter *jsonIndexIterator) Next() (indexIterResult, error) { + for { + res, err := iter.inner.Next() + if err != nil { + return indexIterResult{}, err + } + if !res.foundKey { + return res, nil + } + var docID string + if len(res.value) > 0 { + docID = string(res.value) + } else { + lastField := &res.key.Fields[len(res.key.Fields)-1] + var ok bool + docID, ok = lastField.Value.String() + if !ok { + return indexIterResult{}, NewErrUnexpectedTypeValue[string](lastField.Value) + } + } + if _, ok := iter.fetchedDocs[docID]; ok { + continue + } + iter.fetchedDocs[docID] = struct{}{} + return res, nil + } +} + +func (iter *jsonIndexIterator) Close() error { + return iter.inner.Close() +} + func executeValueMatchers(matchers []valueMatcher, fields []keys.IndexedField) (bool, error) { for i := range matchers { res, err := matchers[i].Match(fields[i].Value) @@ -546,6 +598,20 @@ func (m *invertedMatcher) Match(val client.NormalValue) (bool, error) { return !res, nil } +type jsonMatcher struct { + value float64 + evalFunc func(float64, float64) bool +} + +func (m *jsonMatcher) Match(value client.NormalValue) (bool, error) { + if jsonVal, ok := value.JSON(); ok { + if floatVal, ok := jsonVal.Number(); ok { + return m.evalFunc(floatVal, m.value), nil + } + } + return false, NewErrUnexpectedTypeValue[float64](value) +} + // newPrefixIteratorFromConditions creates a new eqPrefixIndexIterator for fetching indexed data. // It can modify the input matchers slice. func (f *IndexFetcher) newPrefixIteratorFromConditions( @@ -674,6 +740,14 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { } } + hasJSON := false + for i := range fieldConditions { + if fieldConditions[i].kind == client.FieldKind_NILLABLE_JSON { + hasJSON = true + break + } + } + var iter indexIterator if fieldConditions[0].op == opEq { @@ -702,6 +776,10 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { return nil, NewErrInvalidFilterOperator(fieldConditions[0].op) } + if hasJSON { + iter = &jsonIndexIterator{inner: iter, jsonPath: fieldConditions[0].jsonPath} + } + if hasArray { iter = &arrayIndexIterator{inner: iter} } @@ -714,6 +792,7 @@ func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { return &anyMatcher{}, nil } + // TODO: test json null if condition.val.IsNil() { return &nilMatcher{matchNil: condition.op == opEq}, nil } @@ -735,6 +814,11 @@ func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { if v, ok := condition.val.Bool(); ok { return &boolMatcher{value: v, isEq: condition.op == opEq}, nil } + if v, ok := condition.val.JSON(); ok { + if jsonVal, ok := v.Number(); ok { + return &jsonMatcher{value: jsonVal, evalFunc: getCompareValsFunc[float64](condition.op)}, nil + } + } case opIn, opNin: inVals, err := client.ToArrayOfNormalValues(condition.val) if err != nil { @@ -773,10 +857,11 @@ func createValueMatchers(conditions []fieldFilterCond) ([]valueMatcher, error) { } type fieldFilterCond struct { - op string - arrOp string - val client.NormalValue - kind client.FieldKind + op string + arrOp string + jsonPath []string + val client.NormalValue + kind client.FieldKind } // determineFieldFilterConditions determines the conditions and their corresponding operation @@ -796,15 +881,42 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro found = true + fieldDef := f.indexedFields[slices.IndexFunc(f.indexedFields, func(f client.FieldDefinition) bool { + return int(f.ID) == fieldInd + })] + condMap := indexFilterCond.(map[connor.FilterKey]any) + + jsonPath := []string{} + if fieldDef.Kind == client.FieldKind_NILLABLE_JSON { + + jsonPathLoop: + for { + for key, filterVal := range condMap { + prop, ok := key.(*mapper.ObjectProperty) + if !ok { + break jsonPathLoop + } + jsonPath = append(jsonPath, prop.Name) + condMap = filterVal.(map[connor.FilterKey]any) + } + } + } + for key, filterVal := range condMap { cond := fieldFilterCond{ - op: key.(*mapper.Operator).Operation, - kind: f.indexedFields[i].Kind, + op: key.(*mapper.Operator).Operation, + jsonPath: jsonPath, + kind: f.indexedFields[i].Kind, } var err error - if filterVal == nil { + if len(jsonPath) > 0 { + jsonVal, err := client.NewJSONWithPath(filterVal, jsonPath) + if err == nil { + cond.val = client.NewNormalJSON(jsonVal) + } + } else if filterVal == nil { cond.val, err = client.NewNormalNil(cond.kind) } else if !f.indexedFields[i].Kind.IsArray() { cond.val, err = client.NewNormalValue(filterVal) diff --git a/internal/db/index.go b/internal/db/index.go index 638f0b923b..d8dc10284f 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -41,6 +41,7 @@ func isSupportedKind(kind client.FieldKind) bool { client.FieldKind_INT_ARRAY, client.FieldKind_BOOL_ARRAY, client.FieldKind_FLOAT_ARRAY, + client.FieldKind_NILLABLE_JSON, client.FieldKind_NILLABLE_STRING, client.FieldKind_NILLABLE_INT, client.FieldKind_NILLABLE_FLOAT, @@ -68,6 +69,7 @@ func NewCollectionIndex( base := collectionBaseIndex{collection: collection, desc: desc} base.fieldsDescs = make([]client.SchemaFieldDescription, len(desc.Fields)) isArray := false + isJSON := false for i := range desc.Fields { field, foundField := collection.Schema().GetFieldByName(desc.Fields[i].Name) if !foundField { @@ -78,6 +80,7 @@ func NewCollectionIndex( return nil, NewErrUnsupportedIndexFieldType(field.Kind) } isArray = isArray || field.Kind.IsArray() + isJSON = isJSON || field.Kind == client.FieldKind_NILLABLE_JSON } if isArray { if desc.Unique { @@ -85,6 +88,8 @@ func NewCollectionIndex( } else { return newCollectionArrayIndex(base), nil } + } else if isJSON { + return newCollectionJSONIndex(base), nil } else if desc.Unique { return &collectionUniqueIndex{collectionBaseIndex: base}, nil } else { @@ -93,11 +98,14 @@ func NewCollectionIndex( } type collectionBaseIndex struct { - collection client.Collection - desc client.IndexDescription + collection client.Collection + desc client.IndexDescription + // fieldsDescs is a slice of field descriptions for the fields that are indexed by the index + // If there is more than 1 field, the index is composite fieldsDescs []client.SchemaFieldDescription } +// getDocFieldValues retrieves the values of the indexed fields from the given document. func (index *collectionBaseIndex) getDocFieldValues(doc *client.Document) ([]client.NormalValue, error) { result := make([]client.NormalValue, 0, len(index.fieldsDescs)) for iter := range index.fieldsDescs { @@ -743,3 +751,117 @@ func (index *collectionArrayUniqueIndex) Delete( } return nil } + +type collectionJSONBaseIndex struct { + collectionBaseIndex + jsonFieldsIndexes []int +} + +func newCollectionJSONBaseIndex(base collectionBaseIndex) collectionJSONBaseIndex { + ind := collectionJSONBaseIndex{collectionBaseIndex: base} + for i := range base.fieldsDescs { + if base.fieldsDescs[i].Kind == client.FieldKind_NILLABLE_JSON { + ind.jsonFieldsIndexes = append(ind.jsonFieldsIndexes, i) + } + } + if len(ind.jsonFieldsIndexes) == 0 { + return collectionJSONBaseIndex{} + } + return ind +} + +type collectionJSONIndex struct { + collectionJSONBaseIndex +} + +var _ CollectionIndex = (*collectionJSONIndex)(nil) + +func newCollectionJSONIndex(base collectionBaseIndex) *collectionJSONIndex { + return &collectionJSONIndex{collectionJSONBaseIndex: newCollectionJSONBaseIndex(base)} +} + +// Save indexes a document by storing the indexed field value. +func (index *collectionJSONIndex) Save( + ctx context.Context, + txn datastore.Txn, + doc *client.Document, +) error { + key, err := index.getDocumentsIndexKey(doc, true) + if err != nil { + return err + } + + for _, jsonFieldIndex := range index.jsonFieldsIndexes { + json, _ := key.Fields[jsonFieldIndex].Value.JSON() + + err = client.TraverseJSON(json, func(value client.JSON) error { + val, err := client.NewNormalValue(value) + if err != nil { + return err + } + + leafKey := key + copy(leafKey.Fields, key.Fields) + leafKey.Fields[jsonFieldIndex].Value = val + + dsKey := leafKey.ToDS() + err = txn.Datastore().Put(ctx, dsKey, []byte{}) + if err != nil { + return NewErrFailedToStoreIndexedField(key.ToString(), err) + } + + return nil + }, client.TraverseJSONOnlyLeaves()) + + if err != nil { + return err + } + + } + + return nil +} + +func (index *collectionJSONIndex) Update( + ctx context.Context, + txn datastore.Txn, + oldDoc *client.Document, + newDoc *client.Document, +) error { + /*newKeys, err := index.deleteRetiredKeysAndReturnNew(ctx, txn, oldDoc, newDoc, true) + if err != nil { + return err + } + + for _, key := range newKeys { + err = txn.Datastore().Put(ctx, key.ToDS(), []byte{}) + if err != nil { + return NewErrFailedToStoreIndexedField(key.ToString(), err) + } + }*/ + + return nil +} + +func (index *collectionJSONIndex) Delete( + ctx context.Context, + txn datastore.Txn, + doc *client.Document, +) error { + /*getNextKey, err := index.newIndexKeyGenerator(doc, true) + if err != nil { + return err + } + + for { + key, ok := getNextKey() + if !ok { + break + } + err = index.deleteIndexKey(ctx, txn, key) + if err != nil { + return err + } + }*/ + return nil +} diff --git a/internal/encoding/field_value.go b/internal/encoding/field_value.go index 745a07d717..2c051d0a37 100644 --- a/internal/encoding/field_value.go +++ b/internal/encoding/field_value.go @@ -165,6 +165,18 @@ func DecodeFieldValue(b []byte, descending bool, kind client.FieldKind) ([]byte, return nil, nil, NewErrCanNotDecodeFieldValue(b, kind, err) } return b, client.NewNormalTime(v), nil + case JSON: + var v client.JSON + var err error + if descending { + b, v, err = DecodeJSONDescending(b) + } else { + b, v, err = DecodeJSONAscending(b) + } + if err != nil { + return nil, nil, NewErrCanNotDecodeFieldValue(b, kind, err) + } + return b, client.NewNormalJSON(v), nil } return nil, nil, NewErrCanNotDecodeFieldValue(b, kind) diff --git a/internal/encoding/field_value_test.go b/internal/encoding/field_value_test.go index c1ae904fc3..607e3f74a1 100644 --- a/internal/encoding/field_value_test.go +++ b/internal/encoding/field_value_test.go @@ -24,6 +24,23 @@ func TestEncodeDecodeFieldValue(t *testing.T) { normalNil, err := client.NewNormalNil(client.FieldKind_NILLABLE_INT) require.NoError(t, err) + // Create test JSON values + simpleJSON, err := client.NewJSON("simple string") + require.NoError(t, err) + normalSimpleJSON := client.NewNormalJSON(simpleJSON) + + numberJSON, err := client.NewJSON(42.5) + require.NoError(t, err) + normalNumberJSON := client.NewNormalJSON(numberJSON) + + boolJSON, err := client.NewJSON(true) + require.NoError(t, err) + normalBoolJSON := client.NewNormalJSON(boolJSON) + + nullJSON, err := client.NewJSON(nil) + require.NoError(t, err) + normalNullJSON := client.NewNormalJSON(nullJSON) + tests := []struct { name string inputVal client.NormalValue @@ -73,6 +90,34 @@ func TestEncodeDecodeFieldValue(t *testing.T) { expectedBytesDesc: EncodeBytesDescending(nil, []byte("str")), expectedDecodedVal: client.NewNormalString("str"), }, + { + name: "json string", + inputVal: normalSimpleJSON, + expectedBytes: EncodeJSONAscending(nil, simpleJSON), + expectedBytesDesc: EncodeJSONDescending(nil, simpleJSON), + expectedDecodedVal: normalSimpleJSON, + }, + { + name: "json number", + inputVal: normalNumberJSON, + expectedBytes: EncodeJSONAscending(nil, numberJSON), + expectedBytesDesc: EncodeJSONDescending(nil, numberJSON), + expectedDecodedVal: normalNumberJSON, + }, + { + name: "json bool", + inputVal: normalBoolJSON, + expectedBytes: EncodeJSONAscending(nil, boolJSON), + expectedBytesDesc: EncodeJSONDescending(nil, boolJSON), + expectedDecodedVal: normalBoolJSON, + }, + { + name: "json null", + inputVal: normalNullJSON, + expectedBytes: EncodeJSONAscending(nil, nullJSON), + expectedBytesDesc: EncodeJSONDescending(nil, nullJSON), + expectedDecodedVal: normalNullJSON, + }, } for _, tt := range tests { @@ -127,6 +172,11 @@ func TestDecodeInvalidFieldValue(t *testing.T) { inputBytes: []byte{IntMin - 1, 2}, inputBytesDesc: []byte{^byte(IntMin - 1), 2}, }, + { + name: "invalid json value", + inputBytes: []byte{jsonMarker, 0xFF}, + inputBytesDesc: []byte{jsonMarker, 0xFF}, + }, } for _, tt := range tests { diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go new file mode 100644 index 0000000000..025ad3fab1 --- /dev/null +++ b/tests/integration/index/json_test.go @@ -0,0 +1,70 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package index + +import ( + "testing" + + testUtils "github.com/sourcenetwork/defradb/tests/integration" +) + +func TestJSONIndex_WithFilterOnIndexedJSON_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_eq: 168}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + { + "name": "John", + }, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} From 0faa02b84fd45e53b8d3f10f4e75042b4aa60d1f Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 1 Dec 2024 12:13:53 +0100 Subject: [PATCH 08/46] Move match-related code to a file --- internal/db/fetcher/indexer_iterators.go | 353 +--------------------- internal/db/fetcher/indexer_matchers.go | 369 +++++++++++++++++++++++ tests/integration/index/json_test.go | 15 +- 3 files changed, 381 insertions(+), 356 deletions(-) create mode 100644 internal/db/fetcher/indexer_matchers.go diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 62539c05bc..a5a041aaa7 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -1,4 +1,4 @@ -// Copyright 2023 Democratized Data Foundation +// Copyright 2024 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -11,10 +11,10 @@ package fetcher import ( - "cmp" "context" "strings" "time" + "errors" ds "github.com/ipfs/go-datastore" "golang.org/x/exp/slices" @@ -357,261 +357,6 @@ func (iter *jsonIndexIterator) Close() error { return iter.inner.Close() } -func executeValueMatchers(matchers []valueMatcher, fields []keys.IndexedField) (bool, error) { - for i := range matchers { - res, err := matchers[i].Match(fields[i].Value) - if err != nil { - return false, err - } - if !res { - return false, nil - } - } - return true, nil -} - -// checks if the value satisfies the condition -type valueMatcher interface { - Match(client.NormalValue) (bool, error) -} - -type intMatcher struct { - value int64 - evalFunc func(int64, int64) bool -} - -func (m *intMatcher) Match(value client.NormalValue) (bool, error) { - if intVal, ok := value.Int(); ok { - return m.evalFunc(intVal, m.value), nil - } - if intOptVal, ok := value.NillableInt(); ok { - if !intOptVal.HasValue() { - return false, nil - } - return m.evalFunc(intOptVal.Value(), m.value), nil - } - return false, NewErrUnexpectedTypeValue[int64](value) -} - -type floatMatcher struct { - value float64 - evalFunc func(float64, float64) bool -} - -func (m *floatMatcher) Match(value client.NormalValue) (bool, error) { - if floatVal, ok := value.Float(); ok { - return m.evalFunc(floatVal, m.value), nil - } - if floatOptVal, ok := value.NillableFloat(); ok { - if !floatOptVal.HasValue() { - return false, nil - } - return m.evalFunc(floatOptVal.Value(), m.value), nil - } - return false, NewErrUnexpectedTypeValue[float64](value) -} - -type stringMatcher struct { - value string - evalFunc func(string, string) bool -} - -func (m *stringMatcher) Match(value client.NormalValue) (bool, error) { - if strVal, ok := value.String(); ok { - return m.evalFunc(strVal, m.value), nil - } - if strOptVal, ok := value.NillableString(); ok { - if !strOptVal.HasValue() { - return false, nil - } - return m.evalFunc(strOptVal.Value(), m.value), nil - } - return false, NewErrUnexpectedTypeValue[string](value) -} - -type timeMatcher struct { - op string - value time.Time -} - -func (m *timeMatcher) Match(value client.NormalValue) (bool, error) { - timeVal, ok := value.Time() - if !ok { - if timeOptVal, ok := value.NillableTime(); ok { - timeVal = timeOptVal.Value() - } else { - return false, NewErrUnexpectedTypeValue[time.Time](value) - } - } - switch m.op { - case opEq: - return timeVal.Equal(m.value), nil - case opGt: - return timeVal.After(m.value), nil - case opGe: - return !timeVal.Before(m.value), nil - case opLt: - return timeVal.Before(m.value), nil - case opLe: - return !timeVal.After(m.value), nil - case opNe: - return !timeVal.Equal(m.value), nil - } - return false, NewErrInvalidFilterOperator(m.op) -} - -type boolMatcher struct { - value bool - isEq bool -} - -func (m *boolMatcher) Match(value client.NormalValue) (bool, error) { - boolVal, ok := value.Bool() - if !ok { - if boolOptVal, ok := value.NillableBool(); ok { - boolVal = boolOptVal.Value() - } else { - intVal, ok := value.Int() - if !ok { - if intOptVal, ok := value.NillableInt(); ok { - intVal = intOptVal.Value() - } else { - return false, NewErrUnexpectedTypeValue[bool](value) - } - } - boolVal = intVal != 0 - } - } - return boolVal == m.value == m.isEq, nil -} - -type nilMatcher struct { - matchNil bool -} - -func (m *nilMatcher) Match(value client.NormalValue) (bool, error) { - return value.IsNil() == m.matchNil, nil -} - -// checks if the index value is or is not in the given array -type indexInArrayMatcher struct { - inValues []client.NormalValue - isIn bool -} - -func (m *indexInArrayMatcher) Match(value client.NormalValue) (bool, error) { - for _, inVal := range m.inValues { - if inVal.Unwrap() == value.Unwrap() { - return m.isIn, nil - } - } - return !m.isIn, nil -} - -// checks if the index value satisfies the LIKE condition -type indexLikeMatcher struct { - hasPrefix bool - hasSuffix bool - startAndEnd []string - isLike bool - isCaseInsensitive bool - value string -} - -func newLikeIndexCmp(filterValue string, isLike bool, isCaseInsensitive bool) (*indexLikeMatcher, error) { - matcher := &indexLikeMatcher{ - isLike: isLike, - isCaseInsensitive: isCaseInsensitive, - } - if len(filterValue) >= 2 { - if filterValue[0] == '%' { - matcher.hasPrefix = true - filterValue = strings.TrimPrefix(filterValue, "%") - } - if filterValue[len(filterValue)-1] == '%' { - matcher.hasSuffix = true - filterValue = strings.TrimSuffix(filterValue, "%") - } - if !matcher.hasPrefix && !matcher.hasSuffix { - matcher.startAndEnd = strings.Split(filterValue, "%") - } - } - if isCaseInsensitive { - matcher.value = strings.ToLower(filterValue) - } else { - matcher.value = filterValue - } - - return matcher, nil -} - -func (m *indexLikeMatcher) Match(value client.NormalValue) (bool, error) { - strVal, ok := value.String() - if !ok { - strOptVal, ok := value.NillableString() - if !ok { - return false, NewErrUnexpectedTypeValue[string](value) - } - if !strOptVal.HasValue() { - return false, nil - } - strVal = strOptVal.Value() - } - if m.isCaseInsensitive { - strVal = strings.ToLower(strVal) - } - - return m.doesMatch(strVal) == m.isLike, nil -} - -func (m *indexLikeMatcher) doesMatch(currentVal string) bool { - switch { - case m.hasPrefix && m.hasSuffix: - return strings.Contains(currentVal, m.value) - case m.hasPrefix: - return strings.HasSuffix(currentVal, m.value) - case m.hasSuffix: - return strings.HasPrefix(currentVal, m.value) - // there might be 2 ends only for LIKE with 1 % in the middle "ab%cd" - case len(m.startAndEnd) == 2: - return strings.HasPrefix(currentVal, m.startAndEnd[0]) && - strings.HasSuffix(currentVal, m.startAndEnd[1]) - default: - return m.value == currentVal - } -} - -type anyMatcher struct{} - -func (m *anyMatcher) Match(client.NormalValue) (bool, error) { return true, nil } - -// invertedMatcher inverts the result of the inner matcher. -type invertedMatcher struct { - matcher valueMatcher -} - -func (m *invertedMatcher) Match(val client.NormalValue) (bool, error) { - res, err := m.matcher.Match(val) - if err != nil { - return false, err - } - return !res, nil -} - -type jsonMatcher struct { - value float64 - evalFunc func(float64, float64) bool -} - -func (m *jsonMatcher) Match(value client.NormalValue) (bool, error) { - if jsonVal, ok := value.JSON(); ok { - if floatVal, ok := jsonVal.Number(); ok { - return m.evalFunc(floatVal, m.value), nil - } - } - return false, NewErrUnexpectedTypeValue[float64](value) -} - // newPrefixIteratorFromConditions creates a new eqPrefixIndexIterator for fetching indexed data. // It can modify the input matchers slice. func (f *IndexFetcher) newPrefixIteratorFromConditions( @@ -787,75 +532,6 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { return iter, nil } -func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { - if condition.op == "" { - return &anyMatcher{}, nil - } - - // TODO: test json null - if condition.val.IsNil() { - return &nilMatcher{matchNil: condition.op == opEq}, nil - } - - switch condition.op { - case opEq, opGt, opGe, opLt, opLe, opNe: - if v, ok := condition.val.Int(); ok { - return &intMatcher{value: v, evalFunc: getCompareValsFunc[int64](condition.op)}, nil - } - if v, ok := condition.val.Float(); ok { - return &floatMatcher{value: v, evalFunc: getCompareValsFunc[float64](condition.op)}, nil - } - if v, ok := condition.val.String(); ok { - return &stringMatcher{value: v, evalFunc: getCompareValsFunc[string](condition.op)}, nil - } - if v, ok := condition.val.Time(); ok { - return &timeMatcher{value: v, op: condition.op}, nil - } - if v, ok := condition.val.Bool(); ok { - return &boolMatcher{value: v, isEq: condition.op == opEq}, nil - } - if v, ok := condition.val.JSON(); ok { - if jsonVal, ok := v.Number(); ok { - return &jsonMatcher{value: jsonVal, evalFunc: getCompareValsFunc[float64](condition.op)}, nil - } - } - case opIn, opNin: - inVals, err := client.ToArrayOfNormalValues(condition.val) - if err != nil { - return nil, err - } - return &indexInArrayMatcher{inValues: inVals, isIn: condition.op == opIn}, nil - case opLike, opNlike, opILike, opNILike: - strVal, ok := condition.val.String() - if !ok { - strOptVal, ok := condition.val.NillableString() - if !ok { - return nil, NewErrUnexpectedTypeValue[string](condition.val) - } - strVal = strOptVal.Value() - } - isLike := condition.op == opLike || condition.op == opILike - isCaseInsensitive := condition.op == opILike || condition.op == opNILike - return newLikeIndexCmp(strVal, isLike, isCaseInsensitive) - case opAny: - return &anyMatcher{}, nil - } - - return nil, NewErrInvalidFilterOperator(condition.op) -} - -func createValueMatchers(conditions []fieldFilterCond) ([]valueMatcher, error) { - matchers := make([]valueMatcher, 0, len(conditions)) - for i := range conditions { - m, err := createValueMatcher(&conditions[i]) - if err != nil { - return nil, err - } - matchers = append(matchers, m) - } - return matchers, nil -} - type fieldFilterCond struct { op string arrOp string @@ -974,28 +650,3 @@ func isUniqueFetchByFullKey(indexDesc *client.IndexDescription, conditions []fie } return res } - -func getCompareValsFunc[T cmp.Ordered](op string) func(T, T) bool { - switch op { - case opGt: - return checkGT - case opGe: - return checkGE - case opLt: - return checkLT - case opLe: - return checkLE - case opEq: - return checkEQ - case opNe: - return checkNE - } - return nil -} - -func checkGE[T cmp.Ordered](a, b T) bool { return a >= b } -func checkGT[T cmp.Ordered](a, b T) bool { return a > b } -func checkLE[T cmp.Ordered](a, b T) bool { return a <= b } -func checkLT[T cmp.Ordered](a, b T) bool { return a < b } -func checkEQ[T cmp.Ordered](a, b T) bool { return a == b } -func checkNE[T cmp.Ordered](a, b T) bool { return a != b } diff --git a/internal/db/fetcher/indexer_matchers.go b/internal/db/fetcher/indexer_matchers.go new file mode 100644 index 0000000000..cf4138fe4a --- /dev/null +++ b/internal/db/fetcher/indexer_matchers.go @@ -0,0 +1,369 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package fetcher + +import ( + "cmp" + "strings" + "time" + + "github.com/sourcenetwork/defradb/client" + "github.com/sourcenetwork/defradb/internal/keys" +) + +func executeValueMatchers(matchers []valueMatcher, fields []keys.IndexedField) (bool, error) { + for i := range matchers { + res, err := matchers[i].Match(fields[i].Value) + if err != nil { + return false, err + } + if !res { + return false, nil + } + } + return true, nil +} + +// checks if the value satisfies the condition +type valueMatcher interface { + Match(client.NormalValue) (bool, error) +} + +type intMatcher struct { + value int64 + evalFunc func(int64, int64) bool +} + +func (m *intMatcher) Match(value client.NormalValue) (bool, error) { + if intVal, ok := value.Int(); ok { + return m.evalFunc(intVal, m.value), nil + } + if intOptVal, ok := value.NillableInt(); ok { + if !intOptVal.HasValue() { + return false, nil + } + return m.evalFunc(intOptVal.Value(), m.value), nil + } + return false, NewErrUnexpectedTypeValue[int64](value) +} + +type floatMatcher struct { + value float64 + evalFunc func(float64, float64) bool +} + +func (m *floatMatcher) Match(value client.NormalValue) (bool, error) { + if floatVal, ok := value.Float(); ok { + return m.evalFunc(floatVal, m.value), nil + } + if floatOptVal, ok := value.NillableFloat(); ok { + if !floatOptVal.HasValue() { + return false, nil + } + return m.evalFunc(floatOptVal.Value(), m.value), nil + } + return false, NewErrUnexpectedTypeValue[float64](value) +} + +type stringMatcher struct { + value string + evalFunc func(string, string) bool +} + +func (m *stringMatcher) Match(value client.NormalValue) (bool, error) { + if strVal, ok := value.String(); ok { + return m.evalFunc(strVal, m.value), nil + } + if strOptVal, ok := value.NillableString(); ok { + if !strOptVal.HasValue() { + return false, nil + } + return m.evalFunc(strOptVal.Value(), m.value), nil + } + return false, NewErrUnexpectedTypeValue[string](value) +} + +type timeMatcher struct { + op string + value time.Time +} + +func (m *timeMatcher) Match(value client.NormalValue) (bool, error) { + timeVal, ok := value.Time() + if !ok { + if timeOptVal, ok := value.NillableTime(); ok { + timeVal = timeOptVal.Value() + } else { + return false, NewErrUnexpectedTypeValue[time.Time](value) + } + } + switch m.op { + case opEq: + return timeVal.Equal(m.value), nil + case opGt: + return timeVal.After(m.value), nil + case opGe: + return !timeVal.Before(m.value), nil + case opLt: + return timeVal.Before(m.value), nil + case opLe: + return !timeVal.After(m.value), nil + case opNe: + return !timeVal.Equal(m.value), nil + } + return false, NewErrInvalidFilterOperator(m.op) +} + +type boolMatcher struct { + value bool + isEq bool +} + +func (m *boolMatcher) Match(value client.NormalValue) (bool, error) { + boolVal, ok := value.Bool() + if !ok { + if boolOptVal, ok := value.NillableBool(); ok { + boolVal = boolOptVal.Value() + } else { + intVal, ok := value.Int() + if !ok { + if intOptVal, ok := value.NillableInt(); ok { + intVal = intOptVal.Value() + } else { + return false, NewErrUnexpectedTypeValue[bool](value) + } + } + boolVal = intVal != 0 + } + } + return boolVal == m.value == m.isEq, nil +} + +type nilMatcher struct { + matchNil bool +} + +func (m *nilMatcher) Match(value client.NormalValue) (bool, error) { + return value.IsNil() == m.matchNil, nil +} + +// checks if the index value is or is not in the given array +type indexInArrayMatcher struct { + inValues []client.NormalValue + isIn bool +} + +func (m *indexInArrayMatcher) Match(value client.NormalValue) (bool, error) { + for _, inVal := range m.inValues { + if inVal.Unwrap() == value.Unwrap() { + return m.isIn, nil + } + } + return !m.isIn, nil +} + +// checks if the index value satisfies the LIKE condition +type indexLikeMatcher struct { + hasPrefix bool + hasSuffix bool + startAndEnd []string + isLike bool + isCaseInsensitive bool + value string +} + +func newLikeIndexCmp(filterValue string, isLike bool, isCaseInsensitive bool) (*indexLikeMatcher, error) { + matcher := &indexLikeMatcher{ + isLike: isLike, + isCaseInsensitive: isCaseInsensitive, + } + if len(filterValue) >= 2 { + if filterValue[0] == '%' { + matcher.hasPrefix = true + filterValue = strings.TrimPrefix(filterValue, "%") + } + if filterValue[len(filterValue)-1] == '%' { + matcher.hasSuffix = true + filterValue = strings.TrimSuffix(filterValue, "%") + } + if !matcher.hasPrefix && !matcher.hasSuffix { + matcher.startAndEnd = strings.Split(filterValue, "%") + } + } + if isCaseInsensitive { + matcher.value = strings.ToLower(filterValue) + } else { + matcher.value = filterValue + } + + return matcher, nil +} + +func (m *indexLikeMatcher) Match(value client.NormalValue) (bool, error) { + strVal, ok := value.String() + if !ok { + strOptVal, ok := value.NillableString() + if !ok { + return false, NewErrUnexpectedTypeValue[string](value) + } + if !strOptVal.HasValue() { + return false, nil + } + strVal = strOptVal.Value() + } + if m.isCaseInsensitive { + strVal = strings.ToLower(strVal) + } + + return m.doesMatch(strVal) == m.isLike, nil +} + +func (m *indexLikeMatcher) doesMatch(currentVal string) bool { + switch { + case m.hasPrefix && m.hasSuffix: + return strings.Contains(currentVal, m.value) + case m.hasPrefix: + return strings.HasSuffix(currentVal, m.value) + case m.hasSuffix: + return strings.HasPrefix(currentVal, m.value) + // there might be 2 ends only for LIKE with 1 % in the middle "ab%cd" + case len(m.startAndEnd) == 2: + return strings.HasPrefix(currentVal, m.startAndEnd[0]) && + strings.HasSuffix(currentVal, m.startAndEnd[1]) + default: + return m.value == currentVal + } +} + +type anyMatcher struct{} + +func (m *anyMatcher) Match(client.NormalValue) (bool, error) { return true, nil } + +// invertedMatcher inverts the result of the inner matcher. +type invertedMatcher struct { + matcher valueMatcher +} + +func (m *invertedMatcher) Match(val client.NormalValue) (bool, error) { + res, err := m.matcher.Match(val) + if err != nil { + return false, err + } + return !res, nil +} + +type jsonMatcher struct { + value float64 + evalFunc func(float64, float64) bool +} + +func (m *jsonMatcher) Match(value client.NormalValue) (bool, error) { + if jsonVal, ok := value.JSON(); ok { + if floatVal, ok := jsonVal.Number(); ok { + return m.evalFunc(floatVal, m.value), nil + } + } + return false, NewErrUnexpectedTypeValue[float64](value) +} + +func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { + if condition.op == "" { + return &anyMatcher{}, nil + } + + // TODO: test json null + if condition.val.IsNil() { + return &nilMatcher{matchNil: condition.op == opEq}, nil + } + + switch condition.op { + case opEq, opGt, opGe, opLt, opLe, opNe: + if v, ok := condition.val.Int(); ok { + return &intMatcher{value: v, evalFunc: getCompareValsFunc[int64](condition.op)}, nil + } + if v, ok := condition.val.Float(); ok { + return &floatMatcher{value: v, evalFunc: getCompareValsFunc[float64](condition.op)}, nil + } + if v, ok := condition.val.String(); ok { + return &stringMatcher{value: v, evalFunc: getCompareValsFunc[string](condition.op)}, nil + } + if v, ok := condition.val.Time(); ok { + return &timeMatcher{value: v, op: condition.op}, nil + } + if v, ok := condition.val.Bool(); ok { + return &boolMatcher{value: v, isEq: condition.op == opEq}, nil + } + if v, ok := condition.val.JSON(); ok { + if jsonVal, ok := v.Number(); ok { + return &jsonMatcher{value: jsonVal, evalFunc: getCompareValsFunc[float64](condition.op)}, nil + } + } + case opIn, opNin: + inVals, err := client.ToArrayOfNormalValues(condition.val) + if err != nil { + return nil, err + } + return &indexInArrayMatcher{inValues: inVals, isIn: condition.op == opIn}, nil + case opLike, opNlike, opILike, opNILike: + strVal, ok := condition.val.String() + if !ok { + strOptVal, ok := condition.val.NillableString() + if !ok { + return nil, NewErrUnexpectedTypeValue[string](condition.val) + } + strVal = strOptVal.Value() + } + isLike := condition.op == opLike || condition.op == opILike + isCaseInsensitive := condition.op == opILike || condition.op == opNILike + return newLikeIndexCmp(strVal, isLike, isCaseInsensitive) + case opAny: + return &anyMatcher{}, nil + } + + return nil, NewErrInvalidFilterOperator(condition.op) +} + +func createValueMatchers(conditions []fieldFilterCond) ([]valueMatcher, error) { + matchers := make([]valueMatcher, 0, len(conditions)) + for i := range conditions { + m, err := createValueMatcher(&conditions[i]) + if err != nil { + return nil, err + } + matchers = append(matchers, m) + } + return matchers, nil +} + +func getCompareValsFunc[T cmp.Ordered](op string) func(T, T) bool { + switch op { + case opGt: + return checkGT + case opGe: + return checkGE + case opLt: + return checkLT + case opLe: + return checkLE + case opEq: + return checkEQ + case opNe: + return checkNE + } + return nil +} + +func checkGE[T cmp.Ordered](a, b T) bool { return a >= b } +func checkGT[T cmp.Ordered](a, b T) bool { return a > b } +func checkLE[T cmp.Ordered](a, b T) bool { return a <= b } +func checkLT[T cmp.Ordered](a, b T) bool { return a < b } +func checkEQ[T cmp.Ordered](a, b T) bool { return a == b } +func checkNE[T cmp.Ordered](a, b T) bool { return a != b } diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index 025ad3fab1..f3c9f4d989 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -16,7 +16,7 @@ import ( testUtils "github.com/sourcenetwork/defradb/tests/integration" ) -func TestJSONIndex_WithFilterOnIndexedJSON_ShouldUseIndex(t *testing.T) { +func TestJSONIndex_WithFilterOnNumberField_ShouldUseIndex(t *testing.T) { req := `query { User(filter: {custom: {height: {_eq: 168}}}) { name @@ -37,6 +37,12 @@ func TestJSONIndex_WithFilterOnIndexedJSON_ShouldUseIndex(t *testing.T) { "custom": {"height": 168, "weight": 70} }`, }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 168} + }`, + }, testUtils.CreateDoc{ Doc: `{ "name": "Shahzad", @@ -53,15 +59,14 @@ func TestJSONIndex_WithFilterOnIndexedJSON_ShouldUseIndex(t *testing.T) { Request: req, Results: map[string]any{ "User": []map[string]any{ - { - "name": "John", - }, + {"name": "John"}, + {"name": "Islam"}, }, }, }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), }, }, } From 92f7958ee30a558b14983f24eaa01f4a2ed93b4a Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 1 Dec 2024 13:05:22 +0100 Subject: [PATCH 09/46] Make index work for bool and string --- internal/db/fetcher/indexer_matchers.go | 47 ++++++++-- tests/integration/index/json_test.go | 116 ++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 6 deletions(-) diff --git a/internal/db/fetcher/indexer_matchers.go b/internal/db/fetcher/indexer_matchers.go index cf4138fe4a..c10d3bfa37 100644 --- a/internal/db/fetcher/indexer_matchers.go +++ b/internal/db/fetcher/indexer_matchers.go @@ -133,6 +133,7 @@ func (m *boolMatcher) Match(value client.NormalValue) (bool, error) { if boolOptVal, ok := value.NillableBool(); ok { boolVal = boolOptVal.Value() } else { + // TODO: check is this is still needed after encoding of bool changed intVal, ok := value.Int() if !ok { if intOptVal, ok := value.NillableInt(); ok { @@ -260,20 +261,39 @@ func (m *invertedMatcher) Match(val client.NormalValue) (bool, error) { return !res, nil } -type jsonMatcher struct { - value float64 - evalFunc func(float64, float64) bool +type jsonComparableMatcher[T comparable] struct { + value T + getValueFunc func(client.JSON) (T, bool) + evalFunc func(T, T) bool } -func (m *jsonMatcher) Match(value client.NormalValue) (bool, error) { +func (m *jsonComparableMatcher[T]) Match(value client.NormalValue) (bool, error) { if jsonVal, ok := value.JSON(); ok { - if floatVal, ok := jsonVal.Number(); ok { + if floatVal, ok := m.getValueFunc(jsonVal); ok { return m.evalFunc(floatVal, m.value), nil } } return false, NewErrUnexpectedTypeValue[float64](value) } +type jsonBoolMatcher struct { + value bool + isEq bool +} + +func (m *jsonBoolMatcher) Match(value client.NormalValue) (bool, error) { + if jsonVal, ok := value.JSON(); ok { + boolVal, ok := jsonVal.Bool() + if ok { + return boolVal == m.value == m.isEq, nil + } + // TODO: test json null, or other types + //return false, nil + return true, nil + } + return false, NewErrUnexpectedTypeValue[bool](value) +} + func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { if condition.op == "" { return &anyMatcher{}, nil @@ -303,7 +323,22 @@ func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { } if v, ok := condition.val.JSON(); ok { if jsonVal, ok := v.Number(); ok { - return &jsonMatcher{value: jsonVal, evalFunc: getCompareValsFunc[float64](condition.op)}, nil + return &jsonComparableMatcher[float64]{ + value: jsonVal, + getValueFunc: func(j client.JSON) (float64, bool) { return j.Number() }, + evalFunc: getCompareValsFunc[float64](condition.op), + }, nil + } + if jsonVal, ok := v.String(); ok { + return &jsonComparableMatcher[string]{ + value: jsonVal, + getValueFunc: func(j client.JSON) (string, bool) { return j.String() }, + evalFunc: getCompareValsFunc[string](condition.op), + }, nil + } + if jsonVal, ok := v.Bool(); ok { + // TODO: test bool not equal + return &jsonBoolMatcher{value: jsonVal, isEq: condition.op == opEq}, nil } } case opIn, opNin: diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index f3c9f4d989..89a5a5bfeb 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -73,3 +73,119 @@ func TestJSONIndex_WithFilterOnNumberField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {title: {_eq: "Mr"}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"title": "Mr", "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"title": "Mr"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"title": "Dr"} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithFilterOnBoolField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {isStudent: {_eq: true}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"isStudent": true, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"isStudent": true} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"isStudent": false} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} From 516d290fa6b4786e6d54b5f2360cb1fdd49b1d4b Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 1 Dec 2024 16:14:37 +0100 Subject: [PATCH 10/46] Add filter by json null value --- internal/db/fetcher/indexer_iterators.go | 3 +- internal/db/fetcher/indexer_matchers.go | 24 +++-- tests/integration/index/json_test.go | 107 +++++++++++++++++++++++ 3 files changed, 127 insertions(+), 7 deletions(-) diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index a5a041aaa7..4f583163a5 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -446,8 +446,7 @@ func (f *IndexFetcher) newInIndexIterator( } func (f *IndexFetcher) newIndexDataStoreKey() keys.IndexDataStoreKey { - key := keys.IndexDataStoreKey{CollectionID: f.col.ID(), IndexID: f.indexDesc.ID} - return key + return keys.IndexDataStoreKey{CollectionID: f.col.ID(), IndexID: f.indexDesc.ID} } func (f *IndexFetcher) newIndexDataStoreKeyWithValues(values []client.NormalValue) keys.IndexDataStoreKey { diff --git a/internal/db/fetcher/indexer_matchers.go b/internal/db/fetcher/indexer_matchers.go index c10d3bfa37..cfff645e16 100644 --- a/internal/db/fetcher/indexer_matchers.go +++ b/internal/db/fetcher/indexer_matchers.go @@ -269,9 +269,10 @@ type jsonComparableMatcher[T comparable] struct { func (m *jsonComparableMatcher[T]) Match(value client.NormalValue) (bool, error) { if jsonVal, ok := value.JSON(); ok { - if floatVal, ok := m.getValueFunc(jsonVal); ok { - return m.evalFunc(floatVal, m.value), nil + if val, ok := m.getValueFunc(jsonVal); ok { + return m.evalFunc(val, m.value), nil } + return false, nil } return false, NewErrUnexpectedTypeValue[float64](value) } @@ -287,13 +288,23 @@ func (m *jsonBoolMatcher) Match(value client.NormalValue) (bool, error) { if ok { return boolVal == m.value == m.isEq, nil } - // TODO: test json null, or other types - //return false, nil - return true, nil + return false, nil } return false, NewErrUnexpectedTypeValue[bool](value) } +type jsonNullMatcher struct { + // TODO: _ne null is not handled yet + matchNull bool +} + +func (m *jsonNullMatcher) Match(value client.NormalValue) (bool, error) { + if jsonVal, ok := value.JSON(); ok { + return jsonVal.IsNull() == m.matchNull, nil + } + return false, NewErrUnexpectedTypeValue[any](value) +} + func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { if condition.op == "" { return &anyMatcher{}, nil @@ -340,6 +351,9 @@ func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { // TODO: test bool not equal return &jsonBoolMatcher{value: jsonVal, isEq: condition.op == opEq}, nil } + if v.IsNull() { + return &jsonNullMatcher{matchNull: condition.op == opEq}, nil + } } case opIn, opNin: inVals, err := client.ToArrayOfNormalValues(condition.val) diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index 89a5a5bfeb..7d4fafb4a5 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -49,6 +49,18 @@ func TestJSONIndex_WithFilterOnNumberField_ShouldUseIndex(t *testing.T) { "custom": {"weight": 80, "BMI": 25} }`, }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": "168 cm"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"height": null} + }`, + }, testUtils.CreateDoc{ Doc: `{ "name": "Andy", @@ -107,6 +119,18 @@ func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { "custom": {"weight": 80, "BMI": 25} }`, }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"title": 7} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"title": null} + }`, + }, testUtils.CreateDoc{ Doc: `{ "name": "Andy", @@ -165,6 +189,18 @@ func TestJSONIndex_WithFilterOnBoolField_ShouldUseIndex(t *testing.T) { "custom": {"weight": 80, "BMI": 25} }`, }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"isStudent": "very much true"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"isStudent": null} + }`, + }, testUtils.CreateDoc{ Doc: `{ "name": "Andy", @@ -189,3 +225,74 @@ func TestJSONIndex_WithFilterOnBoolField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestJSONIndex_WithEqFilterOnNullField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {title: {_eq: null}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"title": null, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"title": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"title": "null"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"title": 0} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"title": "Dr"} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + From af5eba2aa74eb7580b51acadffde855e4b1b01e7 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 1 Dec 2024 18:51:59 +0100 Subject: [PATCH 11/46] Add MD file for secondary indexes --- client/secondary_indexes.md | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 client/secondary_indexes.md diff --git a/client/secondary_indexes.md b/client/secondary_indexes.md new file mode 100644 index 0000000000..91628e3eda --- /dev/null +++ b/client/secondary_indexes.md @@ -0,0 +1,70 @@ +# Secondary Indexing in DefraDB + +DefraDB provides a powerful and flexible secondary indexing system that enables efficient document lookups and queries. This document explains the architecture, implementation details, and usage patterns of the indexing system. + +## Overview + +The indexing system consists of two main components. The first is index storage, which handles storing and maintaining index information. The second is index-based document fetching, which manages retrieving documents using these indexes. Together, these components provide a robust foundation for efficient data access patterns. + +## Index Storage + +### Core Types + +The indexing system is built around several key types that define how indexes are structured and managed. At its heart is the IndexedFieldDescription, which describes a single field being indexed, including its name and whether it should be ordered in descending order. These field descriptions are combined into an IndexDescription, which provides a complete picture of an index including its name, ID, fields, and whether it enforces uniqueness. + +```go +type IndexedFieldDescription struct { + Name string // Field name being indexed + Descending bool // Whether field is indexed in descending order +} + +type IndexDescription struct { + Name string // Index name + ID uint32 // Local index identifier + Fields []IndexedFieldDescription // Fields being indexed + Unique bool // Whether index enforces uniqueness +} +``` + +The CollectionIndex interface ties everything together by defining the core operations that any index must support. This interface is implemented by different index types such as regular indexes, unique indexes, and array indexes, allowing each to provide specific behaviors while maintaining a consistent interface. + +```go +type CollectionIndex interface { + Save(context.Context, datastore.Txn, *Document) error + Update(context.Context, datastore.Txn, *Document, *Document) error + Delete(context.Context, datastore.Txn, *Document) error + Name() string + Description() IndexDescription +} +``` + +### Key Structure + +Index keys in DefraDB follow a carefully designed format that enables efficient lookups and range scans. For regular indexes, the key format is: +`///../` -> empty value. +Unique indexes follow a similar pattern but store the document ID as the value instead: +`///..` -> ``. + +### Value Encoding + +While DefraDB primarily uses CBOR for encoding, the indexing system employs a custom encoding/decoding solution inspired by CockroachDB. This decision was made because CBOR doesn't guarantee ordering preservation, which is crucial for index functionality. Our custom encoding ensures that numeric values maintain their natural ordering, strings are properly collated, and complex types like arrays and objects have deterministic ordering. + +### Index Maintenance + +Index maintenance happens through three primary operations: document creation, updates, and deletion. When a new document is saved, the system indexes all configured fields, generating entries according to the key format and validating any unique constraints. During updates, the system carefully manages both the removal of old index entries and the creation of new ones, ensuring consistency through atomic transactions. For deletions, all associated index entries are cleaned up along with related metadata. + +## Index-Based Document Fetching + +The IndexFetcher is the cornerstone of document retrieval, orchestrating the process of fetching documents using indexes. It operates in two phases: first retrieving indexed fields (including document IDs), then using a standard fetcher to get any additional requested fields. + +For each query, the system creates specialized result iterators based on the document filter conditions. These iterators are smart about how they handle different types of operations. For simple equality comparisons (`_eq`) or membership tests (`_in`), the iterator can often directly build the exact keys needed. For range operations (`_gt`, `_le`, ...) or pattern matching (`_like`, ...), the system employs dedicated value matchers to validate the results. + +The performance characteristics of these operations vary. Direct match operations are typically the fastest as they can precisely target the needed keys. Range and pattern operations require more work as they must scan a range of keys and validate each result. The system is designed to minimize both key-value operations during mutations and memory usage during result streaming. + +Note: the index fetcher can not benefit at the moment from ordered indexes, as the underlying storage does not support such range queries yet. + +## Performance Considerations + +When working with indexes, it's important to understand their impact on system performance. Each index increases write amplification as every document modification must update all relevant indexes. However, this cost is often outweighed by the dramatic improvement in read performance for indexed queries. + +Index selection should be driven by your query patterns and data distribution. Indexing fields that are frequently used in query filters can significantly improve performance, but indexing rarely-queried fields only adds overhead. For unique indexes, the additional validation requirements make this trade-off even more important to consider. From 3dcb838c6761bc9a40d195174f44c4bff3eaee31 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 2 Dec 2024 08:28:31 +0100 Subject: [PATCH 12/46] Add note about indexing of related docs --- client/secondary_indexes.md | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/client/secondary_indexes.md b/client/secondary_indexes.md index 91628e3eda..ecc58f680d 100644 --- a/client/secondary_indexes.md +++ b/client/secondary_indexes.md @@ -68,3 +68,58 @@ Note: the index fetcher can not benefit at the moment from ordered indexes, as t When working with indexes, it's important to understand their impact on system performance. Each index increases write amplification as every document modification must update all relevant indexes. However, this cost is often outweighed by the dramatic improvement in read performance for indexed queries. Index selection should be driven by your query patterns and data distribution. Indexing fields that are frequently used in query filters can significantly improve performance, but indexing rarely-queried fields only adds overhead. For unique indexes, the additional validation requirements make this trade-off even more important to consider. + +## Indexing Related Objects + +DefraDB's indexing system provides powerful capabilities for handling relationships between documents. Let's explore how this works with a practical example. + +Consider a schema defining a relationship between Users and Addresses: + +```graphql +type User { + name: String + age: Int + address: Address @primary @index +} + +type Address { + user: User + city: String @index + street: String +} +``` + +In this schema, we've defined two important indexes. First, we have an index on the `Address`'s city field, and second, we have an index on the relationship between `User` and `Address`. This setup enables extremely efficient querying across the relationship. For example: + +```graphql +query { + User(filter: { + address: {city: {_eq: "Montreal"}} + }) { + name + } +} +``` + +For requests on not indexed relations, the normal approach is from top to bottom, meaning that first all `User` documents are fetched and then for each `User` document the corresponding `Address` document is fetched. This can be very inefficient for large collections. +With indexing, we use so called inverted fetching, meaning that we first fetch the `Address` documents with the matching `city` value and then for each `Address` document the corresponding `User` document is fetched. This is much more efficient as we can use the index to directly fetch the `User` document. + +### Relationship Cardinality Through Indexes + +The indexing system also plays a crucial role in enforcing relationship cardinality. By marking an index as unique, you can enforce one-to-one relationships between documents. Here's how you would modify the schema to ensure each User has exactly one Address: + +```graphql +type User { + name: String + age: Int + address: Address @primary @index(unique: true) +} + +type Address { + user: User + city: String @index + street: String +} +``` + +The unique index constraint ensures that no two Users can reference the same Address document. Without the unique constraint, the relationship would be one-to-many by default, allowing multiple Users to reference the same Address. From e27d5db8b6d49ac5b5735b56fcd4ff37e3429370 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 2 Dec 2024 10:28:01 +0100 Subject: [PATCH 13/46] Add note about json indexing --- client/README.md | 2 ++ client/secondary_indexes.md | 70 +++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/client/README.md b/client/README.md index ec2cf7efcd..e89577c929 100644 --- a/client/README.md +++ b/client/README.md @@ -1,3 +1,5 @@ The `client` package is the primary access point for interacting with an embedded DefraDB instance. [Data definition overview](./data_definition.md) - How the shape of documents are defined and grouped. + +[Secondary indexes](./secondary_indexes.md) - How secondary indexes work in DefraDB and how to use them. \ No newline at end of file diff --git a/client/secondary_indexes.md b/client/secondary_indexes.md index ecc58f680d..4b345d5659 100644 --- a/client/secondary_indexes.md +++ b/client/secondary_indexes.md @@ -41,9 +41,13 @@ type CollectionIndex interface { ### Key Structure Index keys in DefraDB follow a carefully designed format that enables efficient lookups and range scans. For regular indexes, the key format is: -`///../` -> empty value. +``` +/(/)+/ -> empty value +``` Unique indexes follow a similar pattern but store the document ID as the value instead: -`///..` -> ``. +``` +/(/)+ -> +``` ### Value Encoding @@ -123,3 +127,65 @@ type Address { ``` The unique index constraint ensures that no two Users can reference the same Address document. Without the unique constraint, the relationship would be one-to-many by default, allowing multiple Users to reference the same Address. + +## JSON Field Indexing + +DefraDB implements a specialized indexing system for JSON fields that differs from how other field types are handled. While a document in DefraDB can contain various field types (Int, String, Bool, JSON, etc.), JSON fields require special treatment due to their hierarchical nature. + +#### The JSON Interface + +The indexing system relies on the `JSON` interface defined in `client/json.go`. This interface is crucial for handling JSON fields as it enables traversal of all leaf nodes within a JSON document. A `JSON` value in DefraDB can represent either an entire JSON document or a single node within it. Each `JSON` value maintains its path information, which is essential for indexing. + +For example, given this JSON document: +```json +{ + "user": { + "device": { + "model": "iPhone" + } + } +} +``` + +The system can represent the "iPhone" value as a `JSON` type with its complete path `[]string{"user", "device", "model"}`. This path-aware representation is fundamental to how the indexing system works. + +#### Inverted Indexes for JSON + +For JSON fields, DefraDB uses inverted indexes with the following key format: +``` +/(/)+/ +``` + +The term "inverted" comes from how these indexes reverse the typical document-to-value relationship. Instead of starting with a document and finding its values, we start with a value and can quickly find all documents containing that value at any path. + +This approach differs from traditional secondary indexes in DefraDB. While regular fields map to single index entries, a JSON field generates multiple index entries - one for each leaf node in its structure. The system traverses the entire JSON structure during indexing, creating entries that combine the path and value information. + +#### Value Normalization and JSON + +The indexing system integrates with DefraDB's value normalization through `client.NormalValue`. While the encoding/decoding package handles scalar types directly, JSON values maintain additional path information. Each JSON node is encoded with both its normalized value and its path information, allowing the system to reconstruct the exact location of any value within the JSON structure. + +Similar to how other field types are normalized (e.g., integers to int64), JSON leaf values are normalized based on their type before being included in the index. This ensures consistent ordering and comparison operations. + +#### Integration with Index Infrastructure + +When a document with a JSON field is indexed, the system: +1. Uses the JSON interface to traverse the document structure +2. Creates index entries for each leaf node, combining path information with normalized values +3. Maintains all entries in a way that enables efficient querying at any depth + +This implementation enables efficient queries like: +```graphql +query { + Collection(filter: { + jsonField: { + user: { + device: { + model: {_eq: "iPhone"} + } + } + } + }) +} +``` + +The system can directly look up matching documents using the index entries, avoiding the need to scan and parse JSON content during query execution. \ No newline at end of file From 7e00694c7640191e03f8efe41e50c31de23a3a9e Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Tue, 3 Dec 2024 12:26:46 +0100 Subject: [PATCH 14/46] Enable filtering by json bool and string --- client/json.go | 12 + internal/db/fetcher/indexer_iterators.go | 10 +- internal/db/fetcher/indexer_matchers.go | 47 +- internal/encoding/json.go | 12 +- internal/encoding/json_test.go | 27 +- tests/integration/index/json_test.go | 661 ++++++++++++++++++++++- 6 files changed, 737 insertions(+), 32 deletions(-) diff --git a/client/json.go b/client/json.go index dd3d427c8d..0f9cbe0e6e 100644 --- a/client/json.go +++ b/client/json.go @@ -65,6 +65,14 @@ type JSON interface { accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error } +// MakeVoidJSON creates a JSON value that represents a void value with just a path. +// This is necessary purely for creating a json path prefix for storage queries. +// All other json values will be encoded with some value after the path which makes +// them unsuitable to build a path prefix. +func MakeVoidJSON(path []string) JSON { + return jsonBase[any]{path: path} +} + // TraverseJSON traverses a JSON value and calls the visitor function for each node. // opts controls how the traversal is performed. func TraverseJSON(j JSON, visitor JSONVisitor, opts ...traverseJSONOption) error { @@ -155,6 +163,10 @@ func (v jsonVoid) IsNull() bool { return false } +func (v jsonVoid) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { + return nil +} + type jsonBase[T any] struct { jsonVoid val T diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 4f583163a5..dfaf01f278 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -509,7 +509,15 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { } else if fieldConditions[0].op == opIn && fieldConditions[0].arrOp != compOpNone { iter, err = f.newInIndexIterator(fieldConditions, matchers) } else { - iter, err = f.newPrefixIterator(f.newIndexDataStoreKey(), matchers, &f.execInfo), nil + key := f.newIndexDataStoreKey() + // TODO: can we test fieldConditions[not 0]? + if fieldConditions[0].kind == client.FieldKind_NILLABLE_JSON { + key.Fields = []keys.IndexedField{{ + Descending: f.indexDesc.Fields[0].Descending, + Value: client.NewNormalJSON(client.MakeVoidJSON(fieldConditions[0].jsonPath)), + }} + } + iter, err = f.newPrefixIterator(key, matchers, &f.execInfo), nil } if err != nil { diff --git a/internal/db/fetcher/indexer_matchers.go b/internal/db/fetcher/indexer_matchers.go index cfff645e16..20c647e591 100644 --- a/internal/db/fetcher/indexer_matchers.go +++ b/internal/db/fetcher/indexer_matchers.go @@ -208,17 +208,19 @@ func newLikeIndexCmp(filterValue string, isLike bool, isCaseInsensitive bool) (* return matcher, nil } -func (m *indexLikeMatcher) Match(value client.NormalValue) (bool, error) { - strVal, ok := value.String() +func (m *indexLikeMatcher) Match(val client.NormalValue) (bool, error) { + strVal, ok := val.String() if !ok { - strOptVal, ok := value.NillableString() - if !ok { - return false, NewErrUnexpectedTypeValue[string](value) - } - if !strOptVal.HasValue() { - return false, nil + if strOptVal, ok := val.NillableString(); ok { + strVal = strOptVal.Value() + } else if jsonVal, ok := val.JSON(); ok { + strVal, ok = jsonVal.String() + if !ok { + return false, nil + } + } else { + return false, NewErrUnexpectedTypeValue[string](val) } - strVal = strOptVal.Value() } if m.isCaseInsensitive { strVal = strings.ToLower(strVal) @@ -362,13 +364,9 @@ func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { } return &indexInArrayMatcher{inValues: inVals, isIn: condition.op == opIn}, nil case opLike, opNlike, opILike, opNILike: - strVal, ok := condition.val.String() - if !ok { - strOptVal, ok := condition.val.NillableString() - if !ok { - return nil, NewErrUnexpectedTypeValue[string](condition.val) - } - strVal = strOptVal.Value() + strVal, err := extractStringFromNormalValue(condition.val) + if err != nil { + return nil, err } isLike := condition.op == opLike || condition.op == opILike isCaseInsensitive := condition.op == opILike || condition.op == opNILike @@ -380,6 +378,23 @@ func createValueMatcher(condition *fieldFilterCond) (valueMatcher, error) { return nil, NewErrInvalidFilterOperator(condition.op) } +func extractStringFromNormalValue(val client.NormalValue) (string, error) { + strVal, ok := val.String() + if !ok { + if strOptVal, ok := val.NillableString(); ok { + strVal = strOptVal.Value() + } else if jsonVal, ok := val.JSON(); ok { + strVal, ok = jsonVal.String() + if !ok { + return "", NewErrUnexpectedTypeValue[string](jsonVal) + } + } else { + return "", NewErrUnexpectedTypeValue[string](val) + } + } + return strVal, nil +} + func createValueMatchers(conditions []fieldFilterCond) ([]valueMatcher, error) { matchers := make([]valueMatcher, 0, len(conditions)) for i := range conditions { diff --git a/internal/encoding/json.go b/internal/encoding/json.go index b66164d2d0..82014b6f15 100644 --- a/internal/encoding/json.go +++ b/internal/encoding/json.go @@ -12,10 +12,14 @@ package encoding import "github.com/sourcenetwork/defradb/client" +const jsonPathEnd = '/' + // EncodeJSONAscending encodes a JSON value in ascending order. func EncodeJSONAscending(b []byte, v client.JSON) []byte { b = encodeJSONPath(b, v) + b = append(b, jsonPathEnd) + if str, ok := v.String(); ok { b = EncodeStringAscending(b, str) } else if num, ok := v.Number(); ok { @@ -24,8 +28,6 @@ func EncodeJSONAscending(b []byte, v client.JSON) []byte { b = EncodeBoolAscending(b, boolVal) } else if v.IsNull() { b = EncodeNullAscending(b) - } else { - return nil } return b @@ -35,6 +37,8 @@ func EncodeJSONAscending(b []byte, v client.JSON) []byte { func EncodeJSONDescending(b []byte, v client.JSON) []byte { b = encodeJSONPath(b, v) + b = append(b, jsonPathEnd) + if str, ok := v.String(); ok { b = EncodeStringDescending(b, str) } else if num, ok := v.Number(); ok { @@ -43,8 +47,6 @@ func EncodeJSONDescending(b []byte, v client.JSON) []byte { b = EncodeBoolDescending(b, boolVal) } else if v.IsNull() { b = EncodeNullDescending(b) - } else { - return nil } return b @@ -71,6 +73,8 @@ func decodeJSON(b []byte, ascending bool) ([]byte, client.JSON, error) { return b, nil, err } + b = b[1:] // Skip the path end marker + var jsonValue any switch PeekType(b) { diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go index c27d22c09d..0fa094a452 100644 --- a/internal/encoding/json_test.go +++ b/internal/encoding/json_test.go @@ -92,7 +92,24 @@ func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { } } +func TestJSONEncodingDecoding_WithVoidValue_ShouldEncodeAndDecodeOnlyPath(t *testing.T) { + void := client.MakeVoidJSON([]string{"path", "to", "void"}) + encoded := EncodeJSONAscending(nil, void) + + remaining, decodedPath, err := decodeJSONPath(encoded[1:]) // skip the marker + require.NoError(t, err) + assert.Len(t, remaining, 1) // The path is followed by a separator + assert.Equal(t, void.GetPath(), decodedPath) + + remaining, decoded, err := DecodeJSONAscending(encoded) + require.Error(t, err) + assert.Empty(t, remaining) + assert.Nil(t, decoded) +} + func TestJSONDecoding_MalformedData(t *testing.T) { + term := ascendingBytesEscapes.escapedTerm + tests := []struct { name string input []byte @@ -106,27 +123,27 @@ func TestJSONDecoding_MalformedData(t *testing.T) { }, { name: "malformed json num", - input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, floatPos, 0xFF, 0xFF, 0xFF}, + input: []byte{jsonMarker, term, jsonPathEnd, floatPos, 0xFF, 0xFF, 0xFF}, ascending: true, }, { name: "malformed json num", - input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, floatPos, 0xFF, 0xFF, 0xFF}, + input: []byte{jsonMarker, term, jsonPathEnd, floatPos, 0xFF, 0xFF, 0xFF}, ascending: false, }, { name: "malformed json num", - input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, bytesMarker, 0xFF, 0xFF, 0xFF}, + input: []byte{jsonMarker, term, jsonPathEnd, bytesMarker, 0xFF, 0xFF, 0xFF}, ascending: true, }, { name: "malformed json num", - input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, bytesDescMarker, 0xFF, 0xFF, 0xFF}, + input: []byte{jsonMarker, term, jsonPathEnd, bytesDescMarker, 0xFF, 0xFF, 0xFF}, ascending: false, }, { name: "wrong type marker", - input: []byte{jsonMarker, ascendingBytesEscapes.escapedTerm, timeMarker, 0xFF, 0xFF, 0xFF}, + input: []byte{jsonMarker, term, jsonPathEnd, timeMarker, 0xFF, 0xFF, 0xFF}, ascending: true, }, } diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index 7d4fafb4a5..ba4c60c820 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -86,7 +86,355 @@ func TestJSONIndex_WithFilterOnNumberField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } -func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { +func TestJSONIndex_WithGtFilterOnNumberField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_gt: 178}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 178} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": "168 cm"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"height": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Andy"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithGeFilterOnNumberField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_ge: 178}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 178} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": "168 cm"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"height": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "Andy"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithLtFilterOnNumberField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_lt: 178}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 178} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": "168 cm"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"height": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithLeFilterOnNumberField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_le: 178}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 178} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": "168 cm"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"height": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithNeFilterOnNumberField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_ne: 178}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 178} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": "168 cm"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"height": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Andy"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithEqFilterOnStringField_ShouldUseIndex(t *testing.T) { req := `query { User(filter: {custom: {title: {_eq: "Mr"}}}) { name @@ -110,7 +458,82 @@ func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { testUtils.CreateDoc{ Doc: `{ "name": "Islam", - "custom": {"title": "Mr"} + "custom": {"title": "Mr"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"title": 7} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"title": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"title": "Dr"} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithLikeFilterOnStringField_ShouldUseIndex(t *testing.T) { + likeReq := `query { + User(filter: {custom: {title: {_like: "D%"}}}) { + name + } + }` + ilikeReq := `query { + User(filter: {custom: {title: {_ilike: "D%"}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"title": "Mr", "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"title": "dr"} }`, }, testUtils.CreateDoc{ @@ -138,7 +561,94 @@ func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { }`, }, testUtils.Request{ - Request: req, + Request: likeReq, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Andy"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(likeReq), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(5), + }, + testUtils.Request{ + Request: ilikeReq, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Andy"}, + {"name": "Islam"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(ilikeReq), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithNLikeFilterOnStringField_ShouldUseIndex(t *testing.T) { + nlikeReq := `query { + User(filter: {custom: {title: {_nlike: "D%"}}}) { + name + } + }` + nilikeReq := `query { + User(filter: {custom: {title: {_nilike: "D%"}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"title": "Mr", "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"title": "dr"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"title": 7} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"title": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"title": "Dr"} + }`, + }, + testUtils.Request{ + Request: nlikeReq, Results: map[string]any{ "User": []map[string]any{ {"name": "John"}, @@ -147,8 +657,20 @@ func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { }, }, testUtils.Request{ - Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Request: makeExplainQuery(nlikeReq), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(5), + }, + testUtils.Request{ + Request: nilikeReq, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(nilikeReq), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(5), }, }, } @@ -156,7 +678,7 @@ func TestJSONIndex_WithFilterOnStringField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } -func TestJSONIndex_WithFilterOnBoolField_ShouldUseIndex(t *testing.T) { +func TestJSONIndex_WithEqFilterOnBoolField_ShouldUseIndex(t *testing.T) { req := `query { User(filter: {custom: {isStudent: {_eq: true}}}) { name @@ -226,6 +748,76 @@ func TestJSONIndex_WithFilterOnBoolField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } +func TestJSONIndex_WithNeFilterOnBoolField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {isStudent: {_ne: false}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"isStudent": true, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"isStudent": true} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"isStudent": "very much true"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"isStudent": null} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"isStudent": false} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + func TestJSONIndex_WithEqFilterOnNullField_ShouldUseIndex(t *testing.T) { req := `query { User(filter: {custom: {title: {_eq: null}}}) { @@ -296,3 +888,60 @@ func TestJSONIndex_WithEqFilterOnNullField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } +func TestJSONIndex_WithNotNeFilterOnNullField_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {title: {_ne: null}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"title": null, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"weight": 80, "BMI": 25} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"title": "null"} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": {"title": 0} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Bruno"}, + {"name": "Keenan"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(3), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} From 40341a078369e43c50922e8102aca8105642b95e Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Fri, 6 Dec 2024 13:39:05 +0100 Subject: [PATCH 15/46] Add unique json index --- internal/db/index.go | 172 ++++++----- internal/db/index_test.go | 73 ++++- internal/db/indexed_docs_test.go | 296 ++++++++++++++++--- tests/integration/index/array_unique_test.go | 2 +- tests/integration/index/json_test.go | 68 +++++ tests/integration/index/json_unique_test.go | 162 ++++++++++ 6 files changed, 649 insertions(+), 124 deletions(-) create mode 100644 tests/integration/index/json_unique_test.go diff --git a/internal/db/index.go b/internal/db/index.go index d8dc10284f..3d3b73e5e5 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -89,7 +89,11 @@ func NewCollectionIndex( return newCollectionArrayIndex(base), nil } } else if isJSON { - return newCollectionJSONIndex(base), nil + if desc.Unique { + return newCollectionJSONUniqueIndex(base), nil + } else { + return newCollectionJSONIndex(base), nil + } } else if desc.Unique { return &collectionUniqueIndex{collectionBaseIndex: base}, nil } else { @@ -372,6 +376,28 @@ func validateUniqueKeyValue( return nil } +func addNewUniqueKey( + ctx context.Context, + txn datastore.Txn, + doc *client.Document, + key keys.IndexDataStoreKey, + fieldsDescs []client.SchemaFieldDescription, +) error { + key, val, err := makeUniqueKeyValueRecord(key, doc) + if err != nil { + return err + } + err = validateUniqueKeyValue(ctx, txn, key, val, doc, fieldsDescs) + if err != nil { + return err + } + err = txn.Datastore().Put(ctx, key.ToDS(), val) + if err != nil { + return NewErrFailedToStoreIndexedField(key.ToString(), err) + } + return nil +} + func (index *collectionUniqueIndex) Delete( ctx context.Context, txn datastore.Txn, @@ -679,7 +705,7 @@ func (index *collectionArrayUniqueIndex) Save( if !ok { break } - err := index.addNewUniqueKey(ctx, txn, doc, key) + err := addNewUniqueKey(ctx, txn, doc, key, index.fieldsDescs) if err != nil { return err } @@ -687,27 +713,6 @@ func (index *collectionArrayUniqueIndex) Save( return nil } -func (index *collectionArrayUniqueIndex) addNewUniqueKey( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, - key keys.IndexDataStoreKey, -) error { - key, val, err := makeUniqueKeyValueRecord(key, doc) - if err != nil { - return err - } - err = validateUniqueKeyValue(ctx, txn, key, val, doc, index.fieldsDescs) - if err != nil { - return err - } - err = txn.Datastore().Put(ctx, key.ToDS(), val) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - return nil -} - func (index *collectionArrayUniqueIndex) Update( ctx context.Context, txn datastore.Txn, @@ -720,7 +725,7 @@ func (index *collectionArrayUniqueIndex) Update( } for _, key := range newKeys { - err := index.addNewUniqueKey(ctx, txn, newDoc, key) + err := addNewUniqueKey(ctx, txn, newDoc, key, index.fieldsDescs) if err != nil { return err } @@ -770,23 +775,12 @@ func newCollectionJSONBaseIndex(base collectionBaseIndex) collectionJSONBaseInde return ind } -type collectionJSONIndex struct { - collectionJSONBaseIndex -} - -var _ CollectionIndex = (*collectionJSONIndex)(nil) - -func newCollectionJSONIndex(base collectionBaseIndex) *collectionJSONIndex { - return &collectionJSONIndex{collectionJSONBaseIndex: newCollectionJSONBaseIndex(base)} -} - -// Save indexes a document by storing the indexed field value. -func (index *collectionJSONIndex) Save( - ctx context.Context, - txn datastore.Txn, +func (index *collectionJSONBaseIndex) traverseJSONNodes( doc *client.Document, + appendDocID bool, + f func(keys.IndexDataStoreKey) error, ) error { - key, err := index.getDocumentsIndexKey(doc, true) + key, err := index.getDocumentsIndexKey(doc, appendDocID) if err != nil { return err } @@ -804,64 +798,106 @@ func (index *collectionJSONIndex) Save( copy(leafKey.Fields, key.Fields) leafKey.Fields[jsonFieldIndex].Value = val - dsKey := leafKey.ToDS() - err = txn.Datastore().Put(ctx, dsKey, []byte{}) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - - return nil + return f(leafKey) }, client.TraverseJSONOnlyLeaves()) if err != nil { return err } - } return nil } +type collectionJSONIndex struct { + collectionJSONBaseIndex +} + +var _ CollectionIndex = (*collectionJSONIndex)(nil) + +func newCollectionJSONIndex(base collectionBaseIndex) *collectionJSONIndex { + return &collectionJSONIndex{collectionJSONBaseIndex: newCollectionJSONBaseIndex(base)} +} + +// Save indexes a document by storing the indexed field value. +func (index *collectionJSONIndex) Save( + ctx context.Context, + txn datastore.Txn, + doc *client.Document, +) error { + return index.traverseJSONNodes(doc, true, func(key keys.IndexDataStoreKey) error { + err := txn.Datastore().Put(ctx, key.ToDS(), []byte{}) + if err != nil { + return NewErrFailedToStoreIndexedField(key.ToString(), err) + } + + return nil + }) +} + func (index *collectionJSONIndex) Update( ctx context.Context, txn datastore.Txn, oldDoc *client.Document, newDoc *client.Document, ) error { - /*newKeys, err := index.deleteRetiredKeysAndReturnNew(ctx, txn, oldDoc, newDoc, true) + err := index.Delete(ctx, txn, oldDoc) if err != nil { return err } + return index.Save(ctx, txn, newDoc) +} - for _, key := range newKeys { - err = txn.Datastore().Put(ctx, key.ToDS(), []byte{}) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - }*/ +func (index *collectionJSONIndex) Delete( + ctx context.Context, + txn datastore.Txn, + doc *client.Document, +) error { + return index.traverseJSONNodes(doc, true, func(key keys.IndexDataStoreKey) error { + return index.deleteIndexKey(ctx, txn, key) + }) +} - return nil +type collectionJSONUniqueIndex struct { + collectionJSONBaseIndex } -func (index *collectionJSONIndex) Delete( +var _ CollectionIndex = (*collectionJSONUniqueIndex)(nil) + +func newCollectionJSONUniqueIndex(base collectionBaseIndex) *collectionJSONUniqueIndex { + return &collectionJSONUniqueIndex{collectionJSONBaseIndex: newCollectionJSONBaseIndex(base)} +} + +// Save indexes a document by storing the indexed field value. +func (index *collectionJSONUniqueIndex) Save( ctx context.Context, txn datastore.Txn, doc *client.Document, ) error { - /*getNextKey, err := index.newIndexKeyGenerator(doc, true) + return index.traverseJSONNodes(doc, false, func(key keys.IndexDataStoreKey) error { + return addNewUniqueKey(ctx, txn, doc, key, index.fieldsDescs) + }) +} + +func (index *collectionJSONUniqueIndex) Update( + ctx context.Context, + txn datastore.Txn, + oldDoc *client.Document, + newDoc *client.Document, +) error { + err := index.Delete(ctx, txn, oldDoc) if err != nil { return err } + return index.Save(ctx, txn, newDoc) +} - for { - key, ok := getNextKey() - if !ok { - break - } - err = index.deleteIndexKey(ctx, txn, key) - if err != nil { - return err - } - }*/ - return nil +func (index *collectionJSONUniqueIndex) Delete( + ctx context.Context, + txn datastore.Txn, + doc *client.Document, +) error { + return index.traverseJSONNodes(doc, false, func(key keys.IndexDataStoreKey) error { + return index.deleteIndexKey(ctx, txn, key) + }) } diff --git a/internal/db/index_test.go b/internal/db/index_test.go index 1582cdfcfe..d66fcfedf2 100644 --- a/internal/db/index_test.go +++ b/internal/db/index_test.go @@ -42,15 +42,18 @@ const ( usersWeightFieldName = "weight" usersNumbersFieldName = "numbers" usersHobbiesFieldName = "hobbies" + usersCustomFieldName = "custom" productsIDFieldName = "id" productsPriceFieldName = "price" productsCategoryFieldName = "category" productsAvailableFieldName = "available" - testUsersColIndexName = "user_name" - testUsersColIndexAge = "user_age" - testUsersColIndexWeight = "user_weight" + testUsersColIndexName = "user_name_index" + testUsersColIndexAge = "user_age_index" + testUsersColIndexWeight = "user_weight_index" + testUsersColIndexNumbers = "user_numbers_index" + testUsersColIndexCustom = "user_custom_index" ) type indexTestFixture struct { @@ -75,6 +78,7 @@ func (f *indexTestFixture) addUsersCollection() client.Collection { %s: Float %s: [Int!] %s: [String!] + %s: JSON }`, usersColName, usersNameFieldName, @@ -82,6 +86,7 @@ func (f *indexTestFixture) addUsersCollection() client.Collection { usersWeightFieldName, usersNumbersFieldName, usersHobbiesFieldName, + usersCustomFieldName, ), ) require.NoError(f.t, err) @@ -194,7 +199,7 @@ func (f *indexTestFixture) createUserCollectionIndexOnName() client.IndexDescrip func (f *indexTestFixture) createUserCollectionIndexOnNumbers() client.IndexDescription { indexDesc := client.IndexDescriptionCreateRequest{ - Name: "users_numbers_index", + Name: testUsersColIndexNumbers, Fields: []client.IndexedFieldDescription{ {Name: usersNumbersFieldName}, }, @@ -206,6 +211,21 @@ func (f *indexTestFixture) createUserCollectionIndexOnNumbers() client.IndexDesc return newDesc } +func (f *indexTestFixture) createUserCollectionIndexOnCustom(unique bool) client.IndexDescription { + indexDesc := client.IndexDescriptionCreateRequest{ + Name: testUsersColIndexCustom, + Fields: []client.IndexedFieldDescription{ + {Name: usersCustomFieldName}, + }, + Unique: unique, + } + + newDesc, err := f.createCollectionIndexFor(f.users.Name().Value(), indexDesc) + require.NoError(f.t, err) + + return newDesc +} + func makeUnique(indexDesc client.IndexDescriptionCreateRequest) client.IndexDescriptionCreateRequest { indexDesc.Unique = true return indexDesc @@ -243,21 +263,54 @@ func (f *indexTestFixture) dropIndex(colName, indexName string) error { return f.db.dropCollectionIndex(ctx, colName, indexName) } -func (f *indexTestFixture) countIndexPrefixes(indexName string) int { +// countSystemIndexPrefixes returns the number of prefixes in the systemstore that match the given index name. +func (f *indexTestFixture) countSystemIndexPrefixes(indexName string) int { prefix := keys.NewCollectionIndexKey(immutable.Some(f.users.ID()), indexName) q, err := f.txn.Systemstore().Query(f.ctx, query.Query{ Prefix: prefix.ToString(), }) - assert.NoError(f.t, err) + require.NoError(f.t, err, "failed to query systemstore") + defer func() { + err := q.Close() + require.NoError(f.t, err, "failed to close query") + }() + + count := 0 + for res := range q.Next() { + if res.Error != nil { + require.NoError(f.t, err, "failed to get next result") + } + count++ + } + return count +} + +// countIndexPrefixes returns the number of prefixes in the datastore that match the given index name. +func (f *indexTestFixture) countIndexPrefixes(indexName string) int { + indexes, err := f.users.GetIndexes(f.ctx) + require.NoError(f.t, err, "failed to get indexes") + + key := keys.NewIndexDataStoreKey(f.users.ID(), 0, nil) + + for _, index := range indexes { + if index.Name == indexName { + key.IndexID = index.ID + } + } + + q, err := f.txn.Datastore().Query(f.ctx, query.Query{ + Prefix: key.ToString(), + }) + require.NoError(f.t, err, "failed to query systemstore") defer func() { err := q.Close() - assert.NoError(f.t, err) + require.NoError(f.t, err, "failed to close query") }() count := 0 for res := range q.Next() { if res.Error != nil { - assert.NoError(f.t, err) + require.NoError(f.t, err, "failed to get next result") } count++ } @@ -1129,13 +1182,13 @@ func TestDropAllIndexes_ShouldDeleteAllIndexes(t *testing.T) { }) assert.NoError(f.t, err) - assert.Equal(t, 2, f.countIndexPrefixes("")) + assert.Equal(t, 2, f.countSystemIndexPrefixes("")) ctx := SetContextTxn(f.ctx, f.txn) err = f.users.(*collection).dropAllIndexes(ctx) assert.NoError(t, err) - assert.Equal(t, 0, f.countIndexPrefixes("")) + assert.Equal(t, 0, f.countSystemIndexPrefixes("")) } func TestDropAllIndexes_IfStorageFails_ReturnError(t *testing.T) { diff --git a/internal/db/indexed_docs_test.go b/internal/db/indexed_docs_test.go index 8f1c37a61e..138ee34a2d 100644 --- a/internal/db/indexed_docs_test.go +++ b/internal/db/indexed_docs_test.go @@ -36,11 +36,12 @@ import ( ) type userDoc struct { - Name string `json:"name"` - Age int `json:"age"` - Weight float64 `json:"weight"` - Numbers []int `json:"numbers"` - Hobbies []string `json:"hobbies"` + Name string `json:"name"` + Age int `json:"age"` + Weight float64 `json:"weight"` + Numbers []int `json:"numbers"` + Hobbies []string `json:"hobbies"` + Custom client.JSON `json:"custom"` } type productDoc struct { @@ -107,6 +108,7 @@ type indexKeyBuilder struct { doc *client.Document isUnique bool arrayFieldValues map[string]any + values []client.NormalValue } func newIndexKeyBuilder(f *indexTestFixture) *indexKeyBuilder { @@ -134,6 +136,11 @@ func (b *indexKeyBuilder) ArrayFieldVal(fieldName string, val any) *indexKeyBuil return b } +func (b *indexKeyBuilder) Values(values ...client.NormalValue) *indexKeyBuilder { + b.values = values + return b +} + // Fields sets the fields names for the index key. func (b *indexKeyBuilder) DescendingFields(descending ...bool) *indexKeyBuilder { b.descendingFields = descending @@ -162,6 +169,7 @@ func (b *indexKeyBuilder) Build() keys.IndexDataStoreKey { return key } + // First find that collection ctx := SetContextTxn(b.f.ctx, b.f.txn) cols, err := b.f.db.getCollections(ctx, client.CollectionFetchOptions{}) require.NoError(b.f.t, err) @@ -177,10 +185,12 @@ func (b *indexKeyBuilder) Build() keys.IndexDataStoreKey { } key.CollectionID = collection.ID() + // if no fields are set, return only key with the collection id if len(b.fieldsNames) == 0 { return key } + // find an index with matching fields indexes, err := collection.GetIndexes(b.f.ctx) require.NoError(b.f.t, err) indexLoop: @@ -198,48 +208,60 @@ indexLoop: if b.doc != nil { hasNilValue := false - for i, fieldName := range b.fieldsNames { - fieldValue, err := b.doc.GetValue(fieldName) - if err != nil { - if !errors.Is(err, client.ErrFieldNotExist) { - require.NoError(b.f.t, err) - } + // if values are passed manually, use them + if len(b.values) > 0 { + if len(b.fieldsNames) != len(b.values) { + panic(errors.New("fields names and values count mismatch")) } - var val client.NormalValue - if fieldValue != nil { - val = fieldValue.NormalValue() - } else { - kind := client.FieldKind_NILLABLE_STRING - if fieldName == usersAgeFieldName { - kind = client.FieldKind_NILLABLE_INT - } else if fieldName == usersWeightFieldName { - kind = client.FieldKind_NILLABLE_FLOAT - } - val, err = client.NewNormalNil(kind) - require.NoError(b.f.t, err) + for _, val := range b.values { + key.Fields = append(key.Fields, keys.IndexedField{Value: val}) + hasNilValue = hasNilValue || val.IsNil() } - if val.IsNil() { - hasNilValue = true - } else if val.IsArray() { - if arrVal, ok := b.arrayFieldValues[fieldName]; ok { - if normVal, ok := arrVal.(client.NormalValue); ok { - val = normVal - } else { - val, err = client.NewNormalValue(arrVal) - require.NoError(b.f.t, err, "given value is not a normal value") + } else { + // otherwise if doc is given, we retrieve the field values from the document + for i, fieldName := range b.fieldsNames { + fieldValue, err := b.doc.GetValue(fieldName) + if err != nil { + if !errors.Is(err, client.ErrFieldNotExist) { + require.NoError(b.f.t, err) } + } + var val client.NormalValue + if fieldValue != nil { + val = fieldValue.NormalValue() } else { - arrVals, err := client.ToArrayOfNormalValues(val) + kind := client.FieldKind_NILLABLE_STRING + if fieldName == usersAgeFieldName { + kind = client.FieldKind_NILLABLE_INT + } else if fieldName == usersWeightFieldName { + kind = client.FieldKind_NILLABLE_FLOAT + } + val, err = client.NewNormalNil(kind) require.NoError(b.f.t, err) - require.Greater(b.f.t, len(arrVals), 0, "empty array can not be indexed") - val = arrVals[0] } + if val.IsNil() { + hasNilValue = true + } else if val.IsArray() { + if arrVal, ok := b.arrayFieldValues[fieldName]; ok { + if normVal, ok := arrVal.(client.NormalValue); ok { + val = normVal + } else { + val, err = client.NewNormalValue(arrVal) + require.NoError(b.f.t, err, "given value is not a normal value") + } + } else { + arrVals, err := client.ToArrayOfNormalValues(val) + require.NoError(b.f.t, err) + require.Greater(b.f.t, len(arrVals), 0, "empty array can not be indexed") + val = arrVals[0] + } + } + descending := false + if i < len(b.descendingFields) { + descending = b.descendingFields[i] + } + key.Fields = append(key.Fields, keys.IndexedField{Value: val, Descending: descending}) } - descending := false - if i < len(b.descendingFields) { - descending = b.descendingFields[i] - } - key.Fields = append(key.Fields, keys.IndexedField{Value: val, Descending: descending}) } if !b.isUnique || hasNilValue { @@ -348,8 +370,7 @@ func TestNonUnique_IfDocIsDeleted_ShouldRemoveIndex(t *testing.T) { f.saveDocToCollection(doc, f.users) f.deleteDocFromCollection(doc.ID(), f.users) - userNameKey := newIndexKeyBuilder(f).Col(usersColName).Fields(usersNameFieldName).Build() - assert.Len(t, f.getPrefixFromDataStore(userNameKey.ToString()), 0) + assert.Equal(t, 0, f.countIndexPrefixes(testUsersColIndexName), "index prefixes count") } func TestNonUnique_IfDocWithDescendingOrderIsAdded_ShouldBeIndexed(t *testing.T) { @@ -388,9 +409,7 @@ func TestNonUnique_IfDocDoesNotHaveIndexedField_SkipIndex(t *testing.T) { err = f.users.Create(f.ctx, doc) require.NoError(f.t, err) - key := newIndexKeyBuilder(f).Col(usersColName).Build() - prefixes := f.getPrefixFromDataStore(key.ToString()) - assert.Len(t, prefixes, 0) + assert.Equal(t, 0, f.countIndexPrefixes(testUsersColIndexName), "index prefixes count") } func TestNonUnique_IfIndexIntField_StoreIt(t *testing.T) { @@ -1591,3 +1610,190 @@ func TestArrayIndex_WithUniqueIndexIfDocIsDeleted_ShouldRemoveIndex(t *testing.T assert.Len(t, f.getPrefixFromDataStore(userNumbersKey.ToString()), 0) } + +func TestJSONIndex_IfDocIsAdded_ShouldIndexAllJSONLeaves(t *testing.T) { + f := newIndexTestFixture(t) + defer f.db.Close() + + f.createUserCollectionIndexOnCustom(false) + + obj, err := client.NewJSONFromMap(map[string]any{"height": 180, "address": map[string]any{"city": "Munich"}}) + require.NoError(f.t, err) + + doc := f.newCustomUserDoc(userDoc{Name: "John", Custom: obj}, f.users) + f.saveDocToCollection(doc, f.users) + + err = client.TraverseJSON(obj, func(val client.JSON) error { + key := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). + Values(client.NewNormalJSON(val)).Doc(doc).Build() + + data, err := f.txn.Datastore().Get(f.ctx, key.ToDS()) + require.NoError(t, err) + assert.Len(t, data, 0) + return nil + }, client.TraverseJSONOnlyLeaves()) + + require.NoError(f.t, err) + + require.Equal(t, 2, f.countIndexPrefixes(testUsersColIndexCustom), "Index prefixes count") +} + +func TestJSONIndex_IfDocIsDeleted_ShouldRemoveAllRelatedIndexes(t *testing.T) { + f := newIndexTestFixture(t) + defer f.db.Close() + + f.createUserCollectionIndexOnCustom(false) + + obj1, err := client.NewJSONFromMap(map[string]any{"height": 180, "address": map[string]any{"city": "Munich"}}) + require.NoError(f.t, err) + + obj2, err := client.NewJSONFromMap(map[string]any{"height": 178}) + require.NoError(f.t, err) + + doc1 := f.newCustomUserDoc(userDoc{Name: "John", Custom: obj1}, f.users) + f.saveDocToCollection(doc1, f.users) + + doc2 := f.newCustomUserDoc(userDoc{Name: "Andy", Custom: obj2}, f.users) + f.saveDocToCollection(doc2, f.users) + + require.Equal(t, 3, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes before delete") + + f.deleteDocFromCollection(doc1.ID(), f.users) + + require.Equal(t, 1, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after delete") + + // make sure the second doc is still indexed + obj2Height, err := client.NewJSONWithPath(178, []string{"height"}) + require.NoError(t, err, "Failed to create JSON with path") + key2 := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). + Values(client.NewNormalJSON(obj2Height)).Doc(doc2).Build() + + data, err := f.txn.Datastore().Get(f.ctx, key2.ToDS()) + assert.NoError(t, err, "The index for the second doc should still exist") + assert.Len(t, data, 0, "The value pointed to by the index should be empty") +} + +func TestJSONIndex_IfDocIsUpdated_ShouldCreateNewAndRemoveOldIndexes(t *testing.T) { + f := newIndexTestFixture(t) + defer f.db.Close() + + f.createUserCollectionIndexOnCustom(false) + + obj1, err := client.NewJSONFromMap(map[string]any{ + "weight": 70, + "address": map[string]any{"city": "Munich", "country": "Germany"}, + }) + require.NoError(f.t, err) + + doc := f.newCustomUserDoc(userDoc{Name: "John", Custom: obj1}, f.users) + f.saveDocToCollection(doc, f.users) + + require.Equal(t, 3, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes before update") + + obj2, err := client.NewJSONFromMap(map[string]any{ + "height": 178, + "BMI": 22, + "address": map[string]any{"city": "Berlin", "country": "Germany"}, + }) + require.NoError(f.t, err) + + err = doc.Set(usersCustomFieldName, obj2.Unwrap()) + require.NoError(t, err) + + err = f.users.Update(f.ctx, doc) + require.NoError(t, err) + + f.commitTxn() + + require.Equal(t, 4, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after update") + + _ = client.TraverseJSON(obj2, func(val client.JSON) error { + key := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). + Values(client.NewNormalJSON(val)).Doc(doc).Build() + + data, err := f.txn.Datastore().Get(f.ctx, key.ToDS()) + require.NoError(t, err, "Failed to get index data for JSON with path %s", val.GetPath()) + assert.Len(t, data, 0, "The value pointed to by the index should be empty") + return nil + }, client.TraverseJSONOnlyLeaves()) +} + +func TestJSONUniqueIndex_IfDocIsDeleted_ShouldRemoveAllRelatedIndexes(t *testing.T) { + f := newIndexTestFixture(t) + defer f.db.Close() + + f.createUserCollectionIndexOnCustom(true) + + obj1, err := client.NewJSONFromMap(map[string]any{"height": 180, "address": map[string]any{"city": "Munich"}}) + require.NoError(f.t, err) + + obj2, err := client.NewJSONFromMap(map[string]any{"height": 178}) + require.NoError(f.t, err) + + doc1 := f.newCustomUserDoc(userDoc{Name: "John", Custom: obj1}, f.users) + f.saveDocToCollection(doc1, f.users) + + doc2 := f.newCustomUserDoc(userDoc{Name: "Andy", Custom: obj2}, f.users) + f.saveDocToCollection(doc2, f.users) + + require.Equal(t, 3, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes before delete") + + f.deleteDocFromCollection(doc1.ID(), f.users) + + require.Equal(t, 1, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after delete") + + // make sure the second doc is still indexed + obj2Height, err := client.NewJSONWithPath(178, []string{"height"}) + require.NoError(t, err, "Failed to create JSON with path") + key2 := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). + Values(client.NewNormalJSON(obj2Height)).Unique().Doc(doc2).Build() + + data, err := f.txn.Datastore().Get(f.ctx, key2.ToDS()) + assert.NoError(t, err, "The index for the second doc should still exist") + assert.Equal(t, doc2.ID().String(), string(data), "The value pointed to by the index should be empty") +} + +func TestJSONUniqueIndex_IfDocIsUpdated_ShouldCreateNewAndRemoveOldIndexes(t *testing.T) { + f := newIndexTestFixture(t) + defer f.db.Close() + + f.createUserCollectionIndexOnCustom(true) + + obj1, err := client.NewJSONFromMap(map[string]any{ + "weight": 70, + "address": map[string]any{"city": "Munich", "country": "Germany"}, + }) + require.NoError(f.t, err) + + doc := f.newCustomUserDoc(userDoc{Name: "John", Custom: obj1}, f.users) + f.saveDocToCollection(doc, f.users) + + require.Equal(t, 3, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes before update") + + obj2, err := client.NewJSONFromMap(map[string]any{ + "height": 178, + "BMI": 22, + "address": map[string]any{"city": "Berlin", "country": "Germany"}, + }) + require.NoError(f.t, err) + + err = doc.Set(usersCustomFieldName, obj2.Unwrap()) + require.NoError(t, err) + + err = f.users.Update(f.ctx, doc) + require.NoError(t, err) + + f.commitTxn() + + require.Equal(t, 4, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after update") + + _ = client.TraverseJSON(obj2, func(val client.JSON) error { + key := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). + Values(client.NewNormalJSON(val)).Unique().Doc(doc).Build() + + data, err := f.txn.Datastore().Get(f.ctx, key.ToDS()) + require.NoError(t, err, "Failed to get index data for JSON with path %s", val.GetPath()) + assert.Equal(t, doc.ID().String(), string(data), "The value pointed to by the index should be empty") + return nil + }, client.TraverseJSONOnlyLeaves()) +} diff --git a/tests/integration/index/array_unique_test.go b/tests/integration/index/array_unique_test.go index 4627595ad9..2ad9588371 100644 --- a/tests/integration/index/array_unique_test.go +++ b/tests/integration/index/array_unique_test.go @@ -18,7 +18,7 @@ import ( testUtils "github.com/sourcenetwork/defradb/tests/integration" ) -func TestArrayUniqueIndex_UponDocCreationWithArrayElementThatExists_Error(t *testing.T) { +func TestArrayUniqueIndex_UponDocCreationWithUniqueElement_Succeed(t *testing.T) { req := `query { User(filter: {nfts: {_any: {_eq: 30}}}) { name diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index ba4c60c820..68e4e01784 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -945,3 +945,71 @@ func TestJSONIndex_WithNotNeFilterOnNullField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestJSONIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { + req1 := `query { + User(filter: {custom: {height: {_eq: 172}}}) { + name + } + }` + req2 := `query { + User(filter: {custom: {BMI: {_eq: 22}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "BMI": 25} + }`, + }, + testUtils.UpdateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 172, "BMI": 22} + }`, + }, + testUtils.Request{ + Request: req1, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req1), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + testUtils.Request{ + Request: req2, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req2), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} diff --git a/tests/integration/index/json_unique_test.go b/tests/integration/index/json_unique_test.go new file mode 100644 index 0000000000..062a9af2bc --- /dev/null +++ b/tests/integration/index/json_unique_test.go @@ -0,0 +1,162 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package index + +import ( + "testing" + + "github.com/sourcenetwork/defradb/errors" + "github.com/sourcenetwork/defradb/internal/db" + testUtils "github.com/sourcenetwork/defradb/tests/integration" +) + +func TestJSONUniqueIndex_WithRandomValues_ShouldGuaranteeUniquenessAndBeAbelToUseIndexForFetching(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_eq: 168}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index(unique: true) + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": {"height": 190} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 168} + }`, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-0b423e0b-2c5d-566f-8266-91211353ab66", + errors.NewKV("custom", map[string]any{"height": float64(168)})).Error(), + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": 30 + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": 20 + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": 30 + }`, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-67dd014b-4a26-55ab-a71d-fbd14a3fcecc", + errors.NewKV("custom", 30)).Error(), + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONUniqueIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { + req1 := `query { + User(filter: {custom: {height: {_eq: 172}}}) { + name + } + }` + req2 := `query { + User(filter: {custom: {BMI: {_eq: 22}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index(unique: true) + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "BMI": 25} + }`, + }, + testUtils.UpdateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 172, "BMI": 22} + }`, + }, + testUtils.Request{ + Request: req1, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req1), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + testUtils.Request{ + Request: req2, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req2), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} From 61a7b90d99c640a2b1d6b8bcfa3e2045504c6d61 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 8 Dec 2024 12:02:11 +0100 Subject: [PATCH 16/46] Filter by array elements --- internal/db/fetcher/indexer_iterators.go | 21 +- internal/db/index.go | 2 +- tests/integration/index/json_array_test.go | 239 ++++++++++++++++++ .../index/json_unique_array_test.go | 129 ++++++++++ 4 files changed, 385 insertions(+), 6 deletions(-) create mode 100644 tests/integration/index/json_array_test.go create mode 100644 tests/integration/index/json_unique_array_test.go diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index dfaf01f278..5e0d7d8eeb 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -530,9 +530,7 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { if hasJSON { iter = &jsonIndexIterator{inner: iter, jsonPath: fieldConditions[0].jsonPath} - } - - if hasArray { + } else if hasArray { iter = &arrayIndexIterator{inner: iter} } @@ -595,7 +593,20 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro var err error if len(jsonPath) > 0 { - jsonVal, err := client.NewJSONWithPath(filterVal, jsonPath) + var jsonVal client.JSON + if cond.op == compOpAny || cond.op == compOpAll || cond.op == compOpNone { + subCondMap := filterVal.(map[connor.FilterKey]any) + for subKey, subVal := range subCondMap { + // TODO: check what happens with _any: {_eq: [1, 2]} + cond.arrOp = cond.op + cond.op = subKey.(*mapper.Operator).Operation + jsonVal, err = client.NewJSONWithPath(subVal, jsonPath) + // the sub condition is supposed to have only 1 record + break + } + } else { + jsonVal, err = client.NewJSONWithPath(filterVal, jsonPath) + } if err == nil { cond.val = client.NewNormalJSON(jsonVal) } @@ -606,8 +617,8 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro } else { subCondMap := filterVal.(map[connor.FilterKey]any) for subKey, subVal := range subCondMap { - arrKind := cond.kind.(client.ScalarArrayKind) if subVal == nil { + arrKind := cond.kind.(client.ScalarArrayKind) cond.val, err = client.NewNormalNil(arrKind.SubKind()) } else { cond.val, err = client.NewNormalValue(subVal) diff --git a/internal/db/index.go b/internal/db/index.go index 3d3b73e5e5..1271b26699 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -799,7 +799,7 @@ func (index *collectionJSONBaseIndex) traverseJSONNodes( leafKey.Fields[jsonFieldIndex].Value = val return f(leafKey) - }, client.TraverseJSONOnlyLeaves()) + }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements()) if err != nil { return err diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go new file mode 100644 index 0000000000..d1820c75fb --- /dev/null +++ b/tests/integration/index/json_array_test.go @@ -0,0 +1,239 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package index + +import ( + "testing" + + testUtils "github.com/sourcenetwork/defradb/tests/integration" +) + +func TestJSONArrayIndex_WithDifferentElementValuesAndTypes_ShouldFetchCorrectlyUsingIndex(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_any: {_eq: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3, 5, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []int{3}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []int{4, 8, 4, 4, 5, 4}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + "numbers": []any{8, nil}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "numbers": []any{10, "str", true}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": 4, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + "numbers": nil, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Fred", + "custom": map[string]any{ + "height": 198, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Andy"}, + {"name": "Shahzad"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithNestedArrays_ShouldTreatThemAsFlatten(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_any: {_eq: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []any{3, 5, []int{9, 4}, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []any{0, []int{2, 6}, 9}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Fred", + "custom": map[string]any{ + "numbers": []any{3, 5, []any{1, 0, []int{9, 4, 6}}, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": []any{1, 2, []int{8, 6}, 10}, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Fred"}, + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithNestedArraysAndObjects_ShouldScopeIndexSearch(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {nested: {_any: {_eq: 4}}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []any{3, 5, map[string]any{"nested": []int{9, 4}}, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + // there is sought value here, but not in "nested" scope + "numbers": []any{4, 9, map[string]any{"nested": []int{0, 3}}}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []any{1, map[string]any{"another": []int{4, 3}}}, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} diff --git a/tests/integration/index/json_unique_array_test.go b/tests/integration/index/json_unique_array_test.go new file mode 100644 index 0000000000..2d030c5eb9 --- /dev/null +++ b/tests/integration/index/json_unique_array_test.go @@ -0,0 +1,129 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package index + +import ( + "testing" + + "github.com/sourcenetwork/defradb/errors" + "github.com/sourcenetwork/defradb/internal/db" + testUtils "github.com/sourcenetwork/defradb/tests/integration" +) + +func TestJSONArrayUniqueIndex_ShouldAllowOnlyUniqueValuesAndUseThemForFetching(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_any: {_eq: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index(unique: true) + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []any{3, 4, nil}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + // use existing value of a different type + "numbers": []any{"3", "str", true}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + // use existing value + "numbers": []int{4}, + }, + }, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-8ba4aee7-0f15-5bfd-b1c8-7ae19782982b", + errors.NewKV("custom", map[string]any{"numbers": []int{4}})).Error(), + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + // array with duplicate values + "numbers": []int{5, 8, 5}, + }, + }, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-d7cd78f3-d14e-55a7-bfbc-8c0deb2220b4", + errors.NewKV("custom", map[string]any{"numbers": []int{5, 8, 5}})).Error(), + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + // use existing nil value + "numbers": []any{8, nil}, + }, + }, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-f87bacb3-4741-5208-a432-cbfec654080d", + errors.NewKV("custom", map[string]any{"numbers": []any{8, nil}})).Error(), + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + // existing non-array-element value + "numbers": 3, + }, + }, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-54e76159-66c6-56be-ad65-7ff83edda058", + errors.NewKV("custom", map[string]any{"numbers": 3})).Error(), + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + // existing nested value + "numbers": []any{9, []int{3}}, + }, + }, + ExpectedError: db.NewErrCanNotIndexNonUniqueFields( + "bae-8dba1343-148c-590c-a942-dd6c80f204fb", + errors.NewKV("custom", map[string]any{"numbers": []any{9, []int{3}}})).Error(), + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} From 32ef7bc978b6799092251cca97d2c955eedff080 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sun, 8 Dec 2024 19:24:59 +0100 Subject: [PATCH 17/46] Fix _in/_nin filter for json docs --- client/normal_util.go | 5 + internal/db/fetcher/errors.go | 5 + internal/db/fetcher/indexer_iterators.go | 5 +- tests/integration/index/json_test.go | 238 ++++++++++++++++++++++- 4 files changed, 248 insertions(+), 5 deletions(-) diff --git a/client/normal_util.go b/client/normal_util.go index 87310d9631..74cc203676 100644 --- a/client/normal_util.go +++ b/client/normal_util.go @@ -14,6 +14,11 @@ package client // is an array. If the given value is not an array, an error is returned. func ToArrayOfNormalValues(val NormalValue) ([]NormalValue, error) { if !val.IsArray() { + if jsonVal, ok := val.JSON(); ok { + if jsonArr, ok := jsonVal.Array(); ok { + return toNormalArray(jsonArr, NewNormalJSON), nil + } + } return nil, NewCanNotTurnNormalValueIntoArray(val) } if !val.IsNillable() { diff --git a/internal/db/fetcher/errors.go b/internal/db/fetcher/errors.go index 8836d3982d..0d9c9fdd86 100644 --- a/internal/db/fetcher/errors.go +++ b/internal/db/fetcher/errors.go @@ -96,6 +96,11 @@ func NewErrFailedToGetDagNode(inner error) error { return errors.Wrap(errFailedToGetDagNode, inner) } +// NewErrInvalidInOperatorValue returns an error indicating that the given value is invalid for the _in/_nin operator. +func NewErrInvalidInOperatorValue(inner error) error { + return errors.Wrap(errInvalidInOperatorValue, inner) +} + // NewErrInvalidFilterOperator returns an error indicating that the given filter operator is invalid. func NewErrInvalidFilterOperator(operator string) error { return errors.New(errInvalidFilterOperator, errors.NewKV("Operator", operator)) diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 5e0d7d8eeb..771bb828d3 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -406,12 +406,9 @@ func (f *IndexFetcher) newInIndexIterator( fieldConditions []fieldFilterCond, matchers []valueMatcher, ) (*inIndexIterator, error) { - if !fieldConditions[0].val.IsArray() { - return nil, ErrInvalidInOperatorValue - } inValues, err := client.ToArrayOfNormalValues(fieldConditions[0].val) if err != nil { - return nil, err + return nil, NewErrInvalidInOperatorValue(err) } // iterators for _in filter already iterate over keys with first field value diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index 68e4e01784..386c62134b 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -888,7 +888,7 @@ func TestJSONIndex_WithEqFilterOnNullField_ShouldUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } -func TestJSONIndex_WithNotNeFilterOnNullField_ShouldUseIndex(t *testing.T) { +func TestJSONIndex_WithNeFilterOnNullField_ShouldUseIndex(t *testing.T) { req := `query { User(filter: {custom: {title: {_ne: null}}}) { name @@ -1013,3 +1013,239 @@ func TestJSONIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestJSONIndex_WithInFilter_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_in: [168, 180]}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "weight": 80} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"height": 172, "weight": 75} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": 190, "weight": 85} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Fred", + "custom": {"height": 180, "weight": 70} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + {"name": "Fred"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(3).WithIndexFetches(3), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithInFilterOfDifferentTypes_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_in: [168, 180, "172 cm"]}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "weight": 80} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"height": 172, "weight": 75} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": 190, "weight": 85} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Fred", + "custom": {"height": "172 cm", "weight": 70} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "Fred"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithNinFilter_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {custom: {height: {_nin: [168, 180]}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "weight": 80} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"height": 172, "weight": 75} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": 190, "weight": 85} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Shahzad"}, + {"name": "Keenan"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(4), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONIndex_WithNotAndInFilter_ShouldNotUseIndex(t *testing.T) { + req := `query { + User(filter: {_not: {custom: {height: {_in: [168, 180]}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "weight": 80} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"height": 172, "weight": 75} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": 190, "weight": 85} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Shahzad"}, + {"name": "Keenan"}, + }, + }, + }, + // we don't assert index usage here because the query is not using the index + }, + } + + testUtils.ExecuteTestCase(t, test) +} From fc0eb2bbc26732b3295d5c4521f852ecb62c13a9 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 9 Dec 2024 13:30:17 +0100 Subject: [PATCH 18/46] Add filtering on arrays of json docs --- internal/connor/all.go | 6 +- internal/connor/any.go | 6 +- internal/db/fetcher/indexer_iterators.go | 80 ++------- internal/planner/scan.go | 5 +- tests/integration/index/json_array_test.go | 154 +++++++++++++++++- tests/integration/index/json_test.go | 89 ++++++++-- .../index/json_unique_array_test.go | 2 +- tests/integration/index/json_unique_test.go | 6 +- tests/integration/query/json/with_all_test.go | 73 ++++++++- tests/integration/query/json/with_any_test.go | 65 ++++++++ tests/integration/query/json/with_eq_test.go | 59 +++++++ .../integration/query/json/with_none_test.go | 59 +++++++ 12 files changed, 512 insertions(+), 92 deletions(-) diff --git a/internal/connor/all.go b/internal/connor/all.go index bf607b583b..ad84c15d28 100644 --- a/internal/connor/all.go +++ b/internal/connor/all.go @@ -37,13 +37,15 @@ func all(condition, data any) (bool, error) { return allSlice(condition, t) default: - return false, nil + // if none of the above array types match, we check the scalar value itself + return eq(condition, data) } } func allSlice[T any](condition any, data []T) (bool, error) { for _, c := range data { - m, err := eq(condition, c) + // recurse further in case of nested arrays + m, err := all(condition, c) if err != nil { return false, err } else if !m { diff --git a/internal/connor/any.go b/internal/connor/any.go index ecd16ce992..431a66531f 100644 --- a/internal/connor/any.go +++ b/internal/connor/any.go @@ -37,13 +37,15 @@ func anyOp(condition, data any) (bool, error) { return anySlice(condition, t) default: - return false, nil + // if none of the above array types match, we check the scalar value itself + return eq(condition, data) } } func anySlice[T any](condition any, data []T) (bool, error) { for _, c := range data { - m, err := eq(condition, c) + // recurse further in case of nested arrays + m, err := anyOp(condition, c) if err != nil { return false, err } else if m { diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 771bb828d3..fef2bd9a89 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -254,9 +254,9 @@ func (iter *inIndexIterator) Close() error { return nil } -// arrayIndexIterator is an iterator indexed array elements. +// memorizingIndexIterator is an iterator for set of indexes that belong to the same document // It keeps track of the already fetched documents to avoid duplicates. -type arrayIndexIterator struct { +type memorizingIndexIterator struct { inner indexIterator fetchedDocs map[string]struct{} @@ -265,16 +265,16 @@ type arrayIndexIterator struct { store datastore.DSReaderWriter } -var _ indexIterator = (*arrayIndexIterator)(nil) +var _ indexIterator = (*memorizingIndexIterator)(nil) -func (iter *arrayIndexIterator) Init(ctx context.Context, store datastore.DSReaderWriter) error { +func (iter *memorizingIndexIterator) Init(ctx context.Context, store datastore.DSReaderWriter) error { iter.ctx = ctx iter.store = store iter.fetchedDocs = make(map[string]struct{}) return iter.inner.Init(ctx, store) } -func (iter *arrayIndexIterator) Next() (indexIterResult, error) { +func (iter *memorizingIndexIterator) Next() (indexIterResult, error) { for { res, err := iter.inner.Next() if err != nil { @@ -302,58 +302,7 @@ func (iter *arrayIndexIterator) Next() (indexIterResult, error) { } } -func (iter *arrayIndexIterator) Close() error { - return iter.inner.Close() -} - -type jsonIndexIterator struct { - inner indexIterator - - fetchedDocs map[string]struct{} - jsonPath []string - - ctx context.Context - store datastore.DSReaderWriter -} - -var _ indexIterator = (*jsonIndexIterator)(nil) - -func (iter *jsonIndexIterator) Init(ctx context.Context, store datastore.DSReaderWriter) error { - iter.ctx = ctx - iter.store = store - iter.fetchedDocs = make(map[string]struct{}) - return iter.inner.Init(ctx, store) -} - -func (iter *jsonIndexIterator) Next() (indexIterResult, error) { - for { - res, err := iter.inner.Next() - if err != nil { - return indexIterResult{}, err - } - if !res.foundKey { - return res, nil - } - var docID string - if len(res.value) > 0 { - docID = string(res.value) - } else { - lastField := &res.key.Fields[len(res.key.Fields)-1] - var ok bool - docID, ok = lastField.Value.String() - if !ok { - return indexIterResult{}, NewErrUnexpectedTypeValue[string](lastField.Value) - } - } - if _, ok := iter.fetchedDocs[docID]; ok { - continue - } - iter.fetchedDocs[docID] = struct{}{} - return res, nil - } -} - -func (iter *jsonIndexIterator) Close() error { +func (iter *memorizingIndexIterator) Close() error { return iter.inner.Close() } @@ -366,9 +315,16 @@ func (f *IndexFetcher) newPrefixIteratorFromConditions( keyFieldValues := make([]client.NormalValue, 0, len(fieldConditions)) for i := range fieldConditions { c := &fieldConditions[i] + // prefix can be created only for subsequent _eq conditions. So we build the longest possible + // prefix until we hit a condition that is not _eq. + // The exception is when _eq is nested in _none. if c.op != opEq || c.arrOp == compOpNone { - // prefix can be created only for subsequent _eq conditions - // if we encounter any other condition, we built the longest prefix we could + // if the field where we interrupt building of prefix is JSON, we still want to make sure + // that the JSON path is included in the key + if len(c.jsonPath) > 0 { + jsonVal, _ := fieldConditions[i].val.JSON() + keyFieldValues = append(keyFieldValues, client.NewNormalJSON(client.MakeVoidJSON(jsonVal.GetPath()))) + } break } @@ -525,10 +481,8 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { return nil, NewErrInvalidFilterOperator(fieldConditions[0].op) } - if hasJSON { - iter = &jsonIndexIterator{inner: iter, jsonPath: fieldConditions[0].jsonPath} - } else if hasArray { - iter = &arrayIndexIterator{inner: iter} + if hasJSON || hasArray { + iter = &memorizingIndexIterator{inner: iter} } return iter, nil diff --git a/internal/planner/scan.go b/internal/planner/scan.go index 4e03e22fee..7ab9399cf8 100644 --- a/internal/planner/scan.go +++ b/internal/planner/scan.go @@ -172,7 +172,10 @@ func (scan *scanNode) initFetcher( fd, _ := scan.col.Definition().Schema.GetFieldByName(fieldName) // if the field is an array, we need to copy it instead of moving so that the // top select node can do final filter check on the whole array of the document - if fd.Kind.IsArray() { + // because indexes can not assert conditions like _any, _all, _none + // TODO: we don't have to do this for all json fields, only for those that filter + // on it's array fields. We should be able to optimize this. + if fd.Kind.IsArray() || fd.Kind == client.FieldKind_NILLABLE_JSON { fieldsToCopy = append(fieldsToCopy, indexField) } else { fieldsToMove = append(fieldsToMove, indexField) diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index d1820c75fb..d6b35c5c6b 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -106,7 +106,7 @@ func TestJSONArrayIndex_WithDifferentElementValuesAndTypes_ShouldFetchCorrectlyU }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -172,7 +172,7 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldTreatThemAsFlatten(t *testing.T) }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -230,7 +230,155 @@ func TestJSONArrayIndex_WithNestedArraysAndObjects_ShouldScopeIndexSearch(t *tes }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrectlyUsingIndex(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_none: {_eq: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3, 5, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []int{4, 8}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []any{8, nil}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Fred", + "custom": map[string]any{ + "numbers": []any{1, []int{4}}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": 4, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(10), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithAllFilterOnDifferentElementValues_ShouldFetchCorrectlyUsingIndex(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_all: {_eq: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3, 4}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []any{4, []int{4, 8}}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": 4, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Fred", + "custom": map[string]any{ + "numbers": []any{4, []any{4, []int{4}}}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": 3, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "Fred"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(4), }, }, } diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index 386c62134b..966d1f50f3 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -78,7 +78,7 @@ func TestJSONIndex_WithFilterOnNumberField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -496,7 +496,7 @@ func TestJSONIndex_WithEqFilterOnStringField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -570,7 +570,7 @@ func TestJSONIndex_WithLikeFilterOnStringField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(likeReq), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(5), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), }, testUtils.Request{ Request: ilikeReq, @@ -583,7 +583,7 @@ func TestJSONIndex_WithLikeFilterOnStringField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(ilikeReq), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(5), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), }, }, } @@ -658,7 +658,7 @@ func TestJSONIndex_WithNLikeFilterOnStringField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(nlikeReq), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(5), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), }, testUtils.Request{ Request: nilikeReq, @@ -670,7 +670,7 @@ func TestJSONIndex_WithNLikeFilterOnStringField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(nilikeReq), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(5), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), }, }, } @@ -740,7 +740,7 @@ func TestJSONIndex_WithEqFilterOnBoolField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -810,7 +810,7 @@ func TestJSONIndex_WithNeFilterOnBoolField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(5), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), }, }, } @@ -880,7 +880,7 @@ func TestJSONIndex_WithEqFilterOnNullField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -938,7 +938,7 @@ func TestJSONIndex_WithNeFilterOnNullField_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(3), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(3), }, }, } @@ -994,7 +994,7 @@ func TestJSONIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req1), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, testUtils.Request{ Request: req2, @@ -1006,7 +1006,7 @@ func TestJSONIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req2), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, }, } @@ -1071,7 +1071,7 @@ func TestJSONIndex_WithInFilter_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(3).WithIndexFetches(3), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(3), }, }, } @@ -1129,7 +1129,7 @@ func TestJSONIndex_WithInFilterOfDifferentTypes_ShouldUseIndex(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(2).WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), }, }, } @@ -1249,3 +1249,64 @@ func TestJSONIndex_WithNotAndInFilter_ShouldNotUseIndex(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestJSONIndex_WithCompoundFilterCondition_ShouldUseIndex(t *testing.T) { + req := `query { + User(filter: {_and: [ + {custom: {height: {_eq: 180}}}, + {custom: {weight: {_eq: 80}}} + ]}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": {"height": 168, "weight": 70} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": {"height": 180, "weight": 80} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": {"height": 180, "weight": 75} + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": {"height": 190, "weight": 85} + }`, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + // TODO: this test doesn't utilize indexes. https://github.com/sourcenetwork/defradb/issues/3299 + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(0), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} diff --git a/tests/integration/index/json_unique_array_test.go b/tests/integration/index/json_unique_array_test.go index 2d030c5eb9..45d10536d8 100644 --- a/tests/integration/index/json_unique_array_test.go +++ b/tests/integration/index/json_unique_array_test.go @@ -120,7 +120,7 @@ func TestJSONArrayUniqueIndex_ShouldAllowOnlyUniqueValuesAndUseThemForFetching(t }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, }, } diff --git a/tests/integration/index/json_unique_test.go b/tests/integration/index/json_unique_test.go index 062a9af2bc..4e533600cc 100644 --- a/tests/integration/index/json_unique_test.go +++ b/tests/integration/index/json_unique_test.go @@ -85,7 +85,7 @@ func TestJSONUniqueIndex_WithRandomValues_ShouldGuaranteeUniquenessAndBeAbelToUs }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, }, } @@ -141,7 +141,7 @@ func TestJSONUniqueIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req1), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, testUtils.Request{ Request: req2, @@ -153,7 +153,7 @@ func TestJSONUniqueIndex_UponUpdate_ShouldUseNewIndexValues(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req2), - Asserter: testUtils.NewExplainAsserter().WithFieldFetches(1).WithIndexFetches(1), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, }, } diff --git a/tests/integration/query/json/with_all_test.go b/tests/integration/query/json/with_all_test.go index b0d12765ae..1f99107303 100644 --- a/tests/integration/query/json/with_all_test.go +++ b/tests/integration/query/json/with_all_test.go @@ -38,6 +38,12 @@ func TestQueryJSON_WithAllFilterWithAllTypes_ShouldFilter(t *testing.T) { "custom": [null, false, "second", {"one": 1}, [1, 2]] }`, }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Fred", + "custom": [false, "second", {"one": 1}, [1, [2, null]]] + }`, + }, testUtils.CreateDoc{ Doc: `{ "name": "Islam", @@ -70,9 +76,70 @@ func TestQueryJSON_WithAllFilterWithAllTypes_ShouldFilter(t *testing.T) { }`, Results: map[string]any{ "Users": []map[string]any{ - { - "name": "Shahzad", - }, + {"name": "John"}, + {"name": "Shahzad"}, + {"name": "Keenan"}, + {"name": "Andy"}, + }, + }, + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestQueryJSON_WithAllFilterAndNestedArray_ShouldFilter(t *testing.T) { + test := testUtils.TestCase{ + Description: "Simple JSON array, filtered all of all types array", + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: `type Users { + name: String + custom: JSON + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": [1] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Fred", + "custom": [1, 2, 1] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": [1, [1, [1]]] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": 1 + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": [1, "1"] + }`, + }, + testUtils.Request{ + Request: `query { + Users(filter: {custom: {_all: {_eq: 1}}}) { + name + } + }`, + Results: map[string]any{ + "Users": []map[string]any{ + {"name": "Islam"}, + {"name": "Shahzad"}, + {"name": "Keenan"}, }, }, }, diff --git a/tests/integration/query/json/with_any_test.go b/tests/integration/query/json/with_any_test.go index e79e90946b..3d74c1655a 100644 --- a/tests/integration/query/json/with_any_test.go +++ b/tests/integration/query/json/with_any_test.go @@ -81,3 +81,68 @@ func TestQueryJSON_WithAnyFilterWithAllTypes_ShouldFilter(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestQueryJSON_WithAnyFilterAndNestedArray_ShouldFilter(t *testing.T) { + test := testUtils.TestCase{ + Description: "Simple JSON array, filtered any of all types array", + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: `type Users { + name: String + custom: JSON + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": [1, false, "second", {"one": 1}, [1, 2]] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Fred", + "custom": [null, false, "second", {"one": 1}, [1, [2, 3]]] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": null + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Keenan", + "custom": 3 + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": "" + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": true + }`, + }, + testUtils.Request{ + Request: `query { + Users(filter: {custom: {_any: {_eq: 3}}}) { + name + } + }`, + Results: map[string]any{ + "Users": []map[string]any{ + {"name": "Keenan"}, + {"name": "Fred"}, + }, + }, + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} diff --git a/tests/integration/query/json/with_eq_test.go b/tests/integration/query/json/with_eq_test.go index d319148915..ac61591602 100644 --- a/tests/integration/query/json/with_eq_test.go +++ b/tests/integration/query/json/with_eq_test.go @@ -69,6 +69,65 @@ func TestQueryJSON_WithEqualFilterWithObject_ShouldFilter(t *testing.T) { testUtils.ExecuteTestCase(t, test) } +func TestQueryJSON_WithCompoundFilterCondition_ShouldFilter(t *testing.T) { + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type Users { + name: String + custom: JSON + } + `, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": { + "tree": "maple", + "age": 450 + } + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": { + "tree": "maple", + "age": 250 + } + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": { + "tree": "maple", + "age": 20 + } + }`, + }, + testUtils.Request{ + Request: `query { + Users(filter: {_and: [ + {custom: {tree: {_eq: "maple"}}}, + {custom: {age: {_eq: 250}}} + ]}) { + name + } + }`, + Results: map[string]any{ + "Users": []map[string]any{ + {"name": "John"}, + }, + }, + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + func TestQueryJSON_WithEqualFilterWithNestedObjects_ShouldFilter(t *testing.T) { test := testUtils.TestCase{ Actions: []any{ diff --git a/tests/integration/query/json/with_none_test.go b/tests/integration/query/json/with_none_test.go index 2355810423..3a2434e1a3 100644 --- a/tests/integration/query/json/with_none_test.go +++ b/tests/integration/query/json/with_none_test.go @@ -57,3 +57,62 @@ func TestQueryJSON_WithNoneFilter_ShouldFilter(t *testing.T) { testUtils.ExecuteTestCase(t, test) } + +func TestQueryJSON_WithNoneFilterAndNestedArray_ShouldFilter(t *testing.T) { + test := testUtils.TestCase{ + Description: "Simple JSON array, filtered none of string array", + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: `type Users { + name: String + custom: JSON + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Shahzad", + "custom": [1, false, "second", {"one": 1}, [1, 2]] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Fred", + "custom": [null, false, "second", {"one": 1}, [1, 2]] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Andy", + "custom": [false, "second", {"one": 1}, [1, [2, null]]] + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Islam", + "custom": null + }`, + }, + testUtils.CreateDoc{ + Doc: `{ + "name": "John", + "custom": false + }`, + }, + testUtils.Request{ + Request: `query { + Users(filter: {custom: {_none: {_eq: null}}}) { + name + } + }`, + Results: map[string]any{ + "Users": []map[string]any{ + {"name": "Shahzad"}, + {"name": "John"}, + }, + }, + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} From 70f8651eb594e1c1f212a4f07b1486b876812523 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Tue, 10 Dec 2024 12:27:19 +0100 Subject: [PATCH 19/46] Remove filtering without array elements --- internal/db/fetcher/indexer.go | 3 +- internal/planner/scan.go | 1 + tests/integration/index/json_array_test.go | 58 ---------------------- 3 files changed, 3 insertions(+), 59 deletions(-) diff --git a/internal/db/fetcher/indexer.go b/internal/db/fetcher/indexer.go index 12fd866cb6..5ef63b7246 100644 --- a/internal/db/fetcher/indexer.go +++ b/internal/db/fetcher/indexer.go @@ -12,6 +12,7 @@ package fetcher import ( "context" + "errors" "github.com/sourcenetwork/immutable" @@ -197,7 +198,7 @@ func (f *IndexFetcher) FetchNext(ctx context.Context) (EncodedDocument, ExecInfo } encDoc, execInfo, err := f.docFetcher.FetchNext(ctx) if err != nil { - return nil, ExecInfo{}, err + return nil, ExecInfo{}, errors.Join(err, f.docFetcher.Close()) } err = f.docFetcher.Close() if err != nil { diff --git a/internal/planner/scan.go b/internal/planner/scan.go index 7ab9399cf8..16b8295a3b 100644 --- a/internal/planner/scan.go +++ b/internal/planner/scan.go @@ -175,6 +175,7 @@ func (scan *scanNode) initFetcher( // because indexes can not assert conditions like _any, _all, _none // TODO: we don't have to do this for all json fields, only for those that filter // on it's array fields. We should be able to optimize this. + // https://github.com/sourcenetwork/defradb/issues/3306 if fd.Kind.IsArray() || fd.Kind == client.FieldKind_NILLABLE_JSON { fieldsToCopy = append(fieldsToCopy, indexField) } else { diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index d6b35c5c6b..f4ce72e564 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -180,64 +180,6 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldTreatThemAsFlatten(t *testing.T) testUtils.ExecuteTestCase(t, test) } -func TestJSONArrayIndex_WithNestedArraysAndObjects_ShouldScopeIndexSearch(t *testing.T) { - req := `query { - User(filter: {custom: {numbers: {nested: {_any: {_eq: 4}}}}}) { - name - } - }` - test := testUtils.TestCase{ - Actions: []any{ - testUtils.SchemaUpdate{ - Schema: ` - type User { - name: String - custom: JSON @index - }`, - }, - testUtils.CreateDoc{ - DocMap: map[string]any{ - "name": "John", - "custom": map[string]any{ - "numbers": []any{3, 5, map[string]any{"nested": []int{9, 4}}, 7}, - }, - }, - }, - testUtils.CreateDoc{ - DocMap: map[string]any{ - "name": "Islam", - "custom": map[string]any{ - // there is sought value here, but not in "nested" scope - "numbers": []any{4, 9, map[string]any{"nested": []int{0, 3}}}, - }, - }, - }, - testUtils.CreateDoc{ - DocMap: map[string]any{ - "name": "Islam", - "custom": map[string]any{ - "numbers": []any{1, map[string]any{"another": []int{4, 3}}}, - }, - }, - }, - testUtils.Request{ - Request: req, - Results: map[string]any{ - "User": []map[string]any{ - {"name": "John"}, - }, - }, - }, - testUtils.Request{ - Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), - }, - }, - } - - testUtils.ExecuteTestCase(t, test) -} - func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrectlyUsingIndex(t *testing.T) { req := `query { User(filter: {custom: {numbers: {_none: {_eq: 4}}}}) { From cdb9d34f88d5a765104e68d21ccd6d2d9ee2c609 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sat, 14 Dec 2024 20:33:41 +0100 Subject: [PATCH 20/46] Add tests for composite index with json --- .../integration/index/json_composite_test.go | 786 ++++++++++++++++++ .../index/json_unique_array_test.go | 6 +- 2 files changed, 789 insertions(+), 3 deletions(-) create mode 100644 tests/integration/index/json_composite_test.go diff --git a/tests/integration/index/json_composite_test.go b/tests/integration/index/json_composite_test.go new file mode 100644 index 0000000000..27cc99a4e8 --- /dev/null +++ b/tests/integration/index/json_composite_test.go @@ -0,0 +1,786 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package index + +import ( + "testing" + + testUtils "github.com/sourcenetwork/defradb/tests/integration" +) + +func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing.T) { + type testCase struct { + name string + req string + result map[string]any + indexFetches int + } + + testCases := []testCase{ + { + name: "Unique combination. Non-unique custom.val", + req: `query { + User(filter: {_and: [ + {custom: {val: {_eq: 3}}}, + {age: {_eq: 25}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + }, + }, + indexFetches: 1, + }, + { + name: "Unique combination. Non-unique age", + req: `query { + User(filter: {_and: [ + {custom: {val: {_eq: 3}}}, + {age: {_eq: 30}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + indexFetches: 1, + }, + { + name: "Match first part of the composite index", + req: `query { + User(filter: {custom: {val: {_eq: 3}}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + indexFetches: 2, + }, + { + name: "Non-unique combination", + req: `query { + User(filter: {_and: [ + {custom: {val: {_eq: 5}}}, + {age: {_eq: 35}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Addo"}, + {"name": "Kennan"}, + }, + }, + indexFetches: 2, + }, + { + name: "Match second part of the composite index", + req: `query { + User(filter: { {age: {_eq: 40}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Bruno"}, + }, + }, + indexFetches: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User @index(includes: [{field: "custom"}, {field: "age"}]) { + name: String + custom: JSON + age: Int + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "val": 3, + }, + "age": 30, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "val": 3, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "val": 4, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + "val": 5, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Addo", + "custom": map[string]any{ + "val": 5, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "val": 6, + }, + "age": 40, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "val": nil, + }, + "age": 50, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + "val": 7, + }, + "age": nil, + }, + }, + testUtils.Request{ + Request: tc.req, + Results: tc.result, + }, + testUtils.Request{ + Request: makeExplainQuery(tc.req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(tc.indexFetches), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) + }) + } +} + +func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing.T) { + type testCase struct { + name string + req string + result map[string]any + indexFetches int + } + + testCases := []testCase{ + { + name: "Unique combination. Non-unique custom.val", + req: `query { + User(filter: {_and: [ + {age: {_eq: 25}}, + {custom: {val: {_eq: 3}}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + }, + }, + indexFetches: 1, + }, + { + name: "Unique combination. Non-unique age", + req: `query { + User(filter: {_and: [ + {age: {_eq: 30}}, + {custom: {val: {_eq: 3}}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + indexFetches: 1, + }, + { + name: "Match first part of the composite index", + req: `query { + User(filter: {age: {_eq: 25}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Shahzad"}, + {"name": "Islam"}, + }, + }, + indexFetches: 2, + }, + { + name: "Non-unique combination", + req: `query { + User(filter: {_and: [ + {age: {_eq: 35}}, + {custom: {val: {_eq: 5}}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Addo"}, + {"name": "Keenan"}, + }, + }, + indexFetches: 1, + }, + { + name: "Match second part of the composite index", + req: `query { + User(filter: {custom: {val: {_eq: 6}}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Bruno"}, + }, + }, + indexFetches: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User @index(includes: [{field: "age"}, {field: "custom"}]) { + name: String + custom: JSON + age: Int + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "val": 3, + }, + "age": 30, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "val": 3, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "val": 4, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + "val": 5, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Addo", + "custom": map[string]any{ + "val": 5, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "val": 6, + }, + "age": 40, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "val": nil, + }, + "age": 50, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + "val": 7, + }, + "age": nil, + }, + }, + testUtils.Request{ + Request: tc.req, + Results: tc.result, + }, + testUtils.Request{ + Request: makeExplainQuery(tc.req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(tc.indexFetches), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) + }) + } +} + +func TestJSONArrayCompositeIndex_JSONArrayWithScalar_ShouldFetchUsingIndex(t *testing.T) { + type testCase struct { + name string + req string + result map[string]any + indexFetches int + } + + testCases := []testCase{ + { + name: "Unique combination. Non-unique custom.numbers element", + req: `query { + User(filter: {_and: [ + {custom: {numbers: {_any: {_eq: 3}}}}, + {age: {_eq: 25}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + }, + }, + indexFetches: 1, + }, + { + name: "Unique combination. Non-unique age", + req: `query { + User(filter: {_and: [ + {custom: {numbers: {_any: {_eq: 3}}}}, + {age: {_eq: 30}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + indexFetches: 1, + }, + { + name: "Match first part of the composite index", + req: `query { + User(filter: {custom: {numbers: {_any: {_eq: 3}}}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + indexFetches: 2, + }, + { + name: "Non-unique combination", + req: `query { + User(filter: {_and: [ + {custom: {numbers: {_any: {_eq: 5}}}}, + {age: {_eq: 35}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Addo"}, + {"name": "Keenan"}, + }, + }, + indexFetches: 2, + }, + { + name: "Match second part of the composite index", + req: `query { + User(filter: {age: {_eq: 40}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Bruno"}, + }, + }, + indexFetches: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User @index(includes: [{field: "custom.numbers"}, {field: "age"}]) { + name: String + custom: JSON + age: Int + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3, 4}, + }, + "age": 30, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []int{3, 5}, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []int{4, 6}, + }, + "age": 30, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + "numbers": []int{5, 7}, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Addo", + "custom": map[string]any{ + "numbers": []int{1, 5, 8}, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "numbers": []int{6, 9}, + }, + "age": 40, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": []int{}, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + "numbers": []int{7, 10}, + }, + "age": nil, + }, + }, + testUtils.Request{ + Request: tc.req, + Results: tc.result, + }, + testUtils.Request{ + Request: makeExplainQuery(tc.req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(tc.indexFetches), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) + }) + } +} + +func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t *testing.T) { + type testCase struct { + name string + req string + result map[string]any + indexFetches int + } + + testCases := []testCase{ + { + name: "Unique combination. Non-unique custom.numbers element", + req: `query { + User(filter: {_and: [ + {custom: {numbers: {_any: {_eq: 3}}}}, + {tags: {_any: {_eq: "friend"}}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + }, + }, + indexFetches: 1, + }, + { + name: "Unique combination. Non-unique tags", + req: `query { + User(filter: {_and: [ + {custom: {numbers: {_any: {_eq: 5}}}}, + {tags: {_any: {_eq: "mentor"}}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + indexFetches: 1, + }, + { + name: "Match first part of the composite index", + req: `query { + User(filter: {custom: {numbers: {_any: {_eq: 3}}}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + {"name": "Islam"}, + }, + }, + indexFetches: 2, + }, + { + name: "Non-unique combination", + req: `query { + User(filter: {_and: [ + {custom: {numbers: {_any: {_eq: 5}}}}, + {tags: {_any: {_eq: "family"}}} + ]}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Addo"}, + {"name": "Keenan"}, + }, + }, + indexFetches: 2, + }, + { + name: "Match second part of the composite index", + req: `query { + User(filter: {tags: {_any: {_eq: "dude"}}}) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Bruno"}, + }, + }, + indexFetches: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User @index(includes: [{field: "custom.numbers"}, {field: "tags"}]) { + name: String + custom: JSON + age: Int + tags: [String] + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3, 4}, + }, + "tags": []string{"colleague", "mentor"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []int{3, 5}, + }, + "tags": []string{"friend", "mentor"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []int{4, 6}, + }, + "tags": []string{"colleague"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + "numbers": []int{5, 7}, + }, + "tags": []string{"family"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Addo", + "custom": map[string]any{ + "numbers": []int{1, 5, 8}, + }, + "tags": []string{"family"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "numbers": []int{6, 9}, + }, + "tags": []string{"dude"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": []int{}, + }, + "tags": []string{"friend"}, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + "numbers": []int{7, 10}, + }, + "tags": []string{"colleague"}, + }, + }, + testUtils.Request{ + Request: tc.req, + Results: tc.result, + }, + testUtils.Request{ + Request: makeExplainQuery(tc.req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(tc.indexFetches), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) + }) + } +} diff --git a/tests/integration/index/json_unique_array_test.go b/tests/integration/index/json_unique_array_test.go index 45d10536d8..4c43a4d0bf 100644 --- a/tests/integration/index/json_unique_array_test.go +++ b/tests/integration/index/json_unique_array_test.go @@ -79,12 +79,12 @@ func TestJSONArrayUniqueIndex_ShouldAllowOnlyUniqueValuesAndUseThemForFetching(t "name": "Keenan", "custom": map[string]any{ // use existing nil value - "numbers": []any{8, nil}, + "numbers": []any{6, nil}, }, }, ExpectedError: db.NewErrCanNotIndexNonUniqueFields( - "bae-f87bacb3-4741-5208-a432-cbfec654080d", - errors.NewKV("custom", map[string]any{"numbers": []any{8, nil}})).Error(), + "bae-bde18215-f623-568e-868d-1156c30e45d3", + errors.NewKV("custom", map[string]any{"numbers": []any{6, nil}})).Error(), }, testUtils.CreateDoc{ DocMap: map[string]any{ From adb71d4ab02889ab55e7e274c457f1dec33819ab Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 16 Dec 2024 15:39:09 +0100 Subject: [PATCH 21/46] Enable indexing of array within json docs --- internal/connor/all.go | 6 +- internal/connor/any.go | 6 +- internal/db/index.go | 676 ++++-------------- tests/integration/index/json_array_test.go | 24 +- .../integration/index/json_composite_test.go | 143 ++-- ...with_unique_composite_index_filter_test.go | 4 +- tests/integration/query/json/with_all_test.go | 6 +- tests/integration/query/json/with_any_test.go | 9 +- .../integration/query/json/with_none_test.go | 14 +- 9 files changed, 234 insertions(+), 654 deletions(-) diff --git a/internal/connor/all.go b/internal/connor/all.go index ad84c15d28..bf607b583b 100644 --- a/internal/connor/all.go +++ b/internal/connor/all.go @@ -37,15 +37,13 @@ func all(condition, data any) (bool, error) { return allSlice(condition, t) default: - // if none of the above array types match, we check the scalar value itself - return eq(condition, data) + return false, nil } } func allSlice[T any](condition any, data []T) (bool, error) { for _, c := range data { - // recurse further in case of nested arrays - m, err := all(condition, c) + m, err := eq(condition, c) if err != nil { return false, err } else if !m { diff --git a/internal/connor/any.go b/internal/connor/any.go index 431a66531f..ecd16ce992 100644 --- a/internal/connor/any.go +++ b/internal/connor/any.go @@ -37,15 +37,13 @@ func anyOp(condition, data any) (bool, error) { return anySlice(condition, t) default: - // if none of the above array types match, we check the scalar value itself - return eq(condition, data) + return false, nil } } func anySlice[T any](condition any, data []T) (bool, error) { for _, c := range data { - // recurse further in case of nested arrays - m, err := anyOp(condition, c) + m, err := eq(condition, c) if err != nil { return false, err } else if m { diff --git a/internal/db/index.go b/internal/db/index.go index 1271b26699..d1d7ba4261 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -66,10 +66,12 @@ func NewCollectionIndex( if len(desc.Fields) == 0 { return nil, NewErrIndexDescHasNoFields(desc) } - base := collectionBaseIndex{collection: collection, desc: desc} - base.fieldsDescs = make([]client.SchemaFieldDescription, len(desc.Fields)) - isArray := false - isJSON := false + base := collectionBaseIndex{ + collection: collection, + desc: desc, + fieldsDescs: make([]client.SchemaFieldDescription, len(desc.Fields)), + fieldGenerators: make([]FieldIndexGenerator, len(desc.Fields)), + } for i := range desc.Fields { field, foundField := collection.Schema().GetFieldByName(desc.Fields[i].Name) if !foundField { @@ -79,26 +81,66 @@ func NewCollectionIndex( if !isSupportedKind(field.Kind) { return nil, NewErrUnsupportedIndexFieldType(field.Kind) } - isArray = isArray || field.Kind.IsArray() - isJSON = isJSON || field.Kind == client.FieldKind_NILLABLE_JSON + base.fieldGenerators[i] = getFieldGenerator(field.Kind) + } + if desc.Unique { + return &collectionUniqueIndex{collectionBaseIndex: base}, nil + } + return &collectionSimpleIndex{collectionBaseIndex: base}, nil +} + +// FieldIndexGenerator generates index entries for a single field +type FieldIndexGenerator interface { + // Generate calls the provided function for each value that should be indexed + Generate(value client.NormalValue, f func(client.NormalValue) error) error +} + +type SimpleFieldGenerator struct{} + +func (g *SimpleFieldGenerator) Generate(value client.NormalValue, f func(client.NormalValue) error) error { + return f(value) +} + +type ArrayFieldGenerator struct{} + +func (g *ArrayFieldGenerator) Generate(value client.NormalValue, f func(client.NormalValue) error) error { + normVals, err := client.ToArrayOfNormalValues(value) + if err != nil { + return err } - if isArray { - if desc.Unique { - return newCollectionArrayUniqueIndex(base), nil - } else { - return newCollectionArrayIndex(base), nil + + // Remove duplicates to avoid duplicate index entries + uniqueVals := slice.RemoveDuplicates(normVals) + for _, val := range uniqueVals { + if err := f(val); err != nil { + return err } - } else if isJSON { - if desc.Unique { - return newCollectionJSONUniqueIndex(base), nil - } else { - return newCollectionJSONIndex(base), nil + } + return nil +} + +type JSONFieldGenerator struct{} + +func (g *JSONFieldGenerator) Generate(value client.NormalValue, f func(client.NormalValue) error) error { + json, _ := value.JSON() + return client.TraverseJSON(json, func(value client.JSON) error { + val, err := client.NewNormalValue(value) + if err != nil { + return err } - } else if desc.Unique { - return &collectionUniqueIndex{collectionBaseIndex: base}, nil - } else { - return &collectionSimpleIndex{collectionBaseIndex: base}, nil + return f(val) + }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements()) // TODO: add option to traverse array elements +} + +// getFieldGenerator returns appropriate generator for the field type +func getFieldGenerator(kind client.FieldKind) FieldIndexGenerator { + if kind.IsArray() { + return &ArrayFieldGenerator{} } + if kind == client.FieldKind_NILLABLE_JSON { + return &JSONFieldGenerator{} + } + return &SimpleFieldGenerator{} } type collectionBaseIndex struct { @@ -106,7 +148,8 @@ type collectionBaseIndex struct { desc client.IndexDescription // fieldsDescs is a slice of field descriptions for the fields that are indexed by the index // If there is more than 1 field, the index is composite - fieldsDescs []client.SchemaFieldDescription + fieldsDescs []client.SchemaFieldDescription + fieldGenerators []FieldIndexGenerator } // getDocFieldValues retrieves the values of the indexed fields from the given document. @@ -198,6 +241,47 @@ func (index *collectionBaseIndex) Description() client.IndexDescription { return index.desc } +func (index *collectionBaseIndex) generateIndexKeys( + doc *client.Document, + appendDocID bool, + f func(keys.IndexDataStoreKey) error, +) error { + // Get initial key with base values + baseKey, err := index.getDocumentsIndexKey(doc, appendDocID) + if err != nil { + return err + } + + // Start with first field + return index.generateKeysForField(0, baseKey, f) +} + +func (index *collectionBaseIndex) generateKeysForField( + fieldIdx int, + baseKey keys.IndexDataStoreKey, + f func(keys.IndexDataStoreKey) error, +) error { + // If we've processed all fields, call the handler + if fieldIdx >= len(index.fieldsDescs) { + return f(baseKey) + } + + // Generate values for current field + return index.fieldGenerators[fieldIdx].Generate( + baseKey.Fields[fieldIdx].Value, + func(val client.NormalValue) error { + // Create new key with generated value + newKey := baseKey + newKey.Fields = make([]keys.IndexedField, len(baseKey.Fields)) + copy(newKey.Fields, baseKey.Fields) + newKey.Fields[fieldIdx].Value = val + + // Process next field + return index.generateKeysForField(fieldIdx+1, newKey, f) + }, + ) +} + // collectionSimpleIndex is an non-unique index that indexes documents by a single field. // Single-field indexes store values only in ascending order. type collectionSimpleIndex struct { @@ -206,28 +290,15 @@ type collectionSimpleIndex struct { var _ CollectionIndex = (*collectionSimpleIndex)(nil) -func (index *collectionSimpleIndex) getDocumentsIndexKey( - doc *client.Document, -) (keys.IndexDataStoreKey, error) { - // docID is appended, as it's part of the key for non-unique indexes - return index.collectionBaseIndex.getDocumentsIndexKey(doc, true) -} - // Save indexes a document by storing the indexed field value. func (index *collectionSimpleIndex) Save( ctx context.Context, txn datastore.Txn, doc *client.Document, ) error { - key, err := index.getDocumentsIndexKey(doc) - if err != nil { - return err - } - err = txn.Datastore().Put(ctx, key.ToDS(), []byte{}) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - return nil + return index.generateIndexKeys(doc, true, func(key keys.IndexDataStoreKey) error { + return txn.Datastore().Put(ctx, key.ToDS(), []byte{}) + }) } func (index *collectionSimpleIndex) Update( @@ -236,7 +307,7 @@ func (index *collectionSimpleIndex) Update( oldDoc *client.Document, newDoc *client.Document, ) error { - err := index.deleteDocIndex(ctx, txn, oldDoc) + err := index.Delete(ctx, txn, oldDoc) if err != nil { return err } @@ -248,19 +319,9 @@ func (index *collectionSimpleIndex) Delete( txn datastore.Txn, doc *client.Document, ) error { - return index.deleteDocIndex(ctx, txn, doc) -} - -func (index *collectionSimpleIndex) deleteDocIndex( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - key, err := index.getDocumentsIndexKey(doc) - if err != nil { - return err - } - return index.deleteIndexKey(ctx, txn, key) + return index.generateIndexKeys(doc, true, func(key keys.IndexDataStoreKey) error { + return index.deleteIndexKey(ctx, txn, key) + }) } // hasIndexKeyNilField returns true if the index key has a field with nil value @@ -279,29 +340,14 @@ type collectionUniqueIndex struct { var _ CollectionIndex = (*collectionUniqueIndex)(nil) -func (index *collectionUniqueIndex) save( - ctx context.Context, - txn datastore.Txn, - key *keys.IndexDataStoreKey, - val []byte, -) error { - err := txn.Datastore().Put(ctx, key.ToDS(), val) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToDS().String(), err) - } - return nil -} - func (index *collectionUniqueIndex) Save( ctx context.Context, txn datastore.Txn, doc *client.Document, ) error { - key, val, err := index.prepareUniqueIndexRecordToStore(ctx, txn, doc) - if err != nil { - return err - } - return index.save(ctx, txn, &key, val) + return index.generateIndexKeys(doc, false, func(key keys.IndexDataStoreKey) error { + return addNewUniqueKey(ctx, txn, doc, key, index.fieldsDescs) + }) } func newUniqueIndexError(doc *client.Document, fieldsDescs []client.SchemaFieldDescription) error { @@ -322,16 +368,6 @@ func newUniqueIndexError(doc *client.Document, fieldsDescs []client.SchemaFieldD return NewErrCanNotIndexNonUniqueFields(doc.ID().String(), kvs...) } -func (index *collectionBaseIndex) getDocumentsUniqueIndexRecord( - doc *client.Document, -) (keys.IndexDataStoreKey, []byte, error) { - key, err := index.getDocumentsIndexKey(doc, false) - if err != nil { - return keys.IndexDataStoreKey{}, nil, err - } - return makeUniqueKeyValueRecord(key, doc) -} - func makeUniqueKeyValueRecord( key keys.IndexDataStoreKey, doc *client.Document, @@ -344,18 +380,6 @@ func makeUniqueKeyValueRecord( } } -func (index *collectionUniqueIndex) prepareUniqueIndexRecordToStore( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) (keys.IndexDataStoreKey, []byte, error) { - key, val, err := index.getDocumentsUniqueIndexRecord(doc) - if err != nil { - return keys.IndexDataStoreKey{}, nil, err - } - return key, val, validateUniqueKeyValue(ctx, txn, key, val, doc, index.fieldsDescs) -} - func validateUniqueKeyValue( ctx context.Context, txn datastore.Txn, @@ -403,7 +427,13 @@ func (index *collectionUniqueIndex) Delete( txn datastore.Txn, doc *client.Document, ) error { - return index.deleteDocIndex(ctx, txn, doc) + return index.generateIndexKeys(doc, false, func(key keys.IndexDataStoreKey) error { + key, _, err := makeUniqueKeyValueRecord(key, doc) + if err != nil { + return err + } + return txn.Datastore().Delete(ctx, key.ToDS()) + }) } func (index *collectionUniqueIndex) Update( @@ -417,27 +447,13 @@ func (index *collectionUniqueIndex) Update( if !isUpdatingIndexedFields(index, oldDoc, newDoc) { return nil } - newKey, newVal, err := index.prepareUniqueIndexRecordToStore(ctx, txn, newDoc) - if err != nil { - return err - } - err = index.deleteDocIndex(ctx, txn, oldDoc) - if err != nil { - return err - } - return index.save(ctx, txn, &newKey, newVal) -} -func (index *collectionUniqueIndex) deleteDocIndex( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - key, _, err := index.getDocumentsUniqueIndexRecord(doc) + err := index.Delete(ctx, txn, oldDoc) if err != nil { return err } - return index.deleteIndexKey(ctx, txn, key) + + return index.Save(ctx, txn, newDoc) } func isUpdatingIndexedFields(index CollectionIndex, oldDoc, newDoc *client.Document) bool { @@ -455,449 +471,9 @@ func isUpdatingIndexedFields(index CollectionIndex, oldDoc, newDoc *client.Docum continue case getOldValErr != nil && getNewValErr == nil: return true - case oldVal.Value() != newVal.Value(): + case !oldVal.NormalValue().Equal(newVal.NormalValue()): return true } } return false } - -type collectionArrayBaseIndex struct { - collectionBaseIndex - arrFieldsIndexes []int -} - -func newCollectionArrayBaseIndex(base collectionBaseIndex) collectionArrayBaseIndex { - ind := collectionArrayBaseIndex{collectionBaseIndex: base} - for i := range base.fieldsDescs { - if base.fieldsDescs[i].Kind.IsArray() { - ind.arrFieldsIndexes = append(ind.arrFieldsIndexes, i) - } - } - if len(ind.arrFieldsIndexes) == 0 { - return collectionArrayBaseIndex{} - } - return ind -} - -// newIndexKeyGenerator creates a function that generates index keys for a document -// with multiple array fields. -// All generated keys are unique. -// For example for a doc with these values {{"a", "b", "a"}, {"c", "d", "e"}, {"f", "g"}} it generates: -// "acf", "acg", "adf", "adg", "aef", "aeg", "bcf", "bcg", "bdf", "bdg", "bef", "beg" -// Note: the example is simplified and doesn't include field separation -func (index *collectionArrayBaseIndex) newIndexKeyGenerator( - doc *client.Document, - appendDocID bool, -) (func() (keys.IndexDataStoreKey, bool), error) { - key, err := index.getDocumentsIndexKey(doc, appendDocID) - if err != nil { - return nil, err - } - - // Collect unique values to use as source for generating keys - normValsArr := make([][]client.NormalValue, 0, len(index.arrFieldsIndexes)) - for _, arrFieldIndex := range index.arrFieldsIndexes { - arrVal := key.Fields[arrFieldIndex].Value - normVals, err := client.ToArrayOfNormalValues(arrVal) - if err != nil { - return nil, err - } - normValsArr = append(normValsArr, slice.RemoveDuplicates(normVals)) - } - - // arrFieldCounter keeps track of indexes into arrays of normal values - arrFieldCounter := make([]int, len(index.arrFieldsIndexes)) - done := false - - // This function generates the next key by iterating through all possible combinations. - // It works pretty much like a digital clock that first iterates through seconds, then minutes, etc. - return func() (keys.IndexDataStoreKey, bool) { - if done { - return keys.IndexDataStoreKey{}, false - } - - resultKey := keys.IndexDataStoreKey{ - CollectionID: key.CollectionID, - IndexID: key.IndexID, - Fields: make([]keys.IndexedField, len(key.Fields)), - } - copy(resultKey.Fields, key.Fields) - - // Use current indexes in arrFieldsIndexes to replace corresponding fields in the key - for i, counter := range arrFieldCounter { - field := &resultKey.Fields[index.arrFieldsIndexes[i]] - field.Value = normValsArr[i][counter] - } - - // iterate in reverse order so that we exhaust all combination for the last field first, - // {"f", "g"} in the example above. This way we guarantee that the order of generated keys - // is from left to right, "acf" -> "acg" -> "adf" -> "adg" -> ... - for i := len(arrFieldCounter) - 1; i >= 0; i-- { - arrFieldCounter[i]++ - if arrFieldCounter[i] < len(normValsArr[i]) { - break - } - // if we iterated through all combinations for the current field, reset the counter - // so that we do it again for the next field from the left side - arrFieldCounter[i] = 0 - // if the current side happens to be the leftmost one (the first), we are done - if i == 0 { - done = true - } - } - - return resultKey, true - }, nil -} - -func (index *collectionArrayBaseIndex) getAllKeys( - doc *client.Document, - appendDocID bool, -) ([]keys.IndexDataStoreKey, error) { - getNextOldKey, err := index.newIndexKeyGenerator(doc, appendDocID) - if err != nil { - return nil, err - } - keys := make([]keys.IndexDataStoreKey, 0) - for { - key, ok := getNextOldKey() - if !ok { - break - } - keys = append(keys, key) - } - return keys, nil -} - -func (index *collectionArrayBaseIndex) deleteRetiredKeysAndReturnNew( - ctx context.Context, - txn datastore.Txn, - oldDoc *client.Document, - newDoc *client.Document, - appendDocID bool, -) ([]keys.IndexDataStoreKey, error) { - prevKeys, err := index.getAllKeys(oldDoc, appendDocID) - if err != nil { - return nil, err - } - currentKeys, err := index.getAllKeys(newDoc, appendDocID) - if err != nil { - return nil, err - } - - for _, prevKey := range prevKeys { - keyEqual := func(key keys.IndexDataStoreKey) bool { return prevKey.Equal(key) } - rem, removedVal := slice.RemoveFirstIf(currentKeys, keyEqual) - // If a previous keys is not among the current keys, it should be retired - if !removedVal.HasValue() { - err = index.deleteIndexKey(ctx, txn, prevKey) - if err != nil { - return nil, err - } - } - currentKeys = rem - } - - return currentKeys, nil -} - -type collectionArrayIndex struct { - collectionArrayBaseIndex -} - -var _ CollectionIndex = (*collectionArrayIndex)(nil) - -func newCollectionArrayIndex(base collectionBaseIndex) *collectionArrayIndex { - return &collectionArrayIndex{collectionArrayBaseIndex: newCollectionArrayBaseIndex(base)} -} - -// Save indexes a document by storing the indexed field value. -func (index *collectionArrayIndex) Save( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - getNextKey, err := index.newIndexKeyGenerator(doc, true) - if err != nil { - return err - } - - for { - key, hasKey := getNextKey() - if !hasKey { - break - } - err = txn.Datastore().Put(ctx, key.ToDS(), []byte{}) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - } - return nil -} - -func (index *collectionArrayIndex) Update( - ctx context.Context, - txn datastore.Txn, - oldDoc *client.Document, - newDoc *client.Document, -) error { - newKeys, err := index.deleteRetiredKeysAndReturnNew(ctx, txn, oldDoc, newDoc, true) - if err != nil { - return err - } - - for _, key := range newKeys { - err = txn.Datastore().Put(ctx, key.ToDS(), []byte{}) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - } - - return nil -} - -func (index *collectionArrayIndex) Delete( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - getNextKey, err := index.newIndexKeyGenerator(doc, true) - if err != nil { - return err - } - - for { - key, ok := getNextKey() - if !ok { - break - } - err = index.deleteIndexKey(ctx, txn, key) - if err != nil { - return err - } - } - return nil -} - -type collectionArrayUniqueIndex struct { - collectionArrayBaseIndex -} - -var _ CollectionIndex = (*collectionArrayUniqueIndex)(nil) - -func newCollectionArrayUniqueIndex(base collectionBaseIndex) *collectionArrayUniqueIndex { - return &collectionArrayUniqueIndex{collectionArrayBaseIndex: newCollectionArrayBaseIndex(base)} -} - -func (index *collectionArrayUniqueIndex) Save( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - getNextKey, err := index.newIndexKeyGenerator(doc, false) - if err != nil { - return err - } - - for { - key, ok := getNextKey() - if !ok { - break - } - err := addNewUniqueKey(ctx, txn, doc, key, index.fieldsDescs) - if err != nil { - return err - } - } - return nil -} - -func (index *collectionArrayUniqueIndex) Update( - ctx context.Context, - txn datastore.Txn, - oldDoc *client.Document, - newDoc *client.Document, -) error { - newKeys, err := index.deleteRetiredKeysAndReturnNew(ctx, txn, oldDoc, newDoc, false) - if err != nil { - return err - } - - for _, key := range newKeys { - err := addNewUniqueKey(ctx, txn, newDoc, key, index.fieldsDescs) - if err != nil { - return err - } - } - - return nil -} - -func (index *collectionArrayUniqueIndex) Delete( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - getNextKey, err := index.newIndexKeyGenerator(doc, false) - if err != nil { - return err - } - - for { - key, ok := getNextKey() - if !ok { - break - } - err = index.deleteIndexKey(ctx, txn, key) - if err != nil { - return err - } - } - return nil -} - -type collectionJSONBaseIndex struct { - collectionBaseIndex - jsonFieldsIndexes []int -} - -func newCollectionJSONBaseIndex(base collectionBaseIndex) collectionJSONBaseIndex { - ind := collectionJSONBaseIndex{collectionBaseIndex: base} - for i := range base.fieldsDescs { - if base.fieldsDescs[i].Kind == client.FieldKind_NILLABLE_JSON { - ind.jsonFieldsIndexes = append(ind.jsonFieldsIndexes, i) - } - } - if len(ind.jsonFieldsIndexes) == 0 { - return collectionJSONBaseIndex{} - } - return ind -} - -func (index *collectionJSONBaseIndex) traverseJSONNodes( - doc *client.Document, - appendDocID bool, - f func(keys.IndexDataStoreKey) error, -) error { - key, err := index.getDocumentsIndexKey(doc, appendDocID) - if err != nil { - return err - } - - for _, jsonFieldIndex := range index.jsonFieldsIndexes { - json, _ := key.Fields[jsonFieldIndex].Value.JSON() - - err = client.TraverseJSON(json, func(value client.JSON) error { - val, err := client.NewNormalValue(value) - if err != nil { - return err - } - - leafKey := key - copy(leafKey.Fields, key.Fields) - leafKey.Fields[jsonFieldIndex].Value = val - - return f(leafKey) - }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements()) - - if err != nil { - return err - } - } - - return nil -} - -type collectionJSONIndex struct { - collectionJSONBaseIndex -} - -var _ CollectionIndex = (*collectionJSONIndex)(nil) - -func newCollectionJSONIndex(base collectionBaseIndex) *collectionJSONIndex { - return &collectionJSONIndex{collectionJSONBaseIndex: newCollectionJSONBaseIndex(base)} -} - -// Save indexes a document by storing the indexed field value. -func (index *collectionJSONIndex) Save( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - return index.traverseJSONNodes(doc, true, func(key keys.IndexDataStoreKey) error { - err := txn.Datastore().Put(ctx, key.ToDS(), []byte{}) - if err != nil { - return NewErrFailedToStoreIndexedField(key.ToString(), err) - } - - return nil - }) -} - -func (index *collectionJSONIndex) Update( - ctx context.Context, - txn datastore.Txn, - oldDoc *client.Document, - newDoc *client.Document, -) error { - err := index.Delete(ctx, txn, oldDoc) - if err != nil { - return err - } - return index.Save(ctx, txn, newDoc) -} - -func (index *collectionJSONIndex) Delete( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - return index.traverseJSONNodes(doc, true, func(key keys.IndexDataStoreKey) error { - return index.deleteIndexKey(ctx, txn, key) - }) -} - -type collectionJSONUniqueIndex struct { - collectionJSONBaseIndex -} - -var _ CollectionIndex = (*collectionJSONUniqueIndex)(nil) - -func newCollectionJSONUniqueIndex(base collectionBaseIndex) *collectionJSONUniqueIndex { - return &collectionJSONUniqueIndex{collectionJSONBaseIndex: newCollectionJSONBaseIndex(base)} -} - -// Save indexes a document by storing the indexed field value. -func (index *collectionJSONUniqueIndex) Save( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - return index.traverseJSONNodes(doc, false, func(key keys.IndexDataStoreKey) error { - return addNewUniqueKey(ctx, txn, doc, key, index.fieldsDescs) - }) -} - -func (index *collectionJSONUniqueIndex) Update( - ctx context.Context, - txn datastore.Txn, - oldDoc *client.Document, - newDoc *client.Document, -) error { - err := index.Delete(ctx, txn, oldDoc) - if err != nil { - return err - } - return index.Save(ctx, txn, newDoc) -} - -func (index *collectionJSONUniqueIndex) Delete( - ctx context.Context, - txn datastore.Txn, - doc *client.Document, -) error { - return index.traverseJSONNodes(doc, false, func(key keys.IndexDataStoreKey) error { - return index.deleteIndexKey(ctx, txn, key) - }) -} diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index f4ce72e564..077ab4768d 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -99,7 +99,6 @@ func TestJSONArrayIndex_WithDifferentElementValuesAndTypes_ShouldFetchCorrectlyU Request: req, Results: map[string]any{ "User": []map[string]any{ - {"name": "Andy"}, {"name": "Shahzad"}, }, }, @@ -114,7 +113,7 @@ func TestJSONArrayIndex_WithDifferentElementValuesAndTypes_ShouldFetchCorrectlyU testUtils.ExecuteTestCase(t, test) } -func TestJSONArrayIndex_WithNestedArrays_ShouldTreatThemAsFlatten(t *testing.T) { +func TestJSONArrayIndex_WithNestedArrays_ShouldNotConsiderThem(t *testing.T) { req := `query { User(filter: {custom: {numbers: {_any: {_eq: 4}}}}) { name @@ -164,10 +163,7 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldTreatThemAsFlatten(t *testing.T) testUtils.Request{ Request: req, Results: map[string]any{ - "User": []map[string]any{ - {"name": "Fred"}, - {"name": "John"}, - }, + "User": []map[string]any{}, }, }, testUtils.Request{ @@ -227,6 +223,8 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec }, }, }, + // TODO: This document should be part of the query result, but it needs additional work + // with json encoding https://github.com/sourcenetwork/defradb/issues/3329 testUtils.CreateDoc{ DocMap: map[string]any{ "name": "Andy", @@ -240,6 +238,7 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec Results: map[string]any{ "User": []map[string]any{ {"name": "Islam"}, + {"name": "Fred"}, {"name": "John"}, }, }, @@ -301,6 +300,14 @@ func TestJSONArrayIndex_WithAllFilterOnDifferentElementValues_ShouldFetchCorrect }, }, }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "numbers": []any{4, 4, 4}, + }, + }, + }, testUtils.CreateDoc{ DocMap: map[string]any{ "name": "Andy", @@ -313,14 +320,13 @@ func TestJSONArrayIndex_WithAllFilterOnDifferentElementValues_ShouldFetchCorrect Request: req, Results: map[string]any{ "User": []map[string]any{ - {"name": "Islam"}, - {"name": "Fred"}, + {"name": "Bruno"}, }, }, }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(4), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), }, }, } diff --git a/tests/integration/index/json_composite_test.go b/tests/integration/index/json_composite_test.go index 27cc99a4e8..1985d43f93 100644 --- a/tests/integration/index/json_composite_test.go +++ b/tests/integration/index/json_composite_test.go @@ -28,10 +28,10 @@ func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing { name: "Unique combination. Non-unique custom.val", req: `query { - User(filter: {_and: [ - {custom: {val: {_eq: 3}}}, - {age: {_eq: 25}} - ]}) { + User(filter: { + custom: {val: {_eq: 3}}, + age: {_eq: 25} + }) { name } }`, @@ -45,10 +45,10 @@ func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing { name: "Unique combination. Non-unique age", req: `query { - User(filter: {_and: [ - {custom: {val: {_eq: 3}}}, - {age: {_eq: 30}} - ]}) { + User(filter: { + custom: {val: {_eq: 3}}, + age: {_eq: 30} + }) { name } }`, @@ -68,8 +68,8 @@ func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing }`, result: map[string]any{ "User": []map[string]any{ - {"name": "John"}, {"name": "Islam"}, + {"name": "John"}, }, }, indexFetches: 2, @@ -77,17 +77,17 @@ func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing { name: "Non-unique combination", req: `query { - User(filter: {_and: [ - {custom: {val: {_eq: 5}}}, - {age: {_eq: 35}} - ]}) { + User(filter: { + custom: {val: {_eq: 5}}, + age: {_eq: 35}, + }) { name } }`, result: map[string]any{ "User": []map[string]any{ {"name": "Addo"}, - {"name": "Kennan"}, + {"name": "Keenan"}, }, }, indexFetches: 2, @@ -95,7 +95,7 @@ func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing { name: "Match second part of the composite index", req: `query { - User(filter: { {age: {_eq: 40}}) { + User(filter: {age: {_eq: 40}}) { name } }`, @@ -220,10 +220,10 @@ func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing { name: "Unique combination. Non-unique custom.val", req: `query { - User(filter: {_and: [ - {age: {_eq: 25}}, - {custom: {val: {_eq: 3}}} - ]}) { + User(filter: { + age: {_eq: 25}, + custom: {val: {_eq: 3}} + }) { name } }`, @@ -237,10 +237,10 @@ func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing { name: "Unique combination. Non-unique age", req: `query { - User(filter: {_and: [ - {age: {_eq: 30}}, - {custom: {val: {_eq: 3}}} - ]}) { + User(filter: { + age: {_eq: 30}, + custom: {val: {_eq: 3}} + }) { name } }`, @@ -260,8 +260,8 @@ func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing }`, result: map[string]any{ "User": []map[string]any{ - {"name": "Shahzad"}, {"name": "Islam"}, + {"name": "Shahzad"}, }, }, indexFetches: 2, @@ -269,10 +269,10 @@ func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing { name: "Non-unique combination", req: `query { - User(filter: {_and: [ - {age: {_eq: 35}}, - {custom: {val: {_eq: 5}}} - ]}) { + User(filter: { + age: {_eq: 35}, + custom: {val: {_eq: 5}} + }) { name } }`, @@ -282,7 +282,7 @@ func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing {"name": "Keenan"}, }, }, - indexFetches: 1, + indexFetches: 2, }, { name: "Match second part of the composite index", @@ -412,10 +412,10 @@ func TestJSONArrayCompositeIndex_JSONArrayWithScalar_ShouldFetchUsingIndex(t *te { name: "Unique combination. Non-unique custom.numbers element", req: `query { - User(filter: {_and: [ - {custom: {numbers: {_any: {_eq: 3}}}}, - {age: {_eq: 25}} - ]}) { + User(filter: { + custom: {numbers: {_any: {_eq: 3}}}, + age: {_eq: 25} + }) { name } }`, @@ -429,10 +429,10 @@ func TestJSONArrayCompositeIndex_JSONArrayWithScalar_ShouldFetchUsingIndex(t *te { name: "Unique combination. Non-unique age", req: `query { - User(filter: {_and: [ - {custom: {numbers: {_any: {_eq: 3}}}}, - {age: {_eq: 30}} - ]}) { + User(filter: { + custom: {numbers: {_any: {_eq: 3}}}, + age: {_eq: 30} + }) { name } }`, @@ -452,8 +452,8 @@ func TestJSONArrayCompositeIndex_JSONArrayWithScalar_ShouldFetchUsingIndex(t *te }`, result: map[string]any{ "User": []map[string]any{ - {"name": "John"}, {"name": "Islam"}, + {"name": "John"}, }, }, indexFetches: 2, @@ -461,17 +461,17 @@ func TestJSONArrayCompositeIndex_JSONArrayWithScalar_ShouldFetchUsingIndex(t *te { name: "Non-unique combination", req: `query { - User(filter: {_and: [ - {custom: {numbers: {_any: {_eq: 5}}}}, - {age: {_eq: 35}} - ]}) { + User(filter: { + custom: {numbers: {_any: {_eq: 5}}}, + age: {_eq: 35} + }) { name } }`, result: map[string]any{ "User": []map[string]any{ - {"name": "Addo"}, {"name": "Keenan"}, + {"name": "Addo"}, }, }, indexFetches: 2, @@ -498,7 +498,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithScalar_ShouldFetchUsingIndex(t *te Actions: []any{ testUtils.SchemaUpdate{ Schema: ` - type User @index(includes: [{field: "custom.numbers"}, {field: "age"}]) { + type User @index(includes: [{field: "custom"}, {field: "age"}]) { name: String custom: JSON age: Int @@ -604,16 +604,16 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t { name: "Unique combination. Non-unique custom.numbers element", req: `query { - User(filter: {_and: [ - {custom: {numbers: {_any: {_eq: 3}}}}, - {tags: {_any: {_eq: "friend"}}} - ]}) { + User(filter: { + custom: {numbers: {_any: {_eq: 3}}}, + tags: {_any: {_eq: "unique"}} + }) { name } }`, result: map[string]any{ "User": []map[string]any{ - {"name": "Islam"}, + {"name": "John"}, }, }, indexFetches: 1, @@ -621,16 +621,16 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t { name: "Unique combination. Non-unique tags", req: `query { - User(filter: {_and: [ - {custom: {numbers: {_any: {_eq: 5}}}}, - {tags: {_any: {_eq: "mentor"}}} - ]}) { + User(filter: { + custom: {numbers: {_any: {_eq: 15}}}, + tags: {_any: {_eq: "mentor"}} + }) { name } }`, result: map[string]any{ "User": []map[string]any{ - {"name": "John"}, + {"name": "Islam"}, }, }, indexFetches: 1, @@ -638,14 +638,14 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t { name: "Match first part of the composite index", req: `query { - User(filter: {custom: {numbers: {_any: {_eq: 3}}}}) { + User(filter: {custom: {numbers: {_any: {_eq: 5}}}}) { name } }`, result: map[string]any{ "User": []map[string]any{ - {"name": "John"}, - {"name": "Islam"}, + {"name": "Addo"}, + {"name": "Keenan"}, }, }, indexFetches: 2, @@ -653,10 +653,10 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t { name: "Non-unique combination", req: `query { - User(filter: {_and: [ - {custom: {numbers: {_any: {_eq: 5}}}}, - {tags: {_any: {_eq: "family"}}} - ]}) { + User(filter: { + custom: {numbers: {_any: {_eq: 5}}}, + tags: {_any: {_eq: "family"}} + }) { name } }`, @@ -690,10 +690,9 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t Actions: []any{ testUtils.SchemaUpdate{ Schema: ` - type User @index(includes: [{field: "custom.numbers"}, {field: "tags"}]) { + type User @index(includes: [{field: "custom"}, {field: "tags"}]) { name: String custom: JSON - age: Int tags: [String] }`, }, @@ -703,16 +702,16 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{3, 4}, }, - "tags": []string{"colleague", "mentor"}, + "tags": []any{"colleague", "mentor", "unique"}, }, }, testUtils.CreateDoc{ DocMap: map[string]any{ "name": "Islam", "custom": map[string]any{ - "numbers": []int{3, 5}, + "numbers": []int{3, 15}, }, - "tags": []string{"friend", "mentor"}, + "tags": []any{"friend", "mentor"}, }, }, testUtils.CreateDoc{ @@ -721,7 +720,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{4, 6}, }, - "tags": []string{"colleague"}, + "tags": []any{"colleague"}, }, }, testUtils.CreateDoc{ @@ -730,7 +729,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{5, 7}, }, - "tags": []string{"family"}, + "tags": []any{"family"}, }, }, testUtils.CreateDoc{ @@ -739,7 +738,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{1, 5, 8}, }, - "tags": []string{"family"}, + "tags": []any{"family"}, }, }, testUtils.CreateDoc{ @@ -748,7 +747,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{6, 9}, }, - "tags": []string{"dude"}, + "tags": []any{"dude"}, }, }, testUtils.CreateDoc{ @@ -757,7 +756,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{}, }, - "tags": []string{"friend"}, + "tags": []any{"friend"}, }, }, testUtils.CreateDoc{ @@ -766,7 +765,7 @@ func TestJSONArrayCompositeIndex_JSONArrayWithArrayField_ShouldFetchUsingIndex(t "custom": map[string]any{ "numbers": []int{7, 10}, }, - "tags": []string{"colleague"}, + "tags": []any{"colleague"}, }, }, testUtils.Request{ diff --git a/tests/integration/index/query_with_unique_composite_index_filter_test.go b/tests/integration/index/query_with_unique_composite_index_filter_test.go index 9928cb684b..419795b373 100644 --- a/tests/integration/index/query_with_unique_composite_index_filter_test.go +++ b/tests/integration/index/query_with_unique_composite_index_filter_test.go @@ -1313,7 +1313,7 @@ func TestQueryWithUniqueCompositeIndex_AfterUpdateOnNilFields_ShouldFetch(t *tes }, }, }, - testUtils.Request{ + /*testUtils.Request{ Request: ` query { User(filter: {name: {_eq: null}}) { @@ -1342,7 +1342,7 @@ func TestQueryWithUniqueCompositeIndex_AfterUpdateOnNilFields_ShouldFetch(t *tes {"about": "nil_nil -> bob_nil"}, }, }, - }, + },*/ }, } diff --git a/tests/integration/query/json/with_all_test.go b/tests/integration/query/json/with_all_test.go index 1f99107303..798f0cc047 100644 --- a/tests/integration/query/json/with_all_test.go +++ b/tests/integration/query/json/with_all_test.go @@ -76,10 +76,8 @@ func TestQueryJSON_WithAllFilterWithAllTypes_ShouldFilter(t *testing.T) { }`, Results: map[string]any{ "Users": []map[string]any{ - {"name": "John"}, {"name": "Shahzad"}, - {"name": "Keenan"}, - {"name": "Andy"}, + {"name": "Fred"}, }, }, }, @@ -137,9 +135,7 @@ func TestQueryJSON_WithAllFilterAndNestedArray_ShouldFilter(t *testing.T) { }`, Results: map[string]any{ "Users": []map[string]any{ - {"name": "Islam"}, {"name": "Shahzad"}, - {"name": "Keenan"}, }, }, }, diff --git a/tests/integration/query/json/with_any_test.go b/tests/integration/query/json/with_any_test.go index 3d74c1655a..0c5fdaa01c 100644 --- a/tests/integration/query/json/with_any_test.go +++ b/tests/integration/query/json/with_any_test.go @@ -116,6 +116,12 @@ func TestQueryJSON_WithAnyFilterAndNestedArray_ShouldFilter(t *testing.T) { "custom": 3 }`, }, + testUtils.CreateDoc{ + Doc: `{ + "name": "Bruno", + "custom": [null, 3] + }`, + }, testUtils.CreateDoc{ Doc: `{ "name": "Andy", @@ -136,8 +142,7 @@ func TestQueryJSON_WithAnyFilterAndNestedArray_ShouldFilter(t *testing.T) { }`, Results: map[string]any{ "Users": []map[string]any{ - {"name": "Keenan"}, - {"name": "Fred"}, + {"name": "Bruno"}, }, }, }, diff --git a/tests/integration/query/json/with_none_test.go b/tests/integration/query/json/with_none_test.go index 3a2434e1a3..0aff4baee1 100644 --- a/tests/integration/query/json/with_none_test.go +++ b/tests/integration/query/json/with_none_test.go @@ -71,24 +71,24 @@ func TestQueryJSON_WithNoneFilterAndNestedArray_ShouldFilter(t *testing.T) { testUtils.CreateDoc{ Doc: `{ "name": "Shahzad", - "custom": [1, false, "second", {"one": 1}, [1, 2]] + "custom": [1, false, "second", {"one": 3}, [1, 3]] }`, }, testUtils.CreateDoc{ Doc: `{ "name": "Fred", - "custom": [null, false, "second", {"one": 1}, [1, 2]] + "custom": [null, false, "second", 3, {"one": 1}, [1, 2]] }`, }, testUtils.CreateDoc{ Doc: `{ - "name": "Andy", - "custom": [false, "second", {"one": 1}, [1, [2, null]]] + "name": "Islam", + "custom": 3 }`, }, testUtils.CreateDoc{ Doc: `{ - "name": "Islam", + "name": "Bruno", "custom": null }`, }, @@ -100,7 +100,7 @@ func TestQueryJSON_WithNoneFilterAndNestedArray_ShouldFilter(t *testing.T) { }, testUtils.Request{ Request: `query { - Users(filter: {custom: {_none: {_eq: null}}}) { + Users(filter: {custom: {_none: {_eq: 3}}}) { name } }`, @@ -108,6 +108,8 @@ func TestQueryJSON_WithNoneFilterAndNestedArray_ShouldFilter(t *testing.T) { "Users": []map[string]any{ {"name": "Shahzad"}, {"name": "John"}, + {"name": "Islam"}, + {"name": "Bruno"}, }, }, }, From bb67d2fbb78af41d52278ddd28da850254a7b17a Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 16 Dec 2024 16:49:17 +0100 Subject: [PATCH 22/46] Enable json array traversal to only top level elements --- client/json.go | 56 ++++++++++++------- client/json_traverse_test.go | 26 ++++++++- client/secondary_indexes.md | 2 +- internal/db/index.go | 2 +- internal/encoding/json_test.go | 2 +- .../index/json_unique_array_test.go | 12 ---- 6 files changed, 62 insertions(+), 38 deletions(-) diff --git a/client/json.go b/client/json.go index 0f9cbe0e6e..c803e70ec6 100644 --- a/client/json.go +++ b/client/json.go @@ -80,7 +80,7 @@ func TraverseJSON(j JSON, visitor JSONVisitor, opts ...traverseJSONOption) error for _, opt := range opts { opt(&options) } - if shouldVisitPath(options.PathPrefix, nil) { + if shouldVisitPath(options.pathPrefix, nil) { return j.accept(visitor, []string{}, options) } return nil @@ -92,7 +92,7 @@ type traverseJSONOption func(*traverseJSONOptions) // Only nodes with paths that start with the prefix will be visited. func TraverseJSONWithPrefix(prefix []string) traverseJSONOption { return func(opts *traverseJSONOptions) { - opts.PathPrefix = prefix + opts.pathPrefix = prefix } } @@ -101,22 +101,24 @@ func TraverseJSONWithPrefix(prefix []string) traverseJSONOption { // be called for objects or arrays and proceed with theirs children. func TraverseJSONOnlyLeaves() traverseJSONOption { return func(opts *traverseJSONOptions) { - opts.OnlyLeaves = true + opts.onlyLeaves = true } } // TraverseJSONVisitArrayElements returns a traverseJSONOption that sets the traversal to visit array elements. // When this option is set, the visitor function will be called for each element of an array. -func TraverseJSONVisitArrayElements() traverseJSONOption { +// If recurseElements is true, the visitor function will be called for each array element of type object or array. +func TraverseJSONVisitArrayElements(recurseElements bool) traverseJSONOption { return func(opts *traverseJSONOptions) { - opts.VisitArrayElements = true + opts.visitArrayElements = true + opts.recurseVisitedArrayElements = recurseElements } } // TraverseJSONWithArrayIndexInPath returns a traverseJSONOption that includes array indices in the path. func TraverseJSONWithArrayIndexInPath() traverseJSONOption { return func(opts *traverseJSONOptions) { - opts.IncludeArrayIndexInPath = true + opts.includeArrayIndexInPath = true } } @@ -127,14 +129,16 @@ type JSONVisitor func(value JSON) error // traverseJSONOptions configures how the JSON tree is traversed. type traverseJSONOptions struct { - // OnlyLeaves when true visits only leaf nodes (not objects or arrays) - OnlyLeaves bool - // PathPrefix when set visits only paths that start with this prefix - PathPrefix []string - // VisitArrayElements when true visits array elements - VisitArrayElements bool - // IncludeArrayIndexInPath when true includes array indices in the path - IncludeArrayIndexInPath bool + // onlyLeaves when true visits only leaf nodes (not objects or arrays) + onlyLeaves bool + // pathPrefix when set visits only paths that start with this prefix + pathPrefix []string + // visitArrayElements when true visits array elements + visitArrayElements bool + // recurseVisitedArrayElements when true visits array elements recursively + recurseVisitedArrayElements bool + // includeArrayIndexInPath when true includes array indices in the path + includeArrayIndexInPath bool } type jsonVoid struct{} @@ -217,7 +221,7 @@ func (obj jsonObject) Unwrap() any { func (obj jsonObject) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { obj.path = path - if !opts.OnlyLeaves && len(path) >= len(opts.PathPrefix) { + if !opts.onlyLeaves && len(path) >= len(opts.pathPrefix) { if err := visitor(obj); err != nil { return err } @@ -225,7 +229,7 @@ func (obj jsonObject) accept(visitor JSONVisitor, path []string, opts traverseJS for k, v := range obj.val { newPath := append(path, k) - if !shouldVisitPath(opts.PathPrefix, newPath) { + if !shouldVisitPath(opts.pathPrefix, newPath) { continue } @@ -260,21 +264,24 @@ func (arr jsonArray) Unwrap() any { func (arr jsonArray) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { arr.path = path - if !opts.OnlyLeaves { + if !opts.onlyLeaves { if err := visitor(arr); err != nil { return err } } - if opts.VisitArrayElements { + if opts.visitArrayElements { for i := range arr.val { + if !opts.recurseVisitedArrayElements && isCompositeJSON(arr.val[i]) { + continue + } var newPath []string - if opts.IncludeArrayIndexInPath { + if opts.includeArrayIndexInPath { newPath = append(path, strconv.Itoa(i)) } else { newPath = path } - if !shouldVisitPath(opts.PathPrefix, newPath) { + if !shouldVisitPath(opts.pathPrefix, newPath) { continue } @@ -605,3 +612,12 @@ func shouldVisitPath(prefix, path []string) bool { } return true } + +func isCompositeJSON(v JSON) bool { + _, isObject := v.Object() + if isObject { + return true + } + _, isArray := v.Array() + return isArray +} diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index a764a24973..44ca799eef 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -109,7 +109,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { { name: "VisitArrayElements", options: []traverseJSONOption{ - TraverseJSONVisitArrayElements(), + TraverseJSONVisitArrayElements(true), }, expected: []traverseNode{ {path: "", value: json}, @@ -131,10 +131,30 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "array", value: newJSONNumber(5, nil)}, }, }, + { + name: "VisitArrayElements without recursion", + options: []traverseJSONOption{ + TraverseJSONVisitArrayElements(false), + }, + expected: []traverseNode{ + {path: "", value: json}, + {path: "string", value: newJSONString("value", nil)}, + {path: "number", value: newJSONNumber(42, nil)}, + {path: "bool", value: newJSONBool(true, nil)}, + {path: "null", value: newJSONNull(nil)}, + {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object/nested", value: newJSONString("inside", nil)}, + {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array", value: newJSONNumber(1, nil)}, + {path: "array", value: newJSONString("two", nil)}, + }, + }, { name: "VisitArrayElementsWithIndex", options: []traverseJSONOption{ - TraverseJSONVisitArrayElements(), + TraverseJSONVisitArrayElements(true), TraverseJSONWithArrayIndexInPath(), }, expected: []traverseNode{ @@ -161,7 +181,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { name: "CombinedOptions", options: []traverseJSONOption{ TraverseJSONOnlyLeaves(), - TraverseJSONVisitArrayElements(), + TraverseJSONVisitArrayElements(true), TraverseJSONWithPrefix([]string{"array"}), TraverseJSONWithArrayIndexInPath(), }, diff --git a/client/secondary_indexes.md b/client/secondary_indexes.md index 4b345d5659..3cbce9702c 100644 --- a/client/secondary_indexes.md +++ b/client/secondary_indexes.md @@ -153,7 +153,7 @@ The system can represent the "iPhone" value as a `JSON` type with its complete p For JSON fields, DefraDB uses inverted indexes with the following key format: ``` -/(/)+/ +/(//)+/ ``` The term "inverted" comes from how these indexes reverse the typical document-to-value relationship. Instead of starting with a document and finding its values, we start with a value and can quickly find all documents containing that value at any path. diff --git a/internal/db/index.go b/internal/db/index.go index d1d7ba4261..20e4af7ffe 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -129,7 +129,7 @@ func (g *JSONFieldGenerator) Generate(value client.NormalValue, f func(client.No return err } return f(val) - }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements()) // TODO: add option to traverse array elements + }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements(false)) } // getFieldGenerator returns appropriate generator for the field type diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go index 0fa094a452..2118d39234 100644 --- a/internal/encoding/json_test.go +++ b/internal/encoding/json_test.go @@ -59,7 +59,7 @@ func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { jsons = append(jsons, value) pathMap[p] = jsons return nil - }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements()) + }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements(true)) assert.NoError(t, err) for path, jsons := range pathMap { diff --git a/tests/integration/index/json_unique_array_test.go b/tests/integration/index/json_unique_array_test.go index 4c43a4d0bf..03d8d1991a 100644 --- a/tests/integration/index/json_unique_array_test.go +++ b/tests/integration/index/json_unique_array_test.go @@ -98,18 +98,6 @@ func TestJSONArrayUniqueIndex_ShouldAllowOnlyUniqueValuesAndUseThemForFetching(t "bae-54e76159-66c6-56be-ad65-7ff83edda058", errors.NewKV("custom", map[string]any{"numbers": 3})).Error(), }, - testUtils.CreateDoc{ - DocMap: map[string]any{ - "name": "Chris", - "custom": map[string]any{ - // existing nested value - "numbers": []any{9, []int{3}}, - }, - }, - ExpectedError: db.NewErrCanNotIndexNonUniqueFields( - "bae-8dba1343-148c-590c-a942-dd6c80f204fb", - errors.NewKV("custom", map[string]any{"numbers": []any{9, []int{3}}})).Error(), - }, testUtils.Request{ Request: req, Results: map[string]any{ From 8f24c04e20a786ceb71f3100bd661e31f6e48e36 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 16 Dec 2024 17:18:22 +0100 Subject: [PATCH 23/46] Fix lint --- client/json_traverse_test.go | 53 +++++++++++++++--------- internal/db/fetcher/indexer_iterators.go | 1 - internal/encoding/json_test.go | 3 +- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 44ca799eef..3f279bdcfe 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -23,6 +23,21 @@ type traverseNode struct { path string } +// Helper functions to safely get values +func getObjectValue(j JSON) map[string]JSON { + if val, ok := j.Value().(map[string]JSON); ok { + return val + } + panic("expected object value") +} + +func getArrayValue(j JSON) []JSON { + if val, ok := j.Value().([]JSON); ok { + return val + } + panic("expected array value") +} + func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { // Create a complex JSON structure for testing json := newJSONObject(map[string]JSON{ @@ -63,11 +78,11 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "number", value: newJSONNumber(42, nil)}, {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, - {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object", value: getObjectValue(json)["object"]}, {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, {path: "object/deep/level", value: newJSONNumber(3, nil)}, - {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array", value: getObjectValue(json)["array"]}, }, }, { @@ -90,9 +105,9 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { TraverseJSONWithPrefix([]string{"object"}), }, expected: []traverseNode{ - {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object", value: getObjectValue(json)["object"]}, {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, {path: "object/deep/level", value: newJSONNumber(3, nil)}, }, }, @@ -102,7 +117,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { TraverseJSONWithPrefix([]string{"object", "deep"}), }, expected: []traverseNode{ - {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, {path: "object/deep/level", value: newJSONNumber(3, nil)}, }, }, @@ -117,16 +132,16 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "number", value: newJSONNumber(42, nil)}, {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, - {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object", value: getObjectValue(json)["object"]}, {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, {path: "object/deep/level", value: newJSONNumber(3, nil)}, - {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array", value: getObjectValue(json)["array"]}, {path: "array", value: newJSONNumber(1, nil)}, {path: "array", value: newJSONString("two", nil)}, - {path: "array", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[2]}, + {path: "array", value: getArrayValue(getObjectValue(json)["array"])[2]}, {path: "array/key", value: newJSONString("value", nil)}, - {path: "array", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[3]}, + {path: "array", value: getArrayValue(getObjectValue(json)["array"])[3]}, {path: "array", value: newJSONNumber(4, nil)}, {path: "array", value: newJSONNumber(5, nil)}, }, @@ -142,11 +157,11 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "number", value: newJSONNumber(42, nil)}, {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, - {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object", value: getObjectValue(json)["object"]}, {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, {path: "object/deep/level", value: newJSONNumber(3, nil)}, - {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array", value: getObjectValue(json)["array"]}, {path: "array", value: newJSONNumber(1, nil)}, {path: "array", value: newJSONString("two", nil)}, }, @@ -163,16 +178,16 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "number", value: newJSONNumber(42, nil)}, {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, - {path: "object", value: json.Value().(map[string]JSON)["object"]}, + {path: "object", value: getObjectValue(json)["object"]}, {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: json.Value().(map[string]JSON)["object"].Value().(map[string]JSON)["deep"]}, + {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, {path: "object/deep/level", value: newJSONNumber(3, nil)}, - {path: "array", value: json.Value().(map[string]JSON)["array"]}, + {path: "array", value: getObjectValue(json)["array"]}, {path: "array/0", value: newJSONNumber(1, nil)}, {path: "array/1", value: newJSONString("two", nil)}, - {path: "array/2", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[2]}, + {path: "array/2", value: getArrayValue(getObjectValue(json)["array"])[2]}, {path: "array/2/key", value: newJSONString("value", nil)}, - {path: "array/3", value: json.Value().(map[string]JSON)["array"].Value().([]JSON)[3]}, + {path: "array/3", value: getArrayValue(getObjectValue(json)["array"])[3]}, {path: "array/3/0", value: newJSONNumber(4, nil)}, {path: "array/3/1", value: newJSONNumber(5, nil)}, }, diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index fef2bd9a89..0f7b89a072 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -521,7 +521,6 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro jsonPath := []string{} if fieldDef.Kind == client.FieldKind_NILLABLE_JSON { - jsonPathLoop: for { for key, filterVal := range condMap { diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go index 2118d39234..de059c0a4a 100644 --- a/internal/encoding/json_test.go +++ b/internal/encoding/json_test.go @@ -15,9 +15,10 @@ import ( "strings" "testing" - "github.com/sourcenetwork/defradb/client" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/sourcenetwork/defradb/client" ) func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { From 279bb69775c8c83f9cf1d31bf0a0bf7c2215cde5 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 16 Dec 2024 20:59:56 +0100 Subject: [PATCH 24/46] Update docs --- client/README.md | 2 +- client/secondary_indexes.md | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/client/README.md b/client/README.md index e89577c929..81234ac8fd 100644 --- a/client/README.md +++ b/client/README.md @@ -2,4 +2,4 @@ The `client` package is the primary access point for interacting with an embedde [Data definition overview](./data_definition.md) - How the shape of documents are defined and grouped. -[Secondary indexes](./secondary_indexes.md) - How secondary indexes work in DefraDB and how to use them. \ No newline at end of file +[Secondary indexes](./secondary_indexes.md) - Using secondary indexes in DefraDB. \ No newline at end of file diff --git a/client/secondary_indexes.md b/client/secondary_indexes.md index 3cbce9702c..892fb5e34c 100644 --- a/client/secondary_indexes.md +++ b/client/secondary_indexes.md @@ -1,4 +1,4 @@ -# Secondary Indexing in DefraDB +# Secondary indexing in DefraDB DefraDB provides a powerful and flexible secondary indexing system that enables efficient document lookups and queries. This document explains the architecture, implementation details, and usage patterns of the indexing system. @@ -6,9 +6,9 @@ DefraDB provides a powerful and flexible secondary indexing system that enables The indexing system consists of two main components. The first is index storage, which handles storing and maintaining index information. The second is index-based document fetching, which manages retrieving documents using these indexes. Together, these components provide a robust foundation for efficient data access patterns. -## Index Storage +## Index storage -### Core Types +### Core types The indexing system is built around several key types that define how indexes are structured and managed. At its heart is the IndexedFieldDescription, which describes a single field being indexed, including its name and whether it should be ordered in descending order. These field descriptions are combined into an IndexDescription, which provides a complete picture of an index including its name, ID, fields, and whether it enforces uniqueness. @@ -38,7 +38,7 @@ type CollectionIndex interface { } ``` -### Key Structure +### Key structure Index keys in DefraDB follow a carefully designed format that enables efficient lookups and range scans. For regular indexes, the key format is: ``` @@ -49,15 +49,15 @@ Unique indexes follow a similar pattern but store the document ID as the value i /(/)+ -> ``` -### Value Encoding +### Value encoding While DefraDB primarily uses CBOR for encoding, the indexing system employs a custom encoding/decoding solution inspired by CockroachDB. This decision was made because CBOR doesn't guarantee ordering preservation, which is crucial for index functionality. Our custom encoding ensures that numeric values maintain their natural ordering, strings are properly collated, and complex types like arrays and objects have deterministic ordering. -### Index Maintenance +### Index maintenance Index maintenance happens through three primary operations: document creation, updates, and deletion. When a new document is saved, the system indexes all configured fields, generating entries according to the key format and validating any unique constraints. During updates, the system carefully manages both the removal of old index entries and the creation of new ones, ensuring consistency through atomic transactions. For deletions, all associated index entries are cleaned up along with related metadata. -## Index-Based Document Fetching +## Index-based document fetching The IndexFetcher is the cornerstone of document retrieval, orchestrating the process of fetching documents using indexes. It operates in two phases: first retrieving indexed fields (including document IDs), then using a standard fetcher to get any additional requested fields. @@ -67,13 +67,13 @@ The performance characteristics of these operations vary. Direct match operation Note: the index fetcher can not benefit at the moment from ordered indexes, as the underlying storage does not support such range queries yet. -## Performance Considerations +## Performance considerations When working with indexes, it's important to understand their impact on system performance. Each index increases write amplification as every document modification must update all relevant indexes. However, this cost is often outweighed by the dramatic improvement in read performance for indexed queries. Index selection should be driven by your query patterns and data distribution. Indexing fields that are frequently used in query filters can significantly improve performance, but indexing rarely-queried fields only adds overhead. For unique indexes, the additional validation requirements make this trade-off even more important to consider. -## Indexing Related Objects +## Indexing related objects DefraDB's indexing system provides powerful capabilities for handling relationships between documents. Let's explore how this works with a practical example. @@ -108,7 +108,7 @@ query { For requests on not indexed relations, the normal approach is from top to bottom, meaning that first all `User` documents are fetched and then for each `User` document the corresponding `Address` document is fetched. This can be very inefficient for large collections. With indexing, we use so called inverted fetching, meaning that we first fetch the `Address` documents with the matching `city` value and then for each `Address` document the corresponding `User` document is fetched. This is much more efficient as we can use the index to directly fetch the `User` document. -### Relationship Cardinality Through Indexes +### Relationship cardinality using indexes The indexing system also plays a crucial role in enforcing relationship cardinality. By marking an index as unique, you can enforce one-to-one relationships between documents. Here's how you would modify the schema to ensure each User has exactly one Address: @@ -128,11 +128,11 @@ type Address { The unique index constraint ensures that no two Users can reference the same Address document. Without the unique constraint, the relationship would be one-to-many by default, allowing multiple Users to reference the same Address. -## JSON Field Indexing +## JSON field indexing DefraDB implements a specialized indexing system for JSON fields that differs from how other field types are handled. While a document in DefraDB can contain various field types (Int, String, Bool, JSON, etc.), JSON fields require special treatment due to their hierarchical nature. -#### The JSON Interface +#### JSON interface The indexing system relies on the `JSON` interface defined in `client/json.go`. This interface is crucial for handling JSON fields as it enables traversal of all leaf nodes within a JSON document. A `JSON` value in DefraDB can represent either an entire JSON document or a single node within it. Each `JSON` value maintains its path information, which is essential for indexing. @@ -149,7 +149,7 @@ For example, given this JSON document: The system can represent the "iPhone" value as a `JSON` type with its complete path `[]string{"user", "device", "model"}`. This path-aware representation is fundamental to how the indexing system works. -#### Inverted Indexes for JSON +#### Inverted indexes for JSON For JSON fields, DefraDB uses inverted indexes with the following key format: ``` @@ -160,13 +160,13 @@ The term "inverted" comes from how these indexes reverse the typical document-to This approach differs from traditional secondary indexes in DefraDB. While regular fields map to single index entries, a JSON field generates multiple index entries - one for each leaf node in its structure. The system traverses the entire JSON structure during indexing, creating entries that combine the path and value information. -#### Value Normalization and JSON +#### Value normalization and JSON The indexing system integrates with DefraDB's value normalization through `client.NormalValue`. While the encoding/decoding package handles scalar types directly, JSON values maintain additional path information. Each JSON node is encoded with both its normalized value and its path information, allowing the system to reconstruct the exact location of any value within the JSON structure. Similar to how other field types are normalized (e.g., integers to int64), JSON leaf values are normalized based on their type before being included in the index. This ensures consistent ordering and comparison operations. -#### Integration with Index Infrastructure +#### Integration with index infrastructure When a document with a JSON field is indexed, the system: 1. Uses the JSON interface to traverse the document structure From 343f5fcf3bc0857acb9e42806b1d2f304ee257fd Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 16 Dec 2024 21:10:50 +0100 Subject: [PATCH 25/46] Fix test expectations --- tests/integration/index/json_array_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index 077ab4768d..f7bca7d54a 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -168,7 +168,7 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldNotConsiderThem(t *testing.T) { }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(0), }, }, } @@ -245,7 +245,7 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(10), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(9), }, }, } From a56d3bf44f26c70a07423d42ff4dabb198a405e4 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 16 Dec 2024 22:30:50 +0100 Subject: [PATCH 26/46] Add change detector note --- docs/data_format_changes/i3330-sec-index-on-json.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/data_format_changes/i3330-sec-index-on-json.md diff --git a/docs/data_format_changes/i3330-sec-index-on-json.md b/docs/data_format_changes/i3330-sec-index-on-json.md new file mode 100644 index 0000000000..c01fbaff26 --- /dev/null +++ b/docs/data_format_changes/i3330-sec-index-on-json.md @@ -0,0 +1,3 @@ +# Sec. Index for JSON + +Some test bodies were changed. This change is just to make change detector stop complaining. From b31c6c071a5ced6dce1a5ae58d65cf4cfe7fccdb Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Tue, 17 Dec 2024 09:59:28 +0100 Subject: [PATCH 27/46] Polish --- docs/data_format_changes/i3330-sec-index-on-json.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data_format_changes/i3330-sec-index-on-json.md b/docs/data_format_changes/i3330-sec-index-on-json.md index c01fbaff26..2d70d6600a 100644 --- a/docs/data_format_changes/i3330-sec-index-on-json.md +++ b/docs/data_format_changes/i3330-sec-index-on-json.md @@ -1,3 +1,3 @@ -# Sec. Index for JSON +# Secondary index for JSON Some test bodies were changed. This change is just to make change detector stop complaining. From 69a429baa55b27cc7a42b28c5779e0174f73c886 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Fri, 20 Dec 2024 11:18:57 +0100 Subject: [PATCH 28/46] Update documentation --- client/secondary_indexes.md | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/client/secondary_indexes.md b/client/secondary_indexes.md index 892fb5e34c..b7964a6890 100644 --- a/client/secondary_indexes.md +++ b/client/secondary_indexes.md @@ -4,7 +4,12 @@ DefraDB provides a powerful and flexible secondary indexing system that enables ## Overview -The indexing system consists of two main components. The first is index storage, which handles storing and maintaining index information. The second is index-based document fetching, which manages retrieving documents using these indexes. Together, these components provide a robust foundation for efficient data access patterns. +The indexing system consists of two main components: + +- Index storage (handles storing and maintaining index information). +- Index-based document fetching (manages retrieving documents using these indexes). + +Together, these components provide a robust foundation for efficient data access patterns. ## Index storage @@ -51,7 +56,12 @@ Unique indexes follow a similar pattern but store the document ID as the value i ### Value encoding -While DefraDB primarily uses CBOR for encoding, the indexing system employs a custom encoding/decoding solution inspired by CockroachDB. This decision was made because CBOR doesn't guarantee ordering preservation, which is crucial for index functionality. Our custom encoding ensures that numeric values maintain their natural ordering, strings are properly collated, and complex types like arrays and objects have deterministic ordering. +DefraDB primarily uses CBOR for encoding. However, the indexing system uses a custom encoding/decoding solution, because CBOR doesn't guarantee ordering preservation, which is crucial for index functionality. + +Our custom encoding ensures that: +- numeric values maintain their natural ordering, +- strings are properly collated, and +- complex types like arrays and objects have deterministic ordering. ### Index maintenance @@ -59,11 +69,17 @@ Index maintenance happens through three primary operations: document creation, u ## Index-based document fetching -The IndexFetcher is the cornerstone of document retrieval, orchestrating the process of fetching documents using indexes. It operates in two phases: first retrieving indexed fields (including document IDs), then using a standard fetcher to get any additional requested fields. +The IndexFetcher is central to document retrieval, managing the process through two phases. First, it retrieves indexed fields, such as document IDs. Then, it uses a standard fetcher to obtain any additional requested fields. -For each query, the system creates specialized result iterators based on the document filter conditions. These iterators are smart about how they handle different types of operations. For simple equality comparisons (`_eq`) or membership tests (`_in`), the iterator can often directly build the exact keys needed. For range operations (`_gt`, `_le`, ...) or pattern matching (`_like`, ...), the system employs dedicated value matchers to validate the results. +For each query, the system generates specialized result iterators based on the document filter conditions. These iterators optimize how operations are handled: +- For simple equality (_eq) or membership tests (_in), the iterator often constructs the exact keys directly. +- For range operations (_gt, _le, etc.) or pattern matching (_like, etc.), dedicated value matchers are used to validate the results. -The performance characteristics of these operations vary. Direct match operations are typically the fastest as they can precisely target the needed keys. Range and pattern operations require more work as they must scan a range of keys and validate each result. The system is designed to minimize both key-value operations during mutations and memory usage during result streaming. +The performance of these operations varies: +- Direct match operations are typically the fastest, as they precisely target the required keys. +- Range and pattern operations involve more work, scanning a range of keys and validating each result. + +The system is optimized to reduce key-value operations during mutations and minimize memory usage during result streaming. Note: the index fetcher can not benefit at the moment from ordered indexes, as the underlying storage does not support such range queries yet. @@ -75,7 +91,7 @@ Index selection should be driven by your query patterns and data distribution. I ## Indexing related objects -DefraDB's indexing system provides powerful capabilities for handling relationships between documents. Let's explore how this works with a practical example. +DefraDB's indexing system is capable of handling relationships between documents. See example below. Consider a schema defining a relationship between Users and Addresses: @@ -105,12 +121,19 @@ query { } ``` -For requests on not indexed relations, the normal approach is from top to bottom, meaning that first all `User` documents are fetched and then for each `User` document the corresponding `Address` document is fetched. This can be very inefficient for large collections. -With indexing, we use so called inverted fetching, meaning that we first fetch the `Address` documents with the matching `city` value and then for each `Address` document the corresponding `User` document is fetched. This is much more efficient as we can use the index to directly fetch the `User` document. +For queries on non-indexed relationships, the standard approach is a top-to-bottom strategy: +1. All User documents are fetched. +1. For each User document, the corresponding Address document is retrieved. +This approach can be highly inefficient for large collections. + +With indexing, an inverted fetching strategy is used instead: +1. The Address documents with the matching city value are fetched first. +1. For each Address document, the corresponding User document is retrieved. +This method is significantly more efficient, as the index allows direct retrieval of the relevant User documents. ### Relationship cardinality using indexes -The indexing system also plays a crucial role in enforcing relationship cardinality. By marking an index as unique, you can enforce one-to-one relationships between documents. Here's how you would modify the schema to ensure each User has exactly one Address: +The indexing system also plays a crucial role in enforcing relationship cardinality. By marking an index as unique, you can enforce one-to-one relationships between documents. The code below shows how to modify the schema to ensure each user has exactly one address: ```graphql type User { From 74605dbd6213b1df55d65c3edf55c1cfc9fcb166 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 11:13:18 +0100 Subject: [PATCH 29/46] Update documentation --- client/json.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/client/json.go b/client/json.go index c803e70ec6..5230dd4ac9 100644 --- a/client/json.go +++ b/client/json.go @@ -122,8 +122,7 @@ func TraverseJSONWithArrayIndexInPath() traverseJSONOption { } } -// JSONVisitor is a function that processes a JSON value at a given path. -// path represents the location of the value in the JSON tree. +// JSONVisitor is a function that processes a JSON value at every node of the JSON tree. // Returns an error if the processing fails. type JSONVisitor func(value JSON) error From efef1b1dd5ed171e619dd7cf67af5a508e18062d Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 11:13:46 +0100 Subject: [PATCH 30/46] Rename --- internal/encoding/bool.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/encoding/bool.go b/internal/encoding/bool.go index 989b9081c9..5c9d3369de 100644 --- a/internal/encoding/bool.go +++ b/internal/encoding/bool.go @@ -38,6 +38,6 @@ func DecodeBoolAscending(b []byte) ([]byte, bool, error) { // DecodeBoolDescending decodes a boolean value encoded in descending order. func DecodeBoolDescending(b []byte) ([]byte, bool, error) { - leftover, v, err := DecodeBoolAscending(b) - return leftover, !v, err + b, v, err := DecodeBoolAscending(b) + return b, !v, err } From 85b5e50933fde46120d84d9099cbe481e844124c Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 11:14:00 +0100 Subject: [PATCH 31/46] Update documentation --- internal/encoding/errors.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/encoding/errors.go b/internal/encoding/errors.go index 63c13e3a14..8ddcda01a2 100644 --- a/internal/encoding/errors.go +++ b/internal/encoding/errors.go @@ -94,7 +94,8 @@ func NewErrVarintOverflow(b []byte, value uint64) error { return errors.New(errVarintOverflow, errors.NewKV("Buffer", b), errors.NewKV("Value", value)) } -// NewErrInvalidJSONPayload returns a new error indicating that the buffer +// NewErrInvalidJSONPayload returns a new error indicating that the buffer contains +// an invalid JSON payload. func NewErrInvalidJSONPayload(b []byte, path []string, err ...error) error { kvs := []errors.KV{errors.NewKV("Buffer", b), errors.NewKV("Path", path)} if len(err) > 0 { @@ -103,7 +104,7 @@ func NewErrInvalidJSONPayload(b []byte, path []string, err ...error) error { return errors.New(errInvalidJSONPayload, kvs...) } -// NewErrInvalidJSONPath returns a new error indicating that the buffer +// NewErrInvalidJSONPath returns a new error indicating that the buffer contains invalid JSON path. func NewErrInvalidJSONPath(b []byte, err error) error { return errors.New(errInvalidJSONPath, errors.NewKV("Buffer", b), errors.NewKV("Error", err)) } From c181d9e5ffe8af382412ef9964ac8524a8945ccb Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 11:14:14 +0100 Subject: [PATCH 32/46] Add encoding/decoding tests --- internal/encoding/field_value_test.go | 53 +++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/internal/encoding/field_value_test.go b/internal/encoding/field_value_test.go index 607e3f74a1..18e250540c 100644 --- a/internal/encoding/field_value_test.go +++ b/internal/encoding/field_value_test.go @@ -13,11 +13,13 @@ package encoding import ( "reflect" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/sourcenetwork/defradb/client" + "github.com/sourcenetwork/immutable" ) func TestEncodeDecodeFieldValue(t *testing.T) { @@ -41,6 +43,8 @@ func TestEncodeDecodeFieldValue(t *testing.T) { require.NoError(t, err) normalNullJSON := client.NewNormalJSON(nullJSON) + date := time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC) + tests := []struct { name string inputVal client.NormalValue @@ -69,6 +73,20 @@ func TestEncodeDecodeFieldValue(t *testing.T) { expectedBytesDesc: EncodeBoolDescending(nil, false), expectedDecodedVal: client.NewNormalBool(false), }, + { + name: "nillable bool true", + inputVal: client.NewNormalNillableBool(immutable.Some(true)), + expectedBytes: EncodeBoolAscending(nil, true), + expectedBytesDesc: EncodeBoolDescending(nil, true), + expectedDecodedVal: client.NewNormalBool(true), + }, + { + name: "nillable bool false", + inputVal: client.NewNormalNillableBool(immutable.Some(false)), + expectedBytes: EncodeBoolAscending(nil, false), + expectedBytesDesc: EncodeBoolDescending(nil, false), + expectedDecodedVal: client.NewNormalBool(false), + }, { name: "int", inputVal: client.NewNormalInt(55), @@ -76,6 +94,13 @@ func TestEncodeDecodeFieldValue(t *testing.T) { expectedBytesDesc: EncodeVarintDescending(nil, 55), expectedDecodedVal: client.NewNormalInt(55), }, + { + name: "nillable int", + inputVal: client.NewNormalNillableInt(immutable.Some(55)), + expectedBytes: EncodeVarintAscending(nil, 55), + expectedBytesDesc: EncodeVarintDescending(nil, 55), + expectedDecodedVal: client.NewNormalInt(55), + }, { name: "float", inputVal: client.NewNormalFloat(0.2), @@ -83,6 +108,13 @@ func TestEncodeDecodeFieldValue(t *testing.T) { expectedBytesDesc: EncodeFloatDescending(nil, 0.2), expectedDecodedVal: client.NewNormalFloat(0.2), }, + { + name: "nillable float", + inputVal: client.NewNormalNillableFloat(immutable.Some(0.2)), + expectedBytes: EncodeFloatAscending(nil, 0.2), + expectedBytesDesc: EncodeFloatDescending(nil, 0.2), + expectedDecodedVal: client.NewNormalFloat(0.2), + }, { name: "string", inputVal: client.NewNormalString("str"), @@ -90,6 +122,27 @@ func TestEncodeDecodeFieldValue(t *testing.T) { expectedBytesDesc: EncodeBytesDescending(nil, []byte("str")), expectedDecodedVal: client.NewNormalString("str"), }, + { + name: "nillable string", + inputVal: client.NewNormalNillableString(immutable.Some("str")), + expectedBytes: EncodeBytesAscending(nil, []byte("str")), + expectedBytesDesc: EncodeBytesDescending(nil, []byte("str")), + expectedDecodedVal: client.NewNormalString("str"), + }, + { + name: "time", + inputVal: client.NewNormalTime(date), + expectedBytes: EncodeTimeAscending(nil, date), + expectedBytesDesc: EncodeTimeDescending(nil, date), + expectedDecodedVal: client.NewNormalTime(date), + }, + { + name: "nillable time", + inputVal: client.NewNormalNillableTime(immutable.Some(date)), + expectedBytes: EncodeTimeAscending(nil, date), + expectedBytesDesc: EncodeTimeDescending(nil, date), + expectedDecodedVal: client.NewNormalTime(date), + }, { name: "json string", inputVal: normalSimpleJSON, From 669cc8547553a3ef9d5ae9965756b9d9c5f00e41 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 11:14:27 +0100 Subject: [PATCH 33/46] Fix import --- internal/db/fetcher/indexer_iterators.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 0f7b89a072..d8a191103f 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -12,9 +12,6 @@ package fetcher import ( "context" - "strings" - "time" - "errors" ds "github.com/ipfs/go-datastore" "golang.org/x/exp/slices" From 122bc2474782d58d84a4a810f9810657ee5b2b38 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 12:30:51 +0100 Subject: [PATCH 34/46] Improve coverage --- internal/db/fetcher/indexer_matchers.go | 11 +---------- ...tetime.go => query_with_index_on_datetime_test.go} | 0 2 files changed, 1 insertion(+), 10 deletions(-) rename tests/integration/index/{query_with_index_on_datetime.go => query_with_index_on_datetime_test.go} (100%) diff --git a/internal/db/fetcher/indexer_matchers.go b/internal/db/fetcher/indexer_matchers.go index 20c647e591..e575a2a19a 100644 --- a/internal/db/fetcher/indexer_matchers.go +++ b/internal/db/fetcher/indexer_matchers.go @@ -133,16 +133,7 @@ func (m *boolMatcher) Match(value client.NormalValue) (bool, error) { if boolOptVal, ok := value.NillableBool(); ok { boolVal = boolOptVal.Value() } else { - // TODO: check is this is still needed after encoding of bool changed - intVal, ok := value.Int() - if !ok { - if intOptVal, ok := value.NillableInt(); ok { - intVal = intOptVal.Value() - } else { - return false, NewErrUnexpectedTypeValue[bool](value) - } - } - boolVal = intVal != 0 + return false, NewErrUnexpectedTypeValue[bool](value) } } return boolVal == m.value == m.isEq, nil diff --git a/tests/integration/index/query_with_index_on_datetime.go b/tests/integration/index/query_with_index_on_datetime_test.go similarity index 100% rename from tests/integration/index/query_with_index_on_datetime.go rename to tests/integration/index/query_with_index_on_datetime_test.go From 830867297f6adb60645f81415b5c082d6c8f1902 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 17:08:23 +0100 Subject: [PATCH 35/46] PR fixup --- client/json.go | 24 +-- internal/db/fetcher/indexer_iterators.go | 2 - internal/db/index.go | 26 +-- internal/encoding/json.go | 1 - .../integration/index/json_composite_test.go | 167 +++++++++++++++++- ...with_unique_composite_index_filter_test.go | 4 +- 6 files changed, 193 insertions(+), 31 deletions(-) diff --git a/client/json.go b/client/json.go index 5230dd4ac9..edaad89ea1 100644 --- a/client/json.go +++ b/client/json.go @@ -61,8 +61,8 @@ type JSON interface { // GetPath returns the path of the JSON value in the JSON tree. GetPath() []string - // accept calls the visitor function for the JSON value at the given path. - accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error + // visit calls the visitor function for the JSON value at the given path. + visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error } // MakeVoidJSON creates a JSON value that represents a void value with just a path. @@ -81,7 +81,7 @@ func TraverseJSON(j JSON, visitor JSONVisitor, opts ...traverseJSONOption) error opt(&options) } if shouldVisitPath(options.pathPrefix, nil) { - return j.accept(visitor, []string{}, options) + return j.visit(visitor, []string{}, options) } return nil } @@ -166,7 +166,7 @@ func (v jsonVoid) IsNull() bool { return false } -func (v jsonVoid) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (v jsonVoid) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { return nil } @@ -218,7 +218,7 @@ func (obj jsonObject) Unwrap() any { return result } -func (obj jsonObject) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (obj jsonObject) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { obj.path = path if !opts.onlyLeaves && len(path) >= len(opts.pathPrefix) { if err := visitor(obj); err != nil { @@ -232,7 +232,7 @@ func (obj jsonObject) accept(visitor JSONVisitor, path []string, opts traverseJS continue } - if err := v.accept(visitor, newPath, opts); err != nil { + if err := v.visit(visitor, newPath, opts); err != nil { return err } } @@ -261,7 +261,7 @@ func (arr jsonArray) Unwrap() any { return result } -func (arr jsonArray) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (arr jsonArray) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { arr.path = path if !opts.onlyLeaves { if err := visitor(arr); err != nil { @@ -284,7 +284,7 @@ func (arr jsonArray) accept(visitor JSONVisitor, path []string, opts traverseJSO continue } - if err := arr.val[i].accept(visitor, newPath, opts); err != nil { + if err := arr.val[i].visit(visitor, newPath, opts); err != nil { return err } } @@ -302,7 +302,7 @@ func (n jsonNumber) Number() (float64, bool) { return n.val, true } -func (n jsonNumber) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonNumber) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { n.path = path return visitor(n) } @@ -317,7 +317,7 @@ func (s jsonString) String() (string, bool) { return s.val, true } -func (n jsonString) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonString) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { n.path = path return visitor(n) } @@ -332,7 +332,7 @@ func (b jsonBool) Bool() (bool, bool) { return b.val, true } -func (n jsonBool) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonBool) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { n.path = path return visitor(n) } @@ -347,7 +347,7 @@ func (n jsonNull) IsNull() bool { return true } -func (n jsonNull) accept(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonNull) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { n.path = path return visitor(n) } diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index d8a191103f..2c0ab41583 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -460,7 +460,6 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { iter, err = f.newInIndexIterator(fieldConditions, matchers) } else { key := f.newIndexDataStoreKey() - // TODO: can we test fieldConditions[not 0]? if fieldConditions[0].kind == client.FieldKind_NILLABLE_JSON { key.Fields = []keys.IndexedField{{ Descending: f.indexDesc.Fields[0].Descending, @@ -544,7 +543,6 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro if cond.op == compOpAny || cond.op == compOpAll || cond.op == compOpNone { subCondMap := filterVal.(map[connor.FilterKey]any) for subKey, subVal := range subCondMap { - // TODO: check what happens with _any: {_eq: [1, 2]} cond.arrOp = cond.op cond.op = subKey.(*mapper.Operator).Operation jsonVal, err = client.NewJSONWithPath(subVal, jsonPath) diff --git a/internal/db/index.go b/internal/db/index.go index 20e4af7ffe..72ceb7aab3 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -146,7 +146,7 @@ func getFieldGenerator(kind client.FieldKind) FieldIndexGenerator { type collectionBaseIndex struct { collection client.Collection desc client.IndexDescription - // fieldsDescs is a slice of field descriptions for the fields that are indexed by the index + // fieldsDescs is a slice of field descriptions for the fields that form the index // If there is more than 1 field, the index is composite fieldsDescs []client.SchemaFieldDescription fieldGenerators []FieldIndexGenerator @@ -241,10 +241,12 @@ func (index *collectionBaseIndex) Description() client.IndexDescription { return index.desc } -func (index *collectionBaseIndex) generateIndexKeys( +// generateKeysAndProcess generates index keys for the given document and calls the provided function +// for each generated key +func (index *collectionBaseIndex) generateKeysAndProcess( doc *client.Document, appendDocID bool, - f func(keys.IndexDataStoreKey) error, + processKey func(keys.IndexDataStoreKey) error, ) error { // Get initial key with base values baseKey, err := index.getDocumentsIndexKey(doc, appendDocID) @@ -253,17 +255,17 @@ func (index *collectionBaseIndex) generateIndexKeys( } // Start with first field - return index.generateKeysForField(0, baseKey, f) + return index.generateKeysForFieldAndProcess(0, baseKey, processKey) } -func (index *collectionBaseIndex) generateKeysForField( +func (index *collectionBaseIndex) generateKeysForFieldAndProcess( fieldIdx int, baseKey keys.IndexDataStoreKey, - f func(keys.IndexDataStoreKey) error, + processKey func(keys.IndexDataStoreKey) error, ) error { // If we've processed all fields, call the handler if fieldIdx >= len(index.fieldsDescs) { - return f(baseKey) + return processKey(baseKey) } // Generate values for current field @@ -277,7 +279,7 @@ func (index *collectionBaseIndex) generateKeysForField( newKey.Fields[fieldIdx].Value = val // Process next field - return index.generateKeysForField(fieldIdx+1, newKey, f) + return index.generateKeysForFieldAndProcess(fieldIdx+1, newKey, processKey) }, ) } @@ -296,7 +298,7 @@ func (index *collectionSimpleIndex) Save( txn datastore.Txn, doc *client.Document, ) error { - return index.generateIndexKeys(doc, true, func(key keys.IndexDataStoreKey) error { + return index.generateKeysAndProcess(doc, true, func(key keys.IndexDataStoreKey) error { return txn.Datastore().Put(ctx, key.ToDS(), []byte{}) }) } @@ -319,7 +321,7 @@ func (index *collectionSimpleIndex) Delete( txn datastore.Txn, doc *client.Document, ) error { - return index.generateIndexKeys(doc, true, func(key keys.IndexDataStoreKey) error { + return index.generateKeysAndProcess(doc, true, func(key keys.IndexDataStoreKey) error { return index.deleteIndexKey(ctx, txn, key) }) } @@ -345,7 +347,7 @@ func (index *collectionUniqueIndex) Save( txn datastore.Txn, doc *client.Document, ) error { - return index.generateIndexKeys(doc, false, func(key keys.IndexDataStoreKey) error { + return index.generateKeysAndProcess(doc, false, func(key keys.IndexDataStoreKey) error { return addNewUniqueKey(ctx, txn, doc, key, index.fieldsDescs) }) } @@ -427,7 +429,7 @@ func (index *collectionUniqueIndex) Delete( txn datastore.Txn, doc *client.Document, ) error { - return index.generateIndexKeys(doc, false, func(key keys.IndexDataStoreKey) error { + return index.generateKeysAndProcess(doc, false, func(key keys.IndexDataStoreKey) error { key, _, err := makeUniqueKeyValueRecord(key, doc) if err != nil { return err diff --git a/internal/encoding/json.go b/internal/encoding/json.go index 82014b6f15..65f11bccc5 100644 --- a/internal/encoding/json.go +++ b/internal/encoding/json.go @@ -144,7 +144,6 @@ func encodeJSONPath(b []byte, v client.JSON) []byte { b = append(b, jsonMarker) for _, part := range v.GetPath() { pathBytes := unsafeConvertStringToBytes(part) - //b = encodeBytesAscendingWithTerminator(b, pathBytes, ascendingBytesEscapes.escapedTerm) b = EncodeBytesAscending(b, pathBytes) } b = append(b, ascendingBytesEscapes.escapedTerm) diff --git a/tests/integration/index/json_composite_test.go b/tests/integration/index/json_composite_test.go index 1985d43f93..bee066b5f1 100644 --- a/tests/integration/index/json_composite_test.go +++ b/tests/integration/index/json_composite_test.go @@ -16,7 +16,7 @@ import ( testUtils "github.com/sourcenetwork/defradb/tests/integration" ) -func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing.T) { +func TestJSONCompositeIndex_JSONWithScalarWithEqFilter_ShouldFetchUsingIndex(t *testing.T) { type testCase struct { name string req string @@ -208,7 +208,170 @@ func TestJSONArrayCompositeIndex_JSONWithScalar_ShouldFetchUsingIndex(t *testing } } -func TestJSONArrayCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing.T) { +func TestJSONCompositeIndex_JSONWithScalarWithOtherFilters_ShouldFetchUsingIndex2(t *testing.T) { + type testCase struct { + name string + req string + result map[string]any + indexFetches int + } + + testCases := []testCase{ + { + name: "With _le and _gt filters", + req: `query { + User(filter: { + age: {_le: 35}, + custom: {val: {_gt: 4}} + }) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Keenan"}, + {"name": "Addo"}, + }, + }, + indexFetches: 8, + }, + { + name: "With _lt and _eq filters", + req: `query { + User(filter: { + age: {_lt: 100}, + custom: {val: {_eq: null}} + }) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Andy"}, + }, + }, + indexFetches: 8, + }, + { + name: "With _ne and _ge filters", + req: `query { + User(filter: { + _and: [{ age: {_ne: 35} }, { age: {_ne: 40} }], + custom: {val: {_ge: 5}} + }) { + name + } + }`, + result: map[string]any{ + "User": []map[string]any{ + {"name": "Chris"}, + }, + }, + indexFetches: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User @index(includes: [{field: "age"}, {field: "custom"}]) { + name: String + custom: JSON + age: Int + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "val": 3, + }, + "age": 30, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "val": 3, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "val": 4, + }, + "age": 25, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Keenan", + "custom": map[string]any{ + "val": 5, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Addo", + "custom": map[string]any{ + "val": 6, + }, + "age": 35, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "val": 6, + }, + "age": 40, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "val": nil, + }, + "age": 50, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Chris", + "custom": map[string]any{ + "val": 7, + }, + "age": nil, + }, + }, + testUtils.Request{ + Request: tc.req, + Results: tc.result, + }, + testUtils.Request{ + Request: makeExplainQuery(tc.req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(tc.indexFetches), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) + }) + } +} + +func TestJSONCompositeIndex_ScalarWithJSON_ShouldFetchUsingIndex(t *testing.T) { type testCase struct { name string req string diff --git a/tests/integration/index/query_with_unique_composite_index_filter_test.go b/tests/integration/index/query_with_unique_composite_index_filter_test.go index 419795b373..9928cb684b 100644 --- a/tests/integration/index/query_with_unique_composite_index_filter_test.go +++ b/tests/integration/index/query_with_unique_composite_index_filter_test.go @@ -1313,7 +1313,7 @@ func TestQueryWithUniqueCompositeIndex_AfterUpdateOnNilFields_ShouldFetch(t *tes }, }, }, - /*testUtils.Request{ + testUtils.Request{ Request: ` query { User(filter: {name: {_eq: null}}) { @@ -1342,7 +1342,7 @@ func TestQueryWithUniqueCompositeIndex_AfterUpdateOnNilFields_ShouldFetch(t *tes {"about": "nil_nil -> bob_nil"}, }, }, - },*/ + }, }, } From f237f2fe0b58ac35f1e81b3bccd545981401747e Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 2 Jan 2025 17:26:06 +0100 Subject: [PATCH 36/46] Update copyright --- client/json.go | 2 +- client/json_test.go | 2 +- client/json_traverse_test.go | 2 +- client/normal_util.go | 2 +- client/normal_value_test.go | 2 +- internal/db/fetcher/errors.go | 2 +- internal/db/fetcher/indexer.go | 2 +- internal/db/fetcher/indexer_iterators.go | 2 +- internal/db/fetcher/indexer_matchers.go | 2 +- internal/db/index.go | 2 +- internal/db/index_test.go | 2 +- internal/db/indexed_docs_test.go | 2 +- internal/encoding/bool.go | 2 +- internal/encoding/bool_test.go | 2 +- internal/encoding/errors.go | 2 +- internal/encoding/field_value.go | 2 +- internal/encoding/field_value_test.go | 5 +++-- internal/encoding/json.go | 2 +- internal/encoding/json_test.go | 2 +- internal/planner/scan.go | 2 +- tests/integration/index/array_unique_test.go | 2 +- tests/integration/index/json_array_test.go | 2 +- tests/integration/index/json_composite_test.go | 2 +- tests/integration/index/json_test.go | 2 +- tests/integration/index/json_unique_array_test.go | 2 +- tests/integration/index/json_unique_test.go | 2 +- tests/integration/index/query_with_index_on_datetime_test.go | 2 +- tests/integration/query/json/with_all_test.go | 2 +- tests/integration/query/json/with_any_test.go | 2 +- tests/integration/query/json/with_eq_test.go | 2 +- tests/integration/query/json/with_none_test.go | 2 +- 31 files changed, 33 insertions(+), 32 deletions(-) diff --git a/client/json.go b/client/json.go index edaad89ea1..fd3e3dbad3 100644 --- a/client/json.go +++ b/client/json.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/client/json_test.go b/client/json_test.go index 512742047d..811097e709 100644 --- a/client/json_test.go +++ b/client/json_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 3f279bdcfe..ff9103b1ab 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/client/normal_util.go b/client/normal_util.go index 74cc203676..44631fe45c 100644 --- a/client/normal_util.go +++ b/client/normal_util.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/client/normal_value_test.go b/client/normal_value_test.go index 03c0daff0b..551ef2e300 100644 --- a/client/normal_value_test.go +++ b/client/normal_value_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/fetcher/errors.go b/internal/db/fetcher/errors.go index 0d9c9fdd86..f102404d8c 100644 --- a/internal/db/fetcher/errors.go +++ b/internal/db/fetcher/errors.go @@ -1,4 +1,4 @@ -// Copyright 2022 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/fetcher/indexer.go b/internal/db/fetcher/indexer.go index 5ef63b7246..ec3c5dd4e9 100644 --- a/internal/db/fetcher/indexer.go +++ b/internal/db/fetcher/indexer.go @@ -1,4 +1,4 @@ -// Copyright 2023 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 2c0ab41583..906c6e6103 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/fetcher/indexer_matchers.go b/internal/db/fetcher/indexer_matchers.go index e575a2a19a..05214af2c1 100644 --- a/internal/db/fetcher/indexer_matchers.go +++ b/internal/db/fetcher/indexer_matchers.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/index.go b/internal/db/index.go index 72ceb7aab3..92fad980e6 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -1,4 +1,4 @@ -// Copyright 2023 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/index_test.go b/internal/db/index_test.go index d66fcfedf2..705dba38ac 100644 --- a/internal/db/index_test.go +++ b/internal/db/index_test.go @@ -1,4 +1,4 @@ -// Copyright 2023 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/db/indexed_docs_test.go b/internal/db/indexed_docs_test.go index 138ee34a2d..d9e4327c35 100644 --- a/internal/db/indexed_docs_test.go +++ b/internal/db/indexed_docs_test.go @@ -1,4 +1,4 @@ -// Copyright 2023 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/encoding/bool.go b/internal/encoding/bool.go index 5c9d3369de..766bd3d0ad 100644 --- a/internal/encoding/bool.go +++ b/internal/encoding/bool.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/encoding/bool_test.go b/internal/encoding/bool_test.go index 1a21898449..f4b2e0c4e5 100644 --- a/internal/encoding/bool_test.go +++ b/internal/encoding/bool_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/encoding/errors.go b/internal/encoding/errors.go index 8ddcda01a2..26d2daeb93 100644 --- a/internal/encoding/errors.go +++ b/internal/encoding/errors.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/encoding/field_value.go b/internal/encoding/field_value.go index 2c051d0a37..703355eceb 100644 --- a/internal/encoding/field_value.go +++ b/internal/encoding/field_value.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/encoding/field_value_test.go b/internal/encoding/field_value_test.go index 18e250540c..7942059cec 100644 --- a/internal/encoding/field_value_test.go +++ b/internal/encoding/field_value_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -18,8 +18,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/sourcenetwork/defradb/client" "github.com/sourcenetwork/immutable" + + "github.com/sourcenetwork/defradb/client" ) func TestEncodeDecodeFieldValue(t *testing.T) { diff --git a/internal/encoding/json.go b/internal/encoding/json.go index 65f11bccc5..9c53f11237 100644 --- a/internal/encoding/json.go +++ b/internal/encoding/json.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go index de059c0a4a..22aa7741dc 100644 --- a/internal/encoding/json_test.go +++ b/internal/encoding/json_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/internal/planner/scan.go b/internal/planner/scan.go index 16b8295a3b..bfca64b7bb 100644 --- a/internal/planner/scan.go +++ b/internal/planner/scan.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/array_unique_test.go b/tests/integration/index/array_unique_test.go index 2ad9588371..c58c5310e0 100644 --- a/tests/integration/index/array_unique_test.go +++ b/tests/integration/index/array_unique_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index f7bca7d54a..41e4babde5 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/json_composite_test.go b/tests/integration/index/json_composite_test.go index bee066b5f1..187ed99334 100644 --- a/tests/integration/index/json_composite_test.go +++ b/tests/integration/index/json_composite_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/json_test.go b/tests/integration/index/json_test.go index 966d1f50f3..d154248501 100644 --- a/tests/integration/index/json_test.go +++ b/tests/integration/index/json_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/json_unique_array_test.go b/tests/integration/index/json_unique_array_test.go index 03d8d1991a..6e34c8dd13 100644 --- a/tests/integration/index/json_unique_array_test.go +++ b/tests/integration/index/json_unique_array_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/json_unique_test.go b/tests/integration/index/json_unique_test.go index 4e533600cc..b594381f4d 100644 --- a/tests/integration/index/json_unique_test.go +++ b/tests/integration/index/json_unique_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/index/query_with_index_on_datetime_test.go b/tests/integration/index/query_with_index_on_datetime_test.go index d6618fbc1a..6efb8d5644 100644 --- a/tests/integration/index/query_with_index_on_datetime_test.go +++ b/tests/integration/index/query_with_index_on_datetime_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/query/json/with_all_test.go b/tests/integration/query/json/with_all_test.go index 798f0cc047..2185b0df91 100644 --- a/tests/integration/query/json/with_all_test.go +++ b/tests/integration/query/json/with_all_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/query/json/with_any_test.go b/tests/integration/query/json/with_any_test.go index 0c5fdaa01c..1badd03c94 100644 --- a/tests/integration/query/json/with_any_test.go +++ b/tests/integration/query/json/with_any_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/query/json/with_eq_test.go b/tests/integration/query/json/with_eq_test.go index ac61591602..ebba09de2f 100644 --- a/tests/integration/query/json/with_eq_test.go +++ b/tests/integration/query/json/with_eq_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. diff --git a/tests/integration/query/json/with_none_test.go b/tests/integration/query/json/with_none_test.go index 0aff4baee1..1a65f7de60 100644 --- a/tests/integration/query/json/with_none_test.go +++ b/tests/integration/query/json/with_none_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Democratized Data Foundation +// Copyright 2025 Democratized Data Foundation // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. From edb8386638579d0b2c3c782f16705f9b88c686f3 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Tue, 7 Jan 2025 11:42:38 +0100 Subject: [PATCH 37/46] Add json array path encoding --- client/json.go | 161 ++++++++++++++++++----- client/json_test.go | 6 +- client/json_traverse_test.go | 112 +++++++--------- internal/db/fetcher/indexer_iterators.go | 6 +- internal/db/indexed_docs_test.go | 4 +- internal/encoding/json.go | 6 +- internal/encoding/json_test.go | 5 +- 7 files changed, 190 insertions(+), 110 deletions(-) diff --git a/client/json.go b/client/json.go index fd3e3dbad3..100dead8ff 100644 --- a/client/json.go +++ b/client/json.go @@ -14,11 +14,102 @@ import ( "encoding/json" "io" "strconv" + "strings" "github.com/valyala/fastjson" "golang.org/x/exp/constraints" ) +// JSONPathPart represents a part of a JSON path. +// Json path can be either a property of an object or an index of an element in an array. +type JSONPathPart interface { + // Property returns the property name if the part is a property, and a boolean indicating if the part is a property. + Property() (string, bool) + // Index returns the index if the part is an index, and a boolean indicating if the part is an index. + Index() (uint64, bool) +} + +type propPathPart string +type indexPathPart uint64 + +func (p propPathPart) Property() (string, bool) { + return string(p), true +} + +func (p propPathPart) Index() (uint64, bool) { + return 0, false +} + +func (p indexPathPart) Property() (string, bool) { + return "", false +} + +func (p indexPathPart) Index() (uint64, bool) { + return uint64(p), true +} + +// JSONPath represents a path to a JSON value in a JSON tree. +type JSONPath []JSONPathPart + +// Parts returns the parts of the JSON path. +func (p JSONPath) Parts() []JSONPathPart { + return p +} + +// Append appends a part to the JSON path. +func (p JSONPath) Append(part JSONPathPart) JSONPath { + return append(p, part) +} + +// AppendProperty appends a property part to the JSON path. +func (p JSONPath) AppendProperty(part string) JSONPath { + return append(p, propPathPart(part)) +} + +// AppendIndex appends an index part to the JSON path. +func (p JSONPath) AppendIndex(part uint64) JSONPath { + return append(p, indexPathPart(part)) +} + +// String returns the string representation of the JSON path. +func (p JSONPath) String() string { + var sb strings.Builder + for i, part := range p { + if prop, ok := part.Property(); ok { + if i > 0 { + sb.WriteByte('.') + } + sb.WriteString(prop) + } else if index, ok := part.Index(); ok { + sb.WriteByte('[') + sb.WriteString(strconv.FormatUint(index, 10)) + sb.WriteByte(']') + } + } + return sb.String() +} + +func toJSONPathPart[T string | int | uint64](v T) JSONPathPart { + switch val := any(v).(type) { + case string: + return propPathPart(val) + case int: + return indexPathPart(uint64(val)) + case uint64: + return indexPathPart(val) + } + return nil +} + +// Creates a path from mixed string/integer values +func MakeJSONPath[T string | int | uint64](parts ...T) JSONPath { + path := make(JSONPath, len(parts)) + for i, part := range parts { + path[i] = toJSONPathPart(part) + } + return path +} + // JSON represents a JSON value that can be any valid JSON type: object, array, number, string, boolean, or null. // It provides type-safe access to the underlying value through various accessor methods. type JSON interface { @@ -59,17 +150,17 @@ type JSON interface { Marshal(w io.Writer) error // GetPath returns the path of the JSON value in the JSON tree. - GetPath() []string + GetPath() JSONPath // visit calls the visitor function for the JSON value at the given path. - visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error + visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error } // MakeVoidJSON creates a JSON value that represents a void value with just a path. // This is necessary purely for creating a json path prefix for storage queries. // All other json values will be encoded with some value after the path which makes // them unsuitable to build a path prefix. -func MakeVoidJSON(path []string) JSON { +func MakeVoidJSON(path JSONPath) JSON { return jsonBase[any]{path: path} } @@ -81,7 +172,7 @@ func TraverseJSON(j JSON, visitor JSONVisitor, opts ...traverseJSONOption) error opt(&options) } if shouldVisitPath(options.pathPrefix, nil) { - return j.visit(visitor, []string{}, options) + return j.visit(visitor, JSONPath{}, options) } return nil } @@ -90,7 +181,7 @@ type traverseJSONOption func(*traverseJSONOptions) // TraverseJSONWithPrefix returns a traverseJSONOption that sets the path prefix for the traversal. // Only nodes with paths that start with the prefix will be visited. -func TraverseJSONWithPrefix(prefix []string) traverseJSONOption { +func TraverseJSONWithPrefix(prefix JSONPath) traverseJSONOption { return func(opts *traverseJSONOptions) { opts.pathPrefix = prefix } @@ -131,7 +222,7 @@ type traverseJSONOptions struct { // onlyLeaves when true visits only leaf nodes (not objects or arrays) onlyLeaves bool // pathPrefix when set visits only paths that start with this prefix - pathPrefix []string + pathPrefix JSONPath // visitArrayElements when true visits array elements visitArrayElements bool // recurseVisitedArrayElements when true visits array elements recursively @@ -166,14 +257,14 @@ func (v jsonVoid) IsNull() bool { return false } -func (v jsonVoid) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (v jsonVoid) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { return nil } type jsonBase[T any] struct { jsonVoid val T - path []string + path JSONPath } func (v jsonBase[T]) Value() any { @@ -192,7 +283,7 @@ func (v jsonBase[T]) MarshalJSON() ([]byte, error) { return json.Marshal(v.val) } -func (v jsonBase[T]) GetPath() []string { +func (v jsonBase[T]) GetPath() JSONPath { return v.path } @@ -218,7 +309,7 @@ func (obj jsonObject) Unwrap() any { return result } -func (obj jsonObject) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (obj jsonObject) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { obj.path = path if !opts.onlyLeaves && len(path) >= len(opts.pathPrefix) { if err := visitor(obj); err != nil { @@ -227,7 +318,7 @@ func (obj jsonObject) visit(visitor JSONVisitor, path []string, opts traverseJSO } for k, v := range obj.val { - newPath := append(path, k) + newPath := path.AppendProperty(k) if !shouldVisitPath(opts.pathPrefix, newPath) { continue } @@ -261,7 +352,7 @@ func (arr jsonArray) Unwrap() any { return result } -func (arr jsonArray) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (arr jsonArray) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { arr.path = path if !opts.onlyLeaves { if err := visitor(arr); err != nil { @@ -274,9 +365,9 @@ func (arr jsonArray) visit(visitor JSONVisitor, path []string, opts traverseJSON if !opts.recurseVisitedArrayElements && isCompositeJSON(arr.val[i]) { continue } - var newPath []string + var newPath JSONPath if opts.includeArrayIndexInPath { - newPath = append(path, strconv.Itoa(i)) + newPath = path.AppendIndex(uint64(i)) } else { newPath = path } @@ -302,7 +393,7 @@ func (n jsonNumber) Number() (float64, bool) { return n.val, true } -func (n jsonNumber) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonNumber) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { n.path = path return visitor(n) } @@ -317,7 +408,7 @@ func (s jsonString) String() (string, bool) { return s.val, true } -func (n jsonString) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonString) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { n.path = path return visitor(n) } @@ -332,7 +423,7 @@ func (b jsonBool) Bool() (bool, bool) { return b.val, true } -func (n jsonBool) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonBool) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { n.path = path return visitor(n) } @@ -347,32 +438,32 @@ func (n jsonNull) IsNull() bool { return true } -func (n jsonNull) visit(visitor JSONVisitor, path []string, opts traverseJSONOptions) error { +func (n jsonNull) visit(visitor JSONVisitor, path JSONPath, opts traverseJSONOptions) error { n.path = path return visitor(n) } -func newJSONObject(val map[string]JSON, path []string) jsonObject { +func newJSONObject(val map[string]JSON, path JSONPath) jsonObject { return jsonObject{jsonBase[map[string]JSON]{val: val, path: path}} } -func newJSONArray(val []JSON, path []string) jsonArray { +func newJSONArray(val []JSON, path JSONPath) jsonArray { return jsonArray{jsonBase[[]JSON]{val: val, path: path}} } -func newJSONNumber(val float64, path []string) jsonNumber { +func newJSONNumber(val float64, path JSONPath) jsonNumber { return jsonNumber{jsonBase[float64]{val: val, path: path}} } -func newJSONString(val string, path []string) jsonString { +func newJSONString(val string, path JSONPath) jsonString { return jsonString{jsonBase[string]{val: val, path: path}} } -func newJSONBool(val bool, path []string) jsonBool { +func newJSONBool(val bool, path JSONPath) jsonBool { return jsonBool{jsonBase[bool]{val: val, path: path}} } -func newJSONNull(path []string) jsonNull { +func newJSONNull(path JSONPath) jsonNull { return jsonNull{jsonBase[any]{path: path}} } @@ -426,12 +517,12 @@ func NewJSON(v any) (JSON, error) { // - slice of any above type // - []any // Returns error if the input cannot be converted to JSON. -func NewJSONWithPath(v any, path []string) (JSON, error) { +func NewJSONWithPath(v any, path JSONPath) (JSON, error) { return newJSON(v, path) } // newJSON is an internal function that creates a new JSON value with parent and property name -func newJSON(v any, path []string) (JSON, error) { +func newJSON(v any, path JSONPath) (JSON, error) { if v == nil { return newJSONNull(path), nil } else { @@ -505,7 +596,7 @@ func newJSON(v any, path []string) (JSON, error) { return nil, NewErrInvalidJSONPayload(v) } -func newJsonArrayFromAnyArray(arr []any, path []string) (JSON, error) { +func newJsonArrayFromAnyArray(arr []any, path JSONPath) (JSON, error) { result := make([]JSON, len(arr)) for i := range arr { jsonVal, err := newJSON(arr[i], path) @@ -517,7 +608,7 @@ func newJsonArrayFromAnyArray(arr []any, path []string) (JSON, error) { return newJSONArray(result, path), nil } -func newJSONBoolArray(v []bool, path []string) JSON { +func newJSONBoolArray(v []bool, path JSONPath) JSON { arr := make([]JSON, len(v)) for i := range v { arr[i] = newJSONBool(v[i], path) @@ -525,7 +616,7 @@ func newJSONBoolArray(v []bool, path []string) JSON { return newJSONArray(arr, path) } -func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path []string) JSON { +func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path JSONPath) JSON { arr := make([]JSON, len(v)) for i := range v { arr[i] = newJSONNumber(float64(v[i]), path) @@ -533,7 +624,7 @@ func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path [ return newJSONArray(arr, path) } -func newJSONStringArray(v []string, path []string) JSON { +func newJSONStringArray(v []string, path JSONPath) JSON { arr := make([]JSON, len(v)) for i := range v { arr[i] = newJSONString(v[i], path) @@ -542,14 +633,14 @@ func newJSONStringArray(v []string, path []string) JSON { } // newJSONFromFastJSON is an internal function that creates a new JSON value with parent and property name -func newJSONFromFastJSON(v *fastjson.Value, path []string) JSON { +func newJSONFromFastJSON(v *fastjson.Value, path JSONPath) JSON { switch v.Type() { case fastjson.TypeObject: fastObj := v.GetObject() obj := make(map[string]JSON, fastObj.Len()) fastObj.Visit(func(k []byte, v *fastjson.Value) { key := string(k) - obj[key] = newJSONFromFastJSON(v, append(path, key)) + obj[key] = newJSONFromFastJSON(v, path.Append(propPathPart(key))) }) return newJSONObject(obj, path) case fastjson.TypeArray: @@ -585,10 +676,10 @@ func NewJSONFromMap(data map[string]any) (JSON, error) { return newJSONFromMap(data, nil) } -func newJSONFromMap(data map[string]any, path []string) (JSON, error) { +func newJSONFromMap(data map[string]any, path JSONPath) (JSON, error) { obj := make(map[string]JSON, len(data)) for k, v := range data { - jsonVal, err := newJSON(v, append(path, k)) + jsonVal, err := newJSON(v, path.AppendProperty(k)) if err != nil { return nil, err } @@ -597,7 +688,7 @@ func newJSONFromMap(data map[string]any, path []string) (JSON, error) { return newJSONObject(obj, path), nil } -func shouldVisitPath(prefix, path []string) bool { +func shouldVisitPath(prefix, path JSONPath) bool { if len(prefix) == 0 { return true } diff --git a/client/json_test.go b/client/json_test.go index 811097e709..ed98a027d6 100644 --- a/client/json_test.go +++ b/client/json_test.go @@ -504,7 +504,7 @@ func TestNewJSONAndMarshalJSON(t *testing.T) { }, } - path := []string{"some", "path"} + path := MakeJSONPath("some", "path") for _, tt := range tests { for _, withPath := range []bool{true, false} { @@ -621,12 +621,12 @@ func TestNewJSONFromMap_WithPaths(t *testing.T) { } } -func traverseAndAssertPaths(t *testing.T, j JSON, parentPath []string) { +func traverseAndAssertPaths(t *testing.T, j JSON, parentPath JSONPath) { assert.Equal(t, parentPath, j.GetPath(), "Expected path %v, got %v", parentPath, j.GetPath()) if obj, isObj := j.Object(); isObj { for k, v := range obj { - newPath := append(parentPath, k) + newPath := parentPath.AppendProperty(k) traverseAndAssertPaths(t, v, newPath) } } diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index ff9103b1ab..5056104cf5 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -79,9 +79,9 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, {path: "object", value: getObjectValue(json)["object"]}, - {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.nested", value: newJSONString("inside", nil)}, + {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, {path: "array", value: getObjectValue(json)["array"]}, }, }, @@ -95,30 +95,30 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "number", value: newJSONNumber(42, nil)}, {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, - {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.nested", value: newJSONString("inside", nil)}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, }, }, { name: "WithPrefix_Object", options: []traverseJSONOption{ - TraverseJSONWithPrefix([]string{"object"}), + TraverseJSONWithPrefix(MakeJSONPath("object")), }, expected: []traverseNode{ {path: "object", value: getObjectValue(json)["object"]}, - {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.nested", value: newJSONString("inside", nil)}, + {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, }, }, { name: "WithPrefix_Deep", options: []traverseJSONOption{ - TraverseJSONWithPrefix([]string{"object", "deep"}), + TraverseJSONWithPrefix(MakeJSONPath("object", "deep")), }, expected: []traverseNode{ - {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, }, }, { @@ -133,14 +133,14 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, {path: "object", value: getObjectValue(json)["object"]}, - {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.nested", value: newJSONString("inside", nil)}, + {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, {path: "array", value: getObjectValue(json)["array"]}, {path: "array", value: newJSONNumber(1, nil)}, {path: "array", value: newJSONString("two", nil)}, {path: "array", value: getArrayValue(getObjectValue(json)["array"])[2]}, - {path: "array/key", value: newJSONString("value", nil)}, + {path: "array.key", value: newJSONString("value", nil)}, {path: "array", value: getArrayValue(getObjectValue(json)["array"])[3]}, {path: "array", value: newJSONNumber(4, nil)}, {path: "array", value: newJSONNumber(5, nil)}, @@ -158,9 +158,9 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, {path: "object", value: getObjectValue(json)["object"]}, - {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.nested", value: newJSONString("inside", nil)}, + {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, {path: "array", value: getObjectValue(json)["array"]}, {path: "array", value: newJSONNumber(1, nil)}, {path: "array", value: newJSONString("two", nil)}, @@ -179,17 +179,17 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { {path: "bool", value: newJSONBool(true, nil)}, {path: "null", value: newJSONNull(nil)}, {path: "object", value: getObjectValue(json)["object"]}, - {path: "object/nested", value: newJSONString("inside", nil)}, - {path: "object/deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, - {path: "object/deep/level", value: newJSONNumber(3, nil)}, + {path: "object.nested", value: newJSONString("inside", nil)}, + {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, + {path: "object.deep.level", value: newJSONNumber(3, nil)}, {path: "array", value: getObjectValue(json)["array"]}, - {path: "array/0", value: newJSONNumber(1, nil)}, - {path: "array/1", value: newJSONString("two", nil)}, - {path: "array/2", value: getArrayValue(getObjectValue(json)["array"])[2]}, - {path: "array/2/key", value: newJSONString("value", nil)}, - {path: "array/3", value: getArrayValue(getObjectValue(json)["array"])[3]}, - {path: "array/3/0", value: newJSONNumber(4, nil)}, - {path: "array/3/1", value: newJSONNumber(5, nil)}, + {path: "array[0]", value: newJSONNumber(1, nil)}, + {path: "array[1]", value: newJSONString("two", nil)}, + {path: "array[2]", value: getArrayValue(getObjectValue(json)["array"])[2]}, + {path: "array[2].key", value: newJSONString("value", nil)}, + {path: "array[3]", value: getArrayValue(getObjectValue(json)["array"])[3]}, + {path: "array[3][0]", value: newJSONNumber(4, nil)}, + {path: "array[3][1]", value: newJSONNumber(5, nil)}, }, }, { @@ -197,15 +197,15 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: []traverseJSONOption{ TraverseJSONOnlyLeaves(), TraverseJSONVisitArrayElements(true), - TraverseJSONWithPrefix([]string{"array"}), + TraverseJSONWithPrefix(MakeJSONPath("array")), TraverseJSONWithArrayIndexInPath(), }, expected: []traverseNode{ - {path: "array/0", value: newJSONNumber(1, nil)}, - {path: "array/1", value: newJSONString("two", nil)}, - {path: "array/2/key", value: newJSONString("value", nil)}, - {path: "array/3/0", value: newJSONNumber(4, nil)}, - {path: "array/3/1", value: newJSONNumber(5, nil)}, + {path: "array[0]", value: newJSONNumber(1, nil)}, + {path: "array[1]", value: newJSONString("two", nil)}, + {path: "array[2].key", value: newJSONString("value", nil)}, + {path: "array[3][0]", value: newJSONNumber(4, nil)}, + {path: "array[3][1]", value: newJSONNumber(5, nil)}, }, }, } @@ -214,7 +214,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { t.Run(tt.name, func(t *testing.T) { visited := []traverseNode{} err := TraverseJSON(json, func(value JSON) error { - key := joinPath(value.GetPath()) + key := value.GetPath().String() visited = append(visited, traverseNode{path: key, value: value}) return nil }, tt.options...) @@ -399,44 +399,44 @@ func TestTraverseJSON_WithError(t *testing.T) { func TestShouldVisitPath(t *testing.T) { tests := []struct { name string - prefix []string - path []string + prefix JSONPath + path JSONPath expected bool }{ { name: "EmptyPrefix", - prefix: []string{}, - path: []string{"a", "b"}, + prefix: JSONPath{}, + path: MakeJSONPath("a", "b"), expected: true, }, { name: "ExactMatch", - prefix: []string{"a", "b"}, - path: []string{"a", "b"}, + prefix: MakeJSONPath("a", "b"), + path: MakeJSONPath("a", "b"), expected: true, }, { name: "PrefixMatch", - prefix: []string{"a"}, - path: []string{"a", "b"}, + prefix: MakeJSONPath("a"), + path: MakeJSONPath("a", "b"), expected: true, }, { name: "NoMatch", - prefix: []string{"a", "b"}, - path: []string{"a", "c"}, + prefix: MakeJSONPath("a", "b"), + path: MakeJSONPath("a", "c"), expected: false, }, { name: "PathTooShort", - prefix: []string{"a", "b"}, - path: []string{"a"}, + prefix: MakeJSONPath("a", "b"), + path: MakeJSONPath("a"), expected: true, }, { name: "PathLonger", - prefix: []string{"a", "b"}, - path: []string{"a", "b", "c"}, + prefix: MakeJSONPath("a", "b"), + path: MakeJSONPath("a", "b", "c"), expected: true, }, } @@ -448,15 +448,3 @@ func TestShouldVisitPath(t *testing.T) { }) } } - -// Helper function to join path segments -func joinPath(path []string) string { - if len(path) == 0 { - return "" - } - result := path[0] - for i := 1; i < len(path); i++ { - result += "/" + path[i] - } - return result -} diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 906c6e6103..bdb149870e 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -487,7 +487,7 @@ func (f *IndexFetcher) createIndexIterator() (indexIterator, error) { type fieldFilterCond struct { op string arrOp string - jsonPath []string + jsonPath client.JSONPath val client.NormalValue kind client.FieldKind } @@ -515,7 +515,7 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro condMap := indexFilterCond.(map[connor.FilterKey]any) - jsonPath := []string{} + jsonPath := client.JSONPath{} if fieldDef.Kind == client.FieldKind_NILLABLE_JSON { jsonPathLoop: for { @@ -524,7 +524,7 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro if !ok { break jsonPathLoop } - jsonPath = append(jsonPath, prop.Name) + jsonPath = jsonPath.AppendProperty(prop.Name) condMap = filterVal.(map[connor.FilterKey]any) } } diff --git a/internal/db/indexed_docs_test.go b/internal/db/indexed_docs_test.go index d9e4327c35..a3bc46a530 100644 --- a/internal/db/indexed_docs_test.go +++ b/internal/db/indexed_docs_test.go @@ -1663,7 +1663,7 @@ func TestJSONIndex_IfDocIsDeleted_ShouldRemoveAllRelatedIndexes(t *testing.T) { require.Equal(t, 1, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after delete") // make sure the second doc is still indexed - obj2Height, err := client.NewJSONWithPath(178, []string{"height"}) + obj2Height, err := client.NewJSONWithPath(178, client.MakeJSONPath("height")) require.NoError(t, err, "Failed to create JSON with path") key2 := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). Values(client.NewNormalJSON(obj2Height)).Doc(doc2).Build() @@ -1743,7 +1743,7 @@ func TestJSONUniqueIndex_IfDocIsDeleted_ShouldRemoveAllRelatedIndexes(t *testing require.Equal(t, 1, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after delete") // make sure the second doc is still indexed - obj2Height, err := client.NewJSONWithPath(178, []string{"height"}) + obj2Height, err := client.NewJSONWithPath(178, client.MakeJSONPath("height")) require.NoError(t, err, "Failed to create JSON with path") key2 := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). Values(client.NewNormalJSON(obj2Height)).Unique().Doc(doc2).Build() diff --git a/internal/encoding/json.go b/internal/encoding/json.go index 9c53f11237..32d128bd0d 100644 --- a/internal/encoding/json.go +++ b/internal/encoding/json.go @@ -143,8 +143,10 @@ func decodeJSONPath(b []byte) ([]byte, []string, error) { func encodeJSONPath(b []byte, v client.JSON) []byte { b = append(b, jsonMarker) for _, part := range v.GetPath() { - pathBytes := unsafeConvertStringToBytes(part) - b = EncodeBytesAscending(b, pathBytes) + if prop, ok := part.Property(); ok { + pathBytes := unsafeConvertStringToBytes(prop) + b = EncodeBytesAscending(b, pathBytes) + } } b = append(b, ascendingBytesEscapes.escapedTerm) return b diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go index 22aa7741dc..a0b18bf696 100644 --- a/internal/encoding/json_test.go +++ b/internal/encoding/json_test.go @@ -12,7 +12,6 @@ package encoding import ( "fmt" - "strings" "testing" "github.com/stretchr/testify/assert" @@ -55,7 +54,7 @@ func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { pathMap := make(map[string][]client.JSON) err = client.TraverseJSON(testJSON, func(value client.JSON) error { - p := strings.Join(value.GetPath(), "/") + p := value.GetPath().String() jsons := pathMap[p] jsons = append(jsons, value) pathMap[p] = jsons @@ -94,7 +93,7 @@ func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { } func TestJSONEncodingDecoding_WithVoidValue_ShouldEncodeAndDecodeOnlyPath(t *testing.T) { - void := client.MakeVoidJSON([]string{"path", "to", "void"}) + void := client.MakeVoidJSON(client.MakeJSONPath("path", "to", "void")) encoded := EncodeJSONAscending(nil, void) remaining, decodedPath, err := decodeJSONPath(encoded[1:]) // skip the marker From b6c6da58adaf39ffb09167ae9eaa26e5e8236a2d Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 9 Jan 2025 16:03:52 +0100 Subject: [PATCH 38/46] follow up change --- client/json.go | 10 +- client/json_test.go | 4 +- client/normal_array.go | 17 ++ client/normal_new.go | 2 + client/normal_util.go | 3 + client/normal_value.go | 4 + client/normal_value_test.go | 16 ++ client/normal_void.go | 4 + internal/db/fetcher/indexer_iterators.go | 89 +++++-- internal/db/index.go | 6 +- internal/encoding/errors.go | 5 +- internal/encoding/json.go | 30 ++- tests/integration/index/json_array_test.go | 248 +++++++++++++++++- .../index/json_unique_array_test.go | 21 +- 14 files changed, 399 insertions(+), 60 deletions(-) diff --git a/client/json.go b/client/json.go index 100dead8ff..dba6e8cf59 100644 --- a/client/json.go +++ b/client/json.go @@ -599,7 +599,7 @@ func newJSON(v any, path JSONPath) (JSON, error) { func newJsonArrayFromAnyArray(arr []any, path JSONPath) (JSON, error) { result := make([]JSON, len(arr)) for i := range arr { - jsonVal, err := newJSON(arr[i], path) + jsonVal, err := newJSON(arr[i], path.AppendIndex(uint64(i))) if err != nil { return nil, err } @@ -611,7 +611,7 @@ func newJsonArrayFromAnyArray(arr []any, path JSONPath) (JSON, error) { func newJSONBoolArray(v []bool, path JSONPath) JSON { arr := make([]JSON, len(v)) for i := range v { - arr[i] = newJSONBool(v[i], path) + arr[i] = newJSONBool(v[i], path.AppendIndex(uint64(i))) } return newJSONArray(arr, path) } @@ -619,7 +619,7 @@ func newJSONBoolArray(v []bool, path JSONPath) JSON { func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path JSONPath) JSON { arr := make([]JSON, len(v)) for i := range v { - arr[i] = newJSONNumber(float64(v[i]), path) + arr[i] = newJSONNumber(float64(v[i]), path.AppendIndex(uint64(i))) } return newJSONArray(arr, path) } @@ -627,7 +627,7 @@ func newJSONNumberArray[T constraints.Integer | constraints.Float](v []T, path J func newJSONStringArray(v []string, path JSONPath) JSON { arr := make([]JSON, len(v)) for i := range v { - arr[i] = newJSONString(v[i], path) + arr[i] = newJSONString(v[i], path.AppendIndex(uint64(i))) } return newJSONArray(arr, path) } @@ -647,7 +647,7 @@ func newJSONFromFastJSON(v *fastjson.Value, path JSONPath) JSON { fastArr := v.GetArray() arr := make([]JSON, len(fastArr)) for i := range fastArr { - arr[i] = NewJSONFromFastJSON(fastArr[i]) + arr[i] = newJSONFromFastJSON(fastArr[i], path.Append(indexPathPart(uint64(i)))) } return newJSONArray(arr, path) case fastjson.TypeNumber: diff --git a/client/json_test.go b/client/json_test.go index ed98a027d6..8fbb259e5e 100644 --- a/client/json_test.go +++ b/client/json_test.go @@ -632,8 +632,8 @@ func traverseAndAssertPaths(t *testing.T, j JSON, parentPath JSONPath) { } if arr, isArr := j.Array(); isArr { - for _, v := range arr { - traverseAndAssertPaths(t, v, parentPath) + for i, v := range arr { + traverseAndAssertPaths(t, v, parentPath.AppendIndex(uint64(i))) } } } diff --git a/client/normal_array.go b/client/normal_array.go index b560a4bd9a..7f3605792f 100644 --- a/client/normal_array.go +++ b/client/normal_array.go @@ -120,6 +120,18 @@ func (v normalDocumentArray) Equal(other NormalValue) bool { return areNormalArraysEqual(v.val, other.DocumentArray) } +type normalJSONArray struct { + baseArrayNormalValue[[]JSON] +} + +func (v normalJSONArray) JSONArray() ([]JSON, bool) { + return v.val, true +} + +func (v normalJSONArray) Equal(other NormalValue) bool { + return areNormalArraysEqual(v.val, other.JSONArray) +} + // NewNormalBoolArray creates a new NormalValue that represents a `[]bool` value. func NewNormalBoolArray(val []bool) NormalValue { return normalBoolArray{newBaseArrayNormalValue(val)} @@ -155,6 +167,11 @@ func NewNormalDocumentArray(val []*Document) NormalValue { return normalDocumentArray{newBaseArrayNormalValue(val)} } +// NewNormalJSONArray creates a new NormalValue that represents a `[]JSON` value. +func NewNormalJSONArray(val []JSON) NormalValue { + return normalJSONArray{newBaseArrayNormalValue(val)} +} + func normalizeNumArr[R int64 | float64, T constraints.Integer | constraints.Float](val []T) []R { var v any = val if arr, ok := v.([]R); ok { diff --git a/client/normal_new.go b/client/normal_new.go index 8eb1b9f24c..671f00eac7 100644 --- a/client/normal_new.go +++ b/client/normal_new.go @@ -134,6 +134,8 @@ func NewNormalValue(val any) (NormalValue, error) { return NewNormalTimeArray(v), nil case []*Document: return NewNormalDocumentArray(v), nil + case []JSON: + return NewNormalJSONArray(v), nil case []immutable.Option[bool]: return NewNormalNillableBoolArray(v), nil diff --git a/client/normal_util.go b/client/normal_util.go index 44631fe45c..64845a3404 100644 --- a/client/normal_util.go +++ b/client/normal_util.go @@ -43,6 +43,9 @@ func ToArrayOfNormalValues(val NormalValue) ([]NormalValue, error) { if v, ok := val.DocumentArray(); ok { return toNormalArray(v, NewNormalDocument), nil } + if v, ok := val.JSONArray(); ok { + return toNormalArray(v, NewNormalJSON), nil + } if v, ok := val.NillableBoolArray(); ok { return toNormalArray(v, NewNormalNillableBool), nil } diff --git a/client/normal_value.go b/client/normal_value.go index 3dc66a83fd..536f1daf78 100644 --- a/client/normal_value.go +++ b/client/normal_value.go @@ -123,6 +123,10 @@ type NormalValue interface { // The second return flag is true if the value is a [[]*Document]. // Otherwise it will return nil and false. DocumentArray() ([]*Document, bool) + // JSONArray returns the value as a JSON array. + // The second return flag is true if the value is a JSON array. + // Otherwise it will return nil and false. + JSONArray() ([]JSON, bool) // NillableBoolArray returns the value as nillable array of bool elements. // The second return flag is true if the value is [immutable.Option[[]bool]]. diff --git a/client/normal_value_test.go b/client/normal_value_test.go index 551ef2e300..4bdba2f6aa 100644 --- a/client/normal_value_test.go +++ b/client/normal_value_test.go @@ -47,6 +47,7 @@ const ( BytesArray nType = "BytesArray" TimeArray nType = "TimeArray" DocumentArray nType = "DocumentArray" + JSONArray nType = "JSONArray" NillableBoolArray nType = "NillableBoolArray" NillableIntArray nType = "NillableIntArray" @@ -135,6 +136,7 @@ func TestNormalValue_NewValueAndTypeAssertion(t *testing.T) { BytesArray: func(v NormalValue) (any, bool) { return v.BytesArray() }, TimeArray: func(v NormalValue) (any, bool) { return v.TimeArray() }, DocumentArray: func(v NormalValue) (any, bool) { return v.DocumentArray() }, + JSONArray: func(v NormalValue) (any, bool) { return v.JSONArray() }, BoolNillableArray: func(v NormalValue) (any, bool) { return v.BoolNillableArray() }, IntNillableArray: func(v NormalValue) (any, bool) { return v.IntNillableArray() }, @@ -188,6 +190,7 @@ func TestNormalValue_NewValueAndTypeAssertion(t *testing.T) { BytesArray: func(v any) NormalValue { return NewNormalBytesArray(v.([][]byte)) }, TimeArray: func(v any) NormalValue { return NewNormalTimeArray(v.([]time.Time)) }, DocumentArray: func(v any) NormalValue { return NewNormalDocumentArray(v.([]*Document)) }, + JSONArray: func(v any) NormalValue { return NewNormalJSONArray(v.([]JSON)) }, NillableBoolArray: func(v any) NormalValue { return NewNormalNillableBoolArray(v.([]immutable.Option[bool])) @@ -407,6 +410,11 @@ func TestNormalValue_NewValueAndTypeAssertion(t *testing.T) { input: []*Document{{}, {}}, isArray: true, }, + { + nType: JSONArray, + input: []JSON{newJSONNumber(3, nil), newJSONString("test", nil)}, + isArray: true, + }, { nType: NillableBoolArray, input: []immutable.Option[bool]{immutable.Some(true)}, @@ -1479,6 +1487,14 @@ func TestNormalValue_ToArrayOfNormalValues(t *testing.T) { input: NewNormalDocumentArray([]*Document{doc1, doc2}), expected: []NormalValue{NewNormalDocument(doc1), NewNormalDocument(doc2)}, }, + { + name: "json elements", + input: NewNormalJSONArray([]JSON{newJSONBool(true, nil), newJSONString("test", nil)}), + expected: []NormalValue{ + NewNormalJSON(newJSONBool(true, nil)), + NewNormalJSON(newJSONString("test", nil)), + }, + }, { name: "nillable bool elements", input: NewNormalNillableBoolArray([]immutable.Option[bool]{ diff --git a/client/normal_void.go b/client/normal_void.go index a9078e5328..3c886d4827 100644 --- a/client/normal_void.go +++ b/client/normal_void.go @@ -129,6 +129,10 @@ func (NormalVoid) DocumentArray() ([]*Document, bool) { return nil, false } +func (NormalVoid) JSONArray() ([]JSON, bool) { + return nil, false +} + func (NormalVoid) NillableBoolArray() ([]immutable.Option[bool], bool) { return nil, false } diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index bdb149870e..9ee67f6926 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -50,6 +50,10 @@ const ( opAny = "__any" ) +func isArrayCondition(op string) bool { + return op == compOpAny || op == compOpAll || op == compOpNone +} + // indexIterator is an iterator over index keys. // It is used to iterate over the index keys that match a specific condition. // For example, iteration over condition _eq and _gt will have completely different logic. @@ -522,6 +526,12 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro for key, filterVal := range condMap { prop, ok := key.(*mapper.ObjectProperty) if !ok { + // if filter contains an array condition, we need to append index 0 to the json path + // to limit the search only to array elements + op, ok := key.(*mapper.Operator) + if ok && isArrayCondition(op.Operation) { + jsonPath = jsonPath.AppendIndex(0) + } break jsonPathLoop } jsonPath = jsonPath.AppendProperty(prop.Name) @@ -539,22 +549,7 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro var err error if len(jsonPath) > 0 { - var jsonVal client.JSON - if cond.op == compOpAny || cond.op == compOpAll || cond.op == compOpNone { - subCondMap := filterVal.(map[connor.FilterKey]any) - for subKey, subVal := range subCondMap { - cond.arrOp = cond.op - cond.op = subKey.(*mapper.Operator).Operation - jsonVal, err = client.NewJSONWithPath(subVal, jsonPath) - // the sub condition is supposed to have only 1 record - break - } - } else { - jsonVal, err = client.NewJSONWithPath(filterVal, jsonPath) - } - if err == nil { - cond.val = client.NewNormalJSON(jsonVal) - } + err = determineJSONFilterCondition(&cond, filterVal, jsonPath) } else if filterVal == nil { cond.val, err = client.NewNormalNil(cond.kind) } else if !f.indexedFields[i].Kind.IsArray() { @@ -594,6 +589,68 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro return result, nil } +func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath client.JSONPath) error { + var jsonVal client.JSON + var err error + if isArrayCondition(cond.op) { + subCondMap := filterVal.(map[connor.FilterKey]any) + for subKey, subVal := range subCondMap { + cond.arrOp = cond.op + cond.op = subKey.(*mapper.Operator).Operation + jsonVal, err = client.NewJSONWithPath(subVal, jsonPath) + if err == nil { + cond.val = client.NewNormalJSON(jsonVal) + } + // the sub condition is supposed to have only 1 record + break + } + } else if cond.op == opIn { + var jsonVals []client.JSON + if anyArr, ok := filterVal.([]any); ok { + // if filter value is []any we convert each value separately because JSON might have + // array elements of different types. That's why we can't just pass it directly to + // client.ToArrayOfNormalValues + jsonVals = make([]client.JSON, 0, len(anyArr)) + for _, val := range anyArr { + jsonVal, err = client.NewJSONWithPath(val, jsonPath) + if err != nil { + return err + } + jsonVals = append(jsonVals, jsonVal) + } + } else { + normValue, err := client.NewNormalValue(filterVal) + if err != nil { + return err + } + normArr, err := client.ToArrayOfNormalValues(normValue) + if err != nil { + return err + } + jsonVals = make([]client.JSON, 0, len(normArr)) + for _, val := range normArr { + jsonVal, err = client.NewJSONWithPath(val.Unwrap(), jsonPath) + if err != nil { + return err + } + jsonVals = append(jsonVals, jsonVal) + } + } + normJSONs, err := client.NewNormalValue(jsonVals) + if err != nil { + return err + } + cond.val = normJSONs + } else { + jsonVal, err = client.NewJSONWithPath(filterVal, jsonPath) + if err != nil { + return err + } + cond.val = client.NewNormalJSON(jsonVal) + } + return nil +} + // isUniqueFetchByFullKey checks if the only index key can be fetched by the full index key. // // This method ignores the first condition (unless it's nil) because it's expected to be called only diff --git a/internal/db/index.go b/internal/db/index.go index 92fad980e6..38ca475ce2 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -129,7 +129,11 @@ func (g *JSONFieldGenerator) Generate(value client.NormalValue, f func(client.No return err } return f(val) - }, client.TraverseJSONOnlyLeaves(), client.TraverseJSONVisitArrayElements(false)) + }, + client.TraverseJSONOnlyLeaves(), + client.TraverseJSONWithArrayIndexInPath(), + client.TraverseJSONVisitArrayElements(false), + ) } // getFieldGenerator returns appropriate generator for the field type diff --git a/internal/encoding/errors.go b/internal/encoding/errors.go index 26d2daeb93..838dfd5660 100644 --- a/internal/encoding/errors.go +++ b/internal/encoding/errors.go @@ -94,9 +94,8 @@ func NewErrVarintOverflow(b []byte, value uint64) error { return errors.New(errVarintOverflow, errors.NewKV("Buffer", b), errors.NewKV("Value", value)) } -// NewErrInvalidJSONPayload returns a new error indicating that the buffer contains -// an invalid JSON payload. -func NewErrInvalidJSONPayload(b []byte, path []string, err ...error) error { +// NewErrInvalidJSONPayload returns a new error indicating that the buffer +func NewErrInvalidJSONPayload(b []byte, path string, err ...error) error { kvs := []errors.KV{errors.NewKV("Buffer", b), errors.NewKV("Path", path)} if len(err) > 0 { kvs = append(kvs, errors.NewKV("Error", err[0])) diff --git a/internal/encoding/json.go b/internal/encoding/json.go index 32d128bd0d..a6822198e2 100644 --- a/internal/encoding/json.go +++ b/internal/encoding/json.go @@ -104,14 +104,14 @@ func decodeJSON(b []byte, ascending bool) ([]byte, client.JSON, error) { case Null: b = decodeNull(b) default: - err = NewErrInvalidJSONPayload(b, path) + err = NewErrInvalidJSONPayload(b, path.String()) } if err != nil { return b, nil, err } - result, err := client.NewJSON(jsonValue) + result, err := client.NewJSONWithPath(jsonValue, path) if err != nil { return b, nil, err @@ -120,8 +120,8 @@ func decodeJSON(b []byte, ascending bool) ([]byte, client.JSON, error) { return b, result, nil } -func decodeJSONPath(b []byte) ([]byte, []string, error) { - var path []string +func decodeJSONPath(b []byte) ([]byte, client.JSONPath, error) { + var path client.JSONPath for { if len(b) == 0 { break @@ -130,12 +130,22 @@ func decodeJSONPath(b []byte) ([]byte, []string, error) { b = b[1:] break } - rem, part, err := DecodeBytesAscending(b) - if err != nil { - return b, nil, NewErrInvalidJSONPath(b, err) + + if PeekType(b) == Bytes { + rem, part, err := DecodeBytesAscending(b) + if err != nil { + return b, nil, NewErrInvalidJSONPath(b, err) + } + path = path.AppendProperty(string(part)) + b = rem + } else { + rem, part, err := DecodeUvarintAscending(b) + if err != nil { + return b, nil, NewErrInvalidJSONPath(b, err) + } + path = path.AppendIndex(part) + b = rem } - path = append(path, string(part)) - b = rem } return b, path, nil } @@ -146,6 +156,8 @@ func encodeJSONPath(b []byte, v client.JSON) []byte { if prop, ok := part.Property(); ok { pathBytes := unsafeConvertStringToBytes(prop) b = EncodeBytesAscending(b, pathBytes) + } else if _, ok := part.Index(); ok { + b = EncodeUvarintAscending(b, 0) } } b = append(b, ascendingBytesEscapes.escapedTerm) diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index 41e4babde5..603ae01d12 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -105,7 +105,7 @@ func TestJSONArrayIndex_WithDifferentElementValuesAndTypes_ShouldFetchCorrectlyU }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(2), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, }, } @@ -113,7 +113,7 @@ func TestJSONArrayIndex_WithDifferentElementValuesAndTypes_ShouldFetchCorrectlyU testUtils.ExecuteTestCase(t, test) } -func TestJSONArrayIndex_WithNestedArrays_ShouldNotConsiderThem(t *testing.T) { +func TestJSONArrayIndex_WithAnyEqFilter_ShouldNotConsiderThem(t *testing.T) { req := `query { User(filter: {custom: {numbers: {_any: {_eq: 4}}}}) { name @@ -140,7 +140,7 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldNotConsiderThem(t *testing.T) { DocMap: map[string]any{ "name": "Islam", "custom": map[string]any{ - "numbers": []any{0, []int{2, 6}, 9}, + "numbers": []any{0, []int{2}, 4}, }, }, }, @@ -160,15 +160,25 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldNotConsiderThem(t *testing.T) { }, }, }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": 4, + }, + }, + }, testUtils.Request{ Request: req, Results: map[string]any{ - "User": []map[string]any{}, + "User": []map[string]any{ + {"name": "Islam"}, + }, }, }, testUtils.Request{ Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(0), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(1), }, }, } @@ -176,7 +186,64 @@ func TestJSONArrayIndex_WithNestedArrays_ShouldNotConsiderThem(t *testing.T) { testUtils.ExecuteTestCase(t, test) } -func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrectlyUsingIndex(t *testing.T) { +func TestJSONArrayIndex_WithAnyAndComparisonFilter_ShouldNotConsiderThem(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_any: {_gt: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []any{3, 5, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []any{0, []int{6}, 4}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": 5, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithNoneEqFilter_ShouldFetchCorrectlyUsingIndex(t *testing.T) { req := `query { User(filter: {custom: {numbers: {_none: {_eq: 4}}}}) { name @@ -195,7 +262,7 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec DocMap: map[string]any{ "name": "John", "custom": map[string]any{ - "numbers": []int{3, 5, 7}, + "numbers": []int{3}, }, }, }, @@ -223,8 +290,6 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec }, }, }, - // TODO: This document should be part of the query result, but it needs additional work - // with json encoding https://github.com/sourcenetwork/defradb/issues/3329 testUtils.CreateDoc{ DocMap: map[string]any{ "name": "Andy", @@ -233,6 +298,14 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec }, }, }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "numbers": nil, + }, + }, + }, testUtils.Request{ Request: req, Results: map[string]any{ @@ -244,8 +317,9 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec }, }, testUtils.Request{ - Request: makeExplainQuery(req), - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(9), + Request: makeExplainQuery(req), + // We examine only array elements (excluding nested arrays) and we have 6 of them + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(6), }, }, } @@ -253,7 +327,91 @@ func TestJSONArrayIndex_WithNoneFilterOnDifferentElementValues_ShouldFetchCorrec testUtils.ExecuteTestCase(t, test) } -func TestJSONArrayIndex_WithAllFilterOnDifferentElementValues_ShouldFetchCorrectlyUsingIndex(t *testing.T) { +func TestJSONArrayIndex_WithNoneEqAndComparisonFilter_ShouldFetchCorrectlyUsingIndex(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_none: {_gt: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []int{3, 8}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": []any{2, nil}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Fred", + "custom": map[string]any{ + "numbers": []any{1, []int{5}}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": 5, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Bruno", + "custom": map[string]any{ + "numbers": nil, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Islam"}, + {"name": "Fred"}, + {"name": "John"}, + }, + }, + }, + testUtils.Request{ + Request: makeExplainQuery(req), + // We examine only array elements (excluding nested arrays) and we have 6 of them + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(6), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithAllEqFilter_ShouldFetchCorrectlyUsingIndex(t *testing.T) { req := `query { User(filter: {custom: {numbers: {_all: {_eq: 4}}}}) { name @@ -324,6 +482,72 @@ func TestJSONArrayIndex_WithAllFilterOnDifferentElementValues_ShouldFetchCorrect }, }, }, + testUtils.Request{ + Request: makeExplainQuery(req), + // 4 docs have the value 4 in the numbers array + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(4), + }, + }, + } + + testUtils.ExecuteTestCase(t, test) +} + +func TestJSONArrayIndex_WithAllEqAndComparisonFilter_ShouldFetchCorrectlyUsingIndex(t *testing.T) { + req := `query { + User(filter: {custom: {numbers: {_all: {_gt: 4}}}}) { + name + } + }` + test := testUtils.TestCase{ + Actions: []any{ + testUtils.SchemaUpdate{ + Schema: ` + type User { + name: String + custom: JSON @index + }`, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "John", + "custom": map[string]any{ + "numbers": []int{3, 7}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Shahzad", + "custom": map[string]any{ + "numbers": []any{5, []int{6}}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + "numbers": []any{7, 8}, + }, + }, + }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Islam", + "custom": map[string]any{ + "numbers": 8, + }, + }, + }, + testUtils.Request{ + Request: req, + Results: map[string]any{ + "User": []map[string]any{ + {"name": "Andy"}, + }, + }, + }, testUtils.Request{ Request: makeExplainQuery(req), Asserter: testUtils.NewExplainAsserter().WithIndexFetches(5), diff --git a/tests/integration/index/json_unique_array_test.go b/tests/integration/index/json_unique_array_test.go index 6e34c8dd13..05ebfcfe2f 100644 --- a/tests/integration/index/json_unique_array_test.go +++ b/tests/integration/index/json_unique_array_test.go @@ -50,6 +50,15 @@ func TestJSONArrayUniqueIndex_ShouldAllowOnlyUniqueValuesAndUseThemForFetching(t }, }, }, + testUtils.CreateDoc{ + DocMap: map[string]any{ + "name": "Andy", + "custom": map[string]any{ + // existing non-array-element value + "numbers": 3, + }, + }, + }, testUtils.CreateDoc{ DocMap: map[string]any{ "name": "Islam", @@ -86,18 +95,6 @@ func TestJSONArrayUniqueIndex_ShouldAllowOnlyUniqueValuesAndUseThemForFetching(t "bae-bde18215-f623-568e-868d-1156c30e45d3", errors.NewKV("custom", map[string]any{"numbers": []any{6, nil}})).Error(), }, - testUtils.CreateDoc{ - DocMap: map[string]any{ - "name": "Andy", - "custom": map[string]any{ - // existing non-array-element value - "numbers": 3, - }, - }, - ExpectedError: db.NewErrCanNotIndexNonUniqueFields( - "bae-54e76159-66c6-56be-ad65-7ff83edda058", - errors.NewKV("custom", map[string]any{"numbers": 3})).Error(), - }, testUtils.Request{ Request: req, Results: map[string]any{ From 507ad2029adaee5b6415f8300a4fd1dcd8a61e4e Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Sat, 11 Jan 2025 18:19:28 +0100 Subject: [PATCH 39/46] Scope _none to only arrays --- internal/connor/none.go | 48 +++++++++++++++++-- internal/db/fetcher/indexer_iterators.go | 10 ++-- tests/integration/index/json_array_test.go | 20 +++----- .../integration/query/json/with_none_test.go | 3 -- 4 files changed, 57 insertions(+), 24 deletions(-) diff --git a/internal/connor/none.go b/internal/connor/none.go index 16613b3e46..742d4dc977 100644 --- a/internal/connor/none.go +++ b/internal/connor/none.go @@ -1,12 +1,52 @@ package connor +import "github.com/sourcenetwork/immutable" + // none is an operator which allows the evaluation of // a number of conditions over a list of values // matching if all of them do not match. func none(condition, data any) (bool, error) { - m, err := anyOp(condition, data) - if err != nil { - return false, err + switch t := data.(type) { + case []any: + return noneSlice(condition, t) + + case []string: + return noneSlice(condition, t) + + case []immutable.Option[string]: + return noneSlice(condition, t) + + case []int64: + return noneSlice(condition, t) + + case []immutable.Option[int64]: + return noneSlice(condition, t) + + case []bool: + return noneSlice(condition, t) + + case []immutable.Option[bool]: + return noneSlice(condition, t) + + case []float64: + return noneSlice(condition, t) + + case []immutable.Option[float64]: + return noneSlice(condition, t) + + default: + return false, nil + } +} + +func noneSlice[T any](condition any, data []T) (bool, error) { + for _, c := range data { + m, err := eq(condition, c) + if err != nil { + return false, err + } else if m { + return false, nil + } } - return !m, nil + return true, nil } diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 9ee67f6926..db51690483 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -530,6 +530,10 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro // to limit the search only to array elements op, ok := key.(*mapper.Operator) if ok && isArrayCondition(op.Operation) { + if op.Operation == compOpNone { + // if the array condition is _none it doesn't make sense to use index + return nil, nil + } jsonPath = jsonPath.AppendIndex(0) } break jsonPathLoop @@ -607,9 +611,9 @@ func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath } else if cond.op == opIn { var jsonVals []client.JSON if anyArr, ok := filterVal.([]any); ok { - // if filter value is []any we convert each value separately because JSON might have - // array elements of different types. That's why we can't just pass it directly to - // client.ToArrayOfNormalValues + // if filter value is []any we convert each value separately because JSON might have + // array elements of different types. That's why we can't just pass it directly to + // client.ToArrayOfNormalValues jsonVals = make([]client.JSON, 0, len(anyArr)) for _, val := range anyArr { jsonVal, err = client.NewJSONWithPath(val, jsonPath) diff --git a/tests/integration/index/json_array_test.go b/tests/integration/index/json_array_test.go index 603ae01d12..3d0892545d 100644 --- a/tests/integration/index/json_array_test.go +++ b/tests/integration/index/json_array_test.go @@ -298,28 +298,20 @@ func TestJSONArrayIndex_WithNoneEqFilter_ShouldFetchCorrectlyUsingIndex(t *testi }, }, }, - testUtils.CreateDoc{ - DocMap: map[string]any{ - "name": "Bruno", - "custom": map[string]any{ - "numbers": nil, - }, - }, - }, testUtils.Request{ Request: req, Results: map[string]any{ "User": []map[string]any{ {"name": "Islam"}, - {"name": "Fred"}, {"name": "John"}, + {"name": "Fred"}, }, }, }, testUtils.Request{ Request: makeExplainQuery(req), - // We examine only array elements (excluding nested arrays) and we have 6 of them - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(6), + // We don't use index for _none operator + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(0), }, }, } @@ -395,15 +387,15 @@ func TestJSONArrayIndex_WithNoneEqAndComparisonFilter_ShouldFetchCorrectlyUsingI Results: map[string]any{ "User": []map[string]any{ {"name": "Islam"}, - {"name": "Fred"}, {"name": "John"}, + {"name": "Fred"}, }, }, }, testUtils.Request{ Request: makeExplainQuery(req), - // We examine only array elements (excluding nested arrays) and we have 6 of them - Asserter: testUtils.NewExplainAsserter().WithIndexFetches(6), + // We don't use index for _none operator + Asserter: testUtils.NewExplainAsserter().WithIndexFetches(0), }, }, } diff --git a/tests/integration/query/json/with_none_test.go b/tests/integration/query/json/with_none_test.go index 1a65f7de60..2b33791198 100644 --- a/tests/integration/query/json/with_none_test.go +++ b/tests/integration/query/json/with_none_test.go @@ -107,9 +107,6 @@ func TestQueryJSON_WithNoneFilterAndNestedArray_ShouldFilter(t *testing.T) { Results: map[string]any{ "Users": []map[string]any{ {"name": "Shahzad"}, - {"name": "John"}, - {"name": "Islam"}, - {"name": "Bruno"}, }, }, }, From 10d3e3072914049e2aca3738dfd2d8af0dc6e30b Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 13 Jan 2025 09:54:31 +0100 Subject: [PATCH 40/46] Add a note for data change --- docs/data_format_changes/i3368-json-array-encoding.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/data_format_changes/i3368-json-array-encoding.md diff --git a/docs/data_format_changes/i3368-json-array-encoding.md b/docs/data_format_changes/i3368-json-array-encoding.md new file mode 100644 index 0000000000..8392a8fb8b --- /dev/null +++ b/docs/data_format_changes/i3368-json-array-encoding.md @@ -0,0 +1,3 @@ +# JSON array encoding + +JSON array elements are now encoded in a different way. From e6153ce4f6b64471b9fa9966b62ab84ce3d77295 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Tue, 14 Jan 2025 22:02:03 +0100 Subject: [PATCH 41/46] Add comments --- client/json.go | 14 ++++++++++++++ internal/db/fetcher/indexer_iterators.go | 6 +++++- internal/db/index.go | 4 ++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/client/json.go b/client/json.go index dba6e8cf59..a60327b27f 100644 --- a/client/json.go +++ b/client/json.go @@ -22,6 +22,20 @@ import ( // JSONPathPart represents a part of a JSON path. // Json path can be either a property of an object or an index of an element in an array. +// For example, the paths to both values 1 are very similar: +// +// { +// "0": { +// "val": 1 +// } +// } +// [ +// { +// "val": 1 +// } +// ] +// +// It can be described as "0.val" but they are different. type JSONPathPart interface { // Property returns the property name if the part is a property, and a boolean indicating if the part is a property. Property() (string, bool) diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index e1462e2caa..91a7dfa710 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -593,6 +593,10 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro return result, nil } +// determineJSONFilterCondition determines the condition and its corresponding operation for a +// JSON filter condition. +// It mutates the given condition to make it match the filter value and JSON path so that +// it can be used to fetch the indexed data. func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath client.JSONPath) error { var jsonVal client.JSON var err error @@ -605,7 +609,7 @@ func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath if err == nil { cond.val = client.NewNormalJSON(jsonVal) } - // the sub condition is supposed to have only 1 record + // the array sub condition (_any, _all or _none) is supposed to have only 1 record break } } else if cond.op == opIn { diff --git a/internal/db/index.go b/internal/db/index.go index adcc9ace6f..aa4f4bce05 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -130,8 +130,12 @@ func (g *JSONFieldGenerator) Generate(value client.NormalValue, f func(client.No } return f(val) }, + // we don't want to traverse intermediate nodes client.TraverseJSONOnlyLeaves(), + // we want to include array elements' indexes in json path client.TraverseJSONWithArrayIndexInPath(), + // we want to traverse array elements, but not recurse into them + // this effectively means that we traverse only leave array elements (string, float, bool, null) client.TraverseJSONVisitArrayElements(false), ) } From 29a0c7fbecdb9d2a32014048fe1ade73cfe1b2f6 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Wed, 15 Jan 2025 23:40:06 +0100 Subject: [PATCH 42/46] Move MakeJSONPath to tests --- client/json.go | 9 -------- client/json_test.go | 2 +- client/json_traverse_test.go | 37 ++++++++++++++++++++------------ internal/db/indexed_docs_test.go | 4 ++-- internal/encoding/json_test.go | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/client/json.go b/client/json.go index a60327b27f..0fc2f33207 100644 --- a/client/json.go +++ b/client/json.go @@ -115,15 +115,6 @@ func toJSONPathPart[T string | int | uint64](v T) JSONPathPart { return nil } -// Creates a path from mixed string/integer values -func MakeJSONPath[T string | int | uint64](parts ...T) JSONPath { - path := make(JSONPath, len(parts)) - for i, part := range parts { - path[i] = toJSONPathPart(part) - } - return path -} - // JSON represents a JSON value that can be any valid JSON type: object, array, number, string, boolean, or null. // It provides type-safe access to the underlying value through various accessor methods. type JSON interface { diff --git a/client/json_test.go b/client/json_test.go index 8fbb259e5e..070dd46742 100644 --- a/client/json_test.go +++ b/client/json_test.go @@ -504,7 +504,7 @@ func TestNewJSONAndMarshalJSON(t *testing.T) { }, } - path := MakeJSONPath("some", "path") + path := makeJSONPath("some", "path") for _, tt := range tests { for _, withPath := range []bool{true, false} { diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 5056104cf5..4cd2c248dd 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -38,6 +38,15 @@ func getArrayValue(j JSON) []JSON { panic("expected array value") } +// Creates a path from mixed string/integer values +func makeJSONPath[T string | int | uint64](parts ...T) JSONPath { + path := make(JSONPath, len(parts)) + for i, part := range parts { + path[i] = toJSONPathPart(part) + } + return path +} + func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { // Create a complex JSON structure for testing json := newJSONObject(map[string]JSON{ @@ -102,7 +111,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { { name: "WithPrefix_Object", options: []traverseJSONOption{ - TraverseJSONWithPrefix(MakeJSONPath("object")), + TraverseJSONWithPrefix(makeJSONPath("object")), }, expected: []traverseNode{ {path: "object", value: getObjectValue(json)["object"]}, @@ -114,7 +123,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { { name: "WithPrefix_Deep", options: []traverseJSONOption{ - TraverseJSONWithPrefix(MakeJSONPath("object", "deep")), + TraverseJSONWithPrefix(makeJSONPath("object", "deep")), }, expected: []traverseNode{ {path: "object.deep", value: getObjectValue(getObjectValue(json)["object"])["deep"]}, @@ -197,7 +206,7 @@ func TestTraverseJSON_ShouldVisitAccordingToConfig(t *testing.T) { options: []traverseJSONOption{ TraverseJSONOnlyLeaves(), TraverseJSONVisitArrayElements(true), - TraverseJSONWithPrefix(MakeJSONPath("array")), + TraverseJSONWithPrefix(makeJSONPath("array")), TraverseJSONWithArrayIndexInPath(), }, expected: []traverseNode{ @@ -406,37 +415,37 @@ func TestShouldVisitPath(t *testing.T) { { name: "EmptyPrefix", prefix: JSONPath{}, - path: MakeJSONPath("a", "b"), + path: makeJSONPath("a", "b"), expected: true, }, { name: "ExactMatch", - prefix: MakeJSONPath("a", "b"), - path: MakeJSONPath("a", "b"), + prefix: makeJSONPath("a", "b"), + path: makeJSONPath("a", "b"), expected: true, }, { name: "PrefixMatch", - prefix: MakeJSONPath("a"), - path: MakeJSONPath("a", "b"), + prefix: makeJSONPath("a"), + path: makeJSONPath("a", "b"), expected: true, }, { name: "NoMatch", - prefix: MakeJSONPath("a", "b"), - path: MakeJSONPath("a", "c"), + prefix: makeJSONPath("a", "b"), + path: makeJSONPath("a", "c"), expected: false, }, { name: "PathTooShort", - prefix: MakeJSONPath("a", "b"), - path: MakeJSONPath("a"), + prefix: makeJSONPath("a", "b"), + path: makeJSONPath("a"), expected: true, }, { name: "PathLonger", - prefix: MakeJSONPath("a", "b"), - path: MakeJSONPath("a", "b", "c"), + prefix: makeJSONPath("a", "b"), + path: makeJSONPath("a", "b", "c"), expected: true, }, } diff --git a/internal/db/indexed_docs_test.go b/internal/db/indexed_docs_test.go index a3bc46a530..e436f30b38 100644 --- a/internal/db/indexed_docs_test.go +++ b/internal/db/indexed_docs_test.go @@ -1663,7 +1663,7 @@ func TestJSONIndex_IfDocIsDeleted_ShouldRemoveAllRelatedIndexes(t *testing.T) { require.Equal(t, 1, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after delete") // make sure the second doc is still indexed - obj2Height, err := client.NewJSONWithPath(178, client.MakeJSONPath("height")) + obj2Height, err := client.NewJSONWithPath(178, client.JSONPath{}.AppendProperty("height")) require.NoError(t, err, "Failed to create JSON with path") key2 := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). Values(client.NewNormalJSON(obj2Height)).Doc(doc2).Build() @@ -1743,7 +1743,7 @@ func TestJSONUniqueIndex_IfDocIsDeleted_ShouldRemoveAllRelatedIndexes(t *testing require.Equal(t, 1, f.countIndexPrefixes(testUsersColIndexCustom), "Unexpected num of indexes after delete") // make sure the second doc is still indexed - obj2Height, err := client.NewJSONWithPath(178, client.MakeJSONPath("height")) + obj2Height, err := client.NewJSONWithPath(178, client.JSONPath{}.AppendProperty("height")) require.NoError(t, err, "Failed to create JSON with path") key2 := newIndexKeyBuilder(f).Col(usersColName).Fields(usersCustomFieldName). Values(client.NewNormalJSON(obj2Height)).Unique().Doc(doc2).Build() diff --git a/internal/encoding/json_test.go b/internal/encoding/json_test.go index a0b18bf696..6912b9fdaf 100644 --- a/internal/encoding/json_test.go +++ b/internal/encoding/json_test.go @@ -93,7 +93,7 @@ func TestJSONEncodingAndDecoding_ShouldEncodeAndDecodeBack(t *testing.T) { } func TestJSONEncodingDecoding_WithVoidValue_ShouldEncodeAndDecodeOnlyPath(t *testing.T) { - void := client.MakeVoidJSON(client.MakeJSONPath("path", "to", "void")) + void := client.MakeVoidJSON(client.JSONPath{}.AppendProperty("path").AppendProperty("to").AppendProperty("void")) encoded := EncodeJSONAscending(nil, void) remaining, decodedPath, err := decodeJSONPath(encoded[1:]) // skip the marker From d20e921be9eee686833a8266885a35a55aed95ca Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Thu, 16 Jan 2025 17:58:32 +0100 Subject: [PATCH 43/46] PR fixup --- client/json.go | 12 ------------ client/json_traverse_test.go | 12 ++++++++++++ internal/db/index.go | 8 +++++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/client/json.go b/client/json.go index 0fc2f33207..6174e0cbda 100644 --- a/client/json.go +++ b/client/json.go @@ -103,18 +103,6 @@ func (p JSONPath) String() string { return sb.String() } -func toJSONPathPart[T string | int | uint64](v T) JSONPathPart { - switch val := any(v).(type) { - case string: - return propPathPart(val) - case int: - return indexPathPart(uint64(val)) - case uint64: - return indexPathPart(val) - } - return nil -} - // JSON represents a JSON value that can be any valid JSON type: object, array, number, string, boolean, or null. // It provides type-safe access to the underlying value through various accessor methods. type JSON interface { diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 4cd2c248dd..38b199bd73 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -38,6 +38,18 @@ func getArrayValue(j JSON) []JSON { panic("expected array value") } +func toJSONPathPart[T string | int | uint64](v T) JSONPathPart { + switch val := any(v).(type) { + case string: + return propPathPart(val) + case int: + return indexPathPart(uint64(val)) + case uint64: + return indexPathPart(val) + } + return nil +} + // Creates a path from mixed string/integer values func makeJSONPath[T string | int | uint64](parts ...T) JSONPath { path := make(JSONPath, len(parts)) diff --git a/internal/db/index.go b/internal/db/index.go index aa4f4bce05..f2dba478d1 100644 --- a/internal/db/index.go +++ b/internal/db/index.go @@ -130,11 +130,13 @@ func (g *JSONFieldGenerator) Generate(value client.NormalValue, f func(client.No } return f(val) }, - // we don't want to traverse intermediate nodes + // we don't want to traverse intermediate nodes, because we encode only values that can be filtered on client.TraverseJSONOnlyLeaves(), - // we want to include array elements' indexes in json path + // we want to include array elements' indexes in json path, because we want to differentiate + // between array elements in order to be able to run array-specific queries like _all, _any and _none client.TraverseJSONWithArrayIndexInPath(), - // we want to traverse array elements, but not recurse into them + // we want to traverse array elements, but not recurse into them, because we don't have any way + // to query nested arrays elements. // this effectively means that we traverse only leave array elements (string, float, bool, null) client.TraverseJSONVisitArrayElements(false), ) From e63fd8cbb64f7403cd59cc1c30bc37368a3faae7 Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Fri, 17 Jan 2025 10:15:42 +0100 Subject: [PATCH 44/46] Turn json part interface in struct --- client/json.go | 43 ++++++++++++++----------------------------- client/json_test.go | 2 +- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/client/json.go b/client/json.go index 6174e0cbda..d820214912 100644 --- a/client/json.go +++ b/client/json.go @@ -36,30 +36,20 @@ import ( // ] // // It can be described as "0.val" but they are different. -type JSONPathPart interface { - // Property returns the property name if the part is a property, and a boolean indicating if the part is a property. - Property() (string, bool) - // Index returns the index if the part is an index, and a boolean indicating if the part is an index. - Index() (uint64, bool) +type JSONPathPart struct { + value any } -type propPathPart string -type indexPathPart uint64 - -func (p propPathPart) Property() (string, bool) { - return string(p), true +// Property returns the property name if the part is a property, and a boolean indicating if the part is a property. +func (p JSONPathPart) Property() (string, bool) { + v, ok := p.value.(string) + return v, ok } -func (p propPathPart) Index() (uint64, bool) { - return 0, false -} - -func (p indexPathPart) Property() (string, bool) { - return "", false -} - -func (p indexPathPart) Index() (uint64, bool) { - return uint64(p), true +// Index returns the index if the part is an index, and a boolean indicating if the part is an index. +func (p JSONPathPart) Index() (uint64, bool) { + v, ok := p.value.(uint64) + return v, ok } // JSONPath represents a path to a JSON value in a JSON tree. @@ -70,19 +60,14 @@ func (p JSONPath) Parts() []JSONPathPart { return p } -// Append appends a part to the JSON path. -func (p JSONPath) Append(part JSONPathPart) JSONPath { - return append(p, part) -} - // AppendProperty appends a property part to the JSON path. func (p JSONPath) AppendProperty(part string) JSONPath { - return append(p, propPathPart(part)) + return append(p, JSONPathPart{value: part}) } // AppendIndex appends an index part to the JSON path. func (p JSONPath) AppendIndex(part uint64) JSONPath { - return append(p, indexPathPart(part)) + return append(p, JSONPathPart{value: part}) } // String returns the string representation of the JSON path. @@ -633,14 +618,14 @@ func newJSONFromFastJSON(v *fastjson.Value, path JSONPath) JSON { obj := make(map[string]JSON, fastObj.Len()) fastObj.Visit(func(k []byte, v *fastjson.Value) { key := string(k) - obj[key] = newJSONFromFastJSON(v, path.Append(propPathPart(key))) + obj[key] = newJSONFromFastJSON(v, path.AppendProperty(key)) }) return newJSONObject(obj, path) case fastjson.TypeArray: fastArr := v.GetArray() arr := make([]JSON, len(fastArr)) for i := range fastArr { - arr[i] = newJSONFromFastJSON(fastArr[i], path.Append(indexPathPart(uint64(i)))) + arr[i] = newJSONFromFastJSON(fastArr[i], path.AppendIndex(uint64(i))) } return newJSONArray(arr, path) case fastjson.TypeNumber: diff --git a/client/json_test.go b/client/json_test.go index 070dd46742..a006a4efd1 100644 --- a/client/json_test.go +++ b/client/json_test.go @@ -504,7 +504,7 @@ func TestNewJSONAndMarshalJSON(t *testing.T) { }, } - path := makeJSONPath("some", "path") + path := JSONPath{}.AppendProperty("some").AppendProperty("path") for _, tt := range tests { for _, withPath := range []bool{true, false} { From 71dad5d1fe9d96bbdc7b099d600e08d149faeb2c Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Fri, 17 Jan 2025 11:57:41 +0100 Subject: [PATCH 45/46] PR fixup --- client/json.go | 30 ++++++++++++++++-------- client/json_traverse_test.go | 25 ++++++++------------ internal/db/fetcher/indexer_iterators.go | 28 ++++++++++++---------- internal/encoding/json.go | 9 +++++-- 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/client/json.go b/client/json.go index d820214912..b22a0793ea 100644 --- a/client/json.go +++ b/client/json.go @@ -22,20 +22,29 @@ import ( // JSONPathPart represents a part of a JSON path. // Json path can be either a property of an object or an index of an element in an array. -// For example, the paths to both values 1 are very similar: +// For example, consider the following JSON: // // { +// "custom": { +// "name": "John" +// }, // "0": { -// "val": 1 -// } -// } -// [ -// { // "val": 1 -// } -// ] +// }, +// [ +// { +// "val": 2 +// } +// ] +// } // -// It can be described as "0.val" but they are different. +// The path to a top-level document is empty. +// The path to subtree { "name": "John" } can be described as "custom". +// The path to value "John" can be described as "custom.name". +// The paths to both values 1 and 2 can be described as "0.val": +// - for value 1 it's "0" property of the object and "val" property of the object +// - for value 2 it's "0" index of the array and "val" property of the object +// That's why we need to distinguish between properties and indices in the path. type JSONPathPart struct { value any } @@ -89,6 +98,7 @@ func (p JSONPath) String() string { } // JSON represents a JSON value that can be any valid JSON type: object, array, number, string, boolean, or null. +// It can also represent a subtree of a JSON tree. // It provides type-safe access to the underlying value through various accessor methods. type JSON interface { json.Marshaler @@ -127,7 +137,7 @@ type JSON interface { // Returns an error if marshaling fails. Marshal(w io.Writer) error - // GetPath returns the path of the JSON value in the JSON tree. + // GetPath returns the path of the JSON value (or subtree) in the JSON tree. GetPath() JSONPath // visit calls the visitor function for the JSON value at the given path. diff --git a/client/json_traverse_test.go b/client/json_traverse_test.go index 38b199bd73..c3cf06268a 100644 --- a/client/json_traverse_test.go +++ b/client/json_traverse_test.go @@ -38,23 +38,18 @@ func getArrayValue(j JSON) []JSON { panic("expected array value") } -func toJSONPathPart[T string | int | uint64](v T) JSONPathPart { - switch val := any(v).(type) { - case string: - return propPathPart(val) - case int: - return indexPathPart(uint64(val)) - case uint64: - return indexPathPart(val) - } - return nil -} - // Creates a path from mixed string/integer values func makeJSONPath[T string | int | uint64](parts ...T) JSONPath { - path := make(JSONPath, len(parts)) - for i, part := range parts { - path[i] = toJSONPathPart(part) + path := JSONPath{} + for _, part := range parts { + switch val := any(part).(type) { + case string: + path = path.AppendProperty(val) + case int: + path = path.AppendIndex(uint64(val)) + case uint64: + path = path.AppendIndex(val) + } } return path } diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 91a7dfa710..31c62da321 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -532,6 +532,8 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro if ok && isArrayCondition(op.Operation) { if op.Operation == compOpNone { // if the array condition is _none it doesn't make sense to use index + // because the power of indexer is in efficiently looking up specific + // values, not a value different from specific. return nil, nil } jsonPath = jsonPath.AppendIndex(0) @@ -553,7 +555,7 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro var err error if len(jsonPath) > 0 { - err = determineJSONFilterCondition(&cond, filterVal, jsonPath) + err = setJSONFilterCondition(&cond, filterVal, jsonPath) } else if filterVal == nil { cond.val, err = client.NewNormalNil(cond.kind) } else if !f.indexedFields[i].Kind.IsArray() { @@ -593,26 +595,26 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro return result, nil } -// determineJSONFilterCondition determines the condition and its corresponding operation for a -// JSON filter condition. -// It mutates the given condition to make it match the filter value and JSON path so that +// setJSONFilterCondition sets up the given condition struct based on the filter value and JSON path so that // it can be used to fetch the indexed data. -func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath client.JSONPath) error { - var jsonVal client.JSON - var err error +func setJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath client.JSONPath) error { if isArrayCondition(cond.op) { subCondMap := filterVal.(map[connor.FilterKey]any) for subKey, subVal := range subCondMap { cond.arrOp = cond.op cond.op = subKey.(*mapper.Operator).Operation - jsonVal, err = client.NewJSONWithPath(subVal, jsonPath) - if err == nil { - cond.val = client.NewNormalJSON(jsonVal) + jsonVal, err := client.NewJSONWithPath(subVal, jsonPath) + if err != nil { + return err } + cond.val = client.NewNormalJSON(jsonVal) // the array sub condition (_any, _all or _none) is supposed to have only 1 record break } } else if cond.op == opIn { + // values in _in operator should not be considered as array elements just because they happened + // to be written as an array in the filter. We need to convert them to normal values and + // treat them individually. var jsonVals []client.JSON if anyArr, ok := filterVal.([]any); ok { // if filter value is []any we convert each value separately because JSON might have @@ -620,7 +622,7 @@ func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath // client.ToArrayOfNormalValues jsonVals = make([]client.JSON, 0, len(anyArr)) for _, val := range anyArr { - jsonVal, err = client.NewJSONWithPath(val, jsonPath) + jsonVal, err := client.NewJSONWithPath(val, jsonPath) if err != nil { return err } @@ -637,7 +639,7 @@ func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath } jsonVals = make([]client.JSON, 0, len(normArr)) for _, val := range normArr { - jsonVal, err = client.NewJSONWithPath(val.Unwrap(), jsonPath) + jsonVal, err := client.NewJSONWithPath(val.Unwrap(), jsonPath) if err != nil { return err } @@ -650,7 +652,7 @@ func determineJSONFilterCondition(cond *fieldFilterCond, filterVal any, jsonPath } cond.val = normJSONs } else { - jsonVal, err = client.NewJSONWithPath(filterVal, jsonPath) + jsonVal, err := client.NewJSONWithPath(filterVal, jsonPath) if err != nil { return err } diff --git a/internal/encoding/json.go b/internal/encoding/json.go index a6822198e2..be2f96f202 100644 --- a/internal/encoding/json.go +++ b/internal/encoding/json.go @@ -132,13 +132,15 @@ func decodeJSONPath(b []byte) ([]byte, client.JSONPath, error) { } if PeekType(b) == Bytes { - rem, part, err := DecodeBytesAscending(b) + remainder, part, err := DecodeBytesAscending(b) if err != nil { return b, nil, NewErrInvalidJSONPath(b, err) } path = path.AppendProperty(string(part)) - b = rem + b = remainder } else { + // a part of the path can be either a property or an index, so if the type of the underlying + // encoded value is not Bytes it must be Uvarint. rem, part, err := DecodeUvarintAscending(b) if err != nil { return b, nil, NewErrInvalidJSONPath(b, err) @@ -157,6 +159,9 @@ func encodeJSONPath(b []byte, v client.JSON) []byte { pathBytes := unsafeConvertStringToBytes(prop) b = EncodeBytesAscending(b, pathBytes) } else if _, ok := part.Index(); ok { + // the given json value is an array element and we want all array elements to be + // distinguishable. That's why we add a constant 0 prefix. + // We ignore the actual array index value because we have no way of using it at the moment. b = EncodeUvarintAscending(b, 0) } } From def1754063c22f77adaf3607b2b2e5b5c680579c Mon Sep 17 00:00:00 2001 From: Islam Aleiv Date: Mon, 20 Jan 2025 09:23:06 +0100 Subject: [PATCH 46/46] Add more comments --- internal/db/fetcher/indexer_iterators.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/db/fetcher/indexer_iterators.go b/internal/db/fetcher/indexer_iterators.go index 31c62da321..3fba778344 100644 --- a/internal/db/fetcher/indexer_iterators.go +++ b/internal/db/fetcher/indexer_iterators.go @@ -531,9 +531,13 @@ func (f *IndexFetcher) determineFieldFilterConditions() ([]fieldFilterCond, erro op, ok := key.(*mapper.Operator) if ok && isArrayCondition(op.Operation) { if op.Operation == compOpNone { - // if the array condition is _none it doesn't make sense to use index - // because the power of indexer is in efficiently looking up specific - // values, not a value different from specific. + // if the array condition is _none it doesn't make sense to use index because + // values picked by the index is random guessing. For example if we have doc1 + // with array of [3, 5, 1] and doc2 with [7, 4, 8] the index first fetches + // value 1 of doc1, let it go through the filter and then fetches value 3 of doc1 + // again, skips it (because it cached doc1 id) and fetches value 4 of doc2, and + // so on until it exhaust all prefixes in ascending order. + // It might be even less effective than just scanning all documents. return nil, nil } jsonPath = jsonPath.AppendIndex(0)