Skip to content

Commit

Permalink
A/B testing - fixes (#963)
Browse files Browse the repository at this point in the history
Some fixes.
  • Loading branch information
nablaone authored Nov 8, 2024
1 parent 86050ac commit 5a41028
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 36 deletions.
65 changes: 46 additions & 19 deletions quesma/ab_testing/collector/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,60 @@ func (t *diffTransformer) mostCommonMismatchType(mismatches []jsondiff.JSONMisma

func (t *diffTransformer) process(in EnrichedResults) (out EnrichedResults, drop bool, err error) {

mismatches := jsondiff.Mismatches{}

d, err := jsondiff.NewElasticResponseJSONDiff()
if err != nil {
return in, false, err
}

jsonA, err := types.ParseJSON(in.A.Body)
if err != nil {
in.Mismatch.IsOK = false
in.Mismatch.Message = fmt.Sprintf("failed to parse A response: %v", err)
err = fmt.Errorf("failed to parse A response: %w", err)
in.Errors = append(in.Errors, err.Error())
return in, false, nil
}
if in.A.Error != "" || in.B.Error != "" {

jsonB, err := types.ParseJSON(in.B.Body)
if err != nil {
in.Mismatch.IsOK = false
in.Mismatch.Message = fmt.Sprintf("failed to parse B response: %v", err)
err = fmt.Errorf("failed to parse B response: %w", err)
in.Errors = append(in.Errors, err.Error())
return in, false, nil
}
if in.A.Error != "" {
mismatches = append(mismatches, jsondiff.JSONMismatch{
Type: "error",
Message: fmt.Sprintf("\nA response has an error: %s", in.A.Error),
Path: "n/a",
Expected: "n/a",
Actual: "n/a",
})
}

if in.B.Error != "" {
mismatches = append(mismatches, jsondiff.JSONMismatch{
Type: "error",
Message: fmt.Sprintf("\nB response has an error: %s", in.B.Error),
Path: "n/a",
Expected: "n/a",
Actual: "n/a",
})
}

mismatches, err := d.Diff(jsonA, jsonB)
} else {

jsonA, err := types.ParseJSON(in.A.Body)
if err != nil {
in.Mismatch.IsOK = false
in.Mismatch.Message = fmt.Sprintf("failed to parse A response: %v", err)
err = fmt.Errorf("failed to parse A response: %w", err)
in.Errors = append(in.Errors, err.Error())
return in, false, nil
}

jsonB, err := types.ParseJSON(in.B.Body)
if err != nil {
in.Mismatch.IsOK = false
in.Mismatch.Message = fmt.Sprintf("failed to parse B response: %v", err)
err = fmt.Errorf("failed to parse B response: %w", err)
in.Errors = append(in.Errors, err.Error())
return in, false, nil
}

mismatches, err = d.Diff(jsonA, jsonB)
if err != nil {
return in, false, err
}

if err != nil {
return in, false, err
}

if len(mismatches) > 0 {
Expand Down
17 changes: 16 additions & 1 deletion quesma/jsondiff/elastic_response_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,22 @@ import "fmt"

// NewElasticResponseJSONDiff creates a JSONDiff instance that is tailored to compare Elasticsearch response JSONs.
func NewElasticResponseJSONDiff() (*JSONDiff, error) {
d, err := NewJSONDiff("^id$", ".*Quesma_key_.*", "^took$", ".*__quesma_total_count", ".*\\._id", "^_shards.*", ".*\\._score", ".*\\._source", ".*\\.__quesma_originalKey")

var ignorePaths []string

// quesma specific fields that we want to ignore
ignorePaths = append(ignorePaths, ".*Quesma_key_.*", ".*__quesma_total_count", ".*\\.__quesma_originalKey")

// well known fields that we want to ignore
ignorePaths = append(ignorePaths, "^id$", "^took$", ".*\\._id", "^_shards.*", ".*\\._score", ".*\\._source", ".*\\._version$")

// elastic has some fields that are suffixed with ".keyword" that we want to ignore
ignorePaths = append(ignorePaths, ".*\\.keyword$")

// ignore some fields that are related to location, just for now (we want to compare them in the future)
ignorePaths = append(ignorePaths, ".*Location$", ".*\\.lat$", ".*\\.lon$")

d, err := NewJSONDiff(ignorePaths...)

if err != nil {
return nil, fmt.Errorf("could not create JSONDiff: %v", err)
Expand Down
12 changes: 6 additions & 6 deletions quesma/jsondiff/jsondiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,10 @@ func (d *JSONDiff) compareArray(expected []any, actual []any) {
}

if lenDiff > 1 {
d.addMismatch(invalidArrayLength, fmt.Sprintf("%d", len(actual)), fmt.Sprintf("%d", len(expected)))
d.addMismatch(invalidArrayLength, fmt.Sprintf("%d", len(expected)), fmt.Sprintf("%d", len(actual)))
return
} else if lenDiff == 1 {
d.addMismatch(invalidArrayLengthOffByOne, fmt.Sprintf("%d", len(actual)), fmt.Sprintf("%d", len(expected)))
d.addMismatch(invalidArrayLengthOffByOne, fmt.Sprintf("%d", len(expected)), fmt.Sprintf("%d", len(actual)))
return
}

Expand All @@ -345,7 +345,7 @@ func (d *JSONDiff) compareArray(expected []any, actual []any) {

for i := range len(actual) {
d.pushPath(fmt.Sprintf("[%d]", i))
d.compare(actual[i], expected[i])
d.compare(expected[i], actual[i])
d.popPath()
}
}
Expand All @@ -361,10 +361,10 @@ func (d *JSONDiff) asType(a any) string {
var dateRx = regexp.MustCompile(`\d{4}-\d{2}-\d{2}.\d{2}:\d{2}:`)

func (d *JSONDiff) uniformTimeFormat(date string) string {
returnFormat := "2006-01-02T15:04:05.000Z"
returnFormat := time.RFC3339Nano

inputFormats := []string{
"2006-01-02T15:04:05.000+02:00",
"2006-01-02T15:04:05.000-07:00",
"2006-01-02T15:04:05.000Z",
"2006-01-02T15:04:05.000",
"2006-01-02 15:04:05",
Expand All @@ -375,7 +375,7 @@ func (d *JSONDiff) uniformTimeFormat(date string) string {
for _, format := range inputFormats {
parsedDate, err = time.Parse(format, date)
if err == nil {
return parsedDate.Format(returnFormat)
return parsedDate.UTC().Format(returnFormat)
}
}
return date
Expand Down
7 changes: 6 additions & 1 deletion quesma/jsondiff/jsondiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,15 @@ func TestJSONDiff(t *testing.T) {
problems: []JSONMismatch{},
},
{
name: "SKIP dates 3", // TODO fix this, not sure how we handle TZ
name: "dates 3",
expected: `{"a": "2024-10-24T10:00:00.000"}`,
actual: `{"a": "2024-10-24T12:00:00.000+02:00"}`,
},
{
name: "dates 4",
expected: `{"a": "2024-10-31T11:00:00.000"}`,
actual: `{"a": "2024-10-31T12:00:00.000+01:00"}`,
},
}

for _, tt := range tests {
Expand Down
25 changes: 23 additions & 2 deletions quesma/quesma/schema_transformer.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,15 @@ func (s *SchemaCheckPass) applyWildcardExpansion(indexSchema schema.Schema, quer
cols = append(cols, col.InternalPropertyName.AsString())
}
}

if query.RuntimeMappings != nil {
// add columns that are not in the schema but are in the runtime mappings
// these columns will be transformed later
for name := range query.RuntimeMappings {
cols = append(cols, name)
}
}

sort.Strings(cols)

for _, col := range cols {
Expand Down Expand Up @@ -683,10 +692,22 @@ func (s *SchemaCheckPass) applyRuntimeMappings(indexSchema schema.Schema, query
return query, nil
}

visitor := model.NewBaseVisitor()
cols := query.SelectCommand.Columns

visitor.OverrideVisitColumnRef = func(b *model.BaseExprVisitor, e model.ColumnRef) interface{} {
// replace column refs with runtime mappings with proper name
for i, col := range cols {
switch c := col.(type) {
case model.ColumnRef:
if mapping, ok := query.RuntimeMappings[c.ColumnName]; ok {
cols[i] = model.NewAliasedExpr(mapping.Expr, c.ColumnName)
}
}
}
query.SelectCommand.Columns = cols

// replace other places where column refs are used
visitor := model.NewBaseVisitor()
visitor.OverrideVisitColumnRef = func(b *model.BaseExprVisitor, e model.ColumnRef) interface{} {
if mapping, ok := query.RuntimeMappings[e.ColumnName]; ok {
return mapping.Expr
}
Expand Down
6 changes: 4 additions & 2 deletions quesma/quesma/search_ab_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"quesma/elasticsearch"
"quesma/logger"
"quesma/model"
"quesma/queryparser"
"quesma/quesma/async_search_storage"
"quesma/quesma/config"
"quesma/quesma/recovery"
Expand Down Expand Up @@ -330,7 +329,10 @@ func (q *QueryRunner) storeAsyncSearchWithRaw(qmc *ui.QuesmaManagementConsole, i
asyncResponse := WrapElasticResponseAsAsync(resultJSON, asyncId, false, &okStatus)
responseBody, err = json.MarshalIndent(asyncResponse, "", " ")
} else {
responseBody, _ = queryparser.EmptyAsyncSearchResponse(asyncId, false, 503)
responseBody, err = resultJSON.Bytes()
if err == nil {
logger.Warn().Msgf("error while marshalling async search response: %v: ", err)
}
err = resultError
}

Expand Down
10 changes: 6 additions & 4 deletions quesma/quesma/ui/ab_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -497,19 +497,21 @@ func (qmc *QuesmaManagementConsole) renderABTestingMismatch(buffer *builder.Html
buffer.Html(`</code>`)
{ // poor man's HTML indent
buffer.Html(`<ul>`)

buffer.Html(`<li>`)
buffer.Html(`<code>`)
buffer.Text("Actual: ")
buffer.Text(mismatch.Actual)
buffer.Text("Expected: ")
buffer.Text(mismatch.Expected)
buffer.Html(`</code>`)
buffer.Html(`</li>`)

buffer.Html(`<li>`)
buffer.Html(`<code>`)
buffer.Text("Expected: ")
buffer.Text(mismatch.Expected)
buffer.Text("Actual: ")
buffer.Text(mismatch.Actual)
buffer.Html(`</code>`)
buffer.Html(`</li>`)

buffer.Html(`</ul>`)
}
}
Expand Down
2 changes: 1 addition & 1 deletion quesma/testdata/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -2354,7 +2354,7 @@ var TestSearchRuntimeMappings = []SearchTestCase{
model.ListAllFields,
////[]model.Query{newSimplestQuery()},
[]string{
`SELECT toHour("@timestamp") FROM ` + TableName + ` LIMIT 10`,
`SELECT toHour("@timestamp") AS "hour_of_day" FROM ` + TableName + ` LIMIT 10`,
},
[]string{},
},
Expand Down

0 comments on commit 5a41028

Please sign in to comment.