Skip to content

Commit

Permalink
Ingest performance improvements (#1028)
Browse files Browse the repository at this point in the history
This PR can be reviewed commit by commit. It includes 4 improvements:

- **Use go-json insead of standard JSON library**
- **Add regexp cache**
- **Remove remarshalling**
- **Remove remarshalling - continued**


Maybe I should split them into separate PRs.

---------

Signed-off-by: Rafał Strzaliński <[email protected]>
  • Loading branch information
nablaone authored Nov 27, 2024
1 parent 73218ff commit 3d4a9ef
Show file tree
Hide file tree
Showing 51 changed files with 132 additions and 113 deletions.
2 changes: 1 addition & 1 deletion quesma/ab_testing/collector/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package collector

import (
"crypto/sha1"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"quesma/jsondiff"
"quesma/quesma/types"
)
Expand Down
2 changes: 1 addition & 1 deletion quesma/ab_testing/collector/fanout.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ package collector
import (
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"net/http"
"quesma/ingest"
"quesma/logger"
Expand Down
2 changes: 1 addition & 1 deletion quesma/ab_testing/collector/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
package collector

import (
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"quesma/quesma/types"
"regexp"
)
Expand Down
2 changes: 1 addition & 1 deletion quesma/ast/query_traversal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
package ast

import (
"encoding/json"
"github.com/elastic/go-elasticsearch/v8/typedapi/types"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"testing"
Expand Down
2 changes: 1 addition & 1 deletion quesma/buildinfo/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
package buildinfo

import (
"encoding/json"
"fmt"
"github.com/coreos/go-semver/semver"
"github.com/goccy/go-json"
"net/http"
"time"
)
Expand Down
2 changes: 1 addition & 1 deletion quesma/clickhouse/table_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ package clickhouse
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"github.com/goccy/go-json"
"quesma/common_table"
"quesma/end_user_errors"
"quesma/logger"
Expand Down
2 changes: 1 addition & 1 deletion quesma/clickhouse/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package clickhouse

import (
"bytes"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"quesma/logger"
"quesma/model"
"strings"
Expand Down
2 changes: 1 addition & 1 deletion quesma/elasticsearch/index_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package elasticsearch

import (
"context"
"encoding/json"
"github.com/goccy/go-json"
"io"
"net/http"
"quesma/quesma/config"
Expand Down
2 changes: 1 addition & 1 deletion quesma/eql/e2e/ingest_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ package e2e

import (
"bytes"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"log"
"net/http"
"time"
Expand Down
2 changes: 1 addition & 1 deletion quesma/eql/e2e/query_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ package e2e

import (
"bytes"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"io"
"net/http"
"quesma/jsonprocessor"
Expand Down
1 change: 1 addition & 0 deletions quesma/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ require (

require (
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/goccy/go-json v0.10.3 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions quesma/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
Expand Down
2 changes: 1 addition & 1 deletion quesma/health/elastic.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package health

import (
"context"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"io"
"net/http"
"quesma/elasticsearch"
Expand Down
2 changes: 1 addition & 1 deletion quesma/ingest/common_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package ingest

import (
"context"
"encoding/json"
"github.com/DATA-DOG/go-sqlmock"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"quesma/clickhouse"
"quesma/common_table"
Expand Down
30 changes: 6 additions & 24 deletions quesma/ingest/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ package ingest
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/goccy/go-json"
chLib "quesma/clickhouse"
"quesma/comment_metadata"
"quesma/common_table"
Expand Down Expand Up @@ -490,32 +490,14 @@ func (ip *IngestProcessor) GenerateIngestContent(table *chLib.Table,
config *chLib.ChTableConfig,
encodings map[schema.FieldEncodingKey]schema.EncodedFieldName) ([]string, types.JSON, []NonSchemaField, error) {

jsonAsBytesSlice, err := json.Marshal(data)

if err != nil {
return nil, nil, nil, err
}

// we find all non-schema fields
jsonMap, err := types.ParseJSON(string(jsonAsBytesSlice))
if err != nil {
return nil, nil, nil, err
}

if len(config.Attributes) == 0 {
return nil, jsonMap, nil, nil
}

schemaFieldsJson, err := json.Marshal(jsonMap)

if err != nil {
return nil, jsonMap, nil, err
return nil, data, nil, nil
}

mDiff := DifferenceMap(jsonMap, table) // TODO change to DifferenceMap(m, t)
mDiff := DifferenceMap(data, table) // TODO change to DifferenceMap(m, t)

if len(mDiff) == 0 && string(schemaFieldsJson) == string(jsonAsBytesSlice) && len(inValidJson) == 0 { // no need to modify, just insert 'js'
return nil, jsonMap, nil, nil
if len(mDiff) == 0 && len(inValidJson) == 0 { // no need to modify, just insert 'js'
return nil, data, nil, nil
}

// check attributes precondition
Expand Down Expand Up @@ -546,7 +528,7 @@ func (ip *IngestProcessor) GenerateIngestContent(table *chLib.Table,
return nil, nil, nil, err
}

onlySchemaFields := RemoveNonSchemaFields(jsonMap, table)
onlySchemaFields := RemoveNonSchemaFields(data, table)

return alterCmd, onlySchemaFields, nonSchemaFields, nil
}
Expand Down
2 changes: 1 addition & 1 deletion quesma/ingest/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
package ingest

import (
"encoding/json"
"github.com/goccy/go-json"
"quesma/clickhouse"
"quesma/persistence"
"quesma/quesma/config"
Expand Down
2 changes: 1 addition & 1 deletion quesma/ingest/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package ingest

import (
"bytes"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"quesma/clickhouse"
"strings"
)
Expand Down
2 changes: 1 addition & 1 deletion quesma/jsonprocessor/json_processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
package jsonprocessor

import (
"encoding/json"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"quesma/quesma/types"
"reflect"
Expand Down
2 changes: 1 addition & 1 deletion quesma/licensing/license_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package licensing

import (
"bytes"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"io"
"net/http"
"time"
Expand Down
2 changes: 1 addition & 1 deletion quesma/model/query_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package model

import (
"context"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"quesma/common_table"
"quesma/logger"
"quesma/schema"
Expand Down
2 changes: 1 addition & 1 deletion quesma/model/search_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Elastic-2.0
package model

import "encoding/json"
import "github.com/goccy/go-json"

type JsonMap = map[string]interface{}

Expand Down
2 changes: 1 addition & 1 deletion quesma/persistence/elastic.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package persistence

import (
"context"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"io"
"log"
"net/http"
Expand Down
2 changes: 1 addition & 1 deletion quesma/proxy/l4_proxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package proxy

import (
"bytes"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"log"
"net"
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/dashboard_error.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Elastic-2.0
package queryparser

import "encoding/json"
import "github.com/goccy/go-json"

func BadRequestParseError(err error) []byte {
serialized, _ := json.Marshal(DashboardErrorResponse{
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/query_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ package queryparser
import (
"context"
"encoding/hex"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"github.com/k0kubun/pp"
"quesma/clickhouse"
"quesma/logger"
Expand Down
2 changes: 1 addition & 1 deletion quesma/queryparser/query_translator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package queryparser

import (
"context"
"encoding/json"
"github.com/goccy/go-json"
"github.com/k0kubun/pp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/config/env2json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
package config

import (
"encoding/json"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"os"
"testing"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/functionality/bulk/bulk.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package bulk

import (
"context"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"io"
"net/http"
"quesma/clickhouse"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/functionality/bulk/bulk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
package bulk

import (
"encoding/json"
"github.com/goccy/go-json"
"github.com/stretchr/testify/require"
"testing"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package field_capabilities

import (
"context"
"encoding/json"
"fmt"
"github.com/goccy/go-json"
"quesma/clickhouse"
"quesma/elasticsearch"
"quesma/elasticsearch/elasticsearch_field_types"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
package field_capabilities

import (
"encoding/json"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"quesma/clickhouse"
"quesma/model"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/functionality/terms_enum/terms_enum.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ package terms_enum

import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/goccy/go-json"
"quesma/clickhouse"
"quesma/end_user_errors"
"quesma/logger"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/functionality/terms_enum/terms_enum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ package terms_enum
import (
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/DATA-DOG/go-sqlmock"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"quesma/clickhouse"
"quesma/logger"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/highlight_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package quesma

import (
"context"
"encoding/json"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"quesma/clickhouse"
"quesma/model"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/mappings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
package quesma

import (
"encoding/json"
"github.com/goccy/go-json"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"quesma/elasticsearch"
Expand Down
2 changes: 1 addition & 1 deletion quesma/quesma/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ package quesma

import (
"context"
"encoding/json"
"errors"
"github.com/goccy/go-json"
"net/http"
"quesma/clickhouse"
"quesma/elasticsearch"
Expand Down
Loading

0 comments on commit 3d4a9ef

Please sign in to comment.