Skip to content

Commit

Permalink
Table resolver - apply decision (#870)
Browse files Browse the repository at this point in the history
This is second part of the table resolver introduction. 

In this PR:
1. Decisions made by resolved are applied. 
2. Some rules were simplified
3. Add test.
4. Add endpoint `/index:/_quesma_table_resolver` to get a decision (it
will be used in e2e tests)
```
 curl 'http://localhost:8080/logs*/_quesma_table_resolver
````


Limitations:
- Table name overrides are handled as before. 
- Rules may be incorrect. Tests are passing. But this is complex
rewrite.

---------

Co-authored-by: Piotr Grabowski <[email protected]>
  • Loading branch information
nablaone and avelanarius authored Oct 16, 2024
1 parent 7e78e1b commit 90400ea
Show file tree
Hide file tree
Showing 31 changed files with 1,036 additions and 593 deletions.
6 changes: 6 additions & 0 deletions http_requests/disabled.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
POST localhost:8080/logs_disabled/_doc
Content-Type: application/json

{
"message": "Hello World!"
}
1 change: 1 addition & 0 deletions http_requests/table_resolver.http
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GET http://localhost:8080/logs*/_quesma_table_resolver
38 changes: 38 additions & 0 deletions quesma/elasticsearch/index_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"quesma/logger"
"quesma/quesma/config"
"quesma/quesma/recovery"
"quesma/util"
"strings"
"sync/atomic"
"time"
Expand Down Expand Up @@ -115,3 +116,40 @@ func (im *indexManagement) Start() {
func (im *indexManagement) Stop() {
im.cancel()
}

func NewFixedIndexManagement(indexes ...string) IndexManagement {
return stubIndexManagement{indexes: indexes}
}

type stubIndexManagement struct {
indexes []string
}

func (s stubIndexManagement) Start() {}
func (s stubIndexManagement) Stop() {}
func (s stubIndexManagement) ReloadIndices() {}
func (s stubIndexManagement) GetSources() Sources {
var dataStreams = []DataStream{}
for _, index := range s.indexes {
dataStreams = append(dataStreams, DataStream{Name: index})
}
return Sources{DataStreams: dataStreams}
}

func (s stubIndexManagement) GetSourceNames() map[string]bool {
var result = make(map[string]bool)
for _, index := range s.indexes {
result[index] = true
}
return result
}

func (s stubIndexManagement) GetSourceNamesMatching(indexPattern string) map[string]bool {
var result = make(map[string]bool)
for _, index := range s.indexes {
if util.IndexPatternMatches(indexPattern, index) {
result[index] = true
}
}
return result
}
1 change: 1 addition & 0 deletions quesma/end_user_errors/end_user.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ var ErrSearchCondition = errorType(2001, "Not supported search condition.")
var ErrNoSuchTable = errorType(2002, "Missing table.")
var ErrNoSuchSchema = errorType(2003, "Missing schema.")
var ErrNoIngest = errorType(2004, "Ingest is not enabled.")
var ErrNoConnector = errorType(2005, "No connector found.")

var ErrDatabaseTableNotFound = errorType(3001, "Table not found in database.")
var ErrDatabaseFieldNotFound = errorType(3002, "Field not found in database.")
Expand Down
12 changes: 12 additions & 0 deletions quesma/ingest/common_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,18 @@ func TestIngestToCommonTable(t *testing.T) {

resolver := table_resolver.NewEmptyTableResolver()

decision := &table_resolver.Decision{
UseConnectors: []table_resolver.ConnectorDecision{
&table_resolver.ConnectorDecisionClickhouse{
ClickhouseTableName: common_table.TableName,
ClickhouseTables: []string{indexName},
IsCommonTable: true,
},
},
}

resolver.Decisions[indexName] = decision

ingest := newIngestProcessorWithEmptyTableMap(tables, quesmaConfig)
ingest.chDb = db
ingest.virtualTableStorage = virtualTableStorage
Expand Down
11 changes: 10 additions & 1 deletion quesma/ingest/ingest_validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,16 @@ func TestIngestValidation(t *testing.T) {
ip := newIngestProcessorEmpty()
ip.chDb = db
ip.tableDiscovery = clickhouse.NewTableDiscoveryWith(&config.QuesmaConfiguration{}, nil, *tableMap)
ip.tableResolver = table_resolver.NewEmptyTableResolver()

resolver := table_resolver.NewEmptyTableResolver()
decision := &table_resolver.Decision{
UseConnectors: []table_resolver.ConnectorDecision{&table_resolver.ConnectorDecisionClickhouse{
ClickhouseTableName: "test_table",
}}}
resolver.Decisions["test_table"] = decision

ip.tableResolver = resolver

defer db.Close()

mock.ExpectExec(EscapeBrackets(expectedInsertJsons[i])).WithoutArgs().WillReturnResult(sqlmock.NewResult(0, 0))
Expand Down
16 changes: 14 additions & 2 deletions quesma/ingest/insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,12 @@ func TestProcessInsertQuery(t *testing.T) {
db, mock := util.InitSqlMockWithPrettyPrint(t, true)
ip.ip.chDb = db
resolver := table_resolver.NewEmptyTableResolver()
decision := &table_resolver.Decision{
UseConnectors: []table_resolver.ConnectorDecision{&table_resolver.ConnectorDecisionClickhouse{
ClickhouseTableName: "test_table",
}}}
resolver.Decisions["test_table"] = decision

ip.ip.tableResolver = resolver
defer db.Close()

Expand Down Expand Up @@ -420,15 +426,21 @@ func TestCreateTableIfSomeFieldsExistsInSchemaAlready(t *testing.T) {
}
schemaRegistry.Tables[schema.TableName(indexName)] = indexSchema

indexRegistry := table_resolver.NewEmptyTableResolver()
resolver := table_resolver.NewEmptyTableResolver()
decision := &table_resolver.Decision{
UseConnectors: []table_resolver.ConnectorDecision{&table_resolver.ConnectorDecisionClickhouse{
ClickhouseTableName: "test_index",
}}}
resolver.Decisions["test_index"] = decision

schemaRegistry.FieldEncodings = make(map[schema.FieldEncodingKey]schema.EncodedFieldName)
schemaRegistry.FieldEncodings[schema.FieldEncodingKey{TableName: indexName, FieldName: "schema_field"}] = "schema_field"

ingest := newIngestProcessorWithEmptyTableMap(tables, quesmaConfig)
ingest.chDb = db
ingest.virtualTableStorage = virtualTableStorage
ingest.schemaRegistry = schemaRegistry
ingest.tableResolver = indexRegistry
ingest.tableResolver = resolver

ctx := context.Background()
formatter := clickhouse.DefaultColumnNameFormatter()
Expand Down
68 changes: 45 additions & 23 deletions quesma/ingest/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -706,38 +706,60 @@ func (lm *IngestProcessor) ProcessInsertQuery(ctx context.Context, tableName str
tableFormatter TableColumNameFormatter) error {

decision := lm.tableResolver.Resolve(table_resolver.IngestPipeline, tableName)
table_resolver.TODO("processInsertQuery", decision)

indexConf, ok := lm.cfg.IndexConfig[tableName]
if ok && indexConf.UseCommonTable {
if decision.Err != nil {
return decision.Err
}

// we have clone the data, because we want to process it twice
var clonedJsonData []types.JSON
for _, jsonValue := range jsonData {
clonedJsonData = append(clonedJsonData, jsonValue.Clone())
}
if decision.IsEmpty { // TODO
return fmt.Errorf("table %s not found", tableName)
}

err := lm.processInsertQueryInternal(ctx, tableName, clonedJsonData, transformer, tableFormatter, true)
if err != nil {
// we ignore an error here, because we want to process the data and don't lose it
logger.ErrorWithCtx(ctx).Msgf("error processing insert query - virtual table schema update: %v", err)
}
if decision.IsClosed { // TODO
return fmt.Errorf("table %s is closed", tableName)
}

pipeline := jsonprocessor.IngestTransformerPipeline{}
pipeline = append(pipeline, &common_table.IngestAddIndexNameTransformer{IndexName: tableName})
pipeline = append(pipeline, transformer)
tableName = common_table.TableName
for _, connectorDecision := range decision.UseConnectors {

err = lm.processInsertQueryInternal(ctx, common_table.TableName, jsonData, pipeline, tableFormatter, false)
if err != nil {
return fmt.Errorf("error processing insert query to a common table: %w", err)
var clickhouseDecision *table_resolver.ConnectorDecisionClickhouse

var ok bool
if clickhouseDecision, ok = connectorDecision.(*table_resolver.ConnectorDecisionClickhouse); !ok {
continue
}

return nil
}
if clickhouseDecision.IsCommonTable {

// we have clone the data, because we want to process it twice
var clonedJsonData []types.JSON
for _, jsonValue := range jsonData {
clonedJsonData = append(clonedJsonData, jsonValue.Clone())
}

return lm.processInsertQueryInternal(ctx, tableName, jsonData, transformer, tableFormatter, false)
err := lm.processInsertQueryInternal(ctx, tableName, clonedJsonData, transformer, tableFormatter, true)
if err != nil {
// we ignore an error here, because we want to process the data and don't lose it
logger.ErrorWithCtx(ctx).Msgf("error processing insert query - virtual table schema update: %v", err)
}

pipeline := jsonprocessor.IngestTransformerPipeline{}
pipeline = append(pipeline, &common_table.IngestAddIndexNameTransformer{IndexName: tableName})
pipeline = append(pipeline, transformer)

err = lm.processInsertQueryInternal(ctx, common_table.TableName, jsonData, pipeline, tableFormatter, false)
if err != nil {
return fmt.Errorf("error processing insert query to a common table: %w", err)
}

} else {
err := lm.processInsertQueryInternal(ctx, clickhouseDecision.ClickhouseTableName, jsonData, transformer, tableFormatter, false)
if err != nil {
return fmt.Errorf("error processing insert query: %w", err)
}
}

}
return nil
}

func (ip *IngestProcessor) processInsertQueryInternal(ctx context.Context, tableName string,
Expand Down
8 changes: 3 additions & 5 deletions quesma/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ func main() {
tableDisco := clickhouse.NewTableDiscovery(&cfg, connectionPool, virtualTableStorage)
schemaRegistry := schema.NewSchemaRegistry(clickhouse.TableDiscoveryTableProviderAdapter{TableDiscovery: tableDisco}, &cfg, clickhouse.SchemaTypeAdapter{})

im := elasticsearch.NewIndexManagement(cfg.Elasticsearch.Url.String())

connManager := connectors.NewConnectorManager(&cfg, connectionPool, phoneHomeAgent, tableDisco)
lm := connManager.GetConnector()

elasticIndexResolver := elasticsearch.NewIndexResolver(cfg.Elasticsearch.Url.String())

// TODO index configuration for ingest and query is the same for now
tableResolver := table_resolver.NewTableResolver(cfg, tableDisco, elasticIndexResolver)
tableResolver := table_resolver.NewTableResolver(cfg, tableDisco, im)
tableResolver.Start()

var ingestProcessor *ingest.IngestProcessor
Expand All @@ -112,8 +112,6 @@ func main() {
logger.Info().Msg("Ingest processor is disabled.")
}

im := elasticsearch.NewIndexManagement(cfg.Elasticsearch.Url.String())

logger.Info().Msgf("loaded config: %s", cfg.String())

quesmaManagementConsole := ui.NewQuesmaManagementConsole(&cfg, lm, im, qmcLogChannel, phoneHomeAgent, schemaRegistry, tableResolver) //FIXME no ingest processor here just for now
Expand Down
74 changes: 44 additions & 30 deletions quesma/quesma/functionality/bulk/bulk.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ func splitBulk(ctx context.Context, defaultIndex *string, bulk types.NDJSON, bul
index := op.GetIndex()
operation := op.GetOperation()

decision := tableResolver.Resolve(table_resolver.IngestPipeline, index)
table_resolver.TODO("splitBulk", decision)

entryWithResponse := BulkRequestEntry{
operation: operation,
index: index,
Expand All @@ -142,36 +139,13 @@ func splitBulk(ctx context.Context, defaultIndex *string, bulk types.NDJSON, bul
}
}

indexConfig, found := cfg.IndexConfig[index]
if !found || indexConfig.IsElasticIngestEnabled() {
// Bulk entry for Elastic - forward the request as-is
opBytes, err := rawOp.Bytes()
if err != nil {
return err
}
elasticRequestBody = append(elasticRequestBody, opBytes...)
elasticRequestBody = append(elasticRequestBody, '\n')

documentBytes, err := document.Bytes()
if err != nil {
return err
}
elasticRequestBody = append(elasticRequestBody, documentBytes...)
elasticRequestBody = append(elasticRequestBody, '\n')
decision := tableResolver.Resolve(table_resolver.IngestPipeline, index)

elasticBulkEntries = append(elasticBulkEntries, entryWithResponse)
if decision.Err != nil {
return decision.Err
}
if found && indexConfig.IsClickhouseIngestEnabled() {
// Bulk entry for Clickhouse
if operation != "create" && operation != "index" {
// Elastic also fails the entire bulk in such case
logger.ErrorWithCtxAndReason(ctx, "unsupported bulk operation type").Msgf("unsupported bulk operation type: %s", operation)
return fmt.Errorf("unsupported bulk operation type: %s. Operation: %v, Document: %v", operation, rawOp, document)
}

clickhouseDocumentsToInsert[index] = append(clickhouseDocumentsToInsert[index], entryWithResponse)
}
if indexConfig.IsIngestDisabled() {
if decision.IsClosed || len(decision.UseConnectors) == 0 {
bulkSingleResponse := BulkSingleResponse{
Shards: BulkShardsResponse{
Failed: 1,
Expand Down Expand Up @@ -202,6 +176,46 @@ func splitBulk(ctx context.Context, defaultIndex *string, bulk types.NDJSON, bul
return fmt.Errorf("unsupported bulk operation type: %s. Document: %v", operation, document)
}
}

for _, connector := range decision.UseConnectors {

switch connector.(type) {

case *table_resolver.ConnectorDecisionElastic:
// Bulk entry for Elastic - forward the request as-is
opBytes, err := rawOp.Bytes()
if err != nil {
return err
}
elasticRequestBody = append(elasticRequestBody, opBytes...)
elasticRequestBody = append(elasticRequestBody, '\n')

documentBytes, err := document.Bytes()
if err != nil {
return err
}
elasticRequestBody = append(elasticRequestBody, documentBytes...)
elasticRequestBody = append(elasticRequestBody, '\n')

elasticBulkEntries = append(elasticBulkEntries, entryWithResponse)

case *table_resolver.ConnectorDecisionClickhouse:

// Bulk entry for Clickhouse
if operation != "create" && operation != "index" {
// Elastic also fails the entire bulk in such case
logger.ErrorWithCtxAndReason(ctx, "unsupported bulk operation type").Msgf("unsupported bulk operation type: %s", operation)
return fmt.Errorf("unsupported bulk operation type: %s. Operation: %v, Document: %v", operation, rawOp, document)
}

clickhouseDocumentsToInsert[index] = append(clickhouseDocumentsToInsert[index], entryWithResponse)

default:
return fmt.Errorf("unsupported connector type: %T", connector)
}

}

return nil
})

Expand Down
2 changes: 2 additions & 0 deletions quesma/quesma/http_headers.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ const (
quesmaSourceHeader = "X-Quesma-Source"
quesmaSourceElastic = "Elasticsearch"
quesmaSourceClickhouse = "Clickhouse"

quesmaTableResolverHeader = "X-Quesma-Table-Resolver"
)

// Certain Elasticsearch SaaS providers might add custom headers to the response,
Expand Down
Loading

0 comments on commit 90400ea

Please sign in to comment.