Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Cassandra db schema on session initialization #5922

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
5041f31
Embeded template file in binary and added query construction and exec…
akstron Oct 28, 2024
30db170
Removed unnecessary SchemaConfig struct
akstron Oct 28, 2024
ce0c375
Added new schema configs in default config generator
akstron Oct 28, 2024
810ab1c
Revert Keyspace removal
akstron Oct 28, 2024
0d6383f
Bug fix while creating queries
akstron Oct 29, 2024
207945f
Improving test
akstron Oct 29, 2024
985f65b
Created new struct for derived params
akstron Oct 29, 2024
1c30503
Remove fields from yaml file
akstron Oct 29, 2024
e4ab709
Added integration test
akstron Nov 19, 2024
c329bba
Rebase fixes
akstron Nov 19, 2024
e3c6045
Minor changes in integration script
akstron Nov 19, 2024
492e15e
removed test
akstron Nov 19, 2024
44c39dc
Updated fields with time.Duration type and added validators and tests
akstron Nov 20, 2024
dfc0c43
minor changes in script
akstron Nov 20, 2024
cb8ae19
Addressed comments
akstron Nov 20, 2024
c3d0fbd
Addressed comments
akstron Nov 21, 2024
728a139
Update pkg/cassandra/config/schema.go
akstron Nov 21, 2024
1b6683d
Update pkg/cassandra/config/config.go
akstron Nov 21, 2024
ce11cc1
Addressed comments
akstron Nov 21, 2024
de1c563
Removed unused CasVersion
akstron Nov 21, 2024
edabe22
Addressed validation comments
akstron Nov 22, 2024
d0e1976
Created helper function for session created and updated tests
akstron Nov 26, 2024
d8479b5
Added schema unit tests
akstron Nov 26, 2024
02b6159
Update pkg/cassandra/config/config.go
akstron Nov 26, 2024
73d276a
Update pkg/cassandra/config/config.go
akstron Nov 26, 2024
57349a8
Update pkg/cassandra/config/config.go
akstron Nov 26, 2024
84b52e1
Fixed build
akstron Nov 26, 2024
9c2f05b
formatting fixes
akstron Nov 26, 2024
2c8de88
test fix
akstron Nov 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 93 additions & 4 deletions pkg/cassandra/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import (
"context"
"errors"
"fmt"
"time"

Expand Down Expand Up @@ -58,6 +59,18 @@
// while connecting to the Cassandra Cluster. This is useful for connecting to clusters, like Azure Cosmos DB,
// that do not support SnappyCompression.
DisableCompression bool `mapstructure:"disable_compression"`
// CreateSchema tells if the schema ahould be created during session initialization based on the configs provided
CreateSchema bool `mapstructure:"create" valid:"optional"`
// Datacenter is the name for network topology
yurishkuro marked this conversation as resolved.
Show resolved Hide resolved
Datacenter string `mapstructure:"datacenter" valid:"optional"`
// TraceTTL is Time To Live (TTL) for the trace data. Should at least be 1 second
TraceTTL time.Duration `mapstructure:"trace_ttl" valid:"optional"`
// DependenciesTTL is Time To Live (TTL) for dependencies data. Should at least be 1 second
DependenciesTTL time.Duration `mapstructure:"dependencies_ttl" valid:"optional"`
// Replication factor for the db
ReplicationFactor int `mapstructure:"replication_factor" valid:"optional"`
// CompactionWindow of format tells the compaction window of the db. Should atleast be 1 minute
yurishkuro marked this conversation as resolved.
Show resolved Hide resolved
CompactionWindow time.Duration `mapstructure:"compaction_window" valid:"optional"`
}

type Query struct {
Expand Down Expand Up @@ -86,7 +99,13 @@
func DefaultConfiguration() Configuration {
return Configuration{
Schema: Schema{
Keyspace: "jaeger_v1_test",
CreateSchema: false,
Keyspace: "jaeger_dc1",
Datacenter: "dc1",
TraceTTL: 2 * 24 * time.Hour,
DependenciesTTL: 2 * 24 * time.Hour,
ReplicationFactor: 1,
CompactionWindow: time.Minute,
},
Connection: Connection{
Servers: []string{"127.0.0.1"},
Expand All @@ -106,6 +125,27 @@
if c.Schema.Keyspace == "" {
c.Schema.Keyspace = source.Schema.Keyspace
}

if c.Schema.Datacenter == "" {
c.Schema.Datacenter = source.Schema.Datacenter
}

if c.Schema.TraceTTL == 0 {
c.Schema.TraceTTL = source.Schema.TraceTTL
}

if c.Schema.DependenciesTTL == 0 {
c.Schema.DependenciesTTL = source.Schema.DependenciesTTL
}

if c.Schema.ReplicationFactor == 0 {
c.Schema.ReplicationFactor = source.Schema.ReplicationFactor
}

if c.Schema.CompactionWindow == 0 {
c.Schema.CompactionWindow = source.Schema.CompactionWindow
}

if c.Connection.ConnectionsPerHost == 0 {
c.Connection.ConnectionsPerHost = source.Connection.ConnectionsPerHost
}
Expand Down Expand Up @@ -134,19 +174,48 @@
NewSession() (cassandra.Session, error)
}

// NewSession creates a new Cassandra session
func (c *Configuration) NewSession() (cassandra.Session, error) {
// createSession creates session from a configuration
func createSession(c *Configuration) (cassandra.Session, error) {
cluster, err := c.NewCluster()
akstron marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}

session, err := cluster.CreateSession()
if err != nil {
return nil, err
}

return gocqlw.WrapCQLSession(session), nil
}

// newSessionPrerequisites creates tables and types before creating a session
func (c *Configuration) newSessionPrerequisites() error {
cfg := *c // clone because we need to connect without specifying a keyspace
cfg.Schema.Keyspace = ""
if !c.Schema.CreateSchema {
return nil
}

c.Schema.Keyspace = ""

session, err := createSession(c)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
session, err := createSession(c)
session, err := createSession(cfg)

if err != nil {
return err
}

Check warning on line 205 in pkg/cassandra/config/config.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/config.go#L200-L205

Added lines #L200 - L205 were not covered by tests

return generateSchemaIfNotPresent(session, &c.Schema)

Check warning on line 207 in pkg/cassandra/config/config.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/config.go#L207

Added line #L207 was not covered by tests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return generateSchemaIfNotPresent(session, &c.Schema)
return generateSchemaIfNotPresent(session, &cfg.Schema)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about this one

}

// NewSession creates a new Cassandra session
func (c *Configuration) NewSession() (cassandra.Session, error) {
if err := c.newSessionPrerequisites(); err != nil {
return nil, err
}

Check warning on line 214 in pkg/cassandra/config/config.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/config.go#L213-L214

Added lines #L213 - L214 were not covered by tests

return createSession(c)
}

// NewCluster creates a new gocql cluster from the configuration
func (c *Configuration) NewCluster() (*gocql.ClusterConfig, error) {
cluster := gocql.NewCluster(c.Connection.Servers...)
Expand Down Expand Up @@ -210,7 +279,27 @@
return fmt.Sprintf("%+v", *c)
}

func isValidTTL(duration time.Duration) bool {
return duration == 0 || duration >= time.Second
}

func (c *Configuration) Validate() error {
_, err := govalidator.ValidateStruct(c)
return err
if err != nil {
return err
}

if !isValidTTL(c.Schema.TraceTTL) {
return errors.New("trace_ttl can either be 0 or greater than or equal to 1 second")
}

if !isValidTTL(c.Schema.DependenciesTTL) {
return errors.New("dependencies_ttl can either be 0 or greater than or equal to 1 second")
}

Check warning on line 298 in pkg/cassandra/config/config.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/config.go#L297-L298

Added lines #L297 - L298 were not covered by tests

if c.Schema.CompactionWindow < time.Minute {
return errors.New("compaction_window should at least be 1 minute")
}

return nil
}
19 changes: 19 additions & 0 deletions pkg/cassandra/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package config

import (
"testing"
"time"

"github.com/gocql/gocql"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -43,6 +44,9 @@ func TestValidate_DoesNotReturnErrorWhenRequiredFieldsSet(t *testing.T) {
Connection: Connection{
Servers: []string{"localhost:9200"},
},
Schema: Schema{
CompactionWindow: time.Minute,
},
}

err := cfg.Validate()
Expand Down Expand Up @@ -94,3 +98,18 @@ func TestToString(t *testing.T) {
s := cfg.String()
assert.Contains(t, s, "Keyspace:test")
}

func TestConfigSchemaValidation(t *testing.T) {
cfg := DefaultConfiguration()
err := cfg.Validate()
require.NoError(t, err)

cfg.Schema.TraceTTL = time.Millisecond
err = cfg.Validate()
require.Error(t, err)

cfg.Schema.TraceTTL = time.Second
cfg.Schema.CompactionWindow = time.Minute - 1
err = cfg.Validate()
require.Error(t, err)
}
138 changes: 138 additions & 0 deletions pkg/cassandra/config/schema.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright (c) 2024 The Jaeger Authors.
yurishkuro marked this conversation as resolved.
Show resolved Hide resolved
// SPDX-License-Identifier: Apache-2.0

package config

import (
"bytes"
"embed"
"errors"
"fmt"
"text/template"
"time"

"github.com/jaegertracing/jaeger/pkg/cassandra"
)

//go:embed v004-go-tmpl.cql.tmpl
var schemaFile embed.FS

type TemplateParams struct {
// Keyspace in which tables and types will be created for storage
Keyspace string
// Replication is the replication strategy used. Ex: "{'class': 'NetworkTopologyStrategy', 'replication_factor': '1' }"
Replication string
// CompactionWindowInMinutes is constructed from CompactionWindow for using in template
CompactionWindowInMinutes int64
// TraceTTLInSeconds is constructed from TraceTTL for using in template
TraceTTLInSeconds int64
// DependenciesTTLInSeconds is constructed from DependenciesTTL for using in template
DependenciesTTLInSeconds int64
}

func constructTemplateParams(cfg Schema) TemplateParams {
return TemplateParams{
Keyspace: cfg.Keyspace,
Replication: fmt.Sprintf("{'class': 'NetworkTopologyStrategy', 'replication_factor': '%v' }", cfg.ReplicationFactor),
CompactionWindowInMinutes: int64(cfg.CompactionWindow / time.Minute),
TraceTTLInSeconds: int64(cfg.TraceTTL / time.Second),
DependenciesTTLInSeconds: int64(cfg.DependenciesTTL / time.Second),
}

Check warning on line 40 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L33-L40

Added lines #L33 - L40 were not covered by tests
}

func getQueryFileAsBytes(fileName string, params TemplateParams) ([]byte, error) {
tmpl, err := template.ParseFS(schemaFile, fileName)
if err != nil {
return nil, err
}

Check warning on line 47 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L43-L47

Added lines #L43 - L47 were not covered by tests

var result bytes.Buffer
err = tmpl.Execute(&result, params)
if err != nil {
return nil, err
}

Check warning on line 53 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L49-L53

Added lines #L49 - L53 were not covered by tests

return result.Bytes(), nil

Check warning on line 55 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L55

Added line #L55 was not covered by tests
}

func getQueriesFromBytes(queryFile []byte) ([]string, error) {
lines := bytes.Split(queryFile, []byte("\n"))

var extractedLines [][]byte

for _, line := range lines {
// Remove any comments, if at the end of the line
commentIndex := bytes.Index(line, []byte(`--`))
if commentIndex != -1 {
// remove everything after comment
line = line[0:commentIndex]
}

if len(line) == 0 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would make sense to trim spaces before checking for len=0.

continue
}

extractedLines = append(extractedLines, bytes.TrimSpace(line))
}

var queries []string

// Construct individual queries strings
var queryString string
for _, line := range extractedLines {
queryString += string(line) + "\n"
if bytes.HasSuffix(line, []byte(";")) {
queries = append(queries, queryString)
queryString = ""
}
}

if len(queryString) > 0 {
return nil, errors.New(`invalid template`)
}
Comment on lines +90 to +92
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would do it in generateSchemaIfNotPresent against casQueries, not here.


return queries, nil
}

func getCassandraQueriesFromQueryStrings(session cassandra.Session, queries []string) []cassandra.Query {
var casQueries []cassandra.Query

for _, query := range queries {
casQueries = append(casQueries, session.Query(query))
}

Check warning on line 102 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L97-L102

Added lines #L97 - L102 were not covered by tests

return casQueries

Check warning on line 104 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L104

Added line #L104 was not covered by tests
}

func contructSchemaQueries(session cassandra.Session, cfg *Schema) ([]cassandra.Query, error) {
params := constructTemplateParams(*cfg)

queryFile, err := getQueryFileAsBytes(`v004-go-tmpl.cql.tmpl`, params)
if err != nil {
return nil, err
}

Check warning on line 113 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L107-L113

Added lines #L107 - L113 were not covered by tests

queryStrings, err := getQueriesFromBytes(queryFile)
if err != nil {
return nil, err
}

Check warning on line 118 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L115-L118

Added lines #L115 - L118 were not covered by tests

casQueries := getCassandraQueriesFromQueryStrings(session, queryStrings)

return casQueries, nil

Check warning on line 122 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L120-L122

Added lines #L120 - L122 were not covered by tests
}

func generateSchemaIfNotPresent(session cassandra.Session, cfg *Schema) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are too many functions in this file that are polluting the overall package namespace. I would prefer to introduce a helper struct

type schemaCreator struct {
  session cassandra.Session
  cfg *Schema
}

and define those functions on that struct (and minimize parameter passing)

casQueries, err := contructSchemaQueries(session, cfg)
if err != nil {
return err
}

Check warning on line 129 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L125-L129

Added lines #L125 - L129 were not covered by tests

for _, query := range casQueries {
if err := query.Exec(); err != nil {
return err
}

Check warning on line 134 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L131-L134

Added lines #L131 - L134 were not covered by tests
}

return nil

Check warning on line 137 in pkg/cassandra/config/schema.go

View check run for this annotation

Codecov / codecov/patch

pkg/cassandra/config/schema.go#L137

Added line #L137 was not covered by tests
}
55 changes: 55 additions & 0 deletions pkg/cassandra/config/schema_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package config

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestQueryGenerationFromBytes(t *testing.T) {
queriesAsString := `
query1 -- comment (this should be removed)
query1-continue
query1-finished; --


query2;
query-3 query-3-continue query-3-finished;
`
expGeneratedQueries := []string{
`query1
query1-continue
query1-finished;
`,
`query2;
`,
`query-3 query-3-continue query-3-finished;
`,
}

queriesAsBytes := []byte(queriesAsString)
queries, err := getQueriesFromBytes(queriesAsBytes)
require.NoError(t, err)

require.Equal(t, len(expGeneratedQueries), len(queries))

for i := range len(expGeneratedQueries) {
require.Equal(t, expGeneratedQueries[i], queries[i])
}
}

func TestInvalidQueryTemplate(t *testing.T) {
queriesAsString := `
query1 -- comment (this should be removed)
query1-continue
query1-finished; --


query2;
query-3 query-3-continue query-3-finished -- missing semicolon
`

queriesAsBytes := []byte(queriesAsString)
_, err := getQueriesFromBytes(queriesAsBytes)
require.ErrorContains(t, err, "invalid template")
}
Loading
Loading