diff --git a/go.mod b/go.mod index 5bff735842..20248cb53e 100644 --- a/go.mod +++ b/go.mod @@ -42,6 +42,7 @@ require ( github.com/databricks/databricks-sql-go v1.6.1 github.com/denisenkom/go-mssqldb v0.12.3 github.com/dgraph-io/badger/v4 v4.5.0 + github.com/dlclark/regexp2 v1.11.4 github.com/docker/docker v27.5.0+incompatible github.com/go-chi/chi/v5 v5.2.0 github.com/go-redis/redis v6.15.9+incompatible @@ -192,7 +193,6 @@ require ( github.com/danieljoos/wincred v1.2.2 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect - github.com/dlclark/regexp2 v1.11.4 // indirect github.com/dnephin/pflag v1.0.7 // indirect github.com/docker/cli v27.2.1+incompatible // indirect github.com/docker/cli-docs-tool v0.8.0 // indirect diff --git a/processor/processor.go b/processor/processor.go index cbf40de16b..cb65627fb8 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "reflect" "runtime/trace" "slices" "strconv" @@ -12,9 +13,12 @@ import ( "sync" "time" + obskit "github.com/rudderlabs/rudder-observability-kit/go/labels" + "github.com/google/uuid" "github.com/rudderlabs/rudder-server/enterprise/trackedusers" + warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" "golang.org/x/sync/errgroup" @@ -57,6 +61,7 @@ import ( . "github.com/rudderlabs/rudder-server/utils/tx" //nolint:staticcheck "github.com/rudderlabs/rudder-server/utils/types" "github.com/rudderlabs/rudder-server/utils/workerpool" + wtrans "github.com/rudderlabs/rudder-server/warehouse/transformer" ) const ( @@ -86,10 +91,12 @@ type trackedUsersReporter interface { // Handle is a handle to the processor module type Handle struct { - conf *config.Config - tracer stats.Tracer - backendConfig backendconfig.BackendConfig - transformer transformer.Transformer + conf *config.Config + tracer stats.Tracer + backendConfig backendconfig.BackendConfig + transformer transformer.Transformer + warehouseTransformer transformer.DestinationTransformer + warehouseDebugLogger *wtrans.DebugLogger gatewayDB jobsdb.JobsDB routerDB jobsdb.JobsDB @@ -159,6 +166,7 @@ type Handle struct { eventAuditEnabled map[string]bool credentialsMap map[string][]transformer.Credential nonEventStreamSources map[string]bool + enableWarehouseTransformations config.ValueLoader[bool] } drainConfig struct { @@ -618,6 +626,9 @@ func (proc *Handle) Setup( "partition": partition, }) } + proc.warehouseTransformer = wtrans.New(proc.conf, proc.logger, proc.statsFactory) + proc.warehouseDebugLogger = wtrans.NewDebugLogger(proc.conf, proc.logger) + if proc.config.enableDedup { var err error proc.dedup, err = dedup.New(proc.conf, proc.statsFactory) @@ -819,6 +830,7 @@ func (proc *Handle) loadReloadableConfig(defaultPayloadLimit int64, defaultMaxEv proc.config.archivalEnabled = config.GetReloadableBoolVar(true, "archival.Enabled") // Capture event name as a tag in event level stats proc.config.captureEventNameStats = config.GetReloadableBoolVar(false, "Processor.Stats.captureEventName") + proc.config.enableWarehouseTransformations = config.GetReloadableBoolVar(false, "Processor.enableWarehouseTransformations") } type connection struct { @@ -3215,6 +3227,7 @@ func (proc *Handle) transformSrcDest( proc.logger.Debug("Dest Transform input size", len(eventsToTransform)) s := time.Now() response = proc.transformer.Transform(ctx, eventsToTransform, proc.config.transformBatchSize.Load()) + proc.handleResponseForWarehouseTransformation(ctx, eventsToTransform, response, commonMetaData, eventsByMessageID) destTransformationStat := proc.newDestinationTransformationStat(sourceID, workspaceID, transformAt, destination) destTransformationStat.transformTime.Since(s) @@ -3373,6 +3386,65 @@ func (proc *Handle) transformSrcDest( } } +func (proc *Handle) handleResponseForWarehouseTransformation( + ctx context.Context, + eventsToTransform []transformer.TransformerEvent, + pResponse transformer.Response, + commonMetaData *transformer.Metadata, + eventsByMessageID map[string]types.SingularEventWithReceivedAt, +) { + if _, ok := warehouseutils.WarehouseDestinationMap[commonMetaData.DestinationType]; !ok { + return + } + if len(eventsToTransform) == 0 || !proc.config.enableWarehouseTransformations.Load() { + return + } + defer proc.statsFactory.NewStat("proc_warehouse_transformations_time", stats.TimerType).RecordDuration()() + + wResponse := proc.warehouseTransformer.Transform(ctx, eventsToTransform, proc.config.transformBatchSize.Load()) + differingEvents := proc.responsesDiffer(eventsToTransform, pResponse, wResponse, eventsByMessageID) + if err := proc.warehouseDebugLogger.LogEvents(differingEvents, commonMetaData); err != nil { + proc.logger.Warnn("Failed to log events for warehouse transformation debugging", obskit.Error(err)) + } +} + +func (proc *Handle) responsesDiffer( + eventsToTransform []transformer.TransformerEvent, + pResponse, wResponse transformer.Response, + eventsByMessageID map[string]types.SingularEventWithReceivedAt, +) []types.SingularEventT { + // If the event counts differ, return all events in the transformation + if len(pResponse.Events) != len(wResponse.Events) || len(pResponse.FailedEvents) != len(wResponse.FailedEvents) { + events := lo.Map(eventsToTransform, func(e transformer.TransformerEvent, _ int) types.SingularEventT { + return eventsByMessageID[e.Metadata.MessageID].SingularEvent + }) + proc.statsFactory.NewStat("proc_warehouse_transformations_mismatches", stats.CountType).Count(len(events)) + return events + } + + var ( + differedSampleEvents []types.SingularEventT + differedEventsCount int + collectedSampleEvent bool + ) + + for i := range pResponse.Events { + if !reflect.DeepEqual(pResponse.Events[i], wResponse.Events[i]) { + differedEventsCount++ + if !collectedSampleEvent { + // Collect the mismatched messages and break (sample only) + differedSampleEvents = append(differedSampleEvents, lo.Map(pResponse.Events[i].Metadata.GetMessagesIDs(), func(msgID string, _ int) types.SingularEventT { + return eventsByMessageID[msgID].SingularEvent + })...) + collectedSampleEvent = true + } + } + } + proc.statsFactory.NewStat("proc_warehouse_transformations_mismatches", stats.CountType).Count(differedEventsCount) + + return differedSampleEvents +} + func (proc *Handle) saveDroppedJobs(ctx context.Context, droppedJobs []*jobsdb.JobT, tx *Tx) error { if len(droppedJobs) > 0 { for i := range droppedJobs { // each dropped job should have a unique jobID in the scope of the batch diff --git a/processor/transformer/transformer.go b/processor/transformer/transformer.go index 57d51d977f..897d57879b 100644 --- a/processor/transformer/transformer.go +++ b/processor/transformer/transformer.go @@ -146,13 +146,25 @@ func WithClient(client HTTPDoer) Opt { } } -// Transformer provides methods to transform events -type Transformer interface { - Transform(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response +type UserTransformer interface { UserTransform(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response +} + +type DestinationTransformer interface { + Transform(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response +} + +type TrackingPlanValidator interface { Validate(ctx context.Context, clientEvents []TransformerEvent, batchSize int) Response } +// Transformer provides methods to transform events +type Transformer interface { + UserTransformer + DestinationTransformer + TrackingPlanValidator +} + type HTTPDoer interface { Do(req *http.Request) (*http.Response, error) } @@ -568,7 +580,7 @@ func (trans *handle) destTransformURL(destType string) string { destinationEndPoint := fmt.Sprintf("%s/v0/destinations/%s", trans.config.destTransformationURL, strings.ToLower(destType)) if _, ok := warehouseutils.WarehouseDestinationMap[destType]; ok { - whSchemaVersionQueryParam := fmt.Sprintf("whSchemaVersion=%s&whIDResolve=%v", trans.conf.GetString("Warehouse.schemaVersion", "v1"), warehouseutils.IDResolutionEnabled()) + whSchemaVersionQueryParam := fmt.Sprintf("whIDResolve=%t", trans.conf.GetBool("Warehouse.enableIDResolution", false)) switch destType { case warehouseutils.RS: return destinationEndPoint + "?" + whSchemaVersionQueryParam diff --git a/warehouse/internal/model/schema.go b/warehouse/internal/model/schema.go index d25ab0890e..49d85405ba 100644 --- a/warehouse/internal/model/schema.go +++ b/warehouse/internal/model/schema.go @@ -17,7 +17,8 @@ const ( JSONDataType SchemaType = "json" TextDataType SchemaType = "text" DateTimeDataType SchemaType = "datetime" - ArrayOfBooleanDatatype SchemaType = "array(boolean)" + ArrayDataType SchemaType = "array" + ArrayOfBooleanDataType SchemaType = "array(boolean)" ) type WHSchema struct { diff --git a/warehouse/slave/worker.go b/warehouse/slave/worker.go index 8be3aca228..bbf96b3765 100644 --- a/warehouse/slave/worker.go +++ b/warehouse/slave/worker.go @@ -319,7 +319,7 @@ func (w *worker) processStagingFile(ctx context.Context, job payload) ([]uploadR } columnVal = newColumnVal - case model.ArrayOfBooleanDatatype: + case model.ArrayOfBooleanDataType: if boolValue, ok := columnVal.([]interface{}); ok { newColumnVal := make([]interface{}, len(boolValue)) diff --git a/warehouse/transformer/debuglogger.go b/warehouse/transformer/debuglogger.go new file mode 100644 index 0000000000..963512cd01 --- /dev/null +++ b/warehouse/transformer/debuglogger.go @@ -0,0 +1,81 @@ +package transformer + +import ( + "fmt" + "sync" + + "github.com/google/uuid" + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stringify" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/types" +) + +type DebugLogger struct { + logger logger.Logger + maxLoggedEvents config.ValueLoader[int] + eventLogMutex sync.Mutex + currentLogFileName string + loggedEvents int64 +} + +func NewDebugLogger(conf *config.Config, logger logger.Logger) *DebugLogger { + logFileName := generateLogFileName() + + return &DebugLogger{ + logger: logger.Child("debugLogger").With("currentLogFileName", logFileName), + maxLoggedEvents: conf.GetReloadableIntVar(10000, 1, "Processor.maxLoggedEvents"), + currentLogFileName: logFileName, + } +} + +func generateLogFileName() string { + return fmt.Sprintf("warehouse_transformations_debug_%s.log", uuid.NewString()) +} + +func (d *DebugLogger) LogEvents(events []types.SingularEventT, commonMedata *ptrans.Metadata) error { + if len(events) == 0 { + return nil + } + d.eventLogMutex.Lock() + defer d.eventLogMutex.Unlock() + + if d.loggedEvents >= int64(d.maxLoggedEvents.Load()) { + return nil + } + + logEntries := lo.Map(events, func(item types.SingularEventT, index int) string { + return stringify.Any(ptrans.TransformerEvent{ + Message: item, + Metadata: *commonMedata, + }) + }) + + if err := d.writeLogEntries(logEntries); err != nil { + return fmt.Errorf("logging events: %w", err) + } + + d.logger.Infon("Successfully logged events", logger.NewIntField("event_count", int64(len(logEntries)))) + d.loggedEvents += int64(len(logEntries)) + return nil +} + +func (d *DebugLogger) writeLogEntries(entries []string) error { + writer, err := misc.CreateBufferedWriter(d.currentLogFileName) + if err != nil { + return fmt.Errorf("creating buffered writer: %w", err) + } + defer func() { _ = writer.Close() }() + + for _, entry := range entries { + if _, err := writer.Write([]byte(entry + "\n")); err != nil { + return fmt.Errorf("writing log entry: %w", err) + } + } + return nil +} diff --git a/warehouse/transformer/internal/reservedkeywords/reservedkeywords.go b/warehouse/transformer/internal/reservedkeywords/reservedkeywords.go new file mode 100644 index 0000000000..0f903796c7 --- /dev/null +++ b/warehouse/transformer/internal/reservedkeywords/reservedkeywords.go @@ -0,0 +1,60 @@ +package reservedkeywords + +import ( + "embed" + "log" + "strings" + + jsoniter "github.com/json-iterator/go" + "github.com/samber/lo" +) + +var ( + //go:embed reservedtablescolumns.json + tablesColumnsFile embed.FS + + //go:embed reservednamespaces.json + namespacesFile embed.FS + + reservedTablesColumns, reservedNamespaces map[string]map[string]struct{} + + json = jsoniter.ConfigCompatibleWithStandardLibrary +) + +func init() { + reservedTablesColumns = load(tablesColumnsFile, "reservedtablescolumns.json") + reservedNamespaces = load(namespacesFile, "reservednamespaces.json") +} + +func load(file embed.FS, fileName string) map[string]map[string]struct{} { + data, err := file.ReadFile(fileName) + if err != nil { + log.Fatalf("failed to load reserved keywords from %s: %v", fileName, err) + } + + var tempKeywords map[string][]string + if err := json.Unmarshal(data, &tempKeywords); err != nil { + log.Fatalf("failed to parse reserved keywords from %s: %v", fileName, err) + } + + return lo.MapValues(tempKeywords, func(keywords []string, _ string) map[string]struct{} { + return lo.SliceToMap(keywords, func(k string) (string, struct{}) { + return strings.ToUpper(k), struct{}{} + }) + }) +} + +// IsTableOrColumn checks if the given keyword is a reserved table/column keyword for the destination type. +func IsTableOrColumn(destType, keyword string) bool { + return isKeywordReserved(reservedTablesColumns, destType, keyword) +} + +// IsNamespace checks if the given keyword is a reserved namespace keyword for the destination type. +func IsNamespace(destType, keyword string) bool { + return isKeywordReserved(reservedNamespaces, destType, keyword) +} + +func isKeywordReserved(keywords map[string]map[string]struct{}, destType, keyword string) bool { + _, exists := keywords[destType][strings.ToUpper(keyword)] + return exists +} diff --git a/warehouse/transformer/internal/reservedkeywords/reservedkeywords_test.go b/warehouse/transformer/internal/reservedkeywords/reservedkeywords_test.go new file mode 100644 index 0000000000..c40efea6f2 --- /dev/null +++ b/warehouse/transformer/internal/reservedkeywords/reservedkeywords_test.go @@ -0,0 +1,41 @@ +package reservedkeywords_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/reservedkeywords" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestReservedKeywords(t *testing.T) { + t.Run("IsTableOrColumn", func(t *testing.T) { + testCases := []struct { + keyword string + isReserved bool + }{ + {"SELECT", true}, + {"select", true}, + {"Select", true}, + {"not_reserved", false}, + } + for _, tc := range testCases { + require.Equal(t, tc.isReserved, reservedkeywords.IsTableOrColumn(whutils.POSTGRES, tc.keyword)) + } + }) + t.Run("IsNamespace", func(t *testing.T) { + testCases := []struct { + keyword string + isReserved bool + }{ + {"SELECT", true}, + {"select", true}, + {"Select", true}, + {"not_reserved", false}, + } + for _, tc := range testCases { + require.Equal(t, tc.isReserved, reservedkeywords.IsNamespace(whutils.POSTGRES, tc.keyword)) + } + }) +} diff --git a/warehouse/transformer/internal/reservedkeywords/reservednamespaces.json b/warehouse/transformer/internal/reservedkeywords/reservednamespaces.json new file mode 100644 index 0000000000..c26c1bdd95 --- /dev/null +++ b/warehouse/transformer/internal/reservedkeywords/reservednamespaces.json @@ -0,0 +1,2333 @@ +{ + "AZURE_DATALAKE": [ + "INTO", + "CONSTRAINT", + "CURRENT", + "READ", + "WITHIN", + "NO", + "ROWGUIDCOL", + "WITH", + "NCLOB", + "REGR_SYY", + "SETUSER", + "CONDITION", + "MAP", + "MINUTE", + "PARTITION", + "SYSTEM_USER", + "NULL", + "OBJECT", + "SEMANTICSIMILARITYDETAILSTABLE", + "SESSION", + "ELEMENT", + "HOUR", + "MOD", + "PUBLIC", + "SEMANTICSIMILARITYTABLE", + "EXIT", + "LINENO", + "SECURITYAUDIT", + "TRIM", + "VAR_POP", + "CORRESPONDING", + "CREATE", + "IMMEDIATE", + "XMLATTRIBUTES", + "CLUSTERED", + "DENY", + "LAST", + "OCCURRENCES_REGEX", + "REF", + "RELATIVE", + "SELECT", + "XMLEXISTS", + "BETWEEN", + "FREETEXTTABLE", + "PAD", + "WITHOUT", + "SYSTEM", + "WHENEVER", + "DEC", + "DIAGNOSTICS", + "GENERAL", + "LARGE", + "METHOD", + "ATOMIC", + "DETERMINISTIC", + "OLD", + "UNPIVOT", + "USING", + "DESCRIPTOR", + "BY", + "CALL", + "CONNECT", + "CYCLE", + "MIN", + "ABSOLUTE", + "DICTIONARY", + "MODIFY", + "POSITION_REGEX", + "SECOND", + "UNDER", + "CUME_DIST", + "EXTRACT", + "LEFT", + "NORMALIZE", + "REPLICATION", + "BOTH", + "BROWSE", + "EXEC", + "FORTRAN", + "LANGUAGE", + "RESTORE", + "NOCHECK", + "XMLTEXT", + "FOR", + "NEXT", + "SHUTDOWN", + "STRUCTURE", + "BINARY", + "CURRENT_TIMESTAMP", + "DATE", + "RETURN", + "SAVE", + "OVER", + "BEGIN", + "FULLTEXTTABLE", + "IGNORE", + "INPUT", + "ISOLATION", + "OUTPUT", + "DEFERRABLE", + "NONE", + "COMMIT", + "REGR_AVGX", + "XMLCAST", + "VAR_SAMP", + "AT", + "GLOBAL", + "IS", + "LN", + "OR", + "SEQUENCE", + "TABLESAMPLE", + "DOMAIN", + "FREE", + "IN", + "LOCAL", + "MEMBER", + "OPENROWSET", + "VARYING", + "WRITE", + "ARRAY", + "COLUMN", + "DYNAMIC", + "OUTER", + "DAY", + "DEALLOCATE", + "XMLPARSE", + "CHARACTER_LENGTH", + "PERCENT", + "TRAN", + "WRITETEXT", + "PROCEDURE", + "RECURSIVE", + "struct{}{}", + "CONTAINS", + "OUT", + "PLAN", + "CURRENT_SCHEMA", + "FALSE", + "PRIOR", + "RELEASE", + "TRUNCATE", + "XMLBINARY", + "ANY", + "DOUBLE", + "IDENTITY_INSERT", + "UPDATE", + "CHAR", + "LOCATOR", + "ORDINALITY", + "TSEQUAL", + "XMLDOCUMENT", + "CURRENT_ROLE", + "KILL", + "SMALLINT", + "EACH", + "MODULE", + "RECONFIGURE", + "SUBMULTISET", + "THEN", + "PASCAL", + "CONNECTION", + "LATERAL", + "STATEMENT", + "VALUE", + "BIT_LENGTH", + "XMLTABLE", + "POSITION", + "SIMILAR", + "SOME", + "UNIQUE", + "BEFORE", + "LOCALTIMESTAMP", + "OPENQUERY", + "ORDER", + "ROLE", + "SESSION_USER", + "LEVEL", + "PREORDER", + "FUNCTION", + "GROUP", + "IDENTITY", + "XMLAGG", + "XMLQUERY", + "CASCADED", + "CAST", + "CHECK", + "RETURNS", + "CASE", + "CONTAINSTABLE", + "LOCALTIME", + "CALLED", + "EQUALS", + "READTEXT", + "REGR_SLOPE", + "WITHINGROUP", + "AGGREGATE", + "COLLATE", + "INTERSECTION", + "OVERLAPS", + "STDDEV_POP", + "UNKNOWN", + "LOWER", + "READS", + "MATCH", + "MERGE", + "TRY_CONVERT", + "CORR", + "END", + "HAVING", + "SYMMETRIC", + "UNION", + "ADD", + "DISCONNECT", + "RULE", + "STATISTICS", + "WAITFOR", + "FIRST", + "INTERSECT", + "PERCENT_RANK", + "AFTER", + "CATALOG", + "DESTROY", + "INSENSITIVE", + "SIZE", + "ARE", + "OFFSETS", + "REGR_COUNT", + "ALTER", + "AS", + "COMPLETION", + "COVAR_POP", + "FILE", + "SQLSTATE", + "LESS", + "MONTH", + "ROUTINE", + "CHARACTER", + "CROSS", + "REGR_SXX", + "VARIABLE", + "CONVERT", + "FREETEXT", + "REAL", + "TIMEZONE_MINUTE", + "TRANSLATE", + "REVOKE", + "VALUES", + "BREAK", + "COLLECT", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "DATA", + "INOUT", + "STATE", + "TRANSLATION", + "MAX", + "SUM", + "XMLCOMMENT", + "COLLATION", + "INITIALIZE", + "LIMIT", + "REGR_R2", + "SETS", + "SPECIFIC", + "ASYMMETRIC", + "COALESCE", + "SEMANTICKEYPHRASETABLE", + "SUBSTRING", + "TERMINATE", + "OPENXML", + "PARAMETERS", + "RESULT", + "CURRENT_CATALOG", + "GROUPING", + "DESCRIBE", + "DESTRUCTOR", + "DISTRIBUTED", + "BACKUP", + "CLASS", + "DELETE", + "FOREIGN", + "DBCC", + "END-EXEC", + "EXCEPT", + "IF", + "PRIVILEGES", + "TIME", + "WIDTH_BUCKET", + "ALIAS", + "CARDINALITY", + "DEFERRED", + "EXTERNAL", + "IDENTITYCOL", + "INDEX", + "NAMES", + "XMLSERIALIZE", + "AVG", + "CURRENT_USER", + "HOST", + "NULLIF", + "OPTION", + "REVERT", + "USER", + "NUMERIC", + "PRINT", + "REGR_INTERCEPT", + "ALL", + "CASCADE", + "ELSE", + "PRESERVE", + "SENSITIVE", + "HOLD", + "REFERENCES", + "SQLCA", + "ACTION", + "DECLARE", + "LIKE", + "PIVOT", + "TRANSLATE_REGEX", + "UNNEST", + "AUTHORIZATION", + "COMPUTE", + "DEPTH", + "JOIN", + "OVERLAY", + "XMLNAMESPACES", + "LEADING", + "PERCENTILE_DISC", + "USE", + "SQL", + "VARCHAR", + "ADMIN", + "EXECUTE", + "REFERENCING", + "SQLCODE", + "DEREF", + "ERRLVL", + "INDICATOR", + "LOAD", + "UPPER", + "CLOSE", + "CONSTRAINTS", + "DECIMAL", + "INITIALLY", + "NATIONAL", + "NCHAR", + "SCHEMA", + "CHAR_LENGTH", + "DATABASE", + "DUMP", + "LIKE_REGEX", + "TO", + "PROC", + "BLOB", + "FUSION", + "INNER", + "RIGHT", + "BOOLEAN", + "CURRENT_DATE", + "FILLFACTOR", + "ROWCOUNT", + "XMLVALIDATE", + "CHECKPOINT", + "EXCEPTION", + "NATURAL", + "ROLLBACK", + "STDDEV_SAMP", + "WINDOW", + "CUBE", + "CURRENT_PATH", + "PREFIX", + "TEXTSIZE", + "WHERE", + "DISK", + "EVERY", + "INSERT", + "OCTET_LENGTH", + "WHILE", + "ASENSITIVE", + "DROP", + "HOLDLOCK", + "BULK", + "ONLY", + "CONTINUE", + "COUNT", + "KEY", + "BIT", + "OF", + "PRECISION", + "TEMPORARY", + "MULTISET", + "PARAMETER", + "RESTRICT", + "SPACE", + "TRAILING", + "CLOB", + "DISTINCT", + "ESCAPE", + "FETCH", + "GRANT", + "TRANSACTION", + "ASSERTION", + "FROM", + "MODIFIES", + "SQLWARNING", + "XMLELEMENT", + "THAN", + "ASC", + "EXISTS", + "NEW", + "OFF", + "PERCENTILE_CONT", + "PRIMARY", + "ROW", + "TREAT", + "CURRENT_TIME", + "ITERATE", + "SET", + "TOP", + "XMLPI", + "OPEN", + "OPENDATASOURCE", + "PARTIAL", + "PREPARE", + "REGR_AVGY", + "SQLERROR", + "STATIC", + "ADA", + "RANGE", + "SCOPE", + "GO", + "NONCLUSTERED", + "UPDATETEXT", + "VIEW", + "INT", + "AND", + "COVAR_SAMP", + "DESC", + "NOT", + "ON", + "TABLE", + "INCLUDE", + "SEARCH", + "SPECIFICTYPE", + "SQLEXCEPTION", + "TRIGGER", + "GET", + "START", + "DEFAULT", + "FLOAT", + "FOUND", + "FULL", + "USAGE", + "GOTO", + "RAISERROR", + "SUBSTRING_REGEX", + "ZONE", + "BREADTH", + "CURSOR", + "OPERATION", + "TIMEZONE_HOUR", + "UESCAPE", + "XMLCONCAT", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "ROLLUP", + "INTEGER", + "ROWS", + "SAVEPOINT", + "XMLITERATE", + "YEAR", + "ALLOCATE", + "REGR_SXY", + "POSTFIX", + "SECTION", + "WORK", + "INTERVAL", + "SCROLL", + "WHEN", + "FILTER", + "XMLFOREST" + ], + "AZURE_SYNAPSE": [ + "CAST", + "GROUPING", + "PIVOT", + "ARE", + "ATOMIC", + "ORDINALITY", + "OVERLAY", + "SIZE", + "AFTER", + "EVERY", + "GROUP", + "REGR_SXX", + "RESTRICT", + "SEARCH", + "STDDEV_POP", + "COLLECT", + "KEY", + "LOCAL", + "FROM", + "TEMPORARY", + "struct{}{}", + "TSEQUAL", + "WAITFOR", + "BEFORE", + "DEFERRED", + "STATE", + "XMLDOCUMENT", + "ALLOCATE", + "INOUT", + "SYSTEM_USER", + "CALL", + "CALLED", + "LIMIT", + "INNER", + "NONE", + "REGR_SXY", + "WHILE", + "XMLQUERY", + "AGGREGATE", + "DICTIONARY", + "INITIALIZE", + "REVERT", + "SIMILAR", + "FOUND", + "MODIFIES", + "REFERENCES", + "CONVERT", + "PRESERVE", + "DISTRIBUTED", + "RULE", + "READ", + "SEQUENCE", + "USE", + "BREAK", + "CURRENT_ROLE", + "INTERVAL", + "POSITION", + "REFERENCING", + "TRY_CONVERT", + "WHERE", + "CURRENT_DATE", + "CURRENT_USER", + "INSERT", + "MIN", + "PLAN", + "SUBMULTISET", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "DATABASE", + "FILE", + "MERGE", + "OPTION", + "OFFSETS", + "UPPER", + "AND", + "CHARACTER", + "IDENTITY", + "PRIMARY", + "SELECT", + "CASCADED", + "DEALLOCATE", + "SECOND", + "SQL", + "TABLE", + "TIMEZONE_MINUTE", + "NATIONAL", + "NATURAL", + "ROLE", + "ERRLVL", + "HAVING", + "ONLY", + "OFF", + "SEMANTICSIMILARITYTABLE", + "STDDEV_SAMP", + "TRANSLATION", + "CLASS", + "CURRENT_TIMESTAMP", + "ESCAPE", + "WINDOW", + "WITHOUT", + "ASSERTION", + "COVAR_POP", + "OLD", + "BETWEEN", + "FREETEXT", + "NORMALIZE", + "PREORDER", + "CONTAINSTABLE", + "MAX", + "PARAMETERS", + "ANY", + "BIT", + "INTO", + "FILLFACTOR", + "IF", + "NCLOB", + "GENERAL", + "MAP", + "BOOLEAN", + "COMMIT", + "CROSS", + "SUBSTRING_REGEX", + "THAN", + "CONNECT", + "EXEC", + "RIGHT", + "SAVEPOINT", + "DEC", + "EXTERNAL", + "PARTIAL", + "DESCRIPTOR", + "ORDER", + "REGR_R2", + "PREFIX", + "SESSION", + "COLLATE", + "DEFAULT", + "XMLITERATE", + "CHARACTER_LENGTH", + "LANGUAGE", + "REPLICATION", + "SET", + "SPACE", + "LEADING", + "LOCATOR", + "SECTION", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "PAD", + "OVERLAPS", + "SCROLL", + "INPUT", + "ROUTINE", + "DESCRIBE", + "FREE", + "IN", + "IDENTITY_INSERT", + "OUTER", + "SCOPE", + "AT", + "COLLATION", + "FUNCTION", + "RESTORE", + "SEMANTICSIMILARITYDETAILSTABLE", + "VARCHAR", + "COALESCE", + "MODULE", + "REGR_AVGY", + "DOUBLE", + "SEMANTICKEYPHRASETABLE", + "ISOLATION", + "OVER", + "INITIALLY", + "LIKE_REGEX", + "SETS", + "VALUE", + "CARDINALITY", + "CURSOR", + "INCLUDE", + "PARTITION", + "CURRENT_SCHEMA", + "EXISTS", + "INTEGER", + "PASCAL", + "SQLSTATE", + "SQLWARNING", + "BACKUP", + "CHAR", + "NULLIF", + "HOLDLOCK", + "LEVEL", + "SPECIFIC", + "TRANSLATE", + "CURRENT_TIME", + "END", + "PROC", + "XMLEXISTS", + "OR", + "READS", + "TRANSLATE_REGEX", + "EXCEPTION", + "FILTER", + "OPENQUERY", + "PRECISION", + "TRUNCATE", + "ADA", + "CLOSE", + "EACH", + "INT", + "OPENXML", + "DUMP", + "FUSION", + "GET", + "XMLPARSE", + "DBCC", + "FIRST", + "REF", + "BIT_LENGTH", + "PERCENT", + "REGR_COUNT", + "XMLTABLE", + "PRINT", + "ROWS", + "WIDTH_BUCKET", + "XMLCAST", + "RELEASE", + "ROW", + "SCHEMA", + "EQUALS", + "OUT", + "DEFERRABLE", + "DISTINCT", + "DROP", + "PREPARE", + "SAVE", + "ASC", + "LOCALTIME", + "NEW", + "LARGE", + "SYSTEM", + "WRITE", + "PUBLIC", + "RECURSIVE", + "HOLD", + "MOD", + "NAMES", + "SHUTDOWN", + "CHECKPOINT", + "CLOB", + "FOREIGN", + "CURRENT", + "RANGE", + "ABSOLUTE", + "MODIFY", + "XMLSERIALIZE", + "TRANSACTION", + "UNION", + "DELETE", + "EXIT", + "IS", + "LAST", + "ADMIN", + "DECIMAL", + "KILL", + "XMLCOMMENT", + "COVAR_SAMP", + "IDENTITYCOL", + "SQLCA", + "OPENROWSET", + "REGR_SLOPE", + "XMLELEMENT", + "BREADTH", + "DEPTH", + "INDEX", + "MATCH", + "PERCENT_RANK", + "XMLATTRIBUTES", + "CUBE", + "EXTRACT", + "PRIOR", + "UNPIVOT", + "LEFT", + "LOCALTIMESTAMP", + "OPERATION", + "LN", + "POSITION_REGEX", + "SMALLINT", + "DATA", + "SPECIFICTYPE", + "XMLAGG", + "TOP", + "TRIGGER", + "ASENSITIVE", + "NONCLUSTERED", + "SENSITIVE", + "BY", + "INTERSECT", + "LINENO", + "ROLLBACK", + "CONTAINS", + "EXECUTE", + "LIKE", + "ROWGUIDCOL", + "SECURITYAUDIT", + "XMLBINARY", + "ARRAY", + "BLOB", + "ELEMENT", + "ALTER", + "MONTH", + "XMLVALIDATE", + "ASYMMETRIC", + "DESC", + "SETUSER", + "OCCURRENCES_REGEX", + "RETURN", + "TO", + "DIAGNOSTICS", + "DISK", + "METHOD", + "BROWSE", + "MULTISET", + "FOR", + "VARIABLE", + "POSTFIX", + "PROCEDURE", + "ROWCOUNT", + "VARYING", + "DECLARE", + "HOUR", + "INTERSECTION", + "INDICATOR", + "LOWER", + "ZONE", + "NEXT", + "SUBSTRING", + "XMLNAMESPACES", + "COMPLETION", + "JOIN", + "BEGIN", + "CUME_DIST", + "DYNAMIC", + "VAR_POP", + "WITHINGROUP", + "WORK", + "XMLFOREST", + "ACTION", + "OBJECT", + "PERCENTILE_CONT", + "ALL", + "CHAR_LENGTH", + "TIMEZONE_HOUR", + "COLUMN", + "FULLTEXTTABLE", + "NULL", + "TREAT", + "UNIQUE", + "FORTRAN", + "OCTET_LENGTH", + "RAISERROR", + "CASE", + "SQLCODE", + "WRITETEXT", + "OUTPUT", + "AS", + "CORR", + "LATERAL", + "REGR_SYY", + "SUM", + "TRAILING", + "CURRENT_CATALOG", + "DESTROY", + "ELSE", + "UNNEST", + "BINARY", + "CORRESPONDING", + "DETERMINISTIC", + "CASCADE", + "INSENSITIVE", + "VALUES", + "END-EXEC", + "GLOBAL", + "UNDER", + "RECONFIGURE", + "BOTH", + "ITERATE", + "NO", + "DESTRUCTOR", + "REVOKE", + "WITH", + "CYCLE", + "EXCEPT", + "USAGE", + "CHECK", + "LESS", + "ADD", + "AVG", + "REGR_INTERCEPT", + "TABLESAMPLE", + "IGNORE", + "NOT", + "SOME", + "FULL", + "GOTO", + "OF", + "STATISTICS", + "TIME", + "CLUSTERED", + "DATE", + "DAY", + "VAR_SAMP", + "DISCONNECT", + "OPENDATASOURCE", + "START", + "SQLERROR", + "UNKNOWN", + "XMLTEXT", + "COMPUTE", + "HOST", + "ON", + "SYMMETRIC", + "DOMAIN", + "GRANT", + "NCHAR", + "FREETEXTTABLE", + "CURRENT_PATH", + "RETURNS", + "UPDATETEXT", + "YEAR", + "CONSTRAINT", + "READTEXT", + "THEN", + "CONTINUE", + "MEMBER", + "SESSION_USER", + "ROLLUP", + "TERMINATE", + "UPDATE", + "XMLCONCAT", + "DEREF", + "IMMEDIATE", + "REGR_AVGX", + "CONSTRAINTS", + "PRIVILEGES", + "FALSE", + "LOAD", + "RELATIVE", + "CONDITION", + "OPEN", + "TRAN", + "ALIAS", + "CONNECTION", + "WHENEVER", + "SQLEXCEPTION", + "COUNT", + "NOCHECK", + "PERCENTILE_DISC", + "UESCAPE", + "DENY", + "FETCH", + "TRIM", + "CATALOG", + "GO", + "STATIC", + "STATEMENT", + "USER", + "VIEW", + "WITHIN", + "AUTHORIZATION", + "FLOAT", + "RESULT", + "TEXTSIZE", + "USING", + "WHEN", + "MINUTE", + "REAL", + "STRUCTURE", + "PARAMETER", + "XMLPI", + "BULK", + "CREATE", + "NUMERIC" + ], + "BQ": [ + "ALL", + "FROM", + "NATURAL", + "THEN", + "UNBOUNDED", + "USING", + "ARRAY", + "CURRENT", + "IS", + "NULLS", + "TO", + "TREAT", + "RANGE", + "BY", + "CONTAINS", + "END", + "GROUP", + "GROUPING", + "HAVING", + "CAST", + "FETCH", + "FULL", + "INTERSECT", + "PARTITION", + "ENUM", + "FOR", + "LATERAL", + "RECURSIVE", + "CASE", + "NULL", + "UNION", + "INNER", + "NO", + "CREATE", + "CROSS", + "CUBE", + "EXCLUDE", + "GROUPS", + "IN", + "WINDOW", + "ANY", + "DESC", + "ESCAPE", + "JOIN", + "SOME", + "IF", + "LOOKUP", + "DEFINE", + "DISTINCT", + "EXCEPT", + "EXISTS", + "FOLLOWING", + "HASH", + "ROLLUP", + "ROWS", + "WHERE", + "ELSE", + "BETWEEN", + "NEW", + "OUTER", + "OVER", + "RIGHT", + "WITH", + "FALSE", + "IGNORE", + "LEFT", + "RESPECT", + "SET", + "EXTRACT", + "OF", + "OR", + "PRECEDING", + "TABLESAMPLE", + "UNNEST", + "AND", + "ASSERT_ROWS_MODIFIED", + "NOT", + "struct{}{}", + "ORDER", + "PROTO", + "AT", + "COLLATE", + "INTERVAL", + "INTO", + "LIKE", + "ON", + "STRUCT", + "WITHIN", + "WHEN", + "AS", + "ASC", + "DEFAULT", + "LIMIT", + "MERGE", + "SELECT" + ], + "CLICKHOUSE": [], + "DELTALAKE": [ + "INTERVAL", + "NO", + "TO", + "USER", + "CREATE", + "OUTER", + "FALSE", + "FETCH", + "WHEN", + "WITH", + "START", + "CUBE", + "CURRENT", + "GROUP", + "LATERAL", + "PRIMARY", + "ROW", + "DESCRIBE", + "DISTINCT", + "EVENT_DATE", + "NATURAL", + "OR", + "THEN", + "AT", + "CURRENT_DATE", + "INTERSECT", + "ROLLBACK", + "UNION", + "CASE", + "FROM", + "INNER", + "ORDER", + "ROWS", + "OF", + "REVOKE", + "SET", + "AS", + "END", + "BOTH", + "CURRENT_TIME", + "UPDATE", + "USING", + "GLOBAL", + "VALUES", + "EXCEPT", + "OVERLAPS", + "TRUNCATE", + "BETWEEN", + "FOREIGN", + "JOIN", + "ONLY", + "ANY", + "COLUMN", + "ELSE", + "FULL", + "GROUPING", + "POSITION", + "SELECT", + "ANTI", + "UNIQUE", + "EXTRACT", + "FUNCTION", + "UNKNOWN", + "ALL", + "AUTHORIZATION", + "LOCAL", + "OUT", + "ROLLUP", + "TABLESAMPLE", + "EXISTS", + "IS", + "SESSION_USER", + "struct{}{}", + "ESCAPE", + "EXTERNAL", + "MINUS", + "NOT", + "REFERENCES", + "CONSTRAINT", + "FILTER", + "INTO", + "ALTER", + "DELETE", + "LEADING", + "NULL", + "RANGE", + "TIME", + "AND", + "DROP", + "HAVING", + "IN", + "CURRENT_USER", + "CROSS", + "ON", + "WHERE", + "COMMIT", + "LEFT", + "PARTITION", + "TRAILING", + "SOME", + "TABLE", + "BY", + "CAST", + "GRANT", + "INSERT", + "LIKE", + "RIGHT", + "ARRAY", + "CHECK", + "COLLATE", + "FOR", + "SEMI", + "WINDOW", + "CURRENT_TIMESTAMP" + ], + "GCS_DATALAKE": [ + "WHEN", + "DEFINE", + "DISTINCT", + "FROM", + "STRUCT", + "UNION", + "struct{}{}", + "USING", + "DEFAULT", + "HASH", + "NEW", + "ORDER", + "PRECEDING", + "ROWS", + "CURRENT", + "EXISTS", + "JOIN", + "LATERAL", + "RECURSIVE", + "INTO", + "IS", + "OR", + "PROTO", + "NATURAL", + "OUTER", + "UNBOUNDED", + "ASSERT_ROWS_MODIFIED", + "CASE", + "COLLATE", + "ENUM", + "EXTRACT", + "WHERE", + "AND", + "CAST", + "MERGE", + "GROUPS", + "INNER", + "NOT", + "OF", + "ROLLUP", + "FOLLOWING", + "FOR", + "LOOKUP", + "ARRAY", + "BY", + "CREATE", + "CROSS", + "END", + "NULL", + "ASC", + "IGNORE", + "UNNEST", + "WITH", + "AT", + "GROUP", + "INTERSECT", + "RIGHT", + "THEN", + "PARTITION", + "TABLESAMPLE", + "WINDOW", + "CUBE", + "FETCH", + "INTERVAL", + "LIKE", + "OVER", + "SOME", + "TO", + "TREAT", + "CONTAINS", + "DESC", + "LEFT", + "RESPECT", + "SELECT", + "WITHIN", + "ALL", + "IF", + "LIMIT", + "AS", + "ELSE", + "RANGE", + "ON", + "SET", + "ESCAPE", + "EXCLUDE", + "FALSE", + "HAVING", + "IN", + "NO", + "NULLS", + "ANY", + "BETWEEN", + "EXCEPT", + "FULL", + "GROUPING" + ], + "MSSQL": [ + "END-EXEC", + "LOCATOR", + "XMLSERIALIZE", + "CONTAINSTABLE", + "CURRENT_ROLE", + "LEADING", + "UESCAPE", + "LAST", + "CONNECT", + "REGR_AVGX", + "TRANSLATE_REGEX", + "WITHIN", + "XMLEXISTS", + "ASC", + "BIT_LENGTH", + "XMLCAST", + "XMLCOMMENT", + "XMLFOREST", + "DESCRIPTOR", + "FREETEXT", + "LARGE", + "CONSTRAINT", + "FOREIGN", + "INITIALLY", + "LINENO", + "SMALLINT", + "VALUE", + "DATA", + "SECTION", + "TRANSLATE", + "UNKNOWN", + "BIT", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "GENERAL", + "LOCALTIME", + "NATIONAL", + "UNDER", + "ADMIN", + "GRANT", + "LIMIT", + "LOCAL", + "MERGE", + "NEW", + "WORK", + "NCHAR", + "NULLIF", + "ROWGUIDCOL", + "SEMANTICSIMILARITYDETAILSTABLE", + "SUBSTRING", + "WRITETEXT", + "FILLFACTOR", + "IF", + "NOCHECK", + "PERCENTILE_DISC", + "RECURSIVE", + "TREAT", + "READTEXT", + "WRITE", + "NCLOB", + "RETURNS", + "XMLBINARY", + "DECLARE", + "FETCH", + "NULL", + "REVOKE", + "STATE", + "SYMMETRIC", + "CLUSTERED", + "COVAR_POP", + "CURSOR", + "IN", + "NOT", + "SIZE", + "POSITION_REGEX", + "ARRAY", + "DISTRIBUTED", + "DROP", + "OPENROWSET", + "PREPARE", + "SECOND", + "UNNEST", + "DISCONNECT", + "THEN", + "EXCEPTION", + "FREE", + "OVERLAY", + "SCROLL", + "ROW", + "CURRENT", + "IGNORE", + "LESS", + "MODIFIES", + "OPENQUERY", + "RANGE", + "DEC", + "OFFSETS", + "SQL", + "AFTER", + "CLASS", + "CHAR_LENGTH", + "MONTH", + "REFERENCES", + "REGR_COUNT", + "ROLLBACK", + "ANY", + "CURRENT_SCHEMA", + "RESTRICT", + "TIME", + "XMLQUERY", + "XMLVALIDATE", + "WHERE", + "ASSERTION", + "ELSE", + "NO", + "PARAMETERS", + "PREORDER", + "USAGE", + "CURRENT_TIMESTAMP", + "DISK", + "SIMILAR", + "SYSTEM_USER", + "CHAR", + "CONNECTION", + "DELETE", + "KILL", + "XMLELEMENT", + "XMLTABLE", + "DISTINCT", + "EXCEPT", + "HOLDLOCK", + "OLD", + "PRESERVE", + "STATEMENT", + "CAST", + "COLLATION", + "COUNT", + "OCCURRENCES_REGEX", + "START", + "SUBMULTISET", + "SUM", + "CONDITION", + "DESC", + "LOWER", + "REGR_R2", + "SOME", + "SQLCODE", + "WINDOW", + "ACTION", + "INTERSECT", + "REGR_SXX", + "DAY", + "DESCRIBE", + "TERMINATE", + "WITHOUT", + "DEALLOCATE", + "DENY", + "REGR_SYY", + "USER", + "XMLPI", + "PUBLIC", + "TRY_CONVERT", + "ALTER", + "AS", + "OCTET_LENGTH", + "RESTORE", + "RULE", + "SPECIFICTYPE", + "CASE", + "EXTERNAL", + "KEY", + "XMLCONCAT", + "EQUALS", + "INPUT", + "LEVEL", + "ONLY", + "READ", + "SQLERROR", + "SEARCH", + "SENSITIVE", + "DEFERRED", + "INDICATOR", + "PAD", + "STATISTICS", + "XMLITERATE", + "EACH", + "LOCALTIMESTAMP", + "PREFIX", + "REFERENCING", + "ADA", + "COLLECT", + "DBCC", + "MAP", + "METHOD", + "OPENDATASOURCE", + "REF", + "ABSOLUTE", + "CUBE", + "DIAGNOSTICS", + "MULTISET", + "RETURN", + "WHEN", + "GOTO", + "BEFORE", + "CHARACTER_LENGTH", + "OR", + "PLAN", + "SPECIFIC", + "TO", + "LOAD", + "MAX", + "OPTION", + "SCOPE", + "STDDEV_SAMP", + "XMLDOCUMENT", + "CARDINALITY", + "TIMEZONE_MINUTE", + "TRIGGER", + "VIEW", + "IDENTITY", + "STDDEV_POP", + "ADD", + "ALLOCATE", + "DOMAIN", + "RESULT", + "ROWCOUNT", + "TSEQUAL", + "TABLE", + "XMLTEXT", + "ALIAS", + "ASENSITIVE", + "CURRENT_TIME", + "FILE", + "PRINT", + "ROLLUP", + "THAN", + "TRIM", + "IDENTITY_INSERT", + "ON", + "SESSION", + "UNIQUE", + "YEAR", + "VARCHAR", + "CHECKPOINT", + "DETERMINISTIC", + "EVERY", + "MATCH", + "NUMERIC", + "PARAMETER", + "DYNAMIC", + "HAVING", + "INITIALIZE", + "REGR_AVGY", + "CURRENT_DATE", + "EXISTS", + "PERCENTILE_CONT", + "ARE", + "END", + "GROUPING", + "SETUSER", + "SQLWARNING", + "BREAK", + "DECIMAL", + "LANGUAGE", + "PARTITION", + "SEQUENCE", + "SETS", + "TRANSACTION", + "struct{}{}", + "XMLAGG", + "BOOLEAN", + "BY", + "CLOSE", + "CONTAINS", + "INNER", + "VALUES", + "OPENXML", + "PRIOR", + "TABLESAMPLE", + "TRANSLATION", + "BETWEEN", + "CUME_DIST", + "ELEMENT", + "EXEC", + "HOST", + "LATERAL", + "USING", + "CONTINUE", + "INDEX", + "NONE", + "SELECT", + "REGR_SLOPE", + "WHILE", + "AND", + "DICTIONARY", + "MIN", + "PERCENT_RANK", + "UPDATE", + "UPDATETEXT", + "TRUNCATE", + "CASCADE", + "CONVERT", + "DEFERRABLE", + "FILTER", + "FLOAT", + "NAMES", + "VAR_POP", + "DESTRUCTOR", + "ISOLATION", + "OFF", + "OUTPUT", + "VAR_SAMP", + "CONSTRAINTS", + "SQLSTATE", + "CHARACTER", + "OUTER", + "TIMEZONE_HOUR", + "CALLED", + "CYCLE", + "INSENSITIVE", + "TEXTSIZE", + "ATOMIC", + "COMPLETION", + "PROCEDURE", + "REGR_INTERCEPT", + "BREADTH", + "FULLTEXTTABLE", + "DATABASE", + "DEREF", + "WITH", + "SPACE", + "VARIABLE", + "COALESCE", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "ESCAPE", + "FORTRAN", + "OVERLAPS", + "BINARY", + "CATALOG", + "CREATE", + "UNION", + "WAITFOR", + "LN", + "SCHEMA", + "SYSTEM", + "INTERSECTION", + "ROWS", + "CALL", + "UPPER", + "UNPIVOT", + "BLOB", + "FALSE", + "GET", + "INSERT", + "LEFT", + "PIVOT", + "TEMPORARY", + "USE", + "PRIMARY", + "SEMANTICKEYPHRASETABLE", + "MEMBER", + "COVAR_SAMP", + "CURRENT_PATH", + "SECURITYAUDIT", + "BOTH", + "OF", + "WIDTH_BUCKET", + "BROWSE", + "CORR", + "PRIVILEGES", + "READS", + "CURRENT_CATALOG", + "SHUTDOWN", + "FOR", + "FUSION", + "IDENTITYCOL", + "MODIFY", + "ALL", + "XMLPARSE", + "COMMIT", + "RIGHT", + "XMLATTRIBUTES", + "XMLNAMESPACES", + "TRAILING", + "TRAN", + "BULK", + "INTEGER", + "RAISERROR", + "FOUND", + "INCLUDE", + "VARYING", + "DATE", + "FROM", + "JOIN", + "STRUCTURE", + "AVG", + "DEFAULT", + "GO", + "NATURAL", + "ORDINALITY", + "SAVEPOINT", + "EXIT", + "GROUP", + "WHENEVER", + "AGGREGATE", + "DOUBLE", + "EXECUTE", + "POSTFIX", + "CURRENT_USER", + "DESTROY", + "FIRST", + "ROUTINE", + "TOP", + "REAL", + "MODULE", + "ASYMMETRIC", + "CROSS", + "FREETEXTTABLE", + "PERCENT", + "INT", + "OVER", + "PROC", + "SUBSTRING_REGEX", + "WITHINGROUP", + "FULL", + "FUNCTION", + "NORMALIZE", + "RELEASE", + "REPLICATION", + "SAVE", + "HOUR", + "LIKE_REGEX", + "NONCLUSTERED", + "SQLEXCEPTION", + "ZONE", + "AT", + "COLUMN", + "EXTRACT", + "MINUTE", + "MOD", + "CASCADED", + "COMPUTE", + "GLOBAL", + "OPEN", + "ORDER", + "PARTIAL", + "PRECISION", + "SQLCA", + "AUTHORIZATION", + "DEPTH", + "INOUT", + "INTO", + "OBJECT", + "OUT", + "STATIC", + "ITERATE", + "REGR_SXY", + "SET", + "CLOB", + "LIKE", + "DUMP", + "ERRLVL", + "POSITION", + "RECONFIGURE", + "BEGIN", + "CHECK", + "INTERVAL", + "NEXT", + "RELATIVE", + "BACKUP", + "CORRESPONDING", + "HOLD", + "IMMEDIATE", + "SEMANTICSIMILARITYTABLE", + "PASCAL", + "SESSION_USER", + "COLLATE", + "IS", + "OPERATION", + "REVERT", + "ROLE" + ], + "POSTGRES": [ + "USING", + "BINARY", + "DESC", + "EXCEPT", + "FOREIGN", + "NULL", + "REFERENCES", + "SYMMETRIC", + "struct{}{}", + "ANY", + "CROSS", + "DEFAULT", + "DISTINCT", + "FALSE", + "OLD", + "SESSION_USER", + "TABLE", + "CHECK", + "CURRENT_TIME", + "FOR", + "FREEZE", + "LIMIT", + "LOCALTIME", + "NOT", + "ARRAY", + "COLLATE", + "OFF", + "OFFSET", + "BETWEEN", + "CONSTRAINT", + "CURRENT_TIMESTAMP", + "SIMILAR", + "UNION", + "LEADING", + "LIKE", + "ON", + "UNIQUE", + "AUTHORIZATION", + "CURRENT_DATE", + "ILIKE", + "INNER", + "VERBOSE", + "CURRENT_USER", + "IN", + "NOTNULL", + "ORDER", + "THEN", + "TRAILING", + "WHEN", + "LOCALTIMESTAMP", + "NEW", + "OUTER", + "JOIN", + "ASC", + "DO", + "FROM", + "HAVING", + "INTERSECT", + "INTO", + "ELSE", + "ALL", + "ANALYSE", + "AS", + "ASYMMETRIC", + "CAST", + "CREATE", + "DEFERRABLE", + "FULL", + "ISNULL", + "OR", + "PRIMARY", + "RIGHT", + "END", + "GRANT", + "OVERLAPS", + "PLACING", + "USER", + "WHERE", + "ANALYZE", + "BOTH", + "GROUP", + "INITIALLY", + "IS", + "NATURAL", + "SOME", + "AND", + "CASE", + "COLUMN", + "CURRENT_ROLE", + "LEFT", + "ONLY", + "SELECT" + ], + "RS": [ + "TAG", + "AES128", + "OFFSET", + "OLD", + "PLACING", + "THEN", + "struct{}{}", + "WALLET", + "CREATE", + "DEFAULT", + "DEFRAG", + "LUNS", + "TEXT255", + "LOCALTIME", + "LZO", + "RESPECT", + "WHERE", + "ASC", + "DEFERRABLE", + "DELTA32K", + "AND", + "CURRENT_TIMESTAMP", + "OPEN", + "WITH", + "ALL", + "ENABLE", + "EXCEPT", + "NATURAL", + "REJECTLOG", + "CONSTRAINT", + "CREDENTIALS", + "MOSTLY8", + "PERCENT", + "REFERENCES", + "TO", + "USING", + "BLANKSASNULL", + "INTO", + "LANGUAGE", + "ANY", + "READRATIO", + "BACKUP", + "GZIP", + "UNION", + "CURRENT_USER", + "BETWEEN", + "BOTH", + "BZIP2", + "JOIN", + "TOP", + "TABLE", + "USER", + "COLLATE", + "NOT", + "ONLY", + "NEW", + "ORDER", + "SIMILAR", + "COLUMN", + "CROSS", + "IGNORE", + "WHEN", + "GLOBALDICT64K", + "GRANT", + "RIGHT", + "ON", + "SYSTEM", + "ELSE", + "ISNULL", + "MINUS", + "ILIKE", + "MOSTLY13", + "RESTORE", + "DO", + "FOR", + "FREEZE", + "INTERSECT", + "IS", + "UNIQUE", + "END", + "LEADING", + "OFF", + "RECOVER", + "AUTHORIZATION", + "BYTEDICT", + "CURRENT_TIME", + "ARRAY", + "DELTA", + "IN", + "PARALLEL", + "RAW", + "SELECT", + "TDES", + "CAST", + "EXPLICIT", + "OR", + "NULL", + "OVERLAPS", + "CHECK", + "EMPTYASNULL", + "GLOBALDICT256", + "INNER", + "RESORT", + "CASE", + "ENCODE", + "FALSE", + "SESSION_USER", + "TEXT32K", + "ALLOWOVERWRITE", + "AZ64", + "PERMISSIONS", + "LEFT", + "DESC", + "HAVING", + "INITIALLY", + "AES256", + "ENCRYPT ", + "LUN", + "SYSDATE", + "FROM", + "GROUP", + "PRIMARY", + "DEFLATE", + "FOREIGN", + "LOCALTIMESTAMP", + "NOTNULL", + "OUTER", + "AS", + "BINARY", + "CURRENT_DATE", + "TRUNCATECOLUMNS", + "VERBOSE", + "NULLS", + "SOME", + "ANALYSE", + "ANALYZE", + "FULL", + "OFFLINE", + "SNAPSHOT ", + "CURRENT_USER_ID", + "DISABLE", + "LIKE", + "LZOP", + "TRAILING", + "WITHOUT", + "ENCRYPTION", + "IDENTITY", + "LIMIT", + "PARTITION", + "TIMESTAMP", + "DISTINCT", + "MOSTLY32", + "OID" + ], + "S3_DATALAKE": [ + "INTERSECT", + "LOCAL", + "SET", + "TIMESTAMP", + "struct{}{}", + "CONSTRAINT", + "CREATE", + "NUMERIC", + "EXCHANGE", + "LEFT", + "COMMIT", + "CURSOR", + "DESCRIBE", + "LATERAL", + "RANGE", + "ORDER", + "PERCENT", + "BOTH", + "EXTRACT", + "FETCH", + "FOLLOWING", + "GROUP", + "NONE", + "TRIGGER", + "TRUNCATE", + "WHEN", + "BY", + "COLUMN", + "EXISTS", + "MACRO", + "THEN", + "CASE", + "CHAR", + "DECIMAL", + "PRIMARY", + "RLIKE", + "REFERENCES", + "RIGHT", + "TABLE", + "VIEWS", + "EXTENDED", + "OVER", + "TO", + "PARTITION", + "UNBOUNDED", + "USING", + "INT", + "NOT", + "SMALLINT", + "ROW", + "ROWS", + "CURRENT_TIMESTAMP", + "DOUBLE", + "FOR", + "IMPORT", + "LESS", + "OR", + "ARRAY", + "DISTINCT", + "ONLY", + "CONF", + "HAVING", + "JOIN", + "OF", + "OUT", + "SELECT", + "CAST", + "TABLESAMPLE", + "GROUPING", + "INNER", + "END", + "PRECEDING", + "OUTER", + "INTERVAL", + "REDUCE", + "ROLLBACK", + "UNION", + "WINDOW", + "FUNCTION", + "ON", + "START", + "BETWEEN", + "DATABASE", + "DAYOFWEEK", + "FOREIGN", + "GRANT", + "MORE", + "CURRENT_DATE", + "READS", + "USER", + "WHERE", + "ALL", + "FALSE", + "TRANSFORM", + "VARCHAR", + "WITH", + "ELSE", + "INSERT", + "INTEGER", + "INTO", + "IS", + "DATE", + "NULL", + "REGEXP", + "REVOKE", + "AUTHORIZATION", + "BIGINT", + "BOOLEAN", + "FLOOR", + "MAP", + "CUBE", + "FLOAT", + "FULL", + "ALTER", + "CURRENT", + "PARTIALSCAN", + "ROLLUP", + "UNIQUEJOIN", + "UTC_TIMESTAMP", + "CASHE", + "DELETE", + "EXTERNAL", + "LIKE", + "BINARY", + "CROSS", + "FROM", + "PRECISION", + "PRESERVE", + "TIME", + "UPDATE", + "AND", + "AS", + "DROP", + "IF", + "IN", + "PROCEDURE", + "VALUES" + ], + "SNOWFLAKE": [ + "CHECK", + "DATABASE", + "ORGANIZATION", + "SELECT", + "UPDATE", + "WHERE", + "AS", + "CONSTRAINT", + "CROSS", + "ELSE", + "IN", + "LOCALTIMESTAMP", + "OF", + "ON", + "ALTER", + "ROWS", + "WHEN", + "RIGHT", + "LEFT", + "NATURAL", + "GSCLUSTER", + "COLUMN", + "CURRENT", + "ROW", + "TRIGGER", + "TRY_CAST", + "WHENEVER", + "ACCOUNT", + "LOCALTIME", + "ORDER", + "GRANT", + "HAVING", + "INCREMENT", + "MINUS", + "OR", + "RLIKE", + "SAMPLE", + "VALUES", + "FULL", + "VIEW", + "DELETE", + "DROP", + "ISSUE", + "ANY", + "DISTINCT", + "EXISTS", + "INTO", + "NOT", + "NULL", + "TO", + "CURRENT_DATE", + "FOR", + "SCHEMA", + "TABLESAMPLE", + "struct{}{}", + "ALL", + "CURRENT_TIMESTAMP", + "FROM", + "LIKE", + "QUALIFY", + "CONNECTION", + "CASE", + "CURRENT_USER", + "INNER", + "LATERAL", + "THEN", + "UNION", + "AND", + "FALSE", + "USING", + "CREATE", + "BY", + "CONNECT", + "CURRENT_TIME", + "FOLLOWING", + "ILIKE", + "INSERT", + "JOIN", + "BETWEEN", + "START", + "TABLE", + "SOME", + "GROUP", + "INTERSECT", + "REGEXP", + "CAST", + "REVOKE", + "SET", + "UNIQUE", + "WITH", + "IS" + ], + "SNOWPIPE_STREAMING": [ + "CHECK", + "DATABASE", + "ORGANIZATION", + "SELECT", + "UPDATE", + "WHERE", + "AS", + "CONSTRAINT", + "CROSS", + "ELSE", + "IN", + "LOCALTIMESTAMP", + "OF", + "ON", + "ALTER", + "ROWS", + "WHEN", + "RIGHT", + "LEFT", + "NATURAL", + "GSCLUSTER", + "COLUMN", + "CURRENT", + "ROW", + "TRIGGER", + "TRY_CAST", + "WHENEVER", + "ACCOUNT", + "LOCALTIME", + "ORDER", + "GRANT", + "HAVING", + "INCREMENT", + "MINUS", + "OR", + "RLIKE", + "SAMPLE", + "VALUES", + "FULL", + "VIEW", + "DELETE", + "DROP", + "ISSUE", + "ANY", + "DISTINCT", + "EXISTS", + "INTO", + "NOT", + "NULL", + "TO", + "CURRENT_DATE", + "FOR", + "SCHEMA", + "TABLESAMPLE", + "struct{}{}", + "ALL", + "CURRENT_TIMESTAMP", + "FROM", + "LIKE", + "QUALIFY", + "CONNECTION", + "CASE", + "CURRENT_USER", + "INNER", + "LATERAL", + "THEN", + "UNION", + "AND", + "FALSE", + "USING", + "CREATE", + "BY", + "CONNECT", + "CURRENT_TIME", + "FOLLOWING", + "ILIKE", + "INSERT", + "JOIN", + "BETWEEN", + "START", + "TABLE", + "SOME", + "GROUP", + "INTERSECT", + "REGEXP", + "CAST", + "REVOKE", + "SET", + "UNIQUE", + "WITH", + "IS" + ] +} diff --git a/warehouse/transformer/internal/reservedkeywords/reservedtablescolumns.json b/warehouse/transformer/internal/reservedkeywords/reservedtablescolumns.json new file mode 100644 index 0000000000..cd5b65cd83 --- /dev/null +++ b/warehouse/transformer/internal/reservedkeywords/reservedtablescolumns.json @@ -0,0 +1,2334 @@ +{ + "AZURE_DATALAKE": [ + "NULL", + "PREORDER", + "BOTH", + "OVERLAY", + "REGR_SXX", + "ROW", + "START", + "ASSERTION", + "OLD", + "ON", + "PROC", + "RULE", + "ALTER", + "COMMIT", + "CONTINUE", + "NCLOB", + "EXCEPTION", + "HOST", + "READS", + "USE", + "VALUE", + "AFTER", + "EXTERNAL", + "FULL", + "PARAMETER", + "MATCH", + "SQLSTATE", + "TREAT", + "SQLCODE", + "YEAR", + "DEALLOCATE", + "ROLLBACK", + "STATISTICS", + "TABLE", + "DISTINCT", + "INNER", + "SENSITIVE", + "SEQUENCE", + "ABSOLUTE", + "CURRENT_TIMESTAMP", + "FOUND", + "ROLE", + "SMALLINT", + "VARCHAR", + "FROM", + "MONTH", + "OUTER", + "TIMEZONE_MINUTE", + "FILTER", + "GROUPING", + "NULLIF", + "OUT", + "PERCENTILE_CONT", + "WITHINGROUP", + "SELECT", + "XMLCONCAT", + "CONTAINS", + "EXECUTE", + "LATERAL", + "RETURNS", + "SAVE", + "SECURITYAUDIT", + "COVAR_POP", + "KILL", + "XMLITERATE", + "BROWSE", + "CORRESPONDING", + "TRY_CONVERT", + "CARDINALITY", + "DUMP", + "EXEC", + "DISCONNECT", + "NOCHECK", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "DOMAIN", + "ERRLVL", + "NEXT", + "DATE", + "LARGE", + "NORMALIZE", + "REGR_SYY", + "REVERT", + "TEMPORARY", + "PUBLIC", + "RELEASE", + "DROP", + "INITIALLY", + "CALL", + "CASCADE", + "TRAILING", + "CASCADED", + "HOUR", + "NATIONAL", + "SAVEPOINT", + "USER", + "INITIALIZE", + "NUMERIC", + "OPENROWSET", + "OPERATION", + "REGR_AVGY", + "ROWS", + "CONNECT", + "DEFERRED", + "IDENTITYCOL", + "VARYING", + "XMLCOMMENT", + "BULK", + "COMPUTE", + "FREE", + "FUSION", + "NONE", + "OBJECT", + "COLLECT", + "COLUMN", + "FORTRAN", + "CATALOG", + "DECIMAL", + "FLOAT", + "CONNECTION", + "CURSOR", + "MEMBER", + "TEXTSIZE", + "TRUE", + "CUME_DIST", + "PERCENTILE_DISC", + "CONDITION", + "DESC", + "FOREIGN", + "LEADING", + "LOCATOR", + "NO", + "DAY", + "MOD", + "PRECISION", + "REFERENCING", + "VAR_POP", + "ZONE", + "CHAR", + "INPUT", + "INTERSECTION", + "LOCALTIMESTAMP", + "NEW", + "SIZE", + "BY", + "INSERT", + "PREFIX", + "XMLCAST", + "DIAGNOSTICS", + "SYSTEM_USER", + "BIT", + "CROSS", + "ONLY", + "SYMMETRIC", + "DEREF", + "TRANSACTION", + "END-EXEC", + "MIN", + "UPPER", + "CURRENT_ROLE", + "SETS", + "TRANSLATE_REGEX", + "ELEMENT", + "LESS", + "PRINT", + "REGR_COUNT", + "ALLOCATE", + "CHAR_LENGTH", + "CYCLE", + "EXCEPT", + "NATURAL", + "OPTION", + "STRUCTURE", + "TRIGGER", + "DELETE", + "BINARY", + "DISK", + "SQLWARNING", + "CONSTRAINTS", + "FIRST", + "NONCLUSTERED", + "WHENEVER", + "CLUSTERED", + "INTEGER", + "SIMILAR", + "NAMES", + "TIME", + "TRIM", + "CLASS", + "EXIT", + "INDICATOR", + "MAP", + "ROLLUP", + "FREETEXTTABLE", + "MODIFY", + "ASC", + "CASE", + "GLOBAL", + "OPEN", + "REGR_SXY", + "ACTION", + "ASENSITIVE", + "LOWER", + "REGR_R2", + "WRITE", + "FALSE", + "REGR_AVGX", + "SCROLL", + "WHEN", + "POSTFIX", + "TRUNCATE", + "BREADTH", + "CURRENT_DATE", + "PRIOR", + "SEMANTICSIMILARITYDETAILSTABLE", + "IDENTITY_INSERT", + "CORR", + "FOR", + "OF", + "ALL", + "CLOB", + "MODIFIES", + "TRANSLATION", + "EACH", + "ESCAPE", + "PAD", + "REFERENCES", + "SPECIFIC", + "SUM", + "WRITETEXT", + "XMLPI", + "DISTRIBUTED", + "GROUP", + "IMMEDIATE", + "INCLUDE", + "POSITION", + "SUBSTRING", + "ASYMMETRIC", + "BEGIN", + "OCCURRENCES_REGEX", + "RESTRICT", + "SPECIFICTYPE", + "DBCC", + "DICTIONARY", + "KEY", + "NCHAR", + "RESULT", + "AS", + "COLLATE", + "CURRENT", + "OR", + "REGR_SLOPE", + "EXTRACT", + "ORDER", + "SEMANTICSIMILARITYTABLE", + "TSEQUAL", + "XMLDOCUMENT", + "LIMIT", + "TERMINATE", + "ADA", + "CLOSE", + "EQUALS", + "OFF", + "VARIABLE", + "END", + "PASCAL", + "TO", + "VAR_SAMP", + "ADD", + "COALESCE", + "RESTORE", + "USING", + "VIEW", + "ANY", + "CHARACTER", + "IDENTITY", + "ISOLATION", + "STATE", + "FILE", + "GOTO", + "MULTISET", + "WITH", + "AVG", + "STATIC", + "XMLQUERY", + "ARRAY", + "CURRENT_PATH", + "PARTIAL", + "VALUES", + "XMLAGG", + "WHILE", + "ARE", + "BETWEEN", + "CUBE", + "CURRENT_TIME", + "PREPARE", + "READ", + "EXISTS", + "GENERAL", + "JOIN", + "MAX", + "OPENXML", + "UNDER", + "SCHEMA", + "AT", + "FREETEXT", + "LOCAL", + "XMLVALIDATE", + "DYNAMIC", + "DEPTH", + "OPENDATASOURCE", + "POSITION_REGEX", + "SQLEXCEPTION", + "AND", + "OVER", + "OVERLAPS", + "PLAN", + "THEN", + "XMLEXISTS", + "CURRENT_SCHEMA", + "HAVING", + "RELATIVE", + "RIGHT", + "SQLCA", + "SYSTEM", + "CONVERT", + "PRIMARY", + "REF", + "SCOPE", + "UPDATE", + "BOOLEAN", + "DATABASE", + "XMLFOREST", + "EVERY", + "LEVEL", + "NOT", + "ROUTINE", + "GO", + "LOAD", + "SECTION", + "USAGE", + "CHECKPOINT", + "DECLARE", + "PRIVILEGES", + "INOUT", + "METHOD", + "XMLNAMESPACES", + "TOP", + "COVAR_SAMP", + "IF", + "LINENO", + "PRESERVE", + "PROCEDURE", + "STATEMENT", + "ATOMIC", + "BIT_LENGTH", + "HOLD", + "AUTHORIZATION", + "COLLATION", + "DETERMINISTIC", + "READTEXT", + "CONSTRAINT", + "LANGUAGE", + "THAN", + "WITHIN", + "CONTAINSTABLE", + "XMLSERIALIZE", + "FULLTEXTTABLE", + "INDEX", + "MINUTE", + "DENY", + "FUNCTION", + "RECONFIGURE", + "XMLATTRIBUTES", + "INTO", + "RETURN", + "SQLERROR", + "SUBMULTISET", + "XMLBINARY", + "CURRENT_CATALOG", + "LOCALTIME", + "STDDEV_SAMP", + "AGGREGATE", + "DESTRUCTOR", + "OCTET_LENGTH", + "PERCENT_RANK", + "SESSION", + "UPDATETEXT", + "DESCRIBE", + "RAISERROR", + "RANGE", + "ROWGUIDCOL", + "TRAN", + "CAST", + "DEC", + "DEFERRABLE", + "FETCH", + "OUTPUT", + "WIDTH_BUCKET", + "WITHOUT", + "BEFORE", + "ORDINALITY", + "SESSION_USER", + "WORK", + "CURRENT_USER", + "SQL", + "SUBSTRING_REGEX", + "XMLPARSE", + "DATA", + "INTERVAL", + "MODULE", + "SHUTDOWN", + "TIMEZONE_HOUR", + "UNIQUE", + "ADMIN", + "ALIAS", + "CALLED", + "CREATE", + "LAST", + "RECURSIVE", + "UNPIVOT", + "COUNT", + "INSENSITIVE", + "SECOND", + "SOME", + "STDDEV_POP", + "UNION", + "FILLFACTOR", + "PERCENT", + "REVOKE", + "BLOB", + "BREAK", + "DEFAULT", + "IN", + "LEFT", + "REPLICATION", + "DESCRIPTOR", + "ELSE", + "OPENQUERY", + "SET", + "GRANT", + "PARAMETERS", + "UESCAPE", + "WAITFOR", + "UNNEST", + "HOLDLOCK", + "LN", + "WINDOW", + "DOUBLE", + "LIKE", + "REAL", + "SEMANTICKEYPHRASETABLE", + "XMLELEMENT", + "IGNORE", + "ROWCOUNT", + "TRANSLATE", + "UNKNOWN", + "XMLTABLE", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "OFFSETS", + "SETUSER", + "COMPLETION", + "XMLTEXT", + "CHARACTER_LENGTH", + "GET", + "INT", + "MERGE", + "REGR_INTERCEPT", + "DESTROY", + "PARTITION", + "TABLESAMPLE", + "WHERE", + "INTERSECT", + "ITERATE", + "PIVOT", + "SEARCH", + "SPACE", + "BACKUP", + "CHECK", + "IS", + "LIKE_REGEX" + ], + "AZURE_SYNAPSE": [ + "BEGIN", + "LEVEL", + "LOCAL", + "PRINT", + "TRUNCATE", + "LINENO", + "NEW", + "OLD", + "RELEASE", + "ROW", + "CASCADE", + "FOR", + "INTERVAL", + "WRITETEXT", + "FILTER", + "SAVEPOINT", + "UNION", + "INTEGER", + "MIN", + "XMLBINARY", + "DEREF", + "RELATIVE", + "SQLWARNING", + "TEXTSIZE", + "CURRENT_TIMESTAMP", + "PROC", + "SPACE", + "START", + "REGR_R2", + "COLLECT", + "CROSS", + "FULLTEXTTABLE", + "OCTET_LENGTH", + "PRECISION", + "REPLICATION", + "ROWCOUNT", + "THEN", + "CURRENT_ROLE", + "INITIALIZE", + "NCLOB", + "PIVOT", + "CREATE", + "NULL", + "SIZE", + "SMALLINT", + "SPECIFICTYPE", + "SQLERROR", + "XMLTABLE", + "CORRESPONDING", + "DICTIONARY", + "MEMBER", + "ON", + "WITHINGROUP", + "AT", + "LEFT", + "PREFIX", + "VAR_SAMP", + "XMLCOMMENT", + "DENY", + "EXTERNAL", + "INTO", + "PREPARE", + "SQL", + "DESCRIPTOR", + "WHERE", + "XMLFOREST", + "XMLPI", + "COMMIT", + "FILE", + "LOAD", + "CONTAINSTABLE", + "CUBE", + "REF", + "VARCHAR", + "XMLELEMENT", + "DATE", + "LOWER", + "SYMMETRIC", + "VAR_POP", + "XMLAGG", + "INPUT", + "DECIMAL", + "DEPTH", + "GLOBAL", + "STRUCTURE", + "FIRST", + "STDDEV_SAMP", + "ADMIN", + "BOOLEAN", + "ELEMENT", + "HOUR", + "AFTER", + "BIT_LENGTH", + "PARTIAL", + "PERCENTILE_CONT", + "REGR_AVGX", + "ALLOCATE", + "HOLDLOCK", + "REGR_AVGY", + "CUME_DIST", + "INCLUDE", + "CONNECTION", + "OVERLAPS", + "DYNAMIC", + "RULE", + "TRAILING", + "COLUMN", + "LIMIT", + "PARTITION", + "SETS", + "VARYING", + "BEFORE", + "BY", + "IDENTITYCOL", + "NCHAR", + "OBJECT", + "ADD", + "RETURNS", + "SELECT", + "EQUALS", + "XMLDOCUMENT", + "CALL", + "EXECUTE", + "ORDER", + "VALUES", + "ADA", + "ROLLBACK", + "DISK", + "DISTRIBUTED", + "PERCENTILE_DISC", + "STATIC", + "UNNEST", + "XMLCAST", + "CONVERT", + "FOREIGN", + "HAVING", + "MERGE", + "STATISTICS", + "XMLEXISTS", + "ALIAS", + "MODIFY", + "RESTRICT", + "TRUE", + "OPENXML", + "BETWEEN", + "CHARACTER", + "CONNECT", + "LARGE", + "LATERAL", + "KILL", + "POSTFIX", + "READS", + "SECTION", + "CURSOR", + "IS", + "TRY_CONVERT", + "WHEN", + "COLLATE", + "LANGUAGE", + "SEMANTICSIMILARITYTABLE", + "UNIQUE", + "CONDITION", + "IN", + "REVERT", + "XMLCONCAT", + "AND", + "AS", + "CALLED", + "DOUBLE", + "HOLD", + "XMLSERIALIZE", + "AUTHORIZATION", + "CHAR", + "ELSE", + "END-EXEC", + "LOCALTIMESTAMP", + "BREAK", + "DIAGNOSTICS", + "SAVE", + "SESSION", + "DATA", + "PERCENT_RANK", + "USING", + "ROLE", + "ABSOLUTE", + "ANY", + "INSENSITIVE", + "ISOLATION", + "MATCH", + "ACTION", + "LOCATOR", + "REGR_COUNT", + "SEARCH", + "BOTH", + "NULLIF", + "REFERENCING", + "TABLESAMPLE", + "FILLFACTOR", + "OVER", + "COMPUTE", + "DROP", + "LEADING", + "NUMERIC", + "REGR_SXY", + "BIT", + "CLUSTERED", + "TOP", + "WITH", + "NATIONAL", + "NONCLUSTERED", + "SQLEXCEPTION", + "COLLATION", + "DOMAIN", + "ESCAPE", + "GROUPING", + "LIKE", + "TABLE", + "CLASS", + "INOUT", + "PARAMETER", + "SUM", + "WIDTH_BUCKET", + "WITHOUT", + "XMLITERATE", + "COMPLETION", + "GO", + "GRANT", + "NORMALIZE", + "REVOKE", + "DISCONNECT", + "NO", + "REAL", + "SQLSTATE", + "THAN", + "UNPIVOT", + "CARDINALITY", + "CYCLE", + "OPENQUERY", + "OUTER", + "SETUSER", + "UPDATETEXT", + "AGGREGATE", + "CONTINUE", + "POSITION", + "RANGE", + "SET", + "GROUP", + "LAST", + "PERCENT", + "ASC", + "EACH", + "FALSE", + "INSERT", + "OPTION", + "INDEX", + "MINUTE", + "OFFSETS", + "OUTPUT", + "WINDOW", + "OCCURRENCES_REGEX", + "TRIGGER", + "USE", + "EXISTS", + "RESTORE", + "FUSION", + "PARAMETERS", + "PLAN", + "FORTRAN", + "FUNCTION", + "CASCADED", + "COUNT", + "PASCAL", + "EXEC", + "XMLVALIDATE", + "CATALOG", + "RETURN", + "ROLLUP", + "SIMILAR", + "WORK", + "DECLARE", + "PAD", + "PRIMARY", + "REGR_SXX", + "REGR_SYY", + "SENSITIVE", + "TIMEZONE_MINUTE", + "TRANSACTION", + "VIEW", + "XMLPARSE", + "DETERMINISTIC", + "CHAR_LENGTH", + "IGNORE", + "JOIN", + "REGR_SLOPE", + "DEALLOCATE", + "FETCH", + "OR", + "RECURSIVE", + "CURRENT_CATALOG", + "READTEXT", + "UNDER", + "WHILE", + "INDICATOR", + "INT", + "MONTH", + "XMLQUERY", + "XMLTEXT", + "CAST", + "COVAR_SAMP", + "RIGHT", + "SECURITYAUDIT", + "UPPER", + "ASYMMETRIC", + "CURRENT_PATH", + "LOCALTIME", + "NEXT", + "REGR_INTERCEPT", + "TRIM", + "USER", + "CONSTRAINTS", + "DEFERRED", + "DESTROY", + "OPENROWSET", + "SCHEMA", + "CONSTRAINT", + "DESC", + "ONLY", + "SYSTEM_USER", + "TIME", + "DEC", + "DESTRUCTOR", + "RESULT", + "DAY", + "IMMEDIATE", + "OPERATION", + "PROCEDURE", + "SQLCA", + "UNKNOWN", + "WRITE", + "GENERAL", + "GET", + "METHOD", + "ORDINALITY", + "PRESERVE", + "CLOB", + "OPENDATASOURCE", + "WAITFOR", + "OUT", + "TEMPORARY", + "TRANSLATE", + "COVAR_POP", + "BLOB", + "OPEN", + "POSITION_REGEX", + "TRAN", + "YEAR", + "BULK", + "CASE", + "HOST", + "SECOND", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "NAMES", + "NOCHECK", + "OFF", + "ROWS", + "WHENEVER", + "BACKUP", + "IDENTITY", + "SEMANTICKEYPHRASETABLE", + "SESSION_USER", + "STATEMENT", + "STDDEV_POP", + "ALL", + "CHECK", + "CURRENT", + "DBCC", + "FROM", + "SYSTEM", + "ASSERTION", + "DUMP", + "LN", + "CHARACTER_LENGTH", + "EXCEPT", + "INITIALLY", + "NOT", + "ROWGUIDCOL", + "OF", + "ZONE", + "SUBSTRING_REGEX", + "DEFAULT", + "DEFERRABLE", + "EVERY", + "MODIFIES", + "SEQUENCE", + "ATOMIC", + "ROUTINE", + "ASENSITIVE", + "BROWSE", + "IDENTITY_INSERT", + "TREAT", + "ARRAY", + "FOUND", + "GOTO", + "MULTISET", + "SHUTDOWN", + "TO", + "CLOSE", + "ERRLVL", + "IF", + "XMLNAMESPACES", + "CHECKPOINT", + "CURRENT_USER", + "RECONFIGURE", + "TSEQUAL", + "SOME", + "AVG", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "FREE", + "ITERATE", + "MOD", + "CORR", + "CURRENT_DATE", + "INNER", + "SCOPE", + "TIMEZONE_HOUR", + "EXIT", + "NATURAL", + "KEY", + "UESCAPE", + "VARIABLE", + "XMLATTRIBUTES", + "BINARY", + "INTERSECT", + "PRIVILEGES", + "RAISERROR", + "TRANSLATION", + "END", + "FLOAT", + "FREETEXT", + "SQLCODE", + "DATABASE", + "TRANSLATE_REGEX", + "VALUE", + "SCROLL", + "SEMANTICSIMILARITYDETAILSTABLE", + "SPECIFIC", + "TERMINATE", + "ALTER", + "DISTINCT", + "MODULE", + "PRIOR", + "PUBLIC", + "CURRENT_SCHEMA", + "CURRENT_TIME", + "DESCRIBE", + "FREETEXTTABLE", + "LIKE_REGEX", + "COALESCE", + "MAP", + "READ", + "UPDATE", + "WITHIN", + "DELETE", + "REFERENCES", + "PREORDER", + "STATE", + "USAGE", + "CONTAINS", + "FULL", + "INTERSECTION", + "NONE", + "SUBSTRING", + "ARE", + "BREADTH", + "EXCEPTION", + "EXTRACT", + "LESS", + "MAX", + "OVERLAY", + "SUBMULTISET" + ], + "BQ": [ + "CUBE", + "RANGE", + "WHEN", + "RIGHT", + "SET", + "TREAT", + "ASC", + "CONTAINS", + "CREATE", + "ELSE", + "FETCH", + "FROM", + "HASH", + "LIKE", + "WINDOW", + "ROWS", + "TABLESAMPLE", + "TO", + "CURRENT", + "DEFINE", + "LATERAL", + "OF", + "ORDER", + "ESCAPE", + "TRUE", + "USING", + "ARRAY", + "BY", + "CROSS", + "RECURSIVE", + "SELECT", + "DEFAULT", + "OR", + "OVER", + "PARTITION", + "WHERE", + "UNION", + "FOR", + "INTO", + "JOIN", + "ROLLUP", + "SOME", + "LOOKUP", + "NULLS", + "ON", + "AND", + "ANY", + "EXCEPT", + "EXISTS", + "HAVING", + "WITHIN", + "COLLATE", + "DISTINCT", + "ENUM", + "INTERSECT", + "IS", + "NATURAL", + "PRECEDING", + "AT", + "EXTRACT", + "FALSE", + "IN", + "MERGE", + "STRUCT", + "CAST", + "IGNORE", + "INNER", + "NEW", + "OUTER", + "INTERVAL", + "LIMIT", + "RESPECT", + "UNNEST", + "WITH", + "DESC", + "EXCLUDE", + "GROUPS", + "PROTO", + "THEN", + "ASSERT_ROWS_MODIFIED", + "END", + "FULL", + "GROUPING", + "NOT", + "GROUP", + "IF", + "LEFT", + "ALL", + "AS", + "BETWEEN", + "CASE", + "FOLLOWING", + "NO", + "NULL", + "UNBOUNDED" + ], + "DELTALAKE": [ + "EXTERNAL", + "EXTRACT", + "PRIMARY", + "SEMI", + "UNIQUE", + "ALL", + "AT", + "JOIN", + "PARTITION", + "USING", + "ELSE", + "REFERENCES", + "SOME", + "BETWEEN", + "END", + "FOREIGN", + "GRANT", + "GROUPING", + "INNER", + "WINDOW", + "CURRENT_TIME", + "INSERT", + "LOCAL", + "ORDER", + "SELECT", + "COMMIT", + "EXISTS", + "OR", + "WHEN", + "AS", + "FROM", + "OF", + "ROLLUP", + "GLOBAL", + "NO", + "SET", + "START", + "TO", + "CAST", + "FALSE", + "OVERLAPS", + "ROW", + "ROWS", + "THEN", + "UPDATE", + "ANY", + "CREATE", + "DESCRIBE", + "REVOKE", + "TRAILING", + "TRUE", + "AUTHORIZATION", + "CASE", + "EVENT_DATE", + "HAVING", + "ALTER", + "FUNCTION", + "MINUS", + "OUT", + "RANGE", + "USER", + "WHERE", + "ANTI", + "LEADING", + "NATURAL", + "VALUES", + "CURRENT_USER", + "GROUP", + "OUTER", + "ROLLBACK", + "SESSION_USER", + "CHECK", + "LIKE", + "TABLE", + "DELETE", + "ESCAPE", + "COLLATE", + "COLUMN", + "POSITION", + "CONSTRAINT", + "EXCEPT", + "NOT", + "RIGHT", + "FULL", + "ONLY", + "TRUNCATE", + "CROSS", + "CURRENT", + "DISTINCT", + "FOR", + "INTERVAL", + "LATERAL", + "WITH", + "FILTER", + "IN", + "ON", + "UNKNOWN", + "FETCH", + "TIME", + "CUBE", + "UNION", + "CURRENT_DATE", + "INTERSECT", + "BOTH", + "NULL", + "AND", + "ARRAY", + "BY", + "CURRENT_TIMESTAMP", + "IS", + "TABLESAMPLE", + "DROP", + "INTO", + "LEFT" + ], + "GCS_DATALAKE": [ + "STRUCT", + "COLLATE", + "FOLLOWING", + "INTERSECT", + "NO", + "OF", + "RECURSIVE", + "TREAT", + "CONTAINS", + "ELSE", + "END", + "GROUPS", + "INNER", + "IS", + "ARRAY", + "AS", + "HAVING", + "NATURAL", + "ORDER", + "PRECEDING", + "PROTO", + "AND", + "BY", + "ESCAPE", + "IN", + "LEFT", + "OR", + "TABLESAMPLE", + "THEN", + "CREATE", + "FOR", + "LIKE", + "UNION", + "DEFINE", + "FROM", + "UNBOUNDED", + "DISTINCT", + "EXCLUDE", + "GROUPING", + "LOOKUP", + "OUTER", + "RIGHT", + "CASE", + "CURRENT", + "JOIN", + "NULLS", + "USING", + "WITHIN", + "AT", + "FALSE", + "INTERVAL", + "INTO", + "ALL", + "CROSS", + "ENUM", + "EXCEPT", + "EXTRACT", + "FETCH", + "UNNEST", + "WHERE", + "CUBE", + "LATERAL", + "PARTITION", + "ROLLUP", + "SELECT", + "SOME", + "EXISTS", + "NULL", + "OVER", + "WHEN", + "WINDOW", + "ASC", + "BETWEEN", + "DESC", + "HASH", + "ANY", + "CAST", + "IF", + "MERGE", + "SET", + "TRUE", + "ASSERT_ROWS_MODIFIED", + "FULL", + "LIMIT", + "NEW", + "ON", + "RANGE", + "TO", + "WITH", + "DEFAULT", + "GROUP", + "IGNORE", + "NOT", + "RESPECT", + "ROWS" + ], + "MSSQL": [ + "WHERE", + "ASYMMETRIC", + "DICTIONARY", + "ELSE", + "FREETEXT", + "NEW", + "PARAMETERS", + "USE", + "WHILE", + "REVOKE", + "COALESCE", + "CONSTRAINTS", + "CONTAINSTABLE", + "DUMP", + "LOCATOR", + "NAMES", + "PIVOT", + "UNPIVOT", + "CASE", + "CURSOR", + "DETERMINISTIC", + "EVERY", + "EXEC", + "HOUR", + "INCLUDE", + "DECIMAL", + "IDENTITYCOL", + "SYMMETRIC", + "AFTER", + "DEFERRABLE", + "FETCH", + "IMMEDIATE", + "OVERLAY", + "ASSERTION", + "CONVERT", + "LIMIT", + "MERGE", + "PRIVILEGES", + "BIT_LENGTH", + "BOTH", + "IF", + "LOWER", + "RIGHT", + "MEMBER", + "MODULE", + "RECURSIVE", + "ACTION", + "PROCEDURE", + "REVERT", + "UNNEST", + "AUTHORIZATION", + "CHAR_LENGTH", + "REFERENCES", + "SPECIFICTYPE", + "UNION", + "VARCHAR", + "CARDINALITY", + "CLOB", + "RANGE", + "SYSTEM_USER", + "THEN", + "CURRENT_TIMESTAMP", + "FUSION", + "INITIALIZE", + "LARGE", + "NULLIF", + "PREPARE", + "CONSTRAINT", + "CONTINUE", + "SIZE", + "TO", + "UPPER", + "ANY", + "ASC", + "DATE", + "INDEX", + "THAN", + "PROC", + "BROWSE", + "DEALLOCATE", + "EXECUTE", + "EXTERNAL", + "INDICATOR", + "INOUT", + "OVER", + "ESCAPE", + "MIN", + "NUMERIC", + "TRUNCATE", + "DATA", + "FILE", + "FOREIGN", + "CALLED", + "OPEN", + "XMLQUERY", + "CHECKPOINT", + "DBCC", + "DECLARE", + "FREETEXTTABLE", + "POSITION_REGEX", + "XMLCONCAT", + "CONNECT", + "IS", + "OPENDATASOURCE", + "ARE", + "NONE", + "ZONE", + "COMPUTE", + "SQLWARNING", + "VALUE", + "DELETE", + "TEMPORARY", + "TSEQUAL", + "WRITE", + "CALL", + "SUBMULTISET", + "BREAK", + "EACH", + "FUNCTION", + "RESTRICT", + "START", + "ERRLVL", + "IN", + "PRESERVE", + "DISTRIBUTED", + "MULTISET", + "OUT", + "ELEMENT", + "INTERVAL", + "OFF", + "POSTFIX", + "ROLLUP", + "STATIC", + "CHARACTER_LENGTH", + "MINUTE", + "XMLVALIDATE", + "DESCRIBE", + "OPENROWSET", + "OPENXML", + "REGR_SLOPE", + "REPLICATION", + "COVAR_SAMP", + "CORR", + "END-EXEC", + "TRAN", + "DESTROY", + "OFFSETS", + "WAITFOR", + "AND", + "CURRENT_ROLE", + "INTERSECTION", + "SCROLL", + "TIMEZONE_MINUTE", + "BULK", + "FALSE", + "FULLTEXTTABLE", + "INSENSITIVE", + "UPDATE", + "SHUTDOWN", + "AS", + "DESTRUCTOR", + "GENERAL", + "GO", + "ON", + "OPENQUERY", + "SCHEMA", + "STATE", + "GRANT", + "SEARCH", + "SELECT", + "XMLCOMMENT", + "DESCRIPTOR", + "SQL", + "COUNT", + "OCTET_LENGTH", + "SIMILAR", + "COLLATION", + "CREATE", + "INT", + "INTO", + "MAX", + "SETUSER", + "WINDOW", + "ATOMIC", + "EXCEPT", + "LOCALTIMESTAMP", + "SECURITYAUDIT", + "SEMANTICSIMILARITYDETAILSTABLE", + "CURRENT_DATE", + "SPACE", + "VAR_SAMP", + "CONDITION", + "DEREF", + "NCHAR", + "PUBLIC", + "ALLOCATE", + "SENSITIVE", + "RELATIVE", + "SECOND", + "XMLELEMENT", + "SCOPE", + "TABLESAMPLE", + "TRAILING", + "PERCENTILE_CONT", + "ROWGUIDCOL", + "TEXTSIZE", + "VALUES", + "BEGIN", + "ITERATE", + "OBJECT", + "ONLY", + "PARTITION", + "RELEASE", + "EXTRACT", + "NEXT", + "OLD", + "RECONFIGURE", + "TRIGGER", + "BETWEEN", + "INSERT", + "LEVEL", + "SYSTEM", + "DISCONNECT", + "NATURAL", + "SEQUENCE", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "DYNAMIC", + "GLOBAL", + "METHOD", + "UNDER", + "AGGREGATE", + "LIKE_REGEX", + "NOT", + "STRUCTURE", + "SUBSTRING", + "CURRENT_PATH", + "SEMANTICSIMILARITYTABLE", + "CURRENT_CATALOG", + "OCCURRENCES_REGEX", + "TRANSLATION", + "WITHIN", + "XMLAGG", + "REGR_R2", + "XMLFOREST", + "SMALLINT", + "ALL", + "CHARACTER", + "CLASS", + "COLLECT", + "CURRENT_USER", + "DOMAIN", + "FIRST", + "VARYING", + "ASENSITIVE", + "INTEGER", + "ORDER", + "SQLEXCEPTION", + "USING", + "FOUND", + "LINENO", + "DROP", + "FULL", + "PRECISION", + "SAVEPOINT", + "TRANSLATE_REGEX", + "VARIABLE", + "XMLCAST", + "DEC", + "FOR", + "HOLDLOCK", + "RESULT", + "OPTION", + "PASCAL", + "READTEXT", + "RESTORE", + "SAVE", + "XMLTEXT", + "BIT", + "MODIFIES", + "SQLERROR", + "KEY", + "NULL", + "SPECIFIC", + "SUM", + "NCLOB", + "NORMALIZE", + "REGR_AVGY", + "TRUE", + "WITHINGROUP", + "CONNECTION", + "CUBE", + "INITIALLY", + "POSITION", + "ROW", + "SESSION", + "XMLSERIALIZE", + "LATERAL", + "XMLEXISTS", + "IDENTITY", + "LESS", + "MATCH", + "PERCENTILE_DISC", + "STDDEV_SAMP", + "COMPLETION", + "DEFAULT", + "GOTO", + "INPUT", + "PRIMARY", + "UESCAPE", + "FLOAT", + "LEADING", + "REAL", + "USAGE", + "CORRESPONDING", + "DAY", + "RETURN", + "XMLNAMESPACES", + "XMLPI", + "BY", + "JOIN", + "TERMINATE", + "HOLD", + "ROWS", + "BLOB", + "CYCLE", + "DOUBLE", + "GROUP", + "LOCALTIME", + "PARTIAL", + "ROLE", + "CATALOG", + "LOAD", + "PARAMETER", + "XMLDOCUMENT", + "END", + "GROUPING", + "PREORDER", + "CAST", + "XMLATTRIBUTES", + "XMLBINARY", + "COLUMN", + "REGR_SYY", + "CONTAINS", + "CURRENT_SCHEMA", + "NO", + "ROWCOUNT", + "LIKE", + "OUTER", + "BEFORE", + "FROM", + "ORDINALITY", + "STATEMENT", + "BINARY", + "COVAR_POP", + "LAST", + "WORK", + "HAVING", + "KILL", + "SECTION", + "WITHOUT", + "GET", + "LEFT", + "TOP", + "TRANSLATE", + "TABLE", + "BOOLEAN", + "DISK", + "INNER", + "MONTH", + "NATIONAL", + "REGR_COUNT", + "ROLLBACK", + "VIEW", + "CHAR", + "MOD", + "PLAN", + "SETS", + "REF", + "TRANSACTION", + "YEAR", + "OVERLAPS", + "TRY_CONVERT", + "DATABASE", + "IDENTITY_INSERT", + "PERCENT", + "WIDTH_BUCKET", + "CURRENT", + "CURRENT_TIME", + "EQUALS", + "FILTER", + "INTERSECT", + "SOME", + "WHEN", + "XMLPARSE", + "CUME_DIST", + "LOCAL", + "XMLITERATE", + "DIAGNOSTICS", + "PERCENT_RANK", + "REGR_AVGX", + "REGR_INTERCEPT", + "RETURNS", + "PAD", + "OPERATION", + "ARRAY", + "AT", + "COLLATE", + "CROSS", + "DENY", + "EXIT", + "NONCLUSTERED", + "WHENEVER", + "CASCADE", + "FORTRAN", + "OR", + "UNKNOWN", + "CASCADED", + "HOST", + "PRIOR", + "NOCHECK", + "OF", + "READ", + "SQLCODE", + "TREAT", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "REFERENCING", + "TIMEZONE_HOUR", + "UPDATETEXT", + "USER", + "PRINT", + "REGR_SXY", + "SEMANTICKEYPHRASETABLE", + "SESSION_USER", + "VAR_POP", + "ADMIN", + "BACKUP", + "EXCEPTION", + "FILLFACTOR", + "LN", + "READS", + "ADA", + "ALIAS", + "CLOSE", + "ISOLATION", + "SQLCA", + "STDDEV_POP", + "WRITETEXT", + "DESC", + "SET", + "UNIQUE", + "ABSOLUTE", + "OUTPUT", + "RULE", + "ALTER", + "SQLSTATE", + "TRIM", + "BREADTH", + "MAP", + "COMMIT", + "FREE", + "RAISERROR", + "SUBSTRING_REGEX", + "WITH", + "XMLTABLE", + "AVG", + "DEFERRED", + "DISTINCT", + "REGR_SXX", + "MODIFY", + "STATISTICS", + "ADD", + "EXISTS", + "LANGUAGE", + "PREFIX", + "DEPTH", + "CHECK", + "CLUSTERED", + "IGNORE", + "ROUTINE", + "TIME" + ], + "POSTGRES": [ + "NATURAL", + "SIMILAR", + "ASC", + "BINARY", + "DEFERRABLE", + "EXCEPT", + "FREEZE", + "LEADING", + "LEFT", + "ANALYZE", + "CURRENT_ROLE", + "CURRENT_TIME", + "DO", + "IS", + "OVERLAPS", + "OUTER", + "UNIQUE", + "VERBOSE", + "CURRENT_DATE", + "CURRENT_USER", + "NOT", + "NOTNULL", + "PRIMARY", + "SESSION_USER", + "BETWEEN", + "CONSTRAINT", + "FOREIGN", + "ORDER", + "ONLY", + "OR", + "COLUMN", + "DEFAULT", + "INNER", + "OFFSET", + "GROUP", + "LOCALTIME", + "NEW", + "AND", + "AUTHORIZATION", + "CREATE", + "DISTINCT", + "CHECK", + "ILIKE", + "ASYMMETRIC", + "CURRENT_TIMESTAMP", + "INTERSECT", + "SYMMETRIC", + "TABLE", + "USER", + "ELSE", + "FULL", + "IN", + "SELECT", + "FROM", + "GRANT", + "INTO", + "LIKE", + "ALL", + "ANALYSE", + "END", + "FALSE", + "RIGHT", + "SOME", + "OLD", + "TRUE", + "WHEN", + "BOTH", + "HAVING", + "LOCALTIMESTAMP", + "OFF", + "CROSS", + "UNION", + "CASE", + "DESC", + "REFERENCES", + "THEN", + "ON", + "PLACING", + "USING", + "WHERE", + "ANY", + "ISNULL", + "JOIN", + "LIMIT", + "FOR", + "INITIALLY", + "NULL", + "TO", + "ARRAY", + "AS", + "CAST", + "COLLATE", + "TRAILING" + ], + "RS": [ + "TRUNCATECOLUMNS", + "UNIQUE", + "TEXT32K", + "COLLATE", + "IN", + "OLD", + "REFERENCES", + "AES256", + "LZOP", + "RESPECT", + "WALLET", + "AND", + "TOP", + "SYSDATE", + "IGNORE", + "LEADING", + "MOSTLY32", + "NOTNULL", + "OUTER", + "TRAILING", + "WHEN", + "BACKUP", + "GRANT", + "WITH", + "ANY", + "FOREIGN", + "GLOBALDICT256", + "ILIKE", + "RESORT", + "BETWEEN", + "DESC", + "LOCALTIMESTAMP", + "NULL", + "RECOVER", + "CURRENT_DATE", + "BOTH", + "CASE", + "CREDENTIALS", + "NEW", + "OVERLAPS", + "AZ64", + "ASC", + "DISABLE", + "EXPLICIT", + "ISNULL", + "AS", + "DELTA32K", + "FREEZE", + "INTO", + "LUN", + "MOSTLY8", + "BZIP2", + "ALL", + "CAST", + "DEFLATE", + "DELTA", + "END", + "PERMISSIONS", + "SESSION_USER", + "AES128", + "ENABLE", + "OPEN", + "RESTORE", + "ELSE", + "DEFAULT", + "ONLY", + "TEXT255", + "CURRENT_USER", + "EXCEPT", + "GROUP", + "IS", + "TDES", + "VERBOSE", + "ANALYZE", + "ENCODE", + "ENCRYPT ", + "FROM", + "SNAPSHOT ", + "UNION", + "ALLOWOVERWRITE", + "LZO", + "OR", + "TABLE", + "TAG", + "BLANKSASNULL", + "CURRENT_TIME", + "ENCRYPTION", + "FALSE", + "JOIN", + "BYTEDICT", + "NULLS", + "DEFRAG", + "LUNS", + "OFF", + "OFFSET", + "SOME", + "COLUMN", + "DO", + "FULL", + "BINARY", + "LIKE", + "OFFLINE", + "THEN", + "INNER", + "CHECK", + "NOT", + "ARRAY", + "DISTINCT", + "INITIALLY", + "ON", + "PERCENT", + "RAW", + "SYSTEM", + "DEFERRABLE", + "CURRENT_USER_ID", + "FOR", + "INTERSECT", + "LEFT", + "PLACING", + "USER", + "CROSS", + "LIMIT", + "PARALLEL", + "EMPTYASNULL", + "OID", + "REJECTLOG", + "CREATE", + "HAVING", + "MOSTLY13", + "PRIMARY", + "READRATIO", + "RIGHT", + "USING", + "WHERE", + "GZIP", + "GLOBALDICT64K", + "IDENTITY", + "LANGUAGE", + "MINUS", + "NATURAL", + "AUTHORIZATION", + "CONSTRAINT", + "LOCALTIME", + "PARTITION", + "TRUE", + "ANALYSE", + "TO", + "UUID", + "SELECT", + "ORDER", + "SIMILAR", + "WITHOUT", + "CURRENT_TIMESTAMP" + ], + "S3_DATALAKE": [ + "AS", + "CHAR", + "COMMIT", + "FOLLOWING", + "REDUCE", + "ROW", + "WHERE", + "BY", + "LOCAL", + "CURRENT_DATE", + "JOIN", + "OUT", + "PRESERVE", + "READS", + "ELSE", + "WITH", + "CROSS", + "EXISTS", + "GRANT", + "PRECEDING", + "TIMESTAMP", + "UNBOUNDED", + "DROP", + "START", + "TABLE", + "FLOOR", + "FULL", + "HAVING", + "IF", + "LESS", + "RIGHT", + "ROLLUP", + "UTC_TIMESTAMP", + "CURRENT_TIMESTAMP", + "OR", + "OVER", + "BETWEEN", + "DISTINCT", + "INTEGER", + "BIGINT", + "COLUMN", + "FETCH", + "MACRO", + "UNIQUEJOIN", + "ARRAY", + "CASHE", + "EXTRACT", + "REFERENCES", + "INSERT", + "LIKE", + "VALUES", + "TRUE", + "BOOLEAN", + "EXCHANGE", + "IN", + "INNER", + "LATERAL", + "PRECISION", + "TRIGGER", + "BINARY", + "CAST", + "DOUBLE", + "END", + "FALSE", + "ON", + "PRIMARY", + "CREATE", + "DECIMAL", + "FLOAT", + "NULL", + "TABLESAMPLE", + "THEN", + "REVOKE", + "AND", + "DELETE", + "LEFT", + "NONE", + "PERCENT", + "PROCEDURE", + "REGEXP", + "TRUNCATE", + "UNION", + "BOTH", + "MORE", + "VARCHAR", + "ALL", + "DATE", + "SELECT", + "CURSOR", + "FUNCTION", + "RLIKE", + "NUMERIC", + "OF", + "AUTHORIZATION", + "CASE", + "CONF", + "FOR", + "GROUP", + "INTERSECT", + "ROLLBACK", + "DATABASE", + "INT", + "NOT", + "SMALLINT", + "WHEN", + "CONSTRAINT", + "IMPORT", + "OUTER", + "RANGE", + "TIME", + "EXTENDED", + "INTO", + "CUBE", + "FOREIGN", + "INTERVAL", + "PARTIALSCAN", + "SET", + "TO", + "CURRENT", + "FROM", + "VIEWS", + "ALTER", + "DESCRIBE", + "EXTERNAL", + "GROUPING", + "IS", + "PARTITION", + "USING", + "DAYOFWEEK", + "MAP", + "TRANSFORM", + "USER", + "ONLY", + "ROWS", + "ORDER", + "UPDATE", + "WINDOW" + ], + "SNOWFLAKE": [ + "SET", + "TABLE", + "GRANT", + "INNER", + "MINUS", + "REGEXP", + "CONNECTION", + "CROSS", + "START", + "IN", + "DELETE", + "DISTINCT", + "EXISTS", + "FROM", + "UPDATE", + "DATABASE", + "FULL", + "GROUP", + "INTO", + "FALSE", + "INSERT", + "NOT", + "ORDER", + "ACCOUNT", + "ANY", + "BETWEEN", + "CAST", + "TRIGGER", + "USING", + "ILIKE", + "NULL", + "ON", + "OR", + "ALL", + "CONNECT", + "REVOKE", + "WHERE", + "SAMPLE", + "SELECT", + "CHECK", + "JOIN", + "RLIKE", + "ROW", + "CURRENT", + "LATERAL", + "NATURAL", + "WHENEVER", + "LIKE", + "SOME", + "TRUE", + "VIEW", + "CASE", + "CURRENT_TIMESTAMP", + "INCREMENT", + "IS", + "ELSE", + "FOLLOWING", + "UNION", + "AND", + "BY", + "CREATE", + "CURRENT_TIME", + "TO", + "CURRENT_DATE", + "DROP", + "ISSUE", + "SCHEMA", + "QUALIFY", + "ROWS", + "TABLESAMPLE", + "UNIQUE", + "CONSTRAINT", + "GSCLUSTER", + "HAVING", + "ORGANIZATION", + "COLUMN", + "FOR", + "INTERSECT", + "OF", + "THEN", + "VALUES", + "WITH", + "ALTER", + "CURRENT_USER", + "LOCALTIME", + "RIGHT", + "WHEN", + "AS", + "LEFT", + "LOCALTIMESTAMP", + "TRY_CAST" + ], + "SNOWPIPE_STREAMING": [ + "SET", + "TABLE", + "GRANT", + "INNER", + "MINUS", + "REGEXP", + "CONNECTION", + "CROSS", + "START", + "IN", + "DELETE", + "DISTINCT", + "EXISTS", + "FROM", + "UPDATE", + "DATABASE", + "FULL", + "GROUP", + "INTO", + "FALSE", + "INSERT", + "NOT", + "ORDER", + "ACCOUNT", + "ANY", + "BETWEEN", + "CAST", + "TRIGGER", + "USING", + "ILIKE", + "NULL", + "ON", + "OR", + "ALL", + "CONNECT", + "REVOKE", + "WHERE", + "SAMPLE", + "SELECT", + "CHECK", + "JOIN", + "RLIKE", + "ROW", + "CURRENT", + "LATERAL", + "NATURAL", + "WHENEVER", + "LIKE", + "SOME", + "TRUE", + "VIEW", + "CASE", + "CURRENT_TIMESTAMP", + "INCREMENT", + "IS", + "ELSE", + "FOLLOWING", + "UNION", + "AND", + "BY", + "CREATE", + "CURRENT_TIME", + "TO", + "CURRENT_DATE", + "DROP", + "ISSUE", + "SCHEMA", + "QUALIFY", + "ROWS", + "TABLESAMPLE", + "UNIQUE", + "CONSTRAINT", + "GSCLUSTER", + "HAVING", + "ORGANIZATION", + "COLUMN", + "FOR", + "INTERSECT", + "OF", + "THEN", + "VALUES", + "WITH", + "ALTER", + "CURRENT_USER", + "LOCALTIME", + "RIGHT", + "WHEN", + "AS", + "LEFT", + "LOCALTIMESTAMP", + "TRY_CAST" + ], + "CLICKHOUSE": [] +} diff --git a/warehouse/transformer/internal/response/response.go b/warehouse/transformer/internal/response/response.go new file mode 100644 index 0000000000..92ef41337f --- /dev/null +++ b/warehouse/transformer/internal/response/response.go @@ -0,0 +1,38 @@ +package response + +import ( + "net/http" +) + +type TransformerError struct { + message string + code int +} + +func (e *TransformerError) Error() string { return e.message } +func (e *TransformerError) StatusCode() int { return e.code } + +var _ error = (*TransformerError)(nil) + +var ( + ErrInternalServer = NewTransformerError("Internal Server Error", http.StatusInternalServerError) + ErrMergePropertiesMissing = NewTransformerError("either or both identifiers missing in mergeProperties", http.StatusBadRequest) + ErrMergePropertiesNotSufficient = ErrMergePropertiesMissing + ErrMergePropertyOneInvalid = NewTransformerError("mergeProperties contains null values for expected inputs", http.StatusBadRequest) + ErrMergePropertyTwoInvalid = ErrMergePropertyOneInvalid + ErrMergePropertyEmpty = ErrMergePropertyOneInvalid + ErrMergePropertiesNotArray = ErrMergePropertyOneInvalid + ErrEmptyTableName = NewTransformerError("Table name cannot be empty.", http.StatusBadRequest) + ErrEmptyColumnName = NewTransformerError("Column name cannot be empty.", http.StatusBadRequest) + ErrRecordIDEmpty = NewTransformerError("recordId cannot be empty for cloud sources events", http.StatusBadRequest) + ErrContextNotMap = NewTransformerError("context is not a map", http.StatusInternalServerError) + ErrExtractEventNameEmpty = NewTransformerError("cannot create event table with empty event name, event name is missing in the payload", http.StatusInternalServerError) + ErrRecordIDObject = ErrRecordIDEmpty +) + +func NewTransformerError(message string, statusCode int) *TransformerError { + return &TransformerError{ + message: message, + code: statusCode, + } +} diff --git a/warehouse/transformer/internal/rules/rules.go b/warehouse/transformer/internal/rules/rules.go new file mode 100644 index 0000000000..d6d9ed3db6 --- /dev/null +++ b/warehouse/transformer/internal/rules/rules.go @@ -0,0 +1,169 @@ +package rules + +import ( + "fmt" + "strings" + + "github.com/samber/lo" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/types" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/response" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +type FunctionalRules func(event *ptrans.TransformerEvent) (any, error) + +var ( + DefaultRules = map[string]FunctionalRules{ + "id": staticRule("messageId"), + "anonymous_id": staticRule("anonymousId"), + "user_id": staticRule("userId"), + "sent_at": staticRule("sentAt"), + "timestamp": staticRule("timestamp"), + "received_at": staticRule("receivedAt"), + "original_timestamp": staticRule("originalTimestamp"), + "channel": staticRule("channel"), + "context_ip": func(event *ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"context.ip", "request_ip"}), nil + }, + "context_request_ip": staticRule("request_ip"), + "context_passed_ip": staticRule("context.ip"), + } + + TrackRules = map[string]FunctionalRules{ + "event_text": staticRule("event"), + } + TrackEventTableRules = map[string]FunctionalRules{ + "id": func(event *ptrans.TransformerEvent) (any, error) { + eventType := event.Metadata.EventType + canUseRecordID := utils.CanUseRecordID(event.Metadata.SourceCategory) + if eventType == "track" && canUseRecordID { + return extractCloudRecordID(event.Message, &event.Metadata, event.Metadata.MessageID) + } + return event.Metadata.MessageID, nil + }, + } + TrackTableRules = map[string]FunctionalRules{ + "record_id": func(event *ptrans.TransformerEvent) (any, error) { + eventType := event.Metadata.EventType + canUseRecordID := utils.CanUseRecordID(event.Metadata.SourceCategory) + if eventType == "track" && canUseRecordID { + cr, err := extractCloudRecordID(event.Message, &event.Metadata, nil) + if err != nil { + return nil, fmt.Errorf("extracting cloud record id: %w", err) + } + return utils.ToString(cr), nil + } + return nil, nil // nolint: nilnil + }, + } + + IdentifyRules = map[string]FunctionalRules{ + "context_ip": func(event *ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"context.ip", "request_ip"}), nil + }, + "context_request_ip": staticRule("request_ip"), + "context_passed_ip": staticRule("context.ip"), + } + IdentifyRulesNonDataLake = map[string]FunctionalRules{ + "context_ip": func(event *ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"context.ip", "request_ip"}), nil + }, + "context_request_ip": staticRule("request_ip"), + "context_passed_ip": staticRule("context.ip"), + "sent_at": staticRule("sentAt"), + "timestamp": staticRule("timestamp"), + "original_timestamp": staticRule("originalTimestamp"), + } + + PageRules = map[string]FunctionalRules{ + "name": func(event *ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"name", "properties.name"}), nil + }, + } + + ScreenRules = map[string]FunctionalRules{ + "name": func(event *ptrans.TransformerEvent) (any, error) { + return firstValidValue(event.Message, []string{"name", "properties.name"}), nil + }, + } + + AliasRules = map[string]FunctionalRules{ + "previous_id": staticRule("previousId"), + } + + GroupRules = map[string]FunctionalRules{ + "group_id": staticRule("groupId"), + } + + ExtractRules = map[string]FunctionalRules{ + "id": func(event *ptrans.TransformerEvent) (any, error) { + return extractRecordID(&event.Metadata) + }, + "received_at": staticRule("receivedAt"), + "event": staticRule("event"), + } +) + +func staticRule(value string) FunctionalRules { + return func(*ptrans.TransformerEvent) (any, error) { + return value, nil + } +} + +var rudderReservedColumns = map[string]map[string]struct{}{ + "track": createReservedColumns(DefaultRules, TrackRules, TrackTableRules, TrackEventTableRules), + "identify": createReservedColumns(DefaultRules, IdentifyRules), + "page": createReservedColumns(DefaultRules, PageRules), + "screen": createReservedColumns(DefaultRules, ScreenRules), + "group": createReservedColumns(DefaultRules, GroupRules), + "alias": createReservedColumns(DefaultRules, AliasRules), + "extract": createReservedColumns(ExtractRules), +} + +func createReservedColumns(rules ...map[string]FunctionalRules) map[string]struct{} { + return lo.MapEntries(lo.Assign(rules...), func(key string, _ FunctionalRules) (string, struct{}) { + return key, struct{}{} + }) +} + +func firstValidValue(message map[string]any, props []string) any { + for _, prop := range props { + propKeys := strings.Split(prop, ".") + if val := misc.MapLookup(message, propKeys...); val != nil && !utils.IsBlank(val) { + return val + } + } + return nil +} + +func extractRecordID(metadata *ptrans.Metadata) (any, error) { + if metadata.RecordID == nil || utils.IsBlank(metadata.RecordID) { + return nil, response.ErrRecordIDEmpty + } + if utils.IsObject(metadata.RecordID) { + return nil, response.ErrRecordIDObject + } + return metadata.RecordID, nil +} + +func extractCloudRecordID(message types.SingularEventT, metadata *ptrans.Metadata, fallbackValue any) (any, error) { + if sv := misc.MapLookup(message, "context", "sources", "version"); sv != nil && !utils.IsBlank(sv) { + return extractRecordID(metadata) + } + return fallbackValue, nil +} + +func IsRudderReservedColumn(eventType, columnName string) bool { + lowerEventType := strings.ToLower(eventType) + if _, ok := rudderReservedColumns[lowerEventType]; !ok { + return false + } + lowerColumnName := strings.ToLower(columnName) + if _, ok := rudderReservedColumns[lowerEventType][lowerColumnName]; ok { + return true + } + return false +} diff --git a/warehouse/transformer/internal/rules/rules_test.go b/warehouse/transformer/internal/rules/rules_test.go new file mode 100644 index 0000000000..ae436cba71 --- /dev/null +++ b/warehouse/transformer/internal/rules/rules_test.go @@ -0,0 +1,85 @@ +package rules + +import ( + "testing" + + "github.com/stretchr/testify/require" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/types" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/response" +) + +func TestIsRudderReservedColumn(t *testing.T) { + testCases := []struct { + name string + eventType string + columnName string + expected bool + }{ + {name: "track", eventType: "track", columnName: "id", expected: true}, + {name: "page", eventType: "page", columnName: "id", expected: true}, + {name: "screen", eventType: "screen", columnName: "id", expected: true}, + {name: "identify", eventType: "identify", columnName: "id", expected: true}, + {name: "group", eventType: "group", columnName: "id", expected: true}, + {name: "alias", eventType: "alias", columnName: "id", expected: true}, + {name: "extract", eventType: "extract", columnName: "id", expected: true}, + {name: "not reserved event type", eventType: "not reserved", columnName: "id", expected: false}, + {name: "not reserved column name", eventType: "track", columnName: "not reserved", expected: false}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, IsRudderReservedColumn(tc.eventType, tc.columnName)) + }) + } +} + +func TestExtractRecordID(t *testing.T) { + testCases := []struct { + name string + metadata ptrans.Metadata + expectedRecordID any + expectedError error + }{ + {name: "recordId is nil", metadata: ptrans.Metadata{RecordID: nil}, expectedRecordID: nil, expectedError: response.ErrRecordIDEmpty}, + {name: "recordId is empty", metadata: ptrans.Metadata{RecordID: ""}, expectedRecordID: nil, expectedError: response.ErrRecordIDEmpty}, + {name: "recordId is not empty", metadata: ptrans.Metadata{RecordID: "123"}, expectedRecordID: "123", expectedError: nil}, + {name: "recordId is an object", metadata: ptrans.Metadata{RecordID: map[string]any{"key": "value"}}, expectedRecordID: nil, expectedError: response.ErrRecordIDObject}, + {name: "recordId is a string", metadata: ptrans.Metadata{RecordID: "123"}, expectedRecordID: "123", expectedError: nil}, + {name: "recordId is a number", metadata: ptrans.Metadata{RecordID: 123}, expectedRecordID: 123, expectedError: nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + recordID, err := extractRecordID(&tc.metadata) + require.Equal(t, tc.expectedError, err) + require.Equal(t, tc.expectedRecordID, recordID) + }) + } +} + +func TestExtractCloudRecordID(t *testing.T) { + testCases := []struct { + name string + message types.SingularEventT + metadata ptrans.Metadata + fallbackValue any + expectedRecordID any + expectedError error + }{ + {name: "sources version is nil", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": nil}}}, metadata: ptrans.Metadata{}, fallbackValue: "fallback", expectedRecordID: "fallback", expectedError: nil}, + {name: "sources version is empty", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": ""}}}, metadata: ptrans.Metadata{}, fallbackValue: "fallback", expectedRecordID: "fallback", expectedError: nil}, + {name: "sources version is not empty", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: "123"}, fallbackValue: "fallback", expectedRecordID: "123", expectedError: nil}, + {name: "recordId is nil", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{}, fallbackValue: "fallback", expectedRecordID: nil, expectedError: response.ErrRecordIDEmpty}, + {name: "recordId is empty", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: ""}, fallbackValue: "fallback", expectedRecordID: nil, expectedError: response.ErrRecordIDEmpty}, + {name: "recordId is an object", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: map[string]any{"key": "value"}}, fallbackValue: "fallback", expectedRecordID: nil, expectedError: response.ErrRecordIDObject}, + {name: "recordId is a string", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: "123"}, fallbackValue: "fallback", expectedRecordID: "123", expectedError: nil}, + {name: "recordId is a number", message: types.SingularEventT{"context": map[string]any{"sources": map[string]any{"version": "1.0"}}}, metadata: ptrans.Metadata{RecordID: 123}, fallbackValue: "fallback", expectedRecordID: 123, expectedError: nil}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + recordID, err := extractCloudRecordID(tc.message, &tc.metadata, tc.fallbackValue) + require.Equal(t, tc.expectedError, err) + require.Equal(t, tc.expectedRecordID, recordID) + }) + } +} diff --git a/warehouse/transformer/internal/snakecase/snakecase.go b/warehouse/transformer/internal/snakecase/snakecase.go new file mode 100644 index 0000000000..db8f355e32 --- /dev/null +++ b/warehouse/transformer/internal/snakecase/snakecase.go @@ -0,0 +1,161 @@ +package snakecase + +import ( + "strings" + + "github.com/dlclark/regexp2" + "github.com/samber/lo" +) + +const ( + // Used to compose unicode character classes. + rsAstralRange = "\\ud800-\\udfff" + rsComboMarksRange = "\\u0300-\\u036f" + reComboHalfMarksRange = "\\ufe20-\\ufe2f" + rsComboSymbolsRange = "\\u20d0-\\u20ff" + rsComboMarksExtendedRange = "\\u1ab0-\\u1aff" + rsComboMarksSupplementRange = "\\u1dc0-\\u1dff" + rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange + rsComboMarksExtendedRange + rsComboMarksSupplementRange + rsDingbatRange = "\\u2700-\\u27bf" + rsLowerRange = "a-z\\xdf-\\xf6\\xf8-\\xff" + rsMathOpRange = "\\xac\\xb1\\xd7\\xf7" + rsNonCharRange = "\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf" + rsPunctuationRange = "\\u2000-\\u206f" + rsSpaceRange = " \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000" + rsUpperRange = "A-Z\\xc0-\\xd6\\xd8-\\xde" + rsVarRange = "\\ufe0e\\ufe0f" + rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange + + // Used to compose unicode capture groups + rsApos = "['\u2019]" + rsBreak = "[" + rsBreakRange + "]" + rsCombo = "[" + rsComboRange + "]" + rsDigit = "\\d" + rsDingbat = "[" + rsDingbatRange + "]" + rsLower = "[" + rsLowerRange + "]" + rsMisc = "[^" + rsAstralRange + rsBreakRange + rsDigit + rsDingbatRange + rsLowerRange + rsUpperRange + "]" + rsFitz = "\\ud83c[\\udffb-\\udfff]" + rsModifier = "(?:" + rsCombo + "|" + rsFitz + ")" + rsNonAstral = "[^" + rsAstralRange + "]" + rsRegional = "(?:\\ud83c[\\udde6-\\uddff]){2}" + rsSurrPair = "[\\ud800-\\udbff][\\udc00-\\udfff]" + rsUpper = "[" + rsUpperRange + "]" + rsZWJ = "\\u200d" + + // Used to compose unicode regexes + rsMiscLower = "(?:" + rsLower + "|" + rsMisc + ")" + rsMiscUpper = "(?:" + rsUpper + "|" + rsMisc + ")" + rsOptContrLower = "(?:" + rsApos + "(?:d|ll|m|re|s|t|ve))?" + rsOptContrUpper = "(?:" + rsApos + "(?:D|LL|M|RE|S|T|VE))?" + reOptMod = rsModifier + "?" + rsOptVar = "[" + rsVarRange + "]?" + rsOptJoin = "(?:" + rsZWJ + "(?:" + rsNonAstral + "|" + rsRegional + "|" + rsSurrPair + ")" + rsOptVar + reOptMod + ")*" + rsOrdLower = "\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])" + rsOrdUpper = "\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])" + rsSeq = rsOptVar + reOptMod + rsOptJoin + rsEmoji = "(?:" + rsDingbat + "|" + rsRegional + "|" + rsSurrPair + ")" + rsSeq +) + +var ( + reUnicodeWords = regexp2.MustCompile( + strings.Join( + []string{ + rsUpper + "?" + rsLower + "+" + rsOptContrLower + "(?=" + rsBreak + "|" + rsUpper + "|" + "$)", // Regular words, lowercase letters followed by optional contractions + rsMiscUpper + "+" + rsOptContrUpper + "(?=" + rsBreak + "|" + rsUpper + rsMiscLower + "|" + "$)", // Miscellaneous uppercase characters with optional contractions + rsUpper + "?" + rsMiscLower + "+" + rsOptContrLower, // Miscellaneous lowercase sequences with optional contractions + rsUpper + "+" + rsOptContrUpper, // All uppercase words with optional contractions (e.g., "THIS") + rsOrdUpper, // Ordinals for uppercase (e.g., "1ST", "2ND") + rsOrdLower, // Ordinals for lowercase (e.g., "1st", "2nd") + rsDigit + "+", // Pure digits (e.g., "123") + rsEmoji, // Emojis (e.g., 😀, ❤️) + }, + "|", + ), + regexp2.None, + ) + reUnicodeWordsWithNumbers = regexp2.MustCompile( + strings.Join( + []string{ + rsUpper + "?" + rsLower + "+" + rsDigit + "+", // Lowercase letters followed by digits (e.g., "abc123") + rsUpper + "+" + rsDigit + "+", // Uppercase letters followed by digits (e.g., "ABC123") + rsDigit + "+" + rsUpper + "?" + rsLower + "+", // Digits followed by lowercase letters (e.g., "123abc") + rsDigit + "+" + rsUpper + "+", // Digits followed by uppercase letters (e.g., "123ABC") + rsUpper + "?" + rsLower + "+" + rsOptContrLower + "(?=" + rsBreak + "|" + rsUpper + "|" + "$)", // Regular words, lowercase letters followed by optional contractions + rsMiscUpper + "+" + rsOptContrUpper + "(?=" + rsBreak + "|" + rsUpper + rsMiscLower + "|" + "$)", // Miscellaneous uppercase characters with optional contractions + rsUpper + "?" + rsMiscLower + "+" + rsOptContrLower, // Miscellaneous lowercase sequences with optional contractions + rsUpper + "+" + rsOptContrUpper, // All uppercase words with optional contractions (e.g., "THIS") + rsOrdUpper, // Ordinals for uppercase (e.g., "1ST", "2ND") + rsOrdLower, // Ordinals for lowercase (e.g., "1st", "2nd") + rsDigit + "+", // Pure digits (e.g., "123") + rsEmoji, // Emojis (e.g., 😀, ❤️) + }, + "|", + ), + regexp2.None, + ) + reAsciiWord = regexp2.MustCompile(`[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+`, regexp2.None) + reHasUnicodeWord = regexp2.MustCompile( + `[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]`, regexp2.None, + ) +) + +// ToSnakeCase converts a string to snake_case using regular word separation. +func ToSnakeCase(s string) string { + return snakeCase(s, extractWords) +} + +// ToSnakeCaseWithNumbers converts a string to snake_case, preserving numbers. +func ToSnakeCaseWithNumbers(s string) string { + return snakeCase(s, extractWordsWithNumbers) +} + +func extractWords(s string) []string { + if hasUnicodeWord(s) { + return unicodeWords(s) + } + return asciiWords(s) +} + +func hasUnicodeWord(s string) bool { + isMatch, _ := reHasUnicodeWord.MatchString(s) + return isMatch +} + +func extractWordsWithNumbers(s string) []string { + if hasUnicodeWord(s) { + return unicodeWordsWithNumbers(s) + } + return asciiWords(s) +} + +func unicodeWords(s string) []string { + return regexp2FindAllString(reUnicodeWords, s) +} + +func unicodeWordsWithNumbers(s string) []string { + return regexp2FindAllString(reUnicodeWordsWithNumbers, s) +} + +func asciiWords(s string) []string { + return regexp2FindAllString(reAsciiWord, s) +} + +func regexp2FindAllString(re *regexp2.Regexp, s string) []string { + var matches []string + m, _ := re.FindStringMatch(s) + for m != nil { + matches = append(matches, m.String()) + m, _ = re.FindNextMatch(m) + } + return matches +} + +// snakeCase converts a string to snake_case based on a word extraction function. +func snakeCase(s string, wordExtractor func(s string) []string) string { + s = strings.NewReplacer("'", "", "\u2019", "").Replace(s) + words := wordExtractor(s) + words = lo.Map(words, func(word string, _ int) string { + return strings.ToLower(word) + }) + return strings.Join(words, "_") +} diff --git a/warehouse/transformer/internal/snakecase/snakecase_test.go b/warehouse/transformer/internal/snakecase/snakecase_test.go new file mode 100644 index 0000000000..6a8a18c391 --- /dev/null +++ b/warehouse/transformer/internal/snakecase/snakecase_test.go @@ -0,0 +1,232 @@ +package snakecase + +import ( + "strings" + "testing" + + "github.com/samber/lo" + "github.com/stretchr/testify/require" +) + +var burredLetters = []rune{ + // Latin-1 Supplement letters. + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd8', + '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', '\xe0', + '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', '\xe8', + '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0', + '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf8', '\xf9', + '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', + + // Latin Extended-A letters. + '\u0100', '\u0101', '\u0102', '\u0103', '\u0104', '\u0105', '\u0106', '\u0107', + '\u0108', '\u0109', '\u010a', '\u010b', '\u010c', '\u010d', '\u010e', '\u010f', + '\u0110', '\u0111', '\u0112', '\u0113', '\u0114', '\u0115', '\u0116', '\u0117', + '\u0118', '\u0119', '\u011a', '\u011b', '\u011c', '\u011d', '\u011e', '\u011f', + '\u0120', '\u0121', '\u0122', '\u0123', '\u0124', '\u0125', '\u0126', '\u0127', + '\u0128', '\u0129', '\u012a', '\u012b', '\u012c', '\u012d', '\u012e', '\u012f', + '\u0130', '\u0131', '\u0132', '\u0133', '\u0134', '\u0135', '\u0136', '\u0137', + '\u0138', '\u0139', '\u013a', '\u013b', '\u013c', '\u013d', '\u013e', '\u013f', + '\u0140', '\u0141', '\u0142', '\u0143', '\u0144', '\u0145', '\u0146', '\u0147', + '\u0148', '\u0149', '\u014a', '\u014b', '\u014c', '\u014d', '\u014e', '\u014f', + '\u0150', '\u0151', '\u0152', '\u0153', '\u0154', '\u0155', '\u0156', '\u0157', + '\u0158', '\u0159', '\u015a', '\u015b', '\u015c', '\u015d', '\u015e', '\u015f', + '\u0160', '\u0161', '\u0162', '\u0163', '\u0164', '\u0165', '\u0166', '\u0167', + '\u0168', '\u0169', '\u016a', '\u016b', '\u016c', '\u016d', '\u016e', '\u016f', + '\u0170', '\u0171', '\u0172', '\u0173', '\u0174', '\u0175', '\u0176', '\u0177', + '\u0178', '\u0179', '\u017a', '\u017b', '\u017c', '\u017d', '\u017e', '\u017f', +} + +func TestToSnakeCase(t *testing.T) { + t.Run("extractWords", func(t *testing.T) { + t.Run("should match words containing Latin Unicode letters", func(t *testing.T) { + for _, letter := range burredLetters { + require.Equal(t, []string{string(letter)}, extractWords(string(letter))) + } + }) + t.Run("should work with compound words", func(t *testing.T) { + require.Equal(t, []string{"12", "ft"}, extractWords("12ft")) + require.Equal(t, []string{"aeiou", "Are", "Vowels"}, extractWords("aeiouAreVowels")) + require.Equal(t, []string{"enable", "6", "h", "format"}, extractWords("enable 6h format")) + require.Equal(t, []string{"enable", "24", "H", "format"}, extractWords("enable 24H format")) + require.Equal(t, []string{"is", "ISO", "8601"}, extractWords("isISO8601")) + require.Equal(t, []string{"LETTERS", "Aeiou", "Are", "Vowels"}, extractWords("LETTERSAeiouAreVowels")) + require.Equal(t, []string{"too", "Legit", "2", "Quit"}, extractWords("tooLegit2Quit")) + require.Equal(t, []string{"walk", "500", "Miles"}, extractWords("walk500Miles")) + require.Equal(t, []string{"xhr", "2", "Request"}, extractWords("xhr2Request")) + require.Equal(t, []string{"XML", "Http"}, extractWords("XMLHttp")) + require.Equal(t, []string{"Xml", "HTTP"}, extractWords("XmlHTTP")) + require.Equal(t, []string{"Xml", "Http"}, extractWords("XmlHttp")) + }) + t.Run("should work with compound words containing diacritical marks", func(t *testing.T) { + require.Equal(t, []string{"LETTERS", "Æiou", "Are", "Vowels"}, extractWords("LETTERSÆiouAreVowels")) + require.Equal(t, []string{"æiou", "Are", "Vowels"}, extractWords("æiouAreVowels")) + require.Equal(t, []string{"æiou", "2", "Consonants"}, extractWords("æiou2Consonants")) + }) + t.Run("should not treat contractions as separate words", func(t *testing.T) { + for _, apos := range []string{"'", string('\u2019')} { + t.Run("ToLower", func(t *testing.T) { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + actual := extractWords(strings.ToLower(input)) + expected := lo.Map([]string{"a", "b" + apos + postfix, "c"}, func(item string, index int) string { + return strings.ToLower(item) + }) + require.Equal(t, expected, actual) + } + }) + t.Run("ToUpper", func(t *testing.T) { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + actual := extractWords(strings.ToUpper(input)) + expected := lo.Map([]string{"a", "b" + apos + postfix, "c"}, func(item string, index int) string { + return strings.ToUpper(item) + }) + require.Equal(t, expected, actual) + } + }) + } + }) + t.Run("should not treat ordinal numbers as separate words", func(t *testing.T) { + ordinals := []string{"1st", "2nd", "3rd", "4th"} + for _, ordinal := range ordinals { + expected := []string{strings.ToLower(ordinal)} + actual := extractWords(strings.ToLower(ordinal)) + require.Equal(t, expected, actual) + + expected = []string{strings.ToUpper(ordinal)} + actual = extractWords(strings.ToUpper(ordinal)) + require.Equal(t, expected, actual) + } + }) + }) + t.Run("extractWords", func(t *testing.T) { + t.Run("should match words containing Latin Unicode letters", func(t *testing.T) { + for _, letter := range burredLetters { + require.Equal(t, []string{string(letter)}, extractWordsWithNumbers(string(letter))) + } + }) + t.Run("should work with compound words", func(t *testing.T) { + require.Equal(t, []string{"12ft"}, extractWordsWithNumbers("12ft")) + require.Equal(t, []string{"aeiou", "Are", "Vowels"}, extractWordsWithNumbers("aeiouAreVowels")) + require.Equal(t, []string{"enable", "6h", "format"}, extractWordsWithNumbers("enable 6h format")) + require.Equal(t, []string{"enable", "24H", "format"}, extractWordsWithNumbers("enable 24H format")) + require.Equal(t, []string{"is", "ISO8601"}, extractWordsWithNumbers("isISO8601")) + require.Equal(t, []string{"LETTERS", "Aeiou", "Are", "Vowels"}, extractWordsWithNumbers("LETTERSAeiouAreVowels")) + require.Equal(t, []string{"too", "Legit2", "Quit"}, extractWordsWithNumbers("tooLegit2Quit")) + require.Equal(t, []string{"walk500", "Miles"}, extractWordsWithNumbers("walk500Miles")) + require.Equal(t, []string{"xhr2", "Request"}, extractWordsWithNumbers("xhr2Request")) + require.Equal(t, []string{"XML", "Http"}, extractWordsWithNumbers("XMLHttp")) + require.Equal(t, []string{"Xml", "HTTP"}, extractWordsWithNumbers("XmlHTTP")) + require.Equal(t, []string{"Xml", "Http"}, extractWordsWithNumbers("XmlHttp")) + }) + t.Run("should work with compound words containing diacritical marks", func(t *testing.T) { + require.Equal(t, []string{"LETTERS", "Æiou", "Are", "Vowels"}, extractWordsWithNumbers("LETTERSÆiouAreVowels")) + require.Equal(t, []string{"æiou", "Are", "Vowels"}, extractWordsWithNumbers("æiouAreVowels")) + require.Equal(t, []string{"æiou2", "Consonants"}, extractWordsWithNumbers("æiou2Consonants")) + }) + t.Run("should not treat contractions as separate words", func(t *testing.T) { + for _, apos := range []string{"'", string('\u2019')} { + t.Run("ToLower", func(t *testing.T) { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + actual := extractWordsWithNumbers(strings.ToLower(input)) + expected := lo.Map([]string{"a", "b" + apos + postfix, "c"}, func(item string, index int) string { + return strings.ToLower(item) + }) + require.Equal(t, expected, actual) + } + }) + t.Run("ToUpper", func(t *testing.T) { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + actual := extractWordsWithNumbers(strings.ToUpper(input)) + expected := lo.Map([]string{"a", "b" + apos + postfix, "c"}, func(item string, index int) string { + return strings.ToUpper(item) + }) + require.Equal(t, expected, actual) + } + }) + } + }) + t.Run("should not treat ordinal numbers as separate words", func(t *testing.T) { + ordinals := []string{"1st", "2nd", "3rd", "4th"} + for _, ordinal := range ordinals { + expected := []string{strings.ToLower(ordinal)} + actual := extractWordsWithNumbers(strings.ToLower(ordinal)) + require.Equal(t, expected, actual) + + expected = []string{strings.ToUpper(ordinal)} + actual = extractWordsWithNumbers(strings.ToUpper(ordinal)) + require.Equal(t, expected, actual) + } + }) + }) + t.Run("ToSnakeCase", func(t *testing.T) { + t.Run("should remove Latin mathematical operators", func(t *testing.T) { + require.Equal(t, ToSnakeCase(string('\xd7')), "") + }) + t.Run("should coerce `string` to a string", func(t *testing.T) { + require.Equal(t, ToSnakeCase("foo bar"), "foo_bar") + }) + t.Run("should return an empty string for empty values", func(t *testing.T) { + require.Equal(t, ToSnakeCase(""), "") + }) + t.Run("should remove contraction apostrophes", func(t *testing.T) { + for _, apos := range []string{"'", string('\u2019')} { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + require.Equal(t, "a_b"+postfix+"_c", ToSnakeCase(input)) + } + } + }) + t.Run("should convert `string` to caseName case", func(t *testing.T) { + testCases := []string{"foo bar", "Foo bar", "foo Bar", "Foo Bar", "FOO BAR", "fooBar", "--foo-bar--", "__foo_bar__"} + expected := []string{"foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar"} + for i, input := range testCases { + require.Equal(t, expected[i], ToSnakeCase(input)) + } + }) + t.Run("should handle double-converting strings", func(t *testing.T) { + testCases := []string{"foo bar", "Foo bar", "foo Bar", "Foo Bar", "FOO BAR", "fooBar", "--foo-bar--", "__foo_bar__"} + expected := []string{"foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar"} + for i, input := range testCases { + require.Equal(t, expected[i], ToSnakeCase(ToSnakeCase(input))) + } + }) + }) + t.Run("ToSnakeCaseWithNumbers", func(t *testing.T) { + t.Run("should remove Latin mathematical operators", func(t *testing.T) { + require.Equal(t, ToSnakeCaseWithNumbers(string('\xd7')), "") + }) + t.Run("should coerce `string` to a string", func(t *testing.T) { + require.Equal(t, ToSnakeCaseWithNumbers("foo bar"), "foo_bar") + }) + t.Run("should return an empty string for empty values", func(t *testing.T) { + require.Equal(t, ToSnakeCaseWithNumbers(""), "") + }) + t.Run("should remove contraction apostrophes", func(t *testing.T) { + for _, apos := range []string{"'", string('\u2019')} { + for _, postfix := range []string{"d", "ll", "m", "re", "s", "t", "ve"} { + input := "a b" + apos + postfix + " c" + require.Equal(t, "a_b"+postfix+"_c", ToSnakeCaseWithNumbers(input)) + } + } + }) + t.Run("should convert `string` to caseName case", func(t *testing.T) { + testCases := []string{"foo bar", "Foo bar", "foo Bar", "Foo Bar", "FOO BAR", "fooBar", "--foo-bar--", "__foo_bar__"} + expected := []string{"foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar"} + for i, input := range testCases { + require.Equal(t, expected[i], ToSnakeCaseWithNumbers(input)) + } + }) + t.Run("should handle double-converting strings", func(t *testing.T) { + testCases := []string{"foo bar", "Foo bar", "foo Bar", "Foo Bar", "FOO BAR", "fooBar", "--foo-bar--", "__foo_bar__"} + expected := []string{"foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar", "foo_bar"} + for i, input := range testCases { + require.Equal(t, expected[i], ToSnakeCaseWithNumbers(ToSnakeCaseWithNumbers(input))) + } + }) + }) +} diff --git a/warehouse/transformer/internal/utils/stringlikeobject.go b/warehouse/transformer/internal/utils/stringlikeobject.go new file mode 100644 index 0000000000..49b505facb --- /dev/null +++ b/warehouse/transformer/internal/utils/stringlikeobject.go @@ -0,0 +1,62 @@ +package utils + +import ( + "sort" + "strconv" + "strings" + + "github.com/samber/lo" +) + +func IsStringLikeObject(obj map[string]any) bool { + if len(obj) == 0 { + return false + } + + minKey, maxKey := int(^uint(0)>>1), -1 // Initialize minKey as max int, maxKey as -1 + + for key, value := range obj { + if !isNonNegativeInteger(key) { + return false + } + + strValue, ok := value.(string) + if !ok || len(strValue) != 1 { + return false + } + + numKey, err := strconv.Atoi(key) + if err != nil { + return false + } + + minKey = min(minKey, numKey) + maxKey = max(maxKey, numKey) + } + + for i := minKey; i <= maxKey; i++ { + if _, exists := obj[strconv.Itoa(i)]; !exists { + return false + } + } + return (minKey == 0 || minKey == 1) && maxKey-minKey+1 == len(obj) +} + +func isNonNegativeInteger(str string) bool { + return lo.EveryBy([]rune(str), func(c rune) bool { + return c >= '0' && c <= '9' + }) +} + +func StringLikeObjectToString(obj map[string]any) string { + keys := lo.Map(lo.Keys(obj), func(key string, _ int) int { + numKey, _ := strconv.Atoi(key) + return numKey + }) + sort.Ints(keys) + + values := lo.Map(keys, func(key, _ int) string { + return ToString(obj[strconv.Itoa(key)]) + }) + return strings.Join(values, "") +} diff --git a/warehouse/transformer/internal/utils/stringlikeobject_test.go b/warehouse/transformer/internal/utils/stringlikeobject_test.go new file mode 100644 index 0000000000..1fb30444eb --- /dev/null +++ b/warehouse/transformer/internal/utils/stringlikeobject_test.go @@ -0,0 +1,141 @@ +package utils + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsStringLikeObject(t *testing.T) { + testCases := []struct { + name string + input map[string]any + expected bool + }{ + { + name: "empty map", + input: map[string]any{}, + expected: false, + }, + { + name: "valid string-like object with 0 and 1", + input: map[string]any{ + "0": "a", + "1": "b", + }, + expected: true, + }, + { + name: "valid string-like object with 1 and 2", + input: map[string]any{ + "1": "x", + "2": "y", + }, + expected: true, + }, + { + name: "empty key", + input: map[string]any{ + "": "", + "1": "x", + "2": "y", + }, + expected: false, + }, + { + name: "invalid key type", + input: map[string]any{ + "0": "a", + "one": "b", + }, + expected: false, + }, + { + name: "value is not a string", + input: map[string]any{ + "0": 123, + }, + expected: false, + }, + { + name: "value string length not 1", + input: map[string]any{ + "0": "ab", + }, + expected: false, + }, + { + name: "missing key (1) in sequence", + input: map[string]any{ + "0": "a", + "2": "b", + }, + expected: false, + }, + { + name: "non-consecutive keys (1 ia missing)", + input: map[string]any{ + "0": "a", + "2": "b", + "3": "c", + }, + expected: false, + }, + { + name: "valid string-like object with non-negative integer keys", + input: map[string]any{ + "0": "a", + "1": "b", + "2": "c", + }, + expected: true, + }, + { + name: "valid string-like object with gaps (at 3)", + input: map[string]any{ + "1": "x", + "2": "y", + "4": "z", + }, + expected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, IsStringLikeObject(tc.input)) + }) + } +} + +func TestStringLikeObjectToString(t *testing.T) { + testCases := []struct { + name string + input map[string]any + expected any + }{ + { + name: "valid string-like object with non-negative integer keys", + input: map[string]any{ + "0": "a", + "1": "b", + "2": "c", + }, + expected: "abc", + }, + { + name: "valid string-like object with 1 and 2", + input: map[string]any{ + "1": "x", + "2": "y", + }, + expected: "xy", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, StringLikeObjectToString(tc.input)) + }) + } +} diff --git a/warehouse/transformer/internal/utils/utils.go b/warehouse/transformer/internal/utils/utils.go new file mode 100644 index 0000000000..ce4b85523e --- /dev/null +++ b/warehouse/transformer/internal/utils/utils.go @@ -0,0 +1,152 @@ +package utils + +import ( + "fmt" + "regexp" + "strings" + "time" + + "github.com/araddon/dateparse" + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/internal/model" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +var ( + rudderCreatedTables = sliceToMap([]string{"tracks", "pages", "screens", "aliases", "groups", "accounts"}) + rudderIsolatedTables = sliceToMap([]string{"users", "identifies"}) + sourceCategoriesToUseRecordID = sliceToMap([]string{"cloud", "singer-protocol"}) + identityEnabledWarehouses = sliceToMap([]string{whutils.SNOWFLAKE, whutils.BQ}) + destinationSupportJSONPathAsPartOfConfig = sliceToMap([]string{whutils.POSTGRES, whutils.RS, whutils.SNOWFLAKE, whutils.SnowpipeStreaming, whutils.BQ}) + + supportedJSONPathPrefixes = []string{"track.", "identify.", "page.", "screen.", "alias.", "group.", "extract."} + fullEventColumnTypeByDestType = map[string]string{ + whutils.SNOWFLAKE: model.JSONDataType, + whutils.SnowpipeStreaming: model.JSONDataType, + whutils.RS: model.TextDataType, + whutils.BQ: model.StringDataType, + whutils.POSTGRES: model.JSONDataType, + whutils.MSSQL: model.JSONDataType, + whutils.AzureSynapse: model.JSONDataType, + whutils.CLICKHOUSE: model.StringDataType, + whutils.S3Datalake: model.StringDataType, + whutils.DELTALAKE: model.StringDataType, + whutils.GCSDatalake: model.StringDataType, + whutils.AzureDatalake: model.StringDataType, + } + + reDateTime = regexp.MustCompile( + `^([+-]?\d{4})((-)((0[1-9]|1[0-2])(-([12]\d|0[1-9]|3[01])))([T\s]((([01]\d|2[0-3])((:)[0-5]\d))(:\d+)?)?(:[0-5]\d([.]\d+)?)?([zZ]|([+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)$`, + ) + + minTimeInMs = time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC) + maxTimeInMs = time.Date(9999, 12, 31, 23, 59, 59, 999000000, time.UTC) +) + +func sliceToMap(slice []string) map[string]struct{} { + return lo.SliceToMap(slice, func(item string) (string, struct{}) { + return item, struct{}{} + }) +} + +func IsDataLake(destType string) bool { + switch destType { + case whutils.S3Datalake, whutils.GCSDatalake, whutils.AzureDatalake: + return true + default: + return false + } +} + +func IsRudderSources(event map[string]any) bool { + return event["channel"] == "sources" || event["CHANNEL"] == "sources" +} + +func IsRudderCreatedTable(tableName string) bool { + _, ok := rudderCreatedTables[strings.ToLower(tableName)] + return ok +} + +func IsRudderIsolatedTable(tableName string) bool { + _, ok := rudderIsolatedTables[strings.ToLower(tableName)] + return ok +} + +func IsObject(val any) bool { + _, ok := val.(map[string]any) + return ok +} + +func IsIdentityEnabled(destType string) bool { + _, ok := identityEnabledWarehouses[destType] + return ok +} + +func CanUseRecordID(sourceCategory string) bool { + _, ok := sourceCategoriesToUseRecordID[strings.ToLower(sourceCategory)] + return ok +} + +func HasJSONPathPrefix(jsonPath string) bool { + lowerJSONPath := strings.ToLower(jsonPath) + for _, prefix := range supportedJSONPathPrefixes { + if strings.HasPrefix(lowerJSONPath, prefix) { + return true + } + } + return false +} + +func GetFullEventColumnTypeByDestType(destType string) string { + return fullEventColumnTypeByDestType[destType] +} + +func ValidTimestamp(input string) bool { + if !reDateTime.MatchString(input) { + return false + } + t, err := dateparse.ParseAny(input) + if err != nil { + return false + } + return !t.Before(minTimeInMs) && !t.After(maxTimeInMs) +} + +func ToTimestamp(val any) any { + if strVal, ok := val.(string); ok { + t, err := dateparse.ParseAny(strVal) + if err != nil { + return val + } + return t.UTC().Format(misc.RFC3339Milli) + } + return val +} + +// ToString converts any value to a string representation. +// - If the value is nil, it returns an empty string. +// - If the value implements the fmt.Stringer interface, it returns the result of the String() method. +// - Otherwise, it returns a string representation using fmt.Sprintf. +func ToString(value interface{}) string { + if value == nil { + return "" + } + if str, ok := value.(fmt.Stringer); ok { + return str.String() + } + return fmt.Sprintf("%v", value) +} + +// IsBlank checks if the given value is considered "blank." +// - A value is considered blank if its string representation is an empty string. +// - The function first converts the value to its string representation using ToString and checks if its length is zero. +func IsBlank(value interface{}) bool { + return len(ToString(value)) == 0 +} + +func IsJSONPathSupportedAsPartOfConfig(destType string) bool { + _, ok := destinationSupportJSONPathAsPartOfConfig[destType] + return ok +} diff --git a/warehouse/transformer/internal/utils/utils_test.go b/warehouse/transformer/internal/utils/utils_test.go new file mode 100644 index 0000000000..b41339ce71 --- /dev/null +++ b/warehouse/transformer/internal/utils/utils_test.go @@ -0,0 +1,149 @@ +package utils + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestIsRudderSources(t *testing.T) { + testCases := []struct { + name string + event map[string]any + want bool + }{ + {name: "channel is sources", event: map[string]any{"channel": "sources"}, want: true}, + {name: "CHANNEL is sources", event: map[string]any{"CHANNEL": "sources"}, want: true}, + {name: "channel is not sources", event: map[string]any{"channel": "not-sources"}, want: false}, + {name: "CHANNEL is not sources", event: map[string]any{"CHANNEL": "not-sources"}, want: false}, + {name: "empty event", event: map[string]any{}, want: false}, + {name: "nil event", event: nil, want: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, IsRudderSources(tc.event)) + }) + } +} + +func TestIsObject(t *testing.T) { + testCases := []struct { + name string + val any + want bool + }{ + {name: "map", val: map[string]any{}, want: true}, + {name: "not map", val: "not map", want: false}, + {name: "nil", val: nil, want: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, IsObject(tc.val)) + }) + } +} + +func TestFullEventColumnTypeByDestTypeMapping(t *testing.T) { + for _, destType := range whutils.WarehouseDestinations { + require.NotNilf(t, fullEventColumnTypeByDestType[destType], "Full event column type not found for destination type %s", destType) + } +} + +func TestValidTimestamp(t *testing.T) { + testCases := []struct { + name, timestamp string + expected bool + }{ + {name: "Timestamp without timezone", timestamp: "2021-06-01T00:00:00.000Z", expected: true}, + {name: "Timestamp with timezone", timestamp: "2021-06-01T00:00:00.000+00:00", expected: true}, + {name: "Invalid timestamp", timestamp: "invalid-timestamp", expected: false}, + {name: "Invalid RFC3339 timestamp (day-month-year)", timestamp: "23-05-2024T10:00:00Z", expected: false}, + {name: "Invalid RFC3339 timestamp (Invalid hour)", timestamp: "2024-05-23T25:00:00Z", expected: false}, + {name: "Empty timestamp", timestamp: "", expected: false}, + {name: "Timestamps at bounds (minTimeInMs)", timestamp: "0001-01-01T00:00:00.000Z", expected: true}, + {name: "Timestamps at bounds (maxTimeInMs)", timestamp: "9999-12-31T23:59:59.999Z", expected: true}, + {name: "Time-only", timestamp: "05:23:59.244Z", expected: false}, + {name: "Date Time only", timestamp: "2021-06-01 00:00:00", expected: true}, + {name: "Date-only", timestamp: "2023-06-14", expected: true}, + {name: "Positive year and time input", timestamp: "+2023-06-14T05:23:59.244Z", expected: false}, + {name: "Negative year and time input", timestamp: "-2023-06-14T05:23:59.244Z", expected: false}, + {name: "Malicious string input should return false", timestamp: "%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216%u002e%u002e%u2216Windows%u2216win%u002ein", expected: false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, ValidTimestamp(tc.timestamp)) + }) + } +} + +type Person struct { + Name string + Age int +} + +func (p Person) String() string { + return fmt.Sprintf("Person(Name: %s, Age: %d)", p.Name, p.Age) +} + +func TestToString(t *testing.T) { + testCases := []struct { + input interface{} + expected string + }{ + {nil, ""}, // nil + {"", ""}, // empty string + {"Hello", "Hello"}, // non-empty string + {123, "123"}, // int + {123.45, "123.45"}, // float + {true, "true"}, // bool true + {false, "false"}, // bool false + {[]int{1, 2, 3}, "[1 2 3]"}, // slice + {map[string]int{"key": 1}, "map[key:1]"}, // map + {struct{}{}, "{}"}, // empty struct + {struct{ Field string }{"value"}, "{value}"}, // struct with field + {Person{Name: "Alice", Age: 30}, "Person(Name: Alice, Age: 30)"}, // struct with String method + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("ToString(%v)", tc.input), func(t *testing.T) { + require.Equal(t, tc.expected, ToString(tc.input)) + }) + } +} + +func TestIsBlank(t *testing.T) { + testCases := []struct { + name string + input interface{} + expected bool + }{ + {"NilValue", nil, true}, // nil + {"EmptyString", "", true}, // empty string + {"NonEmptyString", "Hello", false}, // non-empty string + {"IntZero", 0, false}, // integer zero + {"IntNonZero", 123, false}, // non-zero integer + {"FloatZero", 0.0, false}, // float zero + {"FloatNonZero", 123.45, false}, // non-zero float + {"BoolFalse", false, false}, // boolean false + {"BoolTrue", true, false}, // boolean true + {"EmptySlice", []int{}, false}, // empty slice + {"NonEmptySlice", []int{1, 2, 3}, false}, // non-empty slice + {"EmptyMap", map[string]int{}, false}, // empty map + {"NonEmptyMap", map[string]int{"key": 1}, false}, // non-empty map + {"EmptyStruct", struct{}{}, false}, // empty struct + {"StructWithField", struct{ Field string }{"value"}, false}, // non-empty struct + {"StructWithMethod", Person{Name: "Alice", Age: 30}, false}, // struct with String method + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, IsBlank(tc.input)) + }) + } +} diff --git a/warehouse/transformer/jsonpath.go b/warehouse/transformer/jsonpath.go new file mode 100644 index 0000000000..9b416aabbf --- /dev/null +++ b/warehouse/transformer/jsonpath.go @@ -0,0 +1,32 @@ +package transformer + +import ( + "strings" + + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" +) + +func extractJSONPathInfo(jsonPaths []string) jsonPathInfo { + jp := jsonPathInfo{ + keysMap: make(map[string]int), + legacyKeysMap: make(map[string]int), + } + for _, jsonPath := range jsonPaths { + if trimmedJSONPath := strings.TrimSpace(jsonPath); trimmedJSONPath != "" { + jp.processJSONPath(trimmedJSONPath) + } + } + return jp +} + +func (jp *jsonPathInfo) processJSONPath(jsonPath string) { + splitPaths := strings.Split(jsonPath, ".") + key := strings.Join(splitPaths, "_") + pos := len(splitPaths) - 1 + + if utils.HasJSONPathPrefix(jsonPath) { + jp.keysMap[key] = pos + return + } + jp.legacyKeysMap[key] = pos +} diff --git a/warehouse/transformer/jsonpath_test.go b/warehouse/transformer/jsonpath_test.go new file mode 100644 index 0000000000..69b3e39053 --- /dev/null +++ b/warehouse/transformer/jsonpath_test.go @@ -0,0 +1,148 @@ +package transformer + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestExtractJSONPathInfo(t *testing.T) { + testCases := []struct { + name string + jsonPaths []string + expected jsonPathInfo + }{ + { + name: "Valid JSON paths with track prefix", + jsonPaths: []string{"track.properties.name", "track.properties.age", "properties.name", "properties.age"}, + expected: jsonPathInfo{ + keysMap: map[string]int{"track_properties_name": 2, "track_properties_age": 2}, + legacyKeysMap: map[string]int{"properties_name": 1, "properties_age": 1}, + }, + }, + { + name: "Valid JSON paths with identify prefix", + jsonPaths: []string{"identify.traits.address.city", "identify.traits.address.zip", "traits.address.city", "traits.address.zip"}, + expected: jsonPathInfo{ + keysMap: map[string]int{"identify_traits_address_city": 3, "identify_traits_address_zip": 3}, + legacyKeysMap: map[string]int{"traits_address_city": 2, "traits_address_zip": 2}, + }, + }, + { + name: "Whitespace and empty path", + jsonPaths: []string{" ", "track.properties.name", ""}, + expected: jsonPathInfo{ + keysMap: map[string]int{"track_properties_name": 2}, + legacyKeysMap: make(map[string]int), + }, + }, + { + name: "Unknown prefix JSON paths", + jsonPaths: []string{"unknown.prefix.eventType.name", "unknown.prefix.eventType.value"}, + expected: jsonPathInfo{ + keysMap: make(map[string]int), + legacyKeysMap: map[string]int{"unknown_prefix_eventType_name": 3, "unknown_prefix_eventType_value": 3}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expected, extractJSONPathInfo(tc.jsonPaths)) + }) + } +} + +func TestIsValidJSONPathKey(t *testing.T) { + testCases := []struct { + name, key string + level int + isValid bool + }{ + { + name: "Valid JSON path key with track prefix", + key: "track_properties_name", + level: 2, + isValid: true, + }, + { + name: "Valid JSON path key with identify prefix", + key: "identify_traits_address_city", + level: 3, + isValid: true, + }, + { + name: "Valid JSON path key with unknown prefix", + key: "unknown_prefix_eventType_name", + level: 3, + isValid: false, + }, + { + name: "Invalid JSON path key", + key: "invalid_key", + level: 0, + isValid: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pathsInfo := extractJSONPathInfo( + []string{ + "track.properties.name", "properties.name", + "identify.traits.address.city", "traits.address.city", + "unknown.prefix.eventType.name", + }, + ) + require.Equal(t, tc.isValid, isValidJSONPathKey(tc.key, tc.level, pathsInfo.keysMap)) + }) + } +} + +func TestIsValidLegacyJSONPathKey(t *testing.T) { + testCases := []struct { + name, key, eventType string + level int + isValid bool + }{ + { + name: "Valid JSON path key with track prefix", + key: "properties_name", + eventType: "track", + level: 1, + isValid: true, + }, + { + name: "Valid JSON path key with identify prefix", + key: "traits_address_city", + eventType: "identify", + level: 2, + isValid: false, + }, + { + name: "Valid JSON path key with unknown prefix", + key: "unknown_prefix_eventType_name", + eventType: "track", + level: 3, + isValid: true, + }, + { + name: "Invalid JSON path key", + key: "invalid_key", + eventType: "track", + level: 0, + isValid: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pathsInfo := extractJSONPathInfo( + []string{ + "track.properties.name", "properties.name", + "identify.traits.address.city", "traits.address.city", + "unknown.prefix.eventType.name", + }, + ) + require.Equal(t, tc.isValid, isValidLegacyJSONPathKey(tc.eventType, tc.key, tc.level, pathsInfo.legacyKeysMap)) + }) + } +} diff --git a/warehouse/transformer/safe.go b/warehouse/transformer/safe.go new file mode 100644 index 0000000000..cd32919171 --- /dev/null +++ b/warehouse/transformer/safe.go @@ -0,0 +1,192 @@ +package transformer + +import ( + "fmt" + "regexp" + "strings" + + "github.com/iancoleman/strcase" + + "github.com/rudderlabs/rudder-go-kit/config" + + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/reservedkeywords" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/response" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/snakecase" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +var ( + reLeadingUnderscores = regexp.MustCompile(`^_*`) + reNonAlphanumericOrDollar = regexp.MustCompile(`[^a-zA-Z0-9\\$]`) + reStartsWithLetterOrUnderscore = regexp.MustCompile(`^[a-zA-Z_].*`) +) + +// SafeNamespace returns a safe namespace for the given destination type and input namespace. +// The namespace is transformed by removing special characters, converting to snake case, +// and ensuring its safe (not starting with a digit, not empty, and not a reserved keyword). +func SafeNamespace(conf *config.Config, destType, input string) string { + namespace := strings.Join(extractAlphanumericValues(input), "_") + + if !shouldSkipSnakeCasing(conf, destType) { + namespace = strcase.ToSnake(namespace) + } + if startsWithDigit(namespace) { + namespace = "_" + namespace + } + if namespace == "" { + namespace = "stringempty" + } + if reservedkeywords.IsNamespace(destType, namespace) { + namespace = "_" + namespace + } + return misc.TruncateStr(namespace, 127) +} + +func extractAlphanumericValues(input string) []string { + var ( + extractedValues []string + currentValue strings.Builder + ) + + for _, c := range input { + if isAlphaAlphanumeric(c) { + currentValue.WriteRune(c) + } else if currentValue.Len() > 0 { + extractedValues = append(extractedValues, currentValue.String()) + currentValue.Reset() + } + } + if currentValue.Len() > 0 { + extractedValues = append(extractedValues, currentValue.String()) + } + return extractedValues +} + +func isAlphaAlphanumeric(c int32) bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') +} + +func shouldSkipSnakeCasing(conf *config.Config, destType string) bool { + configKey := fmt.Sprintf("Warehouse.%s.skipNamespaceSnakeCasing", whutils.WHDestNameMap[destType]) + return conf.GetBool(configKey, false) +} + +// SafeTableName processes the input table name based on the destination type and integration options. +// It applies case conversion, truncation, reserved keyword escaping, and table name length restrictions. +// For data lake providers, it avoids trimming the table name. +func SafeTableName(destType string, options integrationsOptions, tableName string) (string, error) { + if len(tableName) == 0 { + return "", response.ErrEmptyTableName + } + return safeName(destType, options, tableName), nil +} + +// SafeColumnName processes the input column name based on the destination type and integration options. +// It applies case conversion, truncation, reserved keyword escaping, and column name length restrictions. +// For data lake providers, it avoids trimming the column name. +func SafeColumnName(destType string, options integrationsOptions, columnName string) (string, error) { + if len(columnName) == 0 { + return "", response.ErrEmptyColumnName + } + return safeName(destType, options, columnName), nil +} + +func safeName(destType string, options integrationsOptions, name string) string { + switch destType { + case whutils.SNOWFLAKE, whutils.SnowpipeStreaming: + name = strings.ToUpper(name) + case whutils.POSTGRES: + name = misc.TruncateStr(name, 63) + name = strings.ToLower(name) + default: + name = strings.ToLower(name) + } + + if !options.skipReservedKeywordsEscaping && reservedkeywords.IsTableOrColumn(destType, name) { + name = "_" + name + } + if utils.IsDataLake(destType) { + return name + } + return misc.TruncateStr(name, 127) +} + +// TransformTableName applies transformation to the input table name based on the destination type and configuration options. +// If `useBlendoCasing` is enabled, it converts the table name to lowercase and trims spaces. +// Otherwise, it applies a more general transformation using the `transformName` function. +func TransformTableName(integrationsOptions integrationsOptions, destConfigOptions destConfigOptions, tableName string) string { + if integrationsOptions.useBlendoCasing { + return strings.TrimSpace(strings.ToLower(tableName)) + } + name := strings.Join(extractAlphanumericValues(tableName), "_") + + var snakeCaseFn func(s string) string + if destConfigOptions.underscoreDivideNumbers { + snakeCaseFn = snakecase.ToSnakeCase + } else { + snakeCaseFn = snakecase.ToSnakeCaseWithNumbers + } + if strings.HasPrefix(tableName, "_") { + name = reLeadingUnderscores.FindString(tableName) + snakeCaseFn(reLeadingUnderscores.ReplaceAllString(name, "")) + } else { + name = snakeCaseFn(name) + } + if startsWithDigit(name) { + name = "_" + name + } + return name +} + +// TransformColumnName applies transformation to the input column name based on the destination type and configuration options. +// If `useBlendoCasing` is enabled, it transforms the column name into Blendo casing. +// Otherwise, it applies a more general transformation using the `transformName` function. +func TransformColumnName(destType string, integrationsOptions integrationsOptions, destConfigOptions destConfigOptions, columnName string) string { + if integrationsOptions.useBlendoCasing { + return transformNameToBlendoCase(destType, columnName) + } + + name := strings.Join(extractAlphanumericValues(columnName), "_") + + var snakeCaseFn func(s string) string + if destConfigOptions.underscoreDivideNumbers { + snakeCaseFn = snakecase.ToSnakeCase + } else { + snakeCaseFn = snakecase.ToSnakeCaseWithNumbers + } + if strings.HasPrefix(columnName, "_") { + name = reLeadingUnderscores.FindString(columnName) + snakeCaseFn(reLeadingUnderscores.ReplaceAllString(name, "")) + } else { + name = snakeCaseFn(name) + } + if startsWithDigit(name) { + name = "_" + name + } + if destType == whutils.POSTGRES { + name = misc.TruncateStr(name, 63) + } + return name +} + +func startsWithDigit(name string) bool { + if len(name) > 0 && (rune(name[0]) >= '0' && rune(name[0]) <= '9') { + return true + } + return false +} + +// transformNameToBlendoCase converts the input string into Blendo case format by replacing non-alphanumeric characters with underscores. +// If the name does not start with a letter or underscore, it adds a leading underscore. +// The name is truncated to 63 characters for Postgres, and the result is converted to lowercase. +func transformNameToBlendoCase(destType, name string) string { + key := reNonAlphanumericOrDollar.ReplaceAllString(name, "_") + + if !reStartsWithLetterOrUnderscore.MatchString(key) { + key = "_" + key + } + if destType == whutils.POSTGRES { + key = misc.TruncateStr(name, 63) + } + return strings.ToLower(key) +} diff --git a/warehouse/transformer/safe_test.go b/warehouse/transformer/safe_test.go new file mode 100644 index 0000000000..a316ed6c06 --- /dev/null +++ b/warehouse/transformer/safe_test.go @@ -0,0 +1,858 @@ +package transformer + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestSafeNamespace(t *testing.T) { + testCases := []struct { + destType, namespace, expected string + }{ + {destType: whutils.RS, namespace: "omega", expected: "omega"}, + {destType: whutils.RS, namespace: "omega v2 ", expected: "omega_v_2"}, + {destType: whutils.RS, namespace: "9mega", expected: "_9_mega"}, + {destType: whutils.RS, namespace: "mega&", expected: "mega"}, + {destType: whutils.RS, namespace: "ome$ga", expected: "ome_ga"}, + {destType: whutils.RS, namespace: "omega$", expected: "omega"}, + {destType: whutils.RS, namespace: "ome_ ga", expected: "ome_ga"}, + {destType: whutils.RS, namespace: "9mega________-________90", expected: "_9_mega_90"}, + {destType: whutils.RS, namespace: "Cízǔ", expected: "c_z"}, + {destType: whutils.RS, namespace: "Rudderstack", expected: "rudderstack"}, + {destType: whutils.RS, namespace: "___", expected: "stringempty"}, + {destType: whutils.RS, namespace: "group", expected: "_group"}, + {destType: whutils.RS, namespace: "k3_namespace", expected: "k_3_namespace"}, + {destType: whutils.BQ, namespace: "k3_namespace", expected: "k3_namespace"}, + } + for _, tc := range testCases { + c := config.New() + c.Set("Warehouse.bigquery.skipNamespaceSnakeCasing", true) + + require.Equal(t, tc.expected, SafeNamespace(c, tc.destType, tc.namespace)) + } +} + +func TestSafeTableName(t *testing.T) { + testCases := []struct { + name, destType, tableName, expected string + options integrationsOptions + expectError bool + }{ + { + name: "Empty table name", + destType: whutils.SNOWFLAKE, + tableName: "", + expected: "", + expectError: true, // Should return response + }, + { + name: "Snowflake uppercase conversion", + destType: whutils.SNOWFLAKE, + tableName: "myTable", + expected: "MYTABLE", + expectError: false, + }, + { + name: "Postgres truncation and lowercase", + destType: whutils.POSTGRES, + tableName: "ThisIsAReallyLongTableNameThatExceedsThe63CharacterLimitForPostgresTables", + expected: "thisisareallylongtablenamethatexceedsthe63characterlimitforpost", + expectError: false, + }, + { + name: "Lowercase conversion for other destTypes", + destType: whutils.BQ, + tableName: "MyTableName", + expected: "mytablename", + expectError: false, + }, + { + name: "Reserved keyword escaping", + destType: whutils.SNOWFLAKE, + tableName: "SELECT", + expected: "_SELECT", // Should escape reserved keyword + expectError: false, + }, + { + name: "No reserved keyword escaping with skip option", + destType: whutils.SNOWFLAKE, + tableName: "SELECT", + options: integrationsOptions{skipReservedKeywordsEscaping: true}, + expected: "SELECT", // Should not escape reserved keyword + expectError: false, + }, + { + name: "Data lake, no trimming", + destType: whutils.S3Datalake, + tableName: "ThisIsAReallyLongTableNameThatExceedsThe63CharacterLimitForDatalakeTables", + expected: "thisisareallylongtablenamethatexceedsthe63characterlimitfordatalaketables", + expectError: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := SafeTableName(tc.destType, tc.options, tc.tableName) + + if tc.expectError { + require.Error(t, err) + require.Empty(t, result) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, result) + } + }) + } +} + +func TestSafeColumnName(t *testing.T) { + testCases := []struct { + name, destType, columnName, expected string + options integrationsOptions + expectError bool + }{ + { + name: "Empty column name", + destType: whutils.SNOWFLAKE, + columnName: "", + expected: "", + expectError: true, // Should return response + }, + { + name: "Snowflake uppercase conversion", + destType: whutils.SNOWFLAKE, + columnName: "myColumn", + expected: "MYCOLUMN", + expectError: false, + }, + { + name: "Postgres truncation and lowercase", + destType: whutils.POSTGRES, + columnName: "ThisIsAReallyLongColumnNameThatExceedsThe63CharacterLimitForPostgresTables", + expected: "thisisareallylongcolumnnamethatexceedsthe63characterlimitforpos", + expectError: false, + }, + { + name: "Lowercase conversion for other destTypes", + destType: whutils.BQ, + columnName: "MyColumnName", + expected: "mycolumnname", + expectError: false, + }, + { + name: "Reserved keyword escaping", + destType: whutils.SNOWFLAKE, + columnName: "SELECT", + expected: "_SELECT", // Should escape reserved keyword + expectError: false, + }, + { + name: "No reserved keyword escaping with skip option", + destType: whutils.SNOWFLAKE, + columnName: "SELECT", + options: integrationsOptions{skipReservedKeywordsEscaping: true}, + expected: "SELECT", // Should not escape reserved keyword + expectError: false, + }, + { + name: "Data lake, no trimming", + destType: whutils.S3Datalake, + columnName: "ThisIsAReallyLongColumnNameThatExceedsThe63CharacterLimitForDatalakeColumns", + expected: "thisisareallylongcolumnnamethatexceedsthe63characterlimitfordatalakecolumns", + expectError: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := SafeColumnName(tc.destType, tc.options, tc.columnName) + + if tc.expectError { + require.Error(t, err) + require.Empty(t, result) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, result) + } + }) + } +} + +func TestTransformTableName(t *testing.T) { + testCases := []struct { + name string + integrationsOptions integrationsOptions + destConfigOptions destConfigOptions + tableName string + expected string + }{ + { + name: "Blendo casing - table name trimmed and lowercased", + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: " TableName ", + expected: "tablename", + }, + { + name: "Blendo casing - mixedcased to lowercased", + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "CaMeLcAsE", + expected: "camelcase", + }, + { + name: "Blendo casing - mixedcased to lowercased", + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "Table@Name!", + expected: "table@name!", + }, + { + name: "Blendo casing - alphanumeric", + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "TableName123", + expected: "tablename123", + }, + + { + name: "Standard casing - underscoreDivideNumbers(true) - remove symbols and join continuous letters and numbers with a single underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4_yasdfa_84224_fs_9_3_q", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega to omega", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega v2 to omega_v_2", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega v2", + expected: "omega_v_2", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - prepend underscore if name starts with a number", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega", + expected: "_9_mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing special characters", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - replace special character in the middle with underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing $ character", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - spaces and special characters by converting to underscores", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscores and hyphens by reducing to single underscores", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega________-________90", + expected: "_9_mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - non-ASCII characters by converting them to underscores", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - CamelCase123Key to camel_case_123_key", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "CamelCase123Key", + expected: "camel_case_123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - numbers and commas", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - no valid characters", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - underscores between letters and numbers", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "test123", + expected: "test_123", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc123def456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - single underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + + { + name: "Standard casing - underscoreDivideNumbers(false) - remove symbols and join continuous letters and numbers with a single underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4yasdfa_84224_fs9_3q", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega to omega", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega v2 to omega_v_2", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega v2", + expected: "omega_v2", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - prepend underscore if name starts with a number", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega", + expected: "_9mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing special characters", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - replace special character in the middle with underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing $ character", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - spaces and special characters by converting to underscores", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscores and hyphens by reducing to single underscores", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega________-________90", + expected: "_9mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - non-ASCII characters by converting them to underscores", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - CamelCase123Key to camel_case_123_key", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "CamelCase123Key", + expected: "camel_case123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - numbers and commas", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - no valid characters", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - underscores between letters and numbers", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "test123", + expected: "test123", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc123def456", + expected: "abc123_def456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore-number sequences", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - single underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore", + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tableName := TransformTableName(tc.integrationsOptions, tc.destConfigOptions, tc.tableName) + require.Equal(t, tc.expected, tableName) + }) + } +} + +func TestTransformColumnName(t *testing.T) { + testCases := []struct { + name string + destType string + integrationsOptions integrationsOptions + destConfigOptions destConfigOptions + tableName string + expected string + }{ + { + name: "Blendo casing - special characters other than \\ or $ to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "column@Name$1", + expected: "column_name$1", + }, + { + name: "Blendo casing - add underscore if name does not start with an alphabet or underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "1CComega", + expected: "_1ccomega", + }, + { + name: "Blendo casing - non-ASCII characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "Cízǔ", + expected: "c_z_", + }, + { + name: "Blendo casing - CamelCase123Key to camelcase123key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "CamelCase123Key", + expected: "camelcase123key", + }, + { + name: "Blendo casing - preserve \\ and $ characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "path to $1,00,000", + expected: "path_to_$1_00_000", + }, + { + name: "Blendo casing - mix of characters, numbers, and special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: "CamelCase123Key_with$special\\chars", + expected: "camelcase123key_with$special\\chars", + }, + { + name: "Blendo casing - limit length to 63 characters for postgres provider", + destType: whutils.POSTGRES, + integrationsOptions: integrationsOptions{useBlendoCasing: true}, + destConfigOptions: destConfigOptions{}, + tableName: strings.Repeat("a", 70), + expected: strings.Repeat("a", 63), + }, + + { + name: "Standard casing - underscoreDivideNumbers(true) - remove symbols and join continuous letters and numbers with a single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4_yasdfa_84224_fs_9_3_q", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega to omega", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - omega v2 to omega_v_2", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega v2", + expected: "omega_v_2", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - prepend underscore if name starts with a number", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega", + expected: "_9_mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - replace special character in the middle with underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - remove trailing $ character", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - spaces and special characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscores and hyphens by reducing to single underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "9mega________-________90", + expected: "_9_mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - non-ASCII characters by converting them to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - CamelCase123Key to camel_case_123_key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "CamelCase123Key", + expected: "camel_case_123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - numbers and commas", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - no valid characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - underscores between letters and numbers", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "test123", + expected: "test_123", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc123def456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(true) - multiple underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: true}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + + { + name: "Standard casing - underscoreDivideNumbers(false) - remove symbols and join continuous letters and numbers with a single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "&4yasdfa(84224_fs9##_____*3q", + expected: "_4yasdfa_84224_fs9_3q", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega to omega", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - omega v2 to omega_v_2", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega v2", + expected: "omega_v2", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - prepend underscore if name starts with a number", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega", + expected: "_9mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing special characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "mega&", + expected: "mega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - replace special character in the middle with underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome$ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - remove trailing $ character", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "omega$", + expected: "omega", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - spaces and special characters by converting to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "ome_ ga", + expected: "ome_ga", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscores and hyphens by reducing to single underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "9mega________-________90", + expected: "_9mega_90", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - non-ASCII characters by converting them to underscores", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "Cízǔ", + expected: "c_z", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - CamelCase123Key to camel_case_123_key", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "CamelCase123Key", + expected: "camel_case123_key", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - numbers and commas", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "path to $1,00,000", + expected: "path_to_1_00_000", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - no valid characters", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "@#$%", + expected: "", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - underscores between letters and numbers", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "test123", + expected: "test123", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc123def456", + expected: "abc123_def456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore-number sequences", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "abc_123_def_456", + expected: "abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - single underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "__abc_123_def_456", + expected: "__abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore", + destType: whutils.SNOWFLAKE, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: "_abc_123_def_456", + expected: "_abc_123_def_456", + }, + { + name: "Standard casing - underscoreDivideNumbers(false) - multiple underscore", + destType: whutils.POSTGRES, + integrationsOptions: integrationsOptions{useBlendoCasing: false}, + destConfigOptions: destConfigOptions{underscoreDivideNumbers: false}, + tableName: strings.Repeat("a", 70), + expected: strings.Repeat("a", 63), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tableName := TransformColumnName(tc.destType, tc.integrationsOptions, tc.destConfigOptions, tc.tableName) + require.Equal(t, tc.expected, tableName) + }) + } +} diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/0420b68b7550b8b8 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/0420b68b7550b8b8 new file mode 100644 index 0000000000..60a403603c --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/0420b68b7550b8b8 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"tYpe\":\"00000\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/0970d1c03f60fb13 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/0970d1c03f60fb13 new file mode 100644 index 0000000000..35f7f2846c --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/0970d1c03f60fb13 @@ -0,0 +1,4 @@ +go test fuzz v1 +string("RS") +string("{}") +string("{\"t7pe\":\"alias\",\"messageId\":\"d\",\"receivedAt\":\"2021-09-01T00:00:00Z\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/131dee6b2a9a976d b/warehouse/transformer/testdata/fuzz/FuzzTransformer/131dee6b2a9a976d new file mode 100644 index 0000000000..ac2462ac9b --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/131dee6b2a9a976d @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"type\":\"pAge\",\"messageId\":\"0\",\"receivedAt\":\"0000-01-01T0:00:00Z\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/2719fcfcf5b4aa8f b/warehouse/transformer/testdata/fuzz/FuzzTransformer/2719fcfcf5b4aa8f new file mode 100644 index 0000000000..252bf8b963 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/2719fcfcf5b4aa8f @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"type\":\"pAge\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/2a2fdac69e4311b8 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/2a2fdac69e4311b8 new file mode 100644 index 0000000000..4c5f9b2dd8 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/2a2fdac69e4311b8 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("\"\"") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/4d3139d3c6f4e739 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/4d3139d3c6f4e739 new file mode 100644 index 0000000000..d1653a9cd9 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/4d3139d3c6f4e739 @@ -0,0 +1,4 @@ +go test fuzz v1 +string("RS") +string("{}") +string("{\"type\":\"trACk\",\"messageId\":\"0001-01-01 \",\"receivedAt\":\"0000-01-01T0:00:00Z\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/5286e81c04e5fe1d b/warehouse/transformer/testdata/fuzz/FuzzTransformer/5286e81c04e5fe1d new file mode 100644 index 0000000000..89626c50a0 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/5286e81c04e5fe1d @@ -0,0 +1,4 @@ +go test fuzz v1 +string("RS") +string("{}") +string("{\"type\":\"screen\",\"messageId\":\"d\",\"receivedAt\":\"2021-09-01T00:00:00Z\",\"properties\":{\"np\":\"4\"}\"") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/6f85c0fd8d883c63 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/6f85c0fd8d883c63 new file mode 100644 index 0000000000..82ee3e5a65 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/6f85c0fd8d883c63 @@ -0,0 +1,4 @@ +go test fuzz v1 +string("RS") +string("{}") +string("{\"type\":\"screen\",\"messageId\":\"messageId\",\"userId\":\"userId\",\"senple.cotAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"name\":\"Main\",\"title\":\"Home | RudderStack\",\"url\":\"https://www.rudderstack.com\"},\"context\":{\"traits\":{\"name\":\"Richard Hendricks\",\"inner\":\"rhedricks@example.com\",\"logins\":2},\"ip\":\"1.2.3.4\"}}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/bca4fef86c7b2739 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/bca4fef86c7b2739 new file mode 100644 index 0000000000..97e60613fc --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/bca4fef86c7b2739 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"type\":{}}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/c234464ba1289fcb b/warehouse/transformer/testdata/fuzz/FuzzTransformer/c234464ba1289fcb new file mode 100644 index 0000000000..1f7691f597 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/c234464ba1289fcb @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"tYpe\":\"trACk\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/cbb57a8a69d0fea8 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/cbb57a8a69d0fea8 new file mode 100644 index 0000000000..494f427430 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/cbb57a8a69d0fea8 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"type\":\"AliAs\",\"messageId\":\"0001-01-01\",\"receivedAt\":\"0000-01-01T0:00:00Z\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/cbbad75732661f48 b/warehouse/transformer/testdata/fuzz/FuzzTransformer/cbbad75732661f48 new file mode 100644 index 0000000000..03e0101d5d --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/cbbad75732661f48 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/d216d9ba19b7f97c b/warehouse/transformer/testdata/fuzz/FuzzTransformer/d216d9ba19b7f97c new file mode 100644 index 0000000000..11ecc53916 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/d216d9ba19b7f97c @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"tYpe\":\"track\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/d63af25c55029c3e b/warehouse/transformer/testdata/fuzz/FuzzTransformer/d63af25c55029c3e new file mode 100644 index 0000000000..70df00f21a --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/d63af25c55029c3e @@ -0,0 +1,4 @@ +go test fuzz v1 +string("RS") +string("{}") +string("{\"type\":\"trACk\",\"messageId\":\"2000-01-01T\",\"receivedAt\":\"2000-01-01T0:00:00Z\"}") diff --git a/warehouse/transformer/testdata/fuzz/FuzzTransformer/e1b0c6f73449bc0d b/warehouse/transformer/testdata/fuzz/FuzzTransformer/e1b0c6f73449bc0d new file mode 100644 index 0000000000..5af11635a5 --- /dev/null +++ b/warehouse/transformer/testdata/fuzz/FuzzTransformer/e1b0c6f73449bc0d @@ -0,0 +1,2 @@ +go test fuzz v1 +string("{\"type\":\"pAge\",\"messageId\":{}}") diff --git a/warehouse/transformer/testhelper/outputbuilder.go b/warehouse/transformer/testhelper/outputbuilder.go new file mode 100644 index 0000000000..672bb8aece --- /dev/null +++ b/warehouse/transformer/testhelper/outputbuilder.go @@ -0,0 +1,67 @@ +package testhelper + +type OutputBuilder map[string]any + +func (ob OutputBuilder) SetDataField(key string, value any) OutputBuilder { + if _, ok := ob["data"]; !ok { + ob["data"] = make(map[string]any) + } + if dataMap, ok := ob["data"].(map[string]any); ok { + dataMap[key] = value + } + return ob +} + +func (ob OutputBuilder) SetColumnField(key string, value any) OutputBuilder { + if _, ok := ob["metadata"]; !ok { + ob["metadata"] = make(map[string]any) + } + if metadataMap, ok := ob["metadata"].(map[string]any); ok { + if _, ok := metadataMap["columns"]; !ok { + metadataMap["columns"] = make(map[string]any) + } + if columnsMap, ok := metadataMap["columns"].(map[string]any); ok { + columnsMap[key] = value + } + } + return ob +} + +func (ob OutputBuilder) SetTableName(tableName string) OutputBuilder { + if _, ok := ob["metadata"]; !ok { + ob["metadata"] = make(map[string]any) + } + if metadataMap, ok := ob["metadata"].(map[string]any); ok { + metadataMap["table"] = tableName + } + return ob +} + +func (ob OutputBuilder) RemoveDataFields(fields ...string) OutputBuilder { + if dataMap, ok := ob["data"].(map[string]any); ok { + for _, key := range fields { + delete(dataMap, key) + } + } + return ob +} + +func (ob OutputBuilder) RemoveColumnFields(fields ...string) OutputBuilder { + if metadataMap, ok := ob["metadata"].(map[string]any); ok { + if columnsMap, ok := metadataMap["columns"].(map[string]any); ok { + for _, key := range fields { + delete(columnsMap, key) + } + } + } + return ob +} + +func (ob OutputBuilder) AddRandomEntries(count int, predicate func(index int) (dataKey, dataValue, columnKey, columnValue string)) OutputBuilder { + for i := 0; i < count; i++ { + dataKey, dataValue, columnKey, columnValue := predicate(i) + ob.SetDataField(dataKey, dataValue) + ob.SetColumnField(columnKey, columnValue) + } + return ob +} diff --git a/warehouse/transformer/testhelper/testhelper.go b/warehouse/transformer/testhelper/testhelper.go new file mode 100644 index 0000000000..5aa3b822a8 --- /dev/null +++ b/warehouse/transformer/testhelper/testhelper.go @@ -0,0 +1,42 @@ +package testhelper + +import ( + "fmt" + "strings" + + "github.com/samber/lo" +) + +func AddRandomColumns(eventPayload string, numColumns int) string { + return fmt.Sprintf(eventPayload, strings.Join( + lo.RepeatBy(numColumns, func(index int) string { + return fmt.Sprintf(`"random_column_%d": "random_value_%d"`, index, index) + }), ",", + )) +} + +func AddLargeColumns(eventPayload string, numColumns int) string { + return fmt.Sprintf(eventPayload, strings.Join( + lo.RepeatBy(numColumns, func(index int) string { + return fmt.Sprintf(`"large_column_`+strings.Repeat("a", 1000)+`": "large_value_%d"`, index) + }), ",", + )) +} + +func AddNestedLevels(eventPayload string, numLevels int) string { + var nestedBuilder strings.Builder + + for i := numLevels; i > 0; i-- { + if i < numLevels { + nestedBuilder.WriteString(", ") + } + nestedBuilder.WriteString(fmt.Sprintf(`"nested_level_%d": {`, i)) + } + for i := 0; i < numLevels; i++ { + nestedBuilder.WriteString("}") + if i < numLevels-1 { + nestedBuilder.WriteString(", ") + } + } + return strings.Replace(eventPayload, "{}", "{"+nestedBuilder.String()+"}", 1) +} diff --git a/warehouse/transformer/testhelper/validate.go b/warehouse/transformer/testhelper/validate.go new file mode 100644 index 0000000000..848ac515e5 --- /dev/null +++ b/warehouse/transformer/testhelper/validate.go @@ -0,0 +1,111 @@ +package testhelper + +import ( + "context" + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/types" +) + +type EventInfo struct { + Payload []byte + Metadata ptrans.Metadata + Destination backendconfig.DestinationT +} + +func ValidateEvents(t *testing.T, infos []EventInfo, pTransformer, dTransformer ptrans.DestinationTransformer, expectedResponse ptrans.Response) { + t.Helper() + + events := prepareEvents(t, infos) + + ctx := context.Background() + batchSize := 100 + + pResponse := pTransformer.Transform(ctx, events, batchSize) + wResponse := dTransformer.Transform(ctx, events, batchSize) + + validateResponseLengths(t, expectedResponse, pResponse, wResponse) + validateRudderEventIfExists(t, expectedResponse, pResponse, wResponse) + validateEventEquality(t, expectedResponse, pResponse, wResponse) + validateFailedEventEquality(t, expectedResponse, pResponse, wResponse) +} + +func prepareEvents(t *testing.T, infos []EventInfo) []ptrans.TransformerEvent { + var events []ptrans.TransformerEvent + for _, info := range infos { + var singularEvent types.SingularEventT + err := json.Unmarshal(info.Payload, &singularEvent) + require.NoError(t, err) + + events = append(events, ptrans.TransformerEvent{ + Message: singularEvent, + Metadata: info.Metadata, + Destination: info.Destination, + }) + } + return events +} + +func validateResponseLengths(t *testing.T, expectedResponse, pResponse, wResponse ptrans.Response) { + require.Equal(t, len(expectedResponse.Events), len(pResponse.Events)) + require.Equal(t, len(expectedResponse.Events), len(wResponse.Events)) + require.Equal(t, len(expectedResponse.FailedEvents), len(pResponse.FailedEvents)) + require.Equal(t, len(expectedResponse.FailedEvents), len(wResponse.FailedEvents)) +} + +func validateRudderEventIfExists(t *testing.T, expectedResponse, pResponse, wResponse ptrans.Response) { + for i := range pResponse.Events { + data, ok := expectedResponse.Events[i].Output["data"].(map[string]interface{}) + if !ok { + continue // No data to validate + } + + rudderEvent, ok := data["rudder_event"].(string) + if !ok { + continue // No rudder_event key, skip validation + } + + pEventData, ok := pResponse.Events[i].Output["data"].(map[string]interface{}) + require.True(t, ok, "pResponse data must be a map") + pRudderEvent, ok := pEventData["rudder_event"].(string) + require.True(t, ok, "pResponse rudder_event must be a string") + require.JSONEq(t, rudderEvent, pRudderEvent) + + wEventData, ok := wResponse.Events[i].Output["data"].(map[string]interface{}) + require.True(t, ok, "wResponse data must be a map") + wRudderEvent, ok := wEventData["rudder_event"].(string) + require.True(t, ok, "wResponse rudder_event must be a string") + require.JSONEq(t, rudderEvent, wRudderEvent) + + require.JSONEq(t, pRudderEvent, wRudderEvent) + + delete(pEventData, "rudder_event") + delete(wEventData, "rudder_event") + delete(data, "rudder_event") + } +} + +func validateEventEquality(t *testing.T, expectedResponse, pResponse, wResponse ptrans.Response) { + for i := range pResponse.Events { + require.EqualValues(t, expectedResponse.Events[i], pResponse.Events[i]) + require.EqualValues(t, expectedResponse.Events[i], wResponse.Events[i]) + require.EqualValues(t, wResponse.Events[i], pResponse.Events[i]) + } +} + +func validateFailedEventEquality(t *testing.T, expectedResponse, pResponse, wResponse ptrans.Response) { + for i := range pResponse.FailedEvents { + require.NotEmpty(t, pResponse.FailedEvents[i].Error) + require.NotEmpty(t, wResponse.FailedEvents[i].Error) + require.NotEmpty(t, expectedResponse.FailedEvents[i].Error) + + require.NotZero(t, pResponse.FailedEvents[i].StatusCode) + require.NotZero(t, wResponse.FailedEvents[i].StatusCode) + require.NotZero(t, expectedResponse.FailedEvents[i].StatusCode) + } +} diff --git a/warehouse/transformer/transformer.go b/warehouse/transformer/transformer.go new file mode 100644 index 0000000000..1123c9b9a8 --- /dev/null +++ b/warehouse/transformer/transformer.go @@ -0,0 +1,1260 @@ +package transformer + +import ( + "context" + "errors" + "fmt" + "net/http" + "strings" + "time" + + jsoniter "github.com/json-iterator/go" + "github.com/samber/lo" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/types" + "github.com/rudderlabs/rudder-server/warehouse/internal/model" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/response" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/rules" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/utils" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +const ( + violationErrors = "violationErrors" + redshiftStringLimit = 512 +) + +var json = jsoniter.ConfigCompatibleWithStandardLibrary + +func New(conf *config.Config, logger logger.Logger, statsFactory stats.Stats) ptrans.DestinationTransformer { + t := &transformer{ + conf: conf, + logger: logger.Child("warehouse-transformer"), + statsFactory: statsFactory, + now: time.Now, + } + + t.config.enableIDResolution = conf.GetReloadableBoolVar(false, "Warehouse.enableIDResolution") + t.config.populateSrcDestInfoInContext = conf.GetReloadableBoolVar(true, "WH_POPULATE_SRC_DEST_INFO_IN_CONTEXT") + t.config.maxColumnsInEvent = conf.GetReloadableIntVar(200, 1, "WH_MAX_COLUMNS_IN_EVENT") + return t +} + +func (t *transformer) Transform(_ context.Context, clientEvents []ptrans.TransformerEvent, _ int) (res ptrans.Response) { + if len(clientEvents) == 0 { + return + } + + startTime := t.now() + metadata := clientEvents[0].Metadata + + defer func() { + tags := stats.Tags{ + "workspaceId": metadata.WorkspaceID, + "sourceId": metadata.SourceID, + "sourceType": metadata.SourceType, + "destinationId": metadata.DestinationID, + "destinationType": metadata.DestinationType, + } + + t.statsFactory.NewTaggedStat("warehouse_dest_transform_request_latency", stats.TimerType, tags).Since(startTime) + t.statsFactory.NewTaggedStat("warehouse_dest_transform_requests", stats.CountType, tags).Increment() + t.statsFactory.NewTaggedStat("warehouse_dest_transform_input_events", stats.HistogramType, tags).Observe(float64(len(clientEvents))) + t.statsFactory.NewTaggedStat("warehouse_dest_transform_output_events", stats.HistogramType, tags).Observe(float64(len(res.Events))) + t.statsFactory.NewTaggedStat("warehouse_dest_transform_output_failed_events", stats.HistogramType, tags).Observe(float64(len(res.FailedEvents))) + }() + + for _, event := range clientEvents { + r, err := t.processWarehouseMessage(event) + if err != nil { + res.FailedEvents = append(res.FailedEvents, t.transformerResponseFromErr(event, err)) + continue + } + + res.Events = append(res.Events, lo.Map(r, func(item map[string]any, index int) ptrans.TransformerResponse { + return ptrans.TransformerResponse{ + Output: item, + Metadata: event.Metadata, + StatusCode: http.StatusOK, + } + })...) + } + return +} + +func (t *transformer) processWarehouseMessage(event ptrans.TransformerEvent) ([]map[string]any, error) { + if err := t.enhanceContextWithSourceDestInfo(event); err != nil { + return nil, fmt.Errorf("enhancing context with source and destination info: %w", err) + } + return t.handleEvent(event) +} + +func (t *transformer) enhanceContextWithSourceDestInfo(event ptrans.TransformerEvent) error { + if !t.config.populateSrcDestInfoInContext.Load() { + return nil + } + + messageContext, ok := event.Message["context"] + if !ok || messageContext == nil { + messageContext = map[string]any{} + event.Message["context"] = messageContext + } + messageContextMap, ok := messageContext.(map[string]any) + if !ok { + return response.ErrContextNotMap + } + messageContextMap["sourceId"] = event.Metadata.SourceID + messageContextMap["sourceType"] = event.Metadata.SourceType + messageContextMap["destinationId"] = event.Metadata.DestinationID + messageContextMap["destinationType"] = event.Metadata.DestinationType + + event.Message["context"] = messageContextMap + return nil +} + +func (t *transformer) handleEvent(event ptrans.TransformerEvent) ([]map[string]any, error) { + itrOpts := prepareIntegrationOptions(event) + dstOpts := prepareDestinationOptions(event.Metadata.DestinationType, event.Destination.Config) + jsonPathsInfo := extractJSONPathInfo(append(itrOpts.jsonPaths, dstOpts.jsonPaths...)) + eventType := strings.ToLower(event.Metadata.EventType) + + pi := &processingInfo{ + event: event, + itrOpts: itrOpts, + dstOpts: dstOpts, + jsonPathsInfo: jsonPathsInfo, + } + + switch eventType { + case "track": + return t.handleTrackEvent(pi) + case "identify": + return t.handleIdentifyEvent(pi) + case "page": + return t.handlePageEvent(pi) + case "screen": + return t.handleScreenEvent(pi) + case "alias": + return t.handleAliasEvent(pi) + case "group": + return t.handleGroupEvent(pi) + case "extract": + return t.handleExtractEvent(pi) + case "merge": + return t.handleMergeEvent(pi) + default: + return nil, response.NewTransformerError(fmt.Sprintf("Unknown event type: %q", eventType), http.StatusBadRequest) + } +} + +func prepareIntegrationOptions(event ptrans.TransformerEvent) (opts integrationsOptions) { + src := misc.MapLookup(event.Message, "integrations", event.Metadata.DestinationType, "options") + if src == nil || !utils.IsObject(src) { + return + } + var jsonPaths []any + + srcMap := src.(map[string]any) + + setOption(srcMap, "skipReservedKeywordsEscaping", &opts.skipReservedKeywordsEscaping) + setOption(srcMap, "useBlendoCasing", &opts.useBlendoCasing) + setOption(srcMap, "skipTracksTable", &opts.skipTracksTable) + setOption(srcMap, "skipUsersTable", &opts.skipUsersTable) + setOption(srcMap, "jsonPaths", &jsonPaths) + + for _, jp := range jsonPaths { + if jpStr, ok := jp.(string); ok { + opts.jsonPaths = append(opts.jsonPaths, jpStr) + } + } + return +} + +func prepareDestinationOptions(destType string, destConfig map[string]any) (opts destConfigOptions) { + var jsonPaths string + + setOption(destConfig, "skipTracksTable", &opts.skipTracksTable) + setOption(destConfig, "skipUsersTable", &opts.skipUsersTable) + setOption(destConfig, "underscoreDivideNumbers", &opts.underscoreDivideNumbers) + setOption(destConfig, "allowUsersContextTraits", &opts.allowUsersContextTraits) + setOption(destConfig, "storeFullEvent", &opts.storeFullEvent) + setOption(destConfig, "jsonPaths", &jsonPaths) + + if len(jsonPaths) > 0 && utils.IsJSONPathSupportedAsPartOfConfig(destType) { + opts.jsonPaths = strings.Split(jsonPaths, ",") + } + return +} + +func setOption[T any](src map[string]any, key string, dest *T) { + if val, ok := src[key].(T); ok { + *dest = val + } +} + +func (t *transformer) handleTrackEvent(pi *processingInfo) ([]map[string]any, error) { + commonProps, commonColumnTypes, transformerEventName, err := t.trackCommonProps(pi) + if err != nil { + return nil, fmt.Errorf("track common properties: %w", err) + } + + tracksResponse, err := t.tracksResponse(pi, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("track response: %w", err) + } + + trackEventsResponse, err := t.trackEventsResponse(pi, transformerEventName, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("track events response: %w", err) + } + return append(tracksResponse, trackEventsResponse...), nil +} + +func (t *transformer) trackCommonProps(pi *processingInfo) (map[string]any, map[string]string, string, error) { + commonProps := make(map[string]any) + commonColumnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], commonProps, commonColumnTypes, &prefixInfo{ + completePrefix: "track_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, nil, "", fmt.Errorf("track common props: setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, commonProps, commonColumnTypes, rules.TrackRules); err != nil { + return nil, nil, "", fmt.Errorf("track common props: setting data and column types from rules: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, commonProps, commonColumnTypes, rules.DefaultRules); err != nil { + return nil, nil, "", fmt.Errorf("track common props: setting data and column types from rules: %w", err) + } + + eventColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "event") + if err != nil { + return nil, nil, "", fmt.Errorf("track common props: safe column name: %w", err) + } + eventTextColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "event_text") + if err != nil { + return nil, nil, "", fmt.Errorf("track common props: safe column name: %w", err) + } + + var eventName, transformerEventName string + if d, dok := commonProps[eventTextColName]; dok { + eventName, _ = d.(string) + } + transformerEventName = TransformTableName(pi.itrOpts, pi.dstOpts, eventName) + + commonProps[eventColName] = transformerEventName + commonColumnTypes[eventColName] = "string" + return commonProps, commonColumnTypes, transformerEventName, nil +} + +func (t *transformer) tracksResponse(pi *processingInfo, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + if pi.itrOpts.skipTracksTable || pi.dstOpts.skipTracksTable { + return nil, nil + } + + event := make(map[string]any) + columnTypes := make(map[string]string) + + event = lo.Assign(event, commonProps) + + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.TrackTableRules); err != nil { + return nil, fmt.Errorf("tracks response: setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, event, columnTypes); err != nil { + return nil, fmt.Errorf("tracks response: storing rudder event: %w", err) + } + + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "tracks") + if err != nil { + return nil, fmt.Errorf("tracks response: safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, lo.Assign(columnTypes, commonColumnTypes)) + if err != nil { + return nil, fmt.Errorf("tracks response: getting columns: %w", err) + } + + output := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": table, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{output}, nil +} + +func (t *transformer) trackEventsResponse(pi *processingInfo, transformerEventName string, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + if len(transformerEventName) == 0 || len(strings.TrimSpace(transformerEventName)) == 0 { + return nil, nil + } + + event := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], event, columnTypes, &prefixInfo{ + completePrefix: "track_properties_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("track events response: setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["userProperties"], event, columnTypes, &prefixInfo{ + completePrefix: "track_userProperties_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("track events response: setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, commonProps, commonColumnTypes, rules.TrackEventTableRules); err != nil { + return nil, fmt.Errorf("track events response: setting data and column types from rules: %w", err) + } + + eventTableEvent := lo.Assign(event, commonProps) + + columnName := TransformColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, transformerEventName) + table, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, columnName) + if err != nil { + return nil, fmt.Errorf("track events response: safe table name: %w", err) + } + excludeTable := excludeRudderCreatedTableNames(table, pi.itrOpts.skipReservedKeywordsEscaping) + + columns, err := t.getColumns(pi.event.Metadata.DestinationType, eventTableEvent, lo.Assign(columnTypes, commonColumnTypes)) + if err != nil { + return nil, fmt.Errorf("track events response: getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("track events response: merge event: %w", err) + } + + trackOutput := map[string]any{ + "data": eventTableEvent, + "metadata": map[string]any{ + "table": excludeTable, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{trackOutput}, mergeEvents...), nil +} + +func (t *transformer) handleExtractEvent(pi *processingInfo) ([]map[string]any, error) { + event := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], event, columnTypes, &prefixInfo{ + completePrefix: "extract_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, fmt.Errorf("extract: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], event, columnTypes, &prefixInfo{ + completePrefix: "extract_properties_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("extract: setting data and column types from input: %w", err) + } + + eventColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "event") + if err != nil { + return nil, fmt.Errorf("extract: safe column name: %w", err) + } + + eventName, _ := pi.event.Message[eventColName].(string) + + event[eventColName] = TransformTableName(pi.itrOpts, pi.dstOpts, eventName) + columnTypes[eventColName] = model.StringDataType + + if err = t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.ExtractRules); err != nil { + return nil, fmt.Errorf("extract: setting data and column types from rules: %w", err) + } + + if val := event[eventColName]; val == nil || utils.IsBlank(val) { + return nil, response.ErrExtractEventNameEmpty + } + + columnName := TransformColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, event[eventColName].(string)) + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, columnName) + if err != nil { + return nil, fmt.Errorf("extract: safe table name: %w", err) + } + excludeTableName := excludeRudderCreatedTableNames(tableName, pi.itrOpts.skipReservedKeywordsEscaping) + + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, columnTypes) + if err != nil { + return nil, fmt.Errorf("extract: getting columns: %w", err) + } + + extractOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": excludeTableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{extractOutput}, nil +} + +func (t *transformer) handleIdentifyEvent(pi *processingInfo) ([]map[string]any, error) { + commonProps, commonColumnTypes, err := t.identifyCommonProps(pi) + if err != nil { + return nil, fmt.Errorf("identifies: common properties: %w", err) + } + + identifiesResponse, err := t.identifiesResponse(pi, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("identifies response: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("identifies: merge event: %w", err) + } + + usersResponse, err := t.usersResponse(pi, commonProps, commonColumnTypes) + if err != nil { + return nil, fmt.Errorf("identifies: users response: %w", err) + } + return append(append(identifiesResponse, mergeEvents...), usersResponse...), nil +} + +func (t *transformer) identifyCommonProps(pi *processingInfo) (map[string]any, map[string]string, error) { + commonProps := make(map[string]any) + commonColumnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["userProperties"], commonProps, commonColumnTypes, &prefixInfo{ + completePrefix: "identify_userProperties_", + completeLevel: 2, + }); err != nil { + return nil, nil, fmt.Errorf("identify common props: setting data and column types from message: %w", err) + } + if pi.dstOpts.allowUsersContextTraits { + contextTraits := misc.MapLookup(pi.event.Message, "context", "traits") + + if err := t.setDataAndColumnTypeFromInput(pi, contextTraits, commonProps, commonColumnTypes, &prefixInfo{ + completePrefix: "identify_context_traits_", + completeLevel: 3, + }); err != nil { + return nil, nil, fmt.Errorf("identify common props: setting data and column types from message: %w", err) + } + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["traits"], commonProps, commonColumnTypes, &prefixInfo{ + completePrefix: "identify_traits_", + completeLevel: 2, + }); err != nil { + return nil, nil, fmt.Errorf("identify common props: setting data and column types from message: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], commonProps, commonColumnTypes, &prefixInfo{ + completePrefix: "identify_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, nil, fmt.Errorf("identify common props: setting data and column types from message: %w", err) + } + + userIDColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "user_id") + if err != nil { + return nil, nil, fmt.Errorf("identify common props: safe column name: %w", err) + } + if k, ok := commonProps[userIDColName]; ok && k != nil { + revUserIDCol := "_" + userIDColName + + commonProps[revUserIDCol] = commonProps[userIDColName] + commonColumnTypes[revUserIDCol] = commonColumnTypes[userIDColName] + + delete(commonProps, userIDColName) + delete(commonColumnTypes, userIDColName) + } + return commonProps, commonColumnTypes, nil +} + +func (t *transformer) identifiesResponse(pi *processingInfo, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + event := make(map[string]any) + columnTypes := make(map[string]string) + + event = lo.Assign(event, commonProps) + + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.DefaultRules); err != nil { + return nil, fmt.Errorf("identifies response: setting data and column types from rules: %w", err) + } + if err := storeRudderEvent(pi, event, columnTypes); err != nil { + return nil, fmt.Errorf("identifies response: storing rudder event: %w", err) + } + + identifiesTable, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "identifies") + if err != nil { + return nil, fmt.Errorf("identifies response: safe table name: %w", err) + } + identifiesColumns, err := t.getColumns(pi.event.Metadata.DestinationType, event, lo.Assign(commonColumnTypes, columnTypes)) + if err != nil { + return nil, fmt.Errorf("identifies response: getting columns: %w", err) + } + + identifiesOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": identifiesTable, + "columns": identifiesColumns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{identifiesOutput}, nil +} + +func (t *transformer) usersResponse(pi *processingInfo, commonProps map[string]any, commonColumnTypes map[string]string) ([]map[string]any, error) { + userID := misc.MapLookup(pi.event.Message, "userId") + if userID == nil || utils.IsBlank(userID) { + return nil, nil + } + if pi.itrOpts.skipUsersTable || pi.dstOpts.skipUsersTable { + return nil, nil + } + + event := make(map[string]any) + columnTypes := make(map[string]string) + + event = lo.Assign(event, commonProps) + + var rulesMap map[string]rules.FunctionalRules + if utils.IsDataLake(pi.event.Metadata.DestinationType) { + rulesMap = rules.IdentifyRules + } else { + rulesMap = rules.IdentifyRulesNonDataLake + } + + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rulesMap); err != nil { + return nil, fmt.Errorf("users response: setting data and column types from rules: %w", err) + } + + idColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "id") + if err != nil { + return nil, fmt.Errorf("users response: safe column name: %w", err) + } + idDataType := t.getDataType(pi.event.Metadata.DestinationType, idColName, userID, false) + + event[idColName] = convertValIfDateTime(userID, idDataType) + columnTypes[idColName] = idDataType + + receivedAtColName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "received_at") + if err != nil { + return nil, fmt.Errorf("users response: safe column name: %w", err) + } + + event[receivedAtColName] = convertValIfDateTime(pi.event.Metadata.ReceivedAt, "datetime") + columnTypes[receivedAtColName] = "datetime" + + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "users") + if err != nil { + return nil, fmt.Errorf("users response: safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, lo.Assign(commonColumnTypes, columnTypes)) + if err != nil { + return nil, fmt.Errorf("users response: getting columns: %w", err) + } + + usersOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": tableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return []map[string]any{usersOutput}, nil +} + +func (t *transformer) handlePageEvent(pi *processingInfo) ([]map[string]any, error) { + event := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], event, columnTypes, &prefixInfo{ + completePrefix: "page_properties_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("page: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], event, columnTypes, &prefixInfo{ + completePrefix: "page_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, fmt.Errorf("page: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.DefaultRules); err != nil { + return nil, fmt.Errorf("page: setting data and column types from rules: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.PageRules); err != nil { + return nil, fmt.Errorf("page: setting data and column types from rules: %w", err) + } + + if err := storeRudderEvent(pi, event, columnTypes); err != nil { + return nil, fmt.Errorf("page: storing rudder event: %w", err) + } + + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "pages") + if err != nil { + return nil, fmt.Errorf("page: safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, columnTypes) + if err != nil { + return nil, fmt.Errorf("page: getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("page: merge event: %w", err) + } + + pageOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": tableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{pageOutput}, mergeEvents...), nil +} + +func (t *transformer) handleScreenEvent(pi *processingInfo) ([]map[string]any, error) { + event := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["properties"], event, columnTypes, &prefixInfo{ + completePrefix: "screen_properties_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("screen: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], event, columnTypes, &prefixInfo{ + completePrefix: "screen_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, fmt.Errorf("screen: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.DefaultRules); err != nil { + return nil, fmt.Errorf("screen: setting data and column types from rules: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.ScreenRules); err != nil { + return nil, fmt.Errorf("screen: setting data and column types from rules: %w", err) + } + + if err := storeRudderEvent(pi, event, columnTypes); err != nil { + return nil, fmt.Errorf("screen: storing rudder event: %w", err) + } + + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "screens") + if err != nil { + return nil, fmt.Errorf("screen: safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, columnTypes) + if err != nil { + return nil, fmt.Errorf("screen: getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("screen: merge event: %w", err) + } + + screenOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": tableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{screenOutput}, mergeEvents...), nil +} + +func (t *transformer) handleGroupEvent(pi *processingInfo) ([]map[string]any, error) { + event := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["traits"], event, columnTypes, &prefixInfo{ + completePrefix: "group_traits_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("group: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], event, columnTypes, &prefixInfo{ + completePrefix: "group_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, fmt.Errorf("group: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.DefaultRules); err != nil { + return nil, fmt.Errorf("group: setting data and column types from rules: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.GroupRules); err != nil { + return nil, fmt.Errorf("group: setting data and column types from rules: %w", err) + } + + if err := storeRudderEvent(pi, event, columnTypes); err != nil { + return nil, fmt.Errorf("group: storing rudder event: %w", err) + } + + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "groups") + if err != nil { + return nil, fmt.Errorf("group: safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, columnTypes) + if err != nil { + return nil, fmt.Errorf("group: getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("group: merge event: %w", err) + } + + groupOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": tableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{groupOutput}, mergeEvents...), nil +} + +func (t *transformer) handleAliasEvent(pi *processingInfo) ([]map[string]any, error) { + event := make(map[string]any) + columnTypes := make(map[string]string) + + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["traits"], event, columnTypes, &prefixInfo{ + completePrefix: "alias_traits_", + completeLevel: 2, + }); err != nil { + return nil, fmt.Errorf("alias: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromInput(pi, pi.event.Message["context"], event, columnTypes, &prefixInfo{ + completePrefix: "alias_context_", + completeLevel: 2, + prefix: "context_", + }); err != nil { + return nil, fmt.Errorf("alias: setting data and column types from input: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.DefaultRules); err != nil { + return nil, fmt.Errorf("alias: setting data and column types from rules: %w", err) + } + if err := t.setDataAndColumnTypeFromRules(pi, event, columnTypes, rules.AliasRules); err != nil { + return nil, fmt.Errorf("alias: setting data and column types from rules: %w", err) + } + + if err := storeRudderEvent(pi, event, columnTypes); err != nil { + return nil, fmt.Errorf("alias: storing rudder event: %w", err) + } + + tableName, err := SafeTableName(pi.event.Metadata.DestinationType, pi.itrOpts, "aliases") + if err != nil { + return nil, fmt.Errorf("alias: safe table name: %w", err) + } + columns, err := t.getColumns(pi.event.Metadata.DestinationType, event, columnTypes) + if err != nil { + return nil, fmt.Errorf("alias: getting columns: %w", err) + } + + mergeEvents, err := t.handleMergeEvent(pi) + if err != nil { + return nil, fmt.Errorf("merge event: %w", err) + } + + aliasOutput := map[string]any{ + "data": event, + "metadata": map[string]any{ + "table": tableName, + "columns": columns, + "receivedAt": pi.event.Metadata.ReceivedAt, + }, + "userId": "", + } + return append([]map[string]any{aliasOutput}, mergeEvents...), nil +} + +func (t *transformer) handleMergeEvent(pi *processingInfo) ([]map[string]any, error) { + if !t.config.enableIDResolution.Load() { + return nil, nil + } + if !utils.IsIdentityEnabled(pi.event.Metadata.DestinationType) { + return nil, nil + } + + mergeProp1, mergeProp2, err := mergeProps(pi.event.Message, pi.event.Metadata) + if err != nil { + return nil, fmt.Errorf("merge: merge properties: %w", err) + } + if isMergePropEmpty(mergeProp1) { + return nil, nil + } + + tableName, err := mergeRuleTable(pi.event.Metadata.DestinationType, pi.itrOpts) + if err != nil { + return nil, fmt.Errorf("merge: merge rules table: %w", err) + } + columns, err := mergeRuleColumns(pi.event.Metadata.DestinationType, pi.itrOpts) + if err != nil { + return nil, fmt.Errorf("merge: merge columns: %w", err) + } + + event := map[string]any{ + columns.Prop1Type: utils.ToString(mergeProp1.Type), + columns.Prop1Value: utils.ToString(mergeProp1.Value), + } + columnTypes := map[string]any{ + columns.Prop1Type: model.StringDataType, + columns.Prop1Value: model.StringDataType, + } + metadata := map[string]any{ + "table": tableName, + "columns": columnTypes, + "isMergeRule": true, + "receivedAt": pi.event.Metadata.ReceivedAt, + "mergePropOne": event[columns.Prop1Value], + } + + if !isMergePropEmpty(mergeProp2) { + event[columns.Prop2Type] = utils.ToString(mergeProp2.Type) + event[columns.Prop2Value] = utils.ToString(mergeProp2.Value) + columnTypes[columns.Prop2Type] = model.StringDataType + columnTypes[columns.Prop2Value] = model.StringDataType + + metadata["mergePropTwo"] = event[columns.Prop2Value] + } + + mergeOutput := map[string]any{ + "data": event, + "metadata": metadata, + "userId": "", + } + return []map[string]any{mergeOutput}, nil +} + +func mergeProps(message types.SingularEventT, metadata ptrans.Metadata) (*mergeRule, *mergeRule, error) { + switch strings.ToLower(metadata.EventType) { + case "merge": + return mergePropsForMergeEventType(message) + case "alias": + return mergePropsForAliasEventType(message) + default: + return mergePropsForDefaultEventType(message) + } +} + +func mergePropsForMergeEventType(message types.SingularEventT) (*mergeRule, *mergeRule, error) { + mergeProperties := misc.MapLookup(message, "mergeProperties") + if mergeProperties == nil { + return nil, nil, response.ErrMergePropertiesMissing + } + mergePropertiesArr, ok := mergeProperties.([]any) + if !ok { + return nil, nil, response.ErrMergePropertiesNotArray + } + if len(mergePropertiesArr) != 2 { + return nil, nil, response.ErrMergePropertiesNotSufficient + } + + mergePropertiesMap0, ok := mergePropertiesArr[0].(map[string]any) + if !ok { + return nil, nil, response.ErrMergePropertyOneInvalid + } + mergePropertiesMap1, ok := mergePropertiesArr[1].(map[string]any) + if !ok { + return nil, nil, response.ErrMergePropertyTwoInvalid + } + + mergeProperties0Type := misc.MapLookup(mergePropertiesMap0, "type") + mergeProperties0Value := misc.MapLookup(mergePropertiesMap0, "value") + mergeProperties1Type := misc.MapLookup(mergePropertiesMap1, "type") + mergeProperties1Value := misc.MapLookup(mergePropertiesMap1, "value") + + if mergeProperties0Type == nil || mergeProperties0Value == nil || mergeProperties1Type == nil || mergeProperties1Value == nil { + return nil, nil, response.ErrMergePropertyEmpty + } + if utils.IsBlank(mergeProperties0Type) || utils.IsBlank(mergeProperties0Value) || utils.IsBlank(mergeProperties1Type) || utils.IsBlank(mergeProperties1Value) { + return nil, nil, response.ErrMergePropertyEmpty + } + + mergeProp1 := &mergeRule{Type: mergeProperties0Type, Value: mergeProperties0Value} + mergeProp2 := &mergeRule{Type: mergeProperties1Type, Value: mergeProperties1Value} + return mergeProp1, mergeProp2, nil +} + +func mergePropsForAliasEventType(message types.SingularEventT) (*mergeRule, *mergeRule, error) { + userID := misc.MapLookup(message, "userId") + previousID := misc.MapLookup(message, "previousId") + + mergeProp1 := &mergeRule{Type: "user_id", Value: userID} + mergeProp2 := &mergeRule{Type: "user_id", Value: previousID} + return mergeProp1, mergeProp2, nil +} + +func mergePropsForDefaultEventType(message types.SingularEventT) (*mergeRule, *mergeRule, error) { + anonymousID := misc.MapLookup(message, "anonymousId") + userID := misc.MapLookup(message, "userId") + + var mergeProp1, mergeProp2 *mergeRule + if anonymousID == nil || utils.IsBlank(anonymousID) { + mergeProp1 = &mergeRule{Type: "user_id", Value: userID} + } else { + mergeProp1 = &mergeRule{Type: "anonymous_id", Value: anonymousID} + mergeProp2 = &mergeRule{Type: "user_id", Value: userID} + } + return mergeProp1, mergeProp2, nil +} + +func mergeRuleTable(destType string, options integrationsOptions) (string, error) { + return SafeTableName(destType, options, "rudder_identity_merge_rules") +} + +func mergeRuleColumns(destType string, options integrationsOptions) (*mergeRulesColumns, error) { + var ( + columns [4]string + err error + ) + + for i, col := range []string{ + "merge_property_1_type", "merge_property_1_value", "merge_property_2_type", "merge_property_2_value", + } { + if columns[i], err = SafeColumnName(destType, options, col); err != nil { + return nil, fmt.Errorf("safe column name for %s: %w", col, err) + } + } + + rulesColumns := &mergeRulesColumns{ + Prop1Type: columns[0], Prop1Value: columns[1], Prop2Type: columns[2], Prop2Value: columns[3], + } + return rulesColumns, nil +} + +func isMergePropEmpty(mergeProp *mergeRule) bool { + return mergeProp == nil || mergeProp.Type == nil || mergeProp.Value == nil || utils.IsBlank(mergeProp.Type) || utils.IsBlank(mergeProp.Value) +} + +func (t *transformer) getDataType(destType, key string, val any, isJSONKey bool) string { + if typeName := getPrimitiveType(val); typeName != "" { + return typeName + } + if strVal, ok := val.(string); ok && utils.ValidTimestamp(strVal) { + return model.DateTimeDataType + } + if override := getDataTypeOverride(destType, key, val, isJSONKey); override != "" { + return override + } + return model.StringDataType +} + +func getPrimitiveType(val any) string { + switch v := val.(type) { + case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: + return model.IntDataType + case float64: + return getFloatType(v) + case float32: + return getFloatType(float64(v)) + case bool: + return model.BooleanDataType + default: + return "" + } +} + +func getFloatType(v float64) string { + if v == float64(int64(v)) { + return model.IntDataType + } + return model.FloatDataType +} + +func getDataTypeOverride(destType, key string, val any, isJSONKey bool) string { + switch destType { + case whutils.POSTGRES, whutils.SNOWFLAKE, whutils.SnowpipeStreaming: + if key == violationErrors || isJSONKey { + return model.JSONDataType + } + return model.StringDataType + case whutils.RS: + return getDataTypeOverrideForRedshift(val, isJSONKey) + default: + return "" + } +} + +func getDataTypeOverrideForRedshift(val any, isJSONKey bool) string { + if isJSONKey { + return model.JSONDataType + } + if val == nil { + return model.StringDataType + } + if jsonVal, _ := json.Marshal(val); len(jsonVal) > redshiftStringLimit { + return model.TextDataType + } + return model.StringDataType +} + +func (t *transformer) getColumns(destType string, data map[string]any, columnTypes map[string]string) (map[string]any, error) { + columns := make(map[string]any) + + // uuid_ts and loaded_at datatypes are passed from here to create appropriate columns. + // Corresponding values are inserted when loading into the warehouse + uuidTS := "uuid_ts" + if destType == whutils.SNOWFLAKE || destType == whutils.SnowpipeStreaming { + uuidTS = "UUID_TS" + } + columns[uuidTS] = "datetime" + + if destType == whutils.BQ { + columns["loaded_at"] = "datetime" + } + + for key, value := range data { + if dataType, ok := columnTypes[key]; ok { + columns[key] = dataType + } else { + columns[key] = t.getDataType(destType, key, value, false) + } + } + if len(columns) > t.config.maxColumnsInEvent.Load() && !utils.IsRudderSources(data) && !utils.IsDataLake(destType) { + return nil, response.NewTransformerError(fmt.Sprintf("%s transformer: Too many columns outputted from the event", strings.ToLower(destType)), http.StatusBadRequest) + } + return columns, nil +} + +func (t *transformer) setDataAndColumnTypeFromInput( + processInfo *processingInfo, input any, + data map[string]any, columnType map[string]string, + prefixDetails *prefixInfo, +) error { + if input == nil || !utils.IsObject(input) { + return nil + } + + inputMap := input.(map[string]any) + + if len(inputMap) == 0 { + return nil + } + if t.shouldHandleStringLikeObject(prefixDetails, inputMap) { + return t.handleStringLikeObject(processInfo, prefixDetails, data, columnType, inputMap) + } + for key, val := range inputMap { + if val == nil || utils.IsBlank(val) { + continue + } + + if t.isValidJSONPath(processInfo, prefixDetails, key) { + if err := t.handleValidJSONPath(processInfo, prefixDetails, key, val, data, columnType); err != nil { + return fmt.Errorf("handling valid JSON path: %w", err) + } + } else if t.shouldProcessNestedObject(processInfo, prefixDetails, val) { + if err := t.processNestedObject(processInfo, val.(map[string]any), data, columnType, prefixDetails, key); err != nil { + return fmt.Errorf("processing nested object: %w", err) + } + } else { + if err := t.processNonNestedObject(processInfo, prefixDetails, key, val, data, columnType); err != nil { + return fmt.Errorf("handling non-nested object: %w", err) + } + } + } + return nil +} + +func (t *transformer) shouldHandleStringLikeObject(prefixDetails *prefixInfo, inputMap map[string]any) bool { + return (strings.HasSuffix(prefixDetails.completePrefix, "context_traits_") || prefixDetails.completePrefix == "group_traits_") && utils.IsStringLikeObject(inputMap) +} + +func (t *transformer) handleStringLikeObject(processInfo *processingInfo, prefixDetails *prefixInfo, data map[string]any, columnType map[string]string, inputMap map[string]any) error { + if prefixDetails.prefix == "context_traits_" { + err := t.addColumnTypeAndValue(processInfo, prefixDetails.prefix, utils.StringLikeObjectToString(inputMap), false, data, columnType) + if err != nil { + return fmt.Errorf("adding column type and value: %w", err) + } + return nil + } + return nil +} + +func (t *transformer) isValidJSONPath(processInfo *processingInfo, prefixDetails *prefixInfo, key string) bool { + validLegacyJSONPath := isValidLegacyJSONPathKey(processInfo.event.Metadata.EventType, prefixDetails.prefix+key, prefixDetails.level, processInfo.jsonPathsInfo.legacyKeysMap) + validJSONPath := isValidJSONPathKey(prefixDetails.completePrefix+key, prefixDetails.completeLevel, processInfo.jsonPathsInfo.keysMap) + return validLegacyJSONPath || validJSONPath +} + +func (t *transformer) handleValidJSONPath(processInfo *processingInfo, prefixDetails *prefixInfo, key string, val any, data map[string]any, columnType map[string]string) error { + valJSON, err := json.Marshal(val) + if err != nil { + return fmt.Errorf("marshalling value: %w", err) + } + return t.addColumnTypeAndValue(processInfo, prefixDetails.prefix+key, string(valJSON), true, data, columnType) +} + +func (t *transformer) shouldProcessNestedObject(processInfo *processingInfo, prefixDetails *prefixInfo, val any) bool { + return utils.IsObject(val) && (processInfo.event.Metadata.SourceCategory != "cloud" || prefixDetails.level < 3) +} + +func (t *transformer) processNestedObject( + processInfo *processingInfo, val map[string]any, + data map[string]any, columnType map[string]string, + prefixDetails *prefixInfo, key string, +) error { + newPrefixDetails := &prefixInfo{ + completePrefix: prefixDetails.completePrefix + key + "_", + completeLevel: prefixDetails.completeLevel + 1, + prefix: prefixDetails.prefix + key + "_", + level: prefixDetails.level + 1, + } + return t.setDataAndColumnTypeFromInput(processInfo, val, data, columnType, newPrefixDetails) +} + +func (t *transformer) processNonNestedObject(processInfo *processingInfo, prefixDetails *prefixInfo, key string, val any, data map[string]any, columnType map[string]string) error { + finalValue := val + if processInfo.event.Metadata.SourceCategory == "cloud" && prefixDetails.level >= 3 && utils.IsObject(val) { + jsonData, err := json.Marshal(val) + if err != nil { + return fmt.Errorf("marshalling value: %w", err) + } + finalValue = string(jsonData) + } + return t.addColumnTypeAndValue(processInfo, prefixDetails.prefix+key, finalValue, false, data, columnType) +} + +func (t *transformer) addColumnTypeAndValue(pi *processingInfo, key string, val any, isJSONKey bool, data map[string]any, columnType map[string]string) error { + columnName := TransformColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, pi.dstOpts, key) + if len(columnName) == 0 { + return nil + } + + safeKey, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, columnName) + if err != nil { + return fmt.Errorf("transforming column name: %w", err) + } + + if rules.IsRudderReservedColumn(pi.event.Metadata.EventType, safeKey) { + return nil + } + + dataType := t.getDataType(pi.event.Metadata.DestinationType, key, val, isJSONKey) + + data[safeKey] = convertValIfDateTime(val, dataType) + columnType[safeKey] = dataType + return nil +} + +func (t *transformer) setDataAndColumnTypeFromRules( + pi *processingInfo, + data map[string]any, columnType map[string]string, + functionalRules map[string]rules.FunctionalRules, +) error { + for colKey, functionalRule := range functionalRules { + columnName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, colKey) + if err != nil { + return fmt.Errorf("safe column name: %w", err) + } + + delete(data, columnName) + delete(columnType, columnName) + + colVal, err := functionalRule(&pi.event) + if err != nil { + return fmt.Errorf("applying functional rule: %w", err) + } + if colVal == nil || utils.IsBlank(colVal) || utils.IsObject(colVal) { + continue + } + + dataType := t.getDataType(pi.event.Metadata.DestinationType, colKey, colVal, false) + + data[columnName] = convertValIfDateTime(colVal, dataType) + columnType[columnName] = dataType + } + return nil +} + +func convertValIfDateTime(val any, colType string) any { + if colType == model.DateTimeDataType { + return utils.ToTimestamp(val) + } + return val +} + +func (t *transformer) transformerResponseFromErr(event ptrans.TransformerEvent, err error) ptrans.TransformerResponse { + var te *response.TransformerError + if ok := errors.As(err, &te); ok { + return ptrans.TransformerResponse{ + Output: nil, + Metadata: event.Metadata, + Error: te.Error(), + StatusCode: te.StatusCode(), + } + } + + return ptrans.TransformerResponse{ + Output: nil, + Metadata: event.Metadata, + Error: response.ErrInternalServer.Error(), + StatusCode: response.ErrInternalServer.StatusCode(), + } +} + +func storeRudderEvent(pi *processingInfo, output map[string]any, columnType map[string]string) error { + if !pi.dstOpts.storeFullEvent { + return nil + } + + columnName, err := SafeColumnName(pi.event.Metadata.DestinationType, pi.itrOpts, "rudder_event") + if err != nil { + return fmt.Errorf("safe column name: %w", err) + } + + eventJSON, err := json.Marshal(pi.event.Message) + if err != nil { + return fmt.Errorf("marshalling event: %w", err) + } + + output[columnName] = string(eventJSON) + columnType[columnName] = utils.GetFullEventColumnTypeByDestType(pi.event.Metadata.DestinationType) + return nil +} + +func excludeRudderCreatedTableNames(name string, skipReservedKeywordsEscaping bool) string { + if utils.IsRudderIsolatedTable(name) || (utils.IsRudderCreatedTable(name) && !skipReservedKeywordsEscaping) { + return "_" + name + } + return name +} + +func isValidJSONPathKey(key string, level int, jsonKeys map[string]int) bool { + if val, exists := jsonKeys[key]; exists { + return val == level + } + return false +} + +func isValidLegacyJSONPathKey(eventType, key string, level int, jsonKeys map[string]int) bool { + if eventType == "track" { + return isValidJSONPathKey(key, level, jsonKeys) + } + return false +} diff --git a/warehouse/transformer/transformer_fuzz_test.go b/warehouse/transformer/transformer_fuzz_test.go new file mode 100644 index 0000000000..822af78bb9 --- /dev/null +++ b/warehouse/transformer/transformer_fuzz_test.go @@ -0,0 +1,669 @@ +package transformer + +import ( + "context" + "errors" + "fmt" + "regexp" + "strings" + "testing" + + "github.com/araddon/dateparse" + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" + + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/utils/misc" + "github.com/rudderlabs/rudder-server/utils/types" + "github.com/rudderlabs/rudder-server/warehouse/transformer/testhelper" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +var ( + reDateTime = regexp.MustCompile( + `([+-]?\d{4})((-)((0[1-9]|1[0-2])(-([12]\d|0[1-9]|3[01])))([T\s]((([01]\d|2[0-3])((:)[0-5]\d))(:\d+)?)?(:[0-5]\d([.]\d+)?)?([zZ]|([+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)`, + ) + reValidDateTime = regexp.MustCompile(`^(19[7-9]\d|20\d{2})-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])(?:[T\s]([01]\d|2[0-3]):([0-5]\d):([0-5]\d)(\.\d{0,9})?(Z)?)?$`) +) + +var errInvalidDateTime = errors.New("invalid datetime format") + +func FuzzTransformer(f *testing.F) { + pool, err := dockertest.NewPool("") + require.NoError(f, err) + + transformerResource, err := transformertest.Setup(pool, f) + require.NoError(f, err) + + for _, destType := range whutils.WarehouseDestinations { + f.Log("Providing seed corpus for event types for destination type: ", destType) + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`) + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","name":"Home","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"screen","name":"Main","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`) + f.Add(destType, `{}`, `{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`) + f.Add(destType, `{}`, `{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},{"type":"mobile","value":"+1-202-555-0146"}]}`) + f.Add(destType, `{}`, `{"type":"merge"}`) + f.Add(destType, `{}`, `{"type":"merge", "mergeProperties": "invalid"}`) + f.Add(destType, `{}`, `{"type":"merge", "mergeProperties": []}`) + f.Add(destType, `{}`, `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"}]}`) + f.Add(destType, `{}`, `{"type":"merge","mergeProperties":["invalid",{"type":"email","value":"alex@example.com"}]}`) + f.Add(destType, `{}`, `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},"invalid"]}`) + f.Add(destType, `{}`, `{"type":"merge","mergeProperties":[{"type1":"email","value1":"alex@example.com"},{"type1":"mobile","value1":"+1-202-555-0146"}]}`) + f.Add(destType, `{}`, `{"type":"merge","mergeProperties":[{"type1":"email","value1":"alex@example.com"},{"type1":"mobile","value1":"+1-202-555-0146"}]}`) + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"","previousId":"","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"accounts","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"accounts","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipReservedKeywordsEscaping":true}}}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"users","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","event":"event","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"user_id":"user_id","rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"accounts","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"accounts","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipReservedKeywordsEscaping":true}}}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + + f.Log("Providing seed corpus for column names for destination type: ", destType) + columnNames := []string{ + // SQL keywords and reserved words + "select", "from", "where", "and", "or", "not", "insert", "update", "delete", + "create", "alter", "drop", "table", "index", "view", "primary", "foreign", + "key", "constraint", "default", "null", "unique", "check", "references", + "join", "inner", "outer", "left", "right", "full", "on", "group", "by", + "having", "order", "asc", "desc", "limit", "offset", "union", "all", + "distinct", "as", "in", "between", "like", "is", "null", "true", "false", + + // Data types (which can vary by database system) + "int", "integer", "bigint", "smallint", "tinyint", "decimal", "numeric", + "float", "real", "double", "precision", "char", "varchar", "text", "date", + "time", "timestamp", "datetime", "boolean", "blob", "clob", "binary", + + // Names starting with numbers or special characters + "1column", "2_column", "@column", "#column", "$column", + + // Names with spaces or special characters + "column name", "column-name", "column.name", "column@name", "column#name", + "column$name", "column%name", "column&name", "column*name", "column+name", + "column/name", "column\\name", "column'name", "column\"name", "column`name", + + // Names with non-ASCII characters + "columnñame", "colûmnname", "columnнаме", "列名", "カラム名", + + // Very long names (may exceed maximum length in some databases) + "this_is_a_very_long_column_name_that_exceeds_the_maximum_allowed_length_in_many_database_systems", + + // Names that could be confused with functions + "count", "sum", "avg", "max", "min", "first", "last", "now", "current_timestamp", + + // Names with potential encoding issues + "column\u0000name", "column\ufffdname", + + // Names that might conflict with ORM conventions + "id", "_id", "created_at", "updated_at", "deleted_at", + + // Names that might conflict with common programming conventions + "class", "interface", "enum", "struct", "function", "var", "let", "const", + + // Names with emoji or other Unicode symbols + "column😀name", "column→name", "column★name", + + // Names with mathematical symbols + "column+name", "column-name", "column*name", "column/name", "column^name", + "column=name", "columnname", "column≠name", "column≈name", + + // Names with comment-like syntax + "column--name", "column/*name*/", + + // Names that might be interpreted as operators + "column||name", "column&&name", "column!name", "column?name", + + // Names with control characters + "column\tname", "column\nname", "column\rname", + + // Names that might conflict with schema notation + "schema.column", "database.schema.column", + + // Names with brackets or parentheses + "column(name)", "column[name]", "column{name}", + + // Names with quotes + "'column'", "\"column\"", "`column`", + + // Names that might be interpreted as aliases + "column as alias", + + // Names that might conflict with database-specific features + "rowid", "oid", "xmin", "ctid", // These are specific to certain databases + + // Names that might conflict with common column naming conventions + "fk_", "idx_", "pk_", "ck_", "uq_", + + // Names with invisible characters + "column\u200bname", // Zero-width space + "column\u00A0name", // Non-breaking space + + // Names with combining characters + "columǹ", // 'n' with combining grave accent + + // Names with bidirectional text + "column\u202Ename\u202C", // Using LTR and RTL markers + + // Names with unusual capitalization + "COLUMN", "Column", "cOlUmN", + + // Names that are empty or only whitespace + "", " ", "\t", "\n", + + // Names with currency symbols + "column¢name", "column£name", "column€name", "column¥name", + + // Names with less common punctuation + "column·name", "column…name", "column•name", "column‽name", + + // Names with fractions or other numeric forms + "column½name", "column²name", "columnⅣname", + } + for _, columnName := range columnNames { + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","`+columnName+`":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + } + + f.Log("Providing seed corpus for event names for destination type: ", destType) + eventNames := []string{ + "omega", + "omega v2 ", + "9mega", + "mega&", + "ome$ga", + "omega$", + "ome_ ga", + "9mega________-________90", + "Cízǔ", + "Rudderstack", + "___", + "group", + "k3_namespace", + "k3_namespace", + "select", + "drop", + "create", + "alter", + "index", + "table", + "from", + "where", + "join", + "union", + "insert", + "update", + "delete", + "truncate", + "1invalid", + "invalid-name", + "invalid.name", + "name with spaces", + "name@with@special@chars", + "verylongnamethatiswaytoolongforadatabasetablenameandexceedsthemaximumlengthallowed", + "ñáme_wíth_áccents", + "schema.tablename", + "'quoted_name'", + "name--with--comments", + "name/*with*/comments", + "name;with;semicolons", + "name,with,commas", + "name(with)parentheses", + "name[with]brackets", + "name{with}braces", + "name+with+plus", + "name=with=equals", + "nameangle_brackets", + "name|with|pipes", + "name\\with\\backslashes", + "name/with/slashes", + "name\"with\"quotes", + "name'with'single_quotes", + "name`with`backticks", + "name!with!exclamation", + "name?with?question", + "name#with#hash", + "name%with%percent", + "name^with^caret", + "name~with~tilde", + "primary", + "foreign", + "key", + "constraint", + "default", + "null", + "not null", + "auto_increment", + "identity", + "unique", + "check", + "references", + "on delete", + "on update", + "cascade", + "restrict", + "set null", + "set default", + "temporary", + "temp", + "view", + "function", + "procedure", + "trigger", + "序列化", // Chinese for "serialization" + "テーブル", // Japanese for "table" + "таблица", // Russian for "table" + "0day", + "_system", + "__hidden__", + "name:with:colons", + "name★with★stars", + "name→with→arrows", + "name•with•bullets", + "name‼with‼double_exclamation", + "name⁉with⁉interrobang", + "name‽with‽interrobang", + "name⚠with⚠warning", + "name☢with☢radiation", + "name❗with❗exclamation", + "name❓with❓question", + "name⏎with⏎return", + "name⌘with⌘command", + "name⌥with⌥option", + "name⇧with⇧shift", + "name⌃with⌃control", + "name⎋with⎋escape", + "name␣with␣space", + "name⍽with⍽space", + "name¶with¶pilcrow", + "name§with§section", + "name‖with‖double_vertical_bar", + "name¦with¦broken_bar", + "name¬with¬negation", + "name¤with¤currency", + "name‰with‰permille", + "name‱with‱permyriad", + "name∞with∞infinity", + "name≠with≠not_equal", + "name≈with≈approximately_equal", + "name≡with≡identical", + "name√with√square_root", + "name∛with∛cube_root", + "name∜with∜fourth_root", + "name∫with∫integral", + "name∑with∑sum", + "name∏with∏product", + "name∀with∀for_all", + "name∃with∃exists", + "name∄with∄does_not_exist", + "name∅with∅empty_set", + "name∈with∈element_of", + "name∉with∉not_element_of", + "name∋with∋contains", + "name∌with∌does_not_contain", + "name∩with∩intersection", + "name∪with∪union", + "name⊂with⊂subset", + "name⊃with⊃superset", + "name⊄with⊄not_subset", + "name⊅with⊅not_superset", + } + for _, eventName := range eventNames { + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"`+eventName+`","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"`+eventName+`","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + } + + f.Log("Providing seed corpus for random columns for destination type: ", destType) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + f.Add(destType, `{}`, testhelper.AddRandomColumns(`{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500)) + + f.Log("Providing seed corpus for big columns for destination type: ", destType) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddLargeColumns(`{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + + f.Log("Providing seed corpus for nested levels for destination type: ", destType) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + f.Add(destType, `{}`, testhelper.AddNestedLevels(`{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 10)) + + f.Log("Providing seed corpus for channel for destination type: ", destType) + for _, channel := range []string{"web", "sources", "android", "ios", "server", "backend", "frontend", "mobile", "desktop", "webapp", "mobileapp", "desktopapp", "website"} { + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","channel":"`+channel+`","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + } + + f.Log("Providing seed corpus for caps key names for destination type: ", destType) + f.Add(destType, `{}`, `{"TYPE":"alias","MESSAGEID":"messageId","ANONYMOUSID":"anonymousId","USERID":"userId","PREVIOUSID":"previousId","SENTAT":"2021-09-01T00:00:00.000Z","TIMESTAMP":"2021-09-01T00:00:00.000Z","RECEIVEDAT":"2021-09-01T00:00:00.000Z","ORIGINALTIMESTAMP":"2021-09-01T00:00:00.000Z","CHANNEL":"web","REQUEST_IP":"5.6.7.8","CONTEXT":{"TRAITS":{"EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"TYPE":"page","MESSAGEID":"messageId","USERID":"userId","SENTAT":"2021-09-01T00:00:00.000Z","TIMESTAMP":"2021-09-01T00:00:00.000Z","RECEIVEDAT":"2021-09-01T00:00:00.000Z","ORIGINALTIMESTAMP":"2021-09-01T00:00:00.000Z","PROPERTIES":{"NAME":"Home","TITLE":"Home | RudderStack","URL":"https://www.rudderstack.com"},"CONTEXT":{"TRAITS":{"NAME":"Richard Hendricks","EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"TYPE":"screen","MESSAGEID":"messageId","USERID":"userId","SENTAT":"2021-09-01T00:00:00.000Z","TIMESTAMP":"2021-09-01T00:00:00.000Z","RECEIVEDAT":"2021-09-01T00:00:00.000Z","ORIGINALTIMESTAMP":"2021-09-01T00:00:00.000Z","PROPERTIES":{"NAME":"Main","TITLE":"Home | RudderStack","URL":"https://www.rudderstack.com"},"CONTEXT":{"TRAITS":{"NAME":"Richard Hendricks","EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"TYPE":"group","MESSAGEID":"messageId","USERID":"userId","GROUPID":"groupId","SENTAT":"2021-09-01T00:00:00.000Z","TIMESTAMP":"2021-09-01T00:00:00.000Z","RECEIVEDAT":"2021-09-01T00:00:00.000Z","ORIGINALTIMESTAMP":"2021-09-01T00:00:00.000Z","TRAITS":{"TITLE":"Home | RudderStack","URL":"https://www.rudderstack.com"},"CONTEXT":{"TRAITS":{"EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"TYPE":"track","MESSAGEID":"messageId","ANONYMOUSID":"anonymousId","USERID":"userId","SENTAT":"2021-09-01T00:00:00.000Z","TIMESTAMP":"2021-09-01T00:00:00.000Z","RECEIVEDAT":"2021-09-01T00:00:00.000Z","ORIGINALTIMESTAMP":"2021-09-01T00:00:00.000Z","CHANNEL":"web","EVENT":"event","REQUEST_IP":"5.6.7.8","PROPERTIES":{"REVIEW_ID":"86ac1cd43","PRODUCT_ID":"9578257311"},"USERPROPERTIES":{"RATING":3.0,"REVIEW_BODY":"OK for the price. It works but the material feels flimsy."},"CONTEXT":{"TRAITS":{"NAME":"Richard Hendricks","EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"},"INTEGRATIONS":{"POSTGRES":{"OPTIONS":{"SKIPTRACKSTABLE":true}}}}`) + f.Add(destType, `{}`, `{"TYPE":"identify","MESSAGEID":"messageId","ANONYMOUSID":"anonymousId","USERID":"userId","SENTAT":"2021-09-01T00:00:00.000Z","TIMESTAMP":"2021-09-01T00:00:00.000Z","RECEIVEDAT":"2021-09-01T00:00:00.000Z","ORIGINALTIMESTAMP":"2021-09-01T00:00:00.000Z","CHANNEL":"web","REQUEST_IP":"5.6.7.8","TRAITS":{"REVIEW_ID":"86ac1cd43","PRODUCT_ID":"9578257311"},"USERPROPERTIES":{"RATING":3.0,"REVIEW_BODY":"OK for the price. It works but the material feels flimsy."},"CONTEXT":{"TRAITS":{"NAME":"Richard Hendricks","EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"},"INTEGRATIONS":{"POSTGRES":{"OPTIONS":{"SKIPUSERSTABLE":true}}}}`) + f.Add(destType, `{}`, `{"TYPE":"extract","RECORDID":"recordID","EVENT":"users","RECEIVEDAT":"2021-09-01T00:00:00.000Z","PROPERTIES":{"NAME":"Home","TITLE":"Home | RudderStack","URL":"https://www.rudderstack.com"},"CONTEXT":{"TRAITS":{"NAME":"Richard Hendricks","EMAIL":"rhedrICKS@example.com","LOGINS":2},"IP":"1.2.3.4"}}`) + + f.Log("Providing seed corpus for caps event type for destination type: ", destType) + f.Add(destType, `{}`, `{"type":"ALIAS","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"PAGE","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"SCREEN","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"GROUP","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"TRACK","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"IDENTIFY","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"EXTRACT","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + + f.Log("Providing seed corpus for integrations options for destination type: ", destType) + integrationOpts := []string{ + `{"options":{"skipReservedKeywordsEscaping":true}}`, + `{"options":{"skipReservedKeywordsEscaping":false}}`, + `{"options":{"useBlendoCasing":false}}`, + `{"options":{"useBlendoCasing":true}}`, + `{"options":{"skipTracksTable":true}}`, + `{"options":{"skipTracksTable":false}}`, + `{"options":{"skipUsersTable":true}}`, + `{"options":{"skipUsersTable":false}}`, + `{"options":{"jsonPaths":["context", "properties", "userProperties", "context.traits"]}}`, + `{"options":{"jsonPaths":["track.context", "track.properties", "track.userProperties", "track.context.traits"]}}`, + `{"options":{"jsonPaths":["properties", "context.traits"]}}`, + `{"options":{"jsonPaths":["page.properties", "page.context.traits"]}}`, + `{"options":{"jsonPaths":["screen.properties", "screen.context.traits"]}}`, + `{"options":{"jsonPaths":["alias.properties", "alias.context.traits"]}}`, + `{"options":{"jsonPaths":["group.properties", "group.context.traits"]}}`, + `{"options":{"jsonPaths":["extract.properties", "extract.context.traits"]}}`, + `{"options":{"jsonPaths":["identify.traits", "identify.context.traits", "identify.userProperties"]}}`, + `{"options":{"skipReservedKeywordsEscaping":true,"useBlendoCasing":true,"skipTracksTable":true,"skipUsersTable":true,"jsonPaths":["context", "properties", "userProperties", "context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":false,"useBlendoCasing":false,"skipTracksTable":false,"skipUsersTable":false,"jsonPaths":["track.context", "track.properties", "track.userProperties", "track.context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":true,"useBlendoCasing":true,"skipTracksTable":true,"skipUsersTable":true,"jsonPaths":["properties", "context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":false,"useBlendoCasing":false,"skipTracksTable":false,"skipUsersTable":false,"jsonPaths":["page.properties", "page.context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":true,"useBlendoCasing":true,"skipTracksTable":true,"skipUsersTable":true,"jsonPaths":["screen.properties", "screen.context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":false,"useBlendoCasing":false,"skipTracksTable":false,"skipUsersTable":false,"jsonPaths":["alias.properties", "alias.context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":true,"useBlendoCasing":true,"skipTracksTable":true,"skipUsersTable":true,"jsonPaths":["group.properties", "group.context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":false,"useBlendoCasing":false,"skipTracksTable":false,"skipUsersTable":false,"jsonPaths":["extract.properties", "extract.context.traits"]}}`, + `{"options":{"skipReservedKeywordsEscaping":true,"useBlendoCasing":true,"skipTracksTable":true,"skipUsersTable":true,"jsonPaths":["identify.traits", "identify.context.traits", "identify.userProperties"]}}`, + } + for _, destType := range whutils.WarehouseDestinations { + for _, opt := range integrationOpts { + itrOptsPayload := fmt.Sprintf(`"integrations":{"%s":%s}`, destType, opt) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"IDENTIFY","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + f.Add(destType, `{}`, fmt.Sprintf(`{"type":"EXTRACT","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},%s}`, itrOptsPayload)) + } + + f.Log("Providing seed corpus for destination configurations options for destination type: ", destType) + destConfigOpts := []string{ + `{"skipTracksTable":true}`, + `{"skipTracksTable":false}`, + `{"skipUsersTable":true}`, + `{"skipUsersTable":false}`, + `{"underscoreDivideNumbers":true}`, + `{"underscoreDivideNumbers":false}`, + `{"allowUsersContextTraits":true}`, + `{"allowUsersContextTraits":false}`, + `{"jsonPaths":"context,properties,userProperties,context.traits"}`, + `{"jsonPaths":"track.context,track.properties,track.userProperties,track.context.traits"}`, + `{"jsonPaths":"properties,context.traits"}`, + `{"jsonPaths":"page.properties,page.context.traits"}`, + `{"jsonPaths":"screen.properties,screen.context.traits"}`, + `{"jsonPaths":"alias.properties,alias.context.traits"}`, + `{"jsonPaths":"group.properties,group.context.traits"}`, + `{"jsonPaths":"extract.properties,extract.context.traits"}`, + `{"jsonPaths":"identify.traits,identify.context.traits,identify.userProperties"}`, + `{"skipTracksTable":true,"underscoreDivideNumbers":true,"allowUsersContextTraits":true,"jsonPaths":"context,properties,userProperties,context.traits"}`, + `{"skipTracksTable":false,"underscoreDivideNumbers":false,"allowUsersContextTraits":false,"jsonPaths":"track.context,track.properties,track.userProperties,track.context.traits"}`, + `{"skipUsersTable":true,"underscoreDivideNumbers":true,"allowUsersContextTraits":true,"jsonPaths":"properties,context.traits"}`, + `{"skipUsersTable":false,"underscoreDivideNumbers":false,"allowUsersContextTraits":false,"jsonPaths":"page.properties,page.context.traits"}`, + `{"skipTracksTable":true,"underscoreDivideNumbers":true,"allowUsersContextTraits":true,"jsonPaths":"screen.properties,screen.context.traits"}`, + `{"skipTracksTable":false,"underscoreDivideNumbers":false,"allowUsersContextTraits":false,"jsonPaths":"alias.properties,alias.context.traits"}`, + `{"skipUsersTable":true,"underscoreDivideNumbers":true,"allowUsersContextTraits":true,"jsonPaths":"group.properties,group.context.traits"}`, + `{"skipUsersTable":false,"underscoreDivideNumbers":false,"allowUsersContextTraits":false,"jsonPaths":"extract.properties,extract.context.traits"}`, + `{"skipTracksTable":true,"underscoreDivideNumbers":true,"allowUsersContextTraits":true,"jsonPaths":"identify.traits,identify.context.traits,identify.userProperties"}`, + } + for _, destConfig := range destConfigOpts { + f.Add(destType, destConfig, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"page","name":"Home","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"screen","name":"Main","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},{"type":"mobile","value":"+1-202-555-0146"}]}`) + f.Add(destType, destConfig, `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, destConfig, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, destConfig, `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + } + + f.Log("Providing seed corpus for date formats for destination type: ", destType) + dateFormats := []string{ + "2006-01-02", + "2006-01-02T15:04:05", + "2006-01-02T15:04:05.000", + "2006-01-02T15:04:05.000000", + "2006-01-02T15:04:05.000000000", + "2006-01-02T15:04:05Z", + "2006-01-02T15:04:05.000Z", + "2006-01-02T15:04:05.000000Z", + "2006-01-02T15:04:05.000000000Z", + "2006-01-02T15:04:05+07:00", + "2006-01-02T15:04:05.000+07:00", + "2006-01-02T15:04:05-03:00", + "2006-01-02 15:04:05", + "2006-01-02 15:04:05.000", + "2006-W01-2", + "2006-001", + "2006-001T15:04:05Z", + "2006-01-02T15:04:05.123456789Z", + "2006-01-02T15:04:05.0000000000Z", + "2006-01-02T24:00:00Z", + "2023-02-29", + "2020-02-29", + } + for _, dateFormat := range dateFormats { + f.Add(destType, `{}`, `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"`+dateFormat+`","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"`+dateFormat+`","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"`+dateFormat+`","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"`+dateFormat+`","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + f.Add(destType, `{}`, `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"`+dateFormat+`","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"`+dateFormat+`","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`) + f.Add(destType, `{}`, `{"type":"extract","recordId":"recordID","event":"users","receivedAt":"`+dateFormat+`","channel":"web","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`) + } + } + + f.Fuzz(func(t *testing.T, destType, destConfigJSON, payload string) { + if _, exist := whutils.WarehouseDestinationMap[destType]; !exist { + return + } + + var destConfig map[string]any + err = json.Unmarshal([]byte(destConfigJSON), &destConfig) + if err != nil { + return + } + + sanitizedPayload, err := sanitizePayload(payload) + if err != nil { + if !errors.Is(err, errInvalidDateTime) { + return + } + + t.Log("Destination Type: ", destType, "Destination Config: ", destConfigJSON, "Payload: ", payload) + t.Skip() + } + + var ( + eventType = gjson.Get(sanitizedPayload, "type").String() + eventName = gjson.Get(sanitizedPayload, "event").String() + messageID = gjson.Get(sanitizedPayload, "messageId").String() + receivedAt = gjson.Get(sanitizedPayload, "receivedAt").Time() + recordID = gjson.Get(sanitizedPayload, "recordId").Value() + ) + + if len(messageID) == 0 || receivedAt.IsZero() { + return + } + + sanitizedPayload, err = sjson.Set(sanitizedPayload, "receivedAt", receivedAt.Format(misc.RFC3339Milli)) + if err != nil { + return + } + + conf := setupConfig(transformerResource, map[string]any{}) + + destinationTransformer := ptrans.NewTransformer(conf, logger.NOP, stats.Default) + warehouseTransformer := New(conf, logger.NOP, stats.NOP) + + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(sanitizedPayload), + Metadata: ptrans.Metadata{ + EventType: eventType, + EventName: eventName, + DestinationType: destType, + ReceivedAt: receivedAt.Format(misc.RFC3339Milli), + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: messageID, + RecordID: recordID, + }, + Destination: backendconfig.DestinationT{ + Name: destType, + Config: destConfig, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: destType, + }, + }, + }, + } + + cmpEvents(t, eventsInfos, destinationTransformer, warehouseTransformer) + }) + } +} + +func cmpEvents(t *testing.T, infos []testhelper.EventInfo, pTransformer, dTransformer ptrans.DestinationTransformer) { + t.Helper() + + var events []ptrans.TransformerEvent + for _, info := range infos { + var singularEvent types.SingularEventT + err := json.Unmarshal(info.Payload, &singularEvent) + require.NoError(t, err) + + events = append(events, ptrans.TransformerEvent{ + Message: singularEvent, + Metadata: info.Metadata, + Destination: info.Destination, + }) + } + + ctx := context.Background() + batchSize := 100 + + pResponse := pTransformer.Transform(ctx, events, batchSize) + wResponse := dTransformer.Transform(ctx, events, batchSize) + + require.Equal(t, len(wResponse.Events), len(pResponse.Events)) + require.Equal(t, len(wResponse.FailedEvents), len(pResponse.FailedEvents)) + + for i := range pResponse.Events { + require.EqualValues(t, wResponse.Events[i], pResponse.Events[i]) + } + for i := range pResponse.FailedEvents { + require.NotEmpty(t, pResponse.FailedEvents[i].Error) + require.NotEmpty(t, wResponse.FailedEvents[i].Error) + + require.NotZero(t, pResponse.FailedEvents[i].StatusCode) + require.NotZero(t, wResponse.FailedEvents[i].StatusCode) + } +} + +func sanitizePayload(input string) (string, error) { + sanitized := strings.ReplaceAll(input, `\u0000`, "") + if len(strings.TrimSpace(sanitized)) == 0 { + return "{}", nil + } + + // Checking for valid datetime formats in the payload + // JS converts new Date('0001-01-01 00:00').toISOString() to 2001-01-01T00:00:00.000Z + // https://www.programiz.com/online-compiler/1P7KHTw0ClE9R + dateTimes := reDateTime.FindAllString(sanitized, -1) + for _, dateTime := range dateTimes { + _, err := dateparse.ParseAny(dateTime, dateparse.PreferMonthFirst(true), dateparse.RetryAmbiguousDateWithSwap(true)) + if err != nil { + return "", fmt.Errorf("invalid datetime format for %s: %w", dateTime, errInvalidDateTime) + } + if !reValidDateTime.MatchString(dateTime) { + return "", fmt.Errorf("invalid datetime format for %s: %w", dateTime, errInvalidDateTime) + } + } + + var result types.SingularEventT + if err := json.Unmarshal([]byte(sanitized), &result); err != nil { + return "", errors.New("invalid JSON format") + } + output, err := json.Marshal(result) + if err != nil { + return "", fmt.Errorf("marshalling error: %w", err) + } + return string(output), nil +} diff --git a/warehouse/transformer/transformer_test.go b/warehouse/transformer/transformer_test.go new file mode 100644 index 0000000000..b26af3e4ad --- /dev/null +++ b/warehouse/transformer/transformer_test.go @@ -0,0 +1,3311 @@ +package transformer + +import ( + "fmt" + "net/http" + "testing" + + "github.com/ory/dockertest/v3" + "github.com/stretchr/testify/require" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + transformertest "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/transformer" + + backendconfig "github.com/rudderlabs/rudder-server/backend-config" + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" + "github.com/rudderlabs/rudder-server/warehouse/transformer/internal/response" + "github.com/rudderlabs/rudder-server/warehouse/transformer/testhelper" + whutils "github.com/rudderlabs/rudder-server/warehouse/utils" +) + +func TestIdentify(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "identify (POSTGRES)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (S3_DATALAKE)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("S3_DATALAKE"), + destination: getDestination("S3_DATALAKE", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + SetDataField("_timestamp", "2021-09-01T00:00:00.000Z"). + SetColumnField("_timestamp", "datetime"). + RemoveDataFields("timestamp"). + RemoveColumnFields("timestamp"). + SetDataField("context_destination_type", "S3_DATALAKE"), + Metadata: getIdentifyMetadata("S3_DATALAKE"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + RemoveDataFields("timestamp", "original_timestamp", "sent_at"). + RemoveColumnFields("timestamp", "original_timestamp", "sent_at"). + SetDataField("context_destination_type", "S3_DATALAKE"), + Metadata: getIdentifyMetadata("S3_DATALAKE"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without traits", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + RemoveDataFields("product_id", "review_id"). + RemoveColumnFields("product_id", "review_id"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + RemoveDataFields("product_id", "review_id"). + RemoveColumnFields("product_id", "review_id"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without userProperties", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + RemoveDataFields("rating", "review_body"). + RemoveColumnFields("rating", "review_body"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + RemoveDataFields("rating", "review_body"). + RemoveColumnFields("rating", "review_body"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without context.traits", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + RemoveDataFields("context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"). + RemoveColumnFields("context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + RemoveDataFields("context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"). + RemoveColumnFields("context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) without context", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "email", "logins", "name"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) not allowUsersContextTraits", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + RemoveDataFields("email", "logins", "name"). + RemoveColumnFields("email", "logins", "name"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + RemoveDataFields("email", "logins", "name"). + RemoveColumnFields("email", "logins", "name"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) user_id already exists", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"user_id":"user_id","rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) store rudder event", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + SetDataField("rudder_event", "{\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"ip\":\"1.2.3.4\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"},\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\"},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"traits\":{\"product_id\":\"9578257311\",\"review_id\":\"86ac1cd43\"},\"type\":\"identify\",\"userId\":\"userId\",\"userProperties\":{\"rating\":3,\"review_body\":\"OK for the price. It works but the material feels flimsy.\"}}"). + SetColumnField("rudder_event", "json"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) partial rules", + eventPayload: `{"type":"identify","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + RemoveDataFields("context_request_ip"). + RemoveColumnFields("context_request_ip"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) no userID", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + RemoveDataFields("user_id"). + RemoveColumnFields("user_id"), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) skipUsersTable (dstOpts)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: backendconfig.DestinationT{ + Name: "POSTGRES", + Config: map[string]any{ + "allowUsersContextTraits": true, + "skipUsersTable": true, + }, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: "POSTGRES", + }, + }, + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (POSTGRES) skipUsersTable (itrOpts)", + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`, + metadata: getIdentifyMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(), + Metadata: getIdentifyMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "identify (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"identify","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipUsersTable":true}}}}`, + metadata: getIdentifyMetadata("BQ"), + destination: getDestination("BQ", map[string]any{ + "allowUsersContextTraits": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getIdentifyDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getIdentifyMetadata("BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getIdentifyDefaultMergeOutput(), + Metadata: getIdentifyMetadata("BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getUserDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getIdentifyMetadata("BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getIdentifyDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "messageId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "identifies", + }, + "userId": "", + } +} + +func getUserDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "email": "rhedricks@example.com", + "id": "userId", + "logins": float64(2), + "name": "Richard Hendricks", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "email": "string", + "id": "string", + "logins": "int", + "name": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "users", + }, + "userId": "", + } +} + +func getIdentifyDefaultMergeOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "merge_property_1_type": "string", + "merge_property_1_value": "string", + "merge_property_2_type": "string", + "merge_property_2_value": "string", + }, + "isMergeRule": true, + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "rudder_identity_merge_rules", + }, + "userId": "", + } +} + +func getIdentifyMetadata(destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: "identify", + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + } +} + +func TestAlias(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "alias (Postgres)", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getAliasMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(), + Metadata: getAliasMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) without traits", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getAliasMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + RemoveDataFields("title", "url"). + RemoveColumnFields("title", "url"), + Metadata: getAliasMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) without context", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`, + metadata: getAliasMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins"), + Metadata: getAliasMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) store rudder event", + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getAliasMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + SetDataField("rudder_event", "{\"type\":\"alias\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2}},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"previousId\":\"previousId\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"traits\":{\"title\":\"Home | RudderStack\",\"url\":\"https://www.rudderstack.com\"},\"userId\":\"userId\"}"). + SetColumnField("rudder_event", "json"), + Metadata: getAliasMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (Postgres) partial rules", + eventPayload: `{"type":"alias","messageId":"messageId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getAliasMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getAliasMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getAliasMetadata("BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getAliasMetadata("BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getAliasDefaultMergeOutput(), + Metadata: getAliasMetadata("BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getAliasDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "https://www.rudderstack.com", + "user_id": "userId", + "previous_id": "previousId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "previous_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "aliases", + }, + "userId": "", + } +} + +func getAliasDefaultMergeOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "merge_property_1_type": "user_id", + "merge_property_1_value": "userId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "previousId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "userId", + "mergePropTwo": "previousId", + }, + "userId": "", + } +} + +func getAliasMetadata(destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: "alias", + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + } +} + +func TestExtract(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "extract (Postgres)", + eventPayload: `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getExtractMetadata(), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getExtractDefaultOutput(), + Metadata: getExtractMetadata(), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) without properties", + eventPayload: `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getExtractMetadata(), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getExtractDefaultOutput(). + RemoveDataFields("name", "title", "url"). + RemoveColumnFields("name", "title", "url"), + Metadata: getExtractMetadata(), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) without context", + eventPayload: `{"type":"extract","recordId":"recordID","event":"event","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`, + metadata: getExtractMetadata(), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getExtractDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getExtractMetadata(), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) RudderCreatedTable", + eventPayload: `{"type":"extract","recordId":"recordID","event":"accounts","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getExtractMetadata(), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getExtractDefaultOutput(). + SetDataField("event", "accounts"). + SetTableName("_accounts"), + Metadata: getExtractMetadata(), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) RudderCreatedTable with skipReservedKeywordsEscaping", + eventPayload: `{"type":"extract","recordId":"recordID","event":"accounts","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipReservedKeywordsEscaping":true}}}}`, + metadata: getExtractMetadata(), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getExtractDefaultOutput(). + SetDataField("event", "accounts"). + SetTableName("accounts"), + Metadata: getExtractMetadata(), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "extract (Postgres) RudderIsolatedTable", + eventPayload: `{"type":"extract","recordId":"recordID","event":"users","receivedAt":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getExtractMetadata(), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getExtractDefaultOutput(). + SetDataField("event", "users"). + SetTableName("_users"), + Metadata: getExtractMetadata(), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, map[string]any{}) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getExtractDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "name": "Home", + "context_ip": "1.2.3.4", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "recordID", + "event": "event", + "received_at": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "https://www.rudderstack.com", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "event": "string", + "received_at": "datetime", + "title": "string", + "url": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + } +} + +func getExtractMetadata() ptrans.Metadata { + return ptrans.Metadata{ + EventType: "extract", + DestinationType: "POSTGRES", + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + RecordID: "recordID", + } +} + +func TestPageEvents(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "page (Postgres)", + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getPageMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(), + Metadata: getPageMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) without properties", + eventPayload: `{"type":"page","name":"Home","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getPageMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + RemoveDataFields("title", "url"). + RemoveColumnFields("title", "url"), + Metadata: getPageMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) without context", + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`, + metadata: getPageMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getPageMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) store rudder event", + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getPageMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + SetDataField("rudder_event", "{\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"ip\":\"1.2.3.4\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"},\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\"},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"name\":\"Home\",\"title\":\"Home | RudderStack\",\"url\":\"https://www.rudderstack.com\"},\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"type\":\"page\",\"userId\":\"userId\"}"). + SetColumnField("rudder_event", "json"), + Metadata: getPageMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (Postgres) partial rules", + eventPayload: `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getPageMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getPageMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getPageMetadata("BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getPageMetadata("BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getPageDefaultMergeOutput(), + Metadata: getPageMetadata("BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getPageDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "name": "Home", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "https://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "pages", + }, + "userId": "", + } +} + +func getPageDefaultMergeOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + } +} + +func getPageMetadata(destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: "page", + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + } +} + +func TestScreen(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "screen (Postgres)", + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getScreenMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getScreenDefaultOutput(), + Metadata: getScreenMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) without properties", + eventPayload: `{"type":"screen","name":"Main","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getScreenMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getScreenDefaultOutput(). + RemoveDataFields("title", "url"). + RemoveColumnFields("title", "url"), + Metadata: getScreenMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) without context", + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`, + metadata: getScreenMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getScreenDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getScreenMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) store rudder event", + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getScreenMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getScreenDefaultOutput(). + SetDataField("rudder_event", "{\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"ip\":\"1.2.3.4\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"},\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\"},\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"name\":\"Main\",\"title\":\"Home | RudderStack\",\"url\":\"https://www.rudderstack.com\"},\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"type\":\"screen\",\"userId\":\"userId\"}"). + SetColumnField("rudder_event", "json"), + Metadata: getScreenMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (Postgres) partial rules", + eventPayload: `{"type":"screen","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getScreenMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getScreenDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getScreenMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "screen (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"screen","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Main","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getScreenMetadata("BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getScreenDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getScreenMetadata("BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getScreenDefaultMergeOutput(), + Metadata: getScreenMetadata("BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getScreenDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "name": "Main", + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "https://www.rudderstack.com", + "user_id": "userId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "name": "string", + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "screens", + }, + "userId": "", + } +} + +func getScreenDefaultMergeOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + } +} + +func getScreenMetadata(destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: "screen", + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + } +} + +func TestMerge(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "merge (Postgres)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge"}`, + metadata: getMergeMetadata("merge", "POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{}, + }, + { + name: "merge (BQ)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},{"type":"mobile","value":"+1-202-555-0146"}]}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "email", + "merge_property_1_value": "alex@example.com", + "merge_property_2_type": "mobile", + "merge_property_2_value": "+1-202-555-0146", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "alex@example.com", + "mergePropTwo": "+1-202-555-0146", + }, + "userId": "", + }, + Metadata: getMergeMetadata("merge", "BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "merge (BQ) not enableIDResolution", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": false, + }, + eventPayload: `{"type":"merge"}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{}, + }, + { + name: "merge (BQ) missing mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge"}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertiesMissing.Error(), + StatusCode: response.ErrMergePropertiesMissing.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (BQ) invalid mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge", "mergeProperties": "invalid"}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertiesNotArray.Error(), + StatusCode: response.ErrMergePropertiesNotArray.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (BQ) empty mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge", "mergeProperties": []}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertiesNotSufficient.Error(), + StatusCode: response.ErrMergePropertiesNotSufficient.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (BQ) single mergeProperties", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"}]}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertiesNotSufficient.Error(), + StatusCode: response.ErrMergePropertiesNotSufficient.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (BQ) invalid merge property one", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":["invalid",{"type":"email","value":"alex@example.com"}]}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertyOneInvalid.Error(), + StatusCode: response.ErrMergePropertyOneInvalid.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (BQ) invalid merge property two", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},"invalid"]}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertyTwoInvalid.Error(), + StatusCode: response.ErrMergePropertyTwoInvalid.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (BQ) missing mergeProperty", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type1":"email","value1":"alex@example.com"},{"type1":"mobile","value1":"+1-202-555-0146"}]}`, + metadata: getMergeMetadata("merge", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrMergePropertyEmpty.Error(), + StatusCode: response.ErrMergePropertyEmpty.StatusCode(), + Metadata: getMergeMetadata("merge", "BQ"), + }, + }, + }, + }, + { + name: "merge (SNOWFLAKE)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"merge","mergeProperties":[{"type":"email","value":"alex@example.com"},{"type":"mobile","value":"+1-202-555-0146"}]}`, + metadata: getMergeMetadata("merge", "SNOWFLAKE"), + destination: getDestination("SNOWFLAKE", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: map[string]any{ + "data": map[string]any{ + "MERGE_PROPERTY_1_TYPE": "email", + "MERGE_PROPERTY_1_VALUE": "alex@example.com", + "MERGE_PROPERTY_2_TYPE": "mobile", + "MERGE_PROPERTY_2_VALUE": "+1-202-555-0146", + }, + "metadata": map[string]any{ + "table": "RUDDER_IDENTITY_MERGE_RULES", + "columns": map[string]any{"MERGE_PROPERTY_1_TYPE": "string", "MERGE_PROPERTY_1_VALUE": "string", "MERGE_PROPERTY_2_TYPE": "string", "MERGE_PROPERTY_2_VALUE": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "alex@example.com", + "mergePropTwo": "+1-202-555-0146", + }, + "userId": "", + }, + Metadata: getMergeMetadata("merge", "SNOWFLAKE"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","previousId":"previousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getMergeMetadata("alias", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getMergeMetadata("alias", "BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getAliasDefaultMergeOutput(), + Metadata: getMergeMetadata("alias", "BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ) no userId and previousId", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getMergeMetadata("alias", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"). + RemoveDataFields("user_id", "previous_id"). + RemoveColumnFields("user_id", "previous_id"), + Metadata: getMergeMetadata("alias", "BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "alias (BQ) empty userId and previousId", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"alias","messageId":"messageId","anonymousId":"anonymousId","userId":"","previousId":"","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getMergeMetadata("alias", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getAliasDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"). + RemoveDataFields("user_id", "previous_id"). + RemoveColumnFields("user_id", "previous_id"), + Metadata: getMergeMetadata("alias", "BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (BQ)", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"page","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getMergeMetadata("page", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getMergeMetadata("page", "BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getPageDefaultMergeOutput(), + Metadata: getMergeMetadata("page", "BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "page (BQ) no anonymousID", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"page","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"name":"Home","title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getMergeMetadata("page", "BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getPageDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"). + RemoveDataFields("anonymous_id"). + RemoveColumnFields("anonymous_id"), + Metadata: getMergeMetadata("page", "BQ"), + StatusCode: http.StatusOK, + }, + { + Output: map[string]any{ + "data": map[string]any{ + "merge_property_1_type": "user_id", + "merge_property_1_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "userId", + }, + "userId": "", + }, + Metadata: getMergeMetadata("page", "BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getMergeMetadata(eventType, destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: eventType, + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + } +} + +func TestTrack(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "track (POSTGRES)", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) without properties", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + RemoveDataFields("product_id", "review_id"). + RemoveColumnFields("product_id", "review_id"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) without userProperties", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + RemoveDataFields("rating", "review_body"). + RemoveColumnFields("rating", "review_body"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) without context", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) RudderCreatedTable", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"accounts","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("event", "accounts"). + SetDataField("event_text", "accounts"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("event", "accounts"). + SetDataField("event_text", "accounts"). + SetTableName("_accounts"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) RudderCreatedTable with skipReservedKeywordsEscaping", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"accounts","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipReservedKeywordsEscaping":true}}}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("event", "accounts"). + SetDataField("event_text", "accounts"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("event", "accounts"). + SetDataField("event_text", "accounts"). + SetTableName("accounts"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) RudderIsolatedTable", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"users","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("event", "users"). + SetDataField("event_text", "users"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("event", "users"). + SetDataField("event_text", "users"). + SetTableName("_users"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) empty event", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("event", ""). + RemoveDataFields("event_text"). + RemoveColumnFields("event_text"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) no event", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("event", ""). + RemoveDataFields("event_text"). + RemoveColumnFields("event_text"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) store rudder event", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("rudder_event", "{\"type\":\"track\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2,\"name\":\"Richard Hendricks\"}},\"event\":\"event\",\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"properties\":{\"product_id\":\"9578257311\",\"review_id\":\"86ac1cd43\"},\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"userId\":\"userId\",\"userProperties\":{\"rating\":3,\"review_body\":\"OK for the price. It works but the material feels flimsy.\"}}"). + SetColumnField("rudder_event", "json"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) partial rules", + eventPayload: `{"type":"track","messageId":"messageId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","event":"event","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) skipTracksTable (dstOpts)", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{ + "skipTracksTable": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getEventDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (POSTGRES) skipTracksTable (itrOpts)", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"},"integrations":{"POSTGRES":{"options":{"skipTracksTable":true}}}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getEventDefaultOutput(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "track (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("BQ", "webhook"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getTrackMetadata("BQ", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"), + Metadata: getTrackMetadata("BQ", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getTrackDefaultMergeOutput(), + Metadata: getTrackMetadata("BQ", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getTrackDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "tracks", + }, + "userId": "", + } +} + +func getEventDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "context_traits_name": "Richard Hendricks", + "event": "event", + "event_text": "event", + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "product_id": "9578257311", + "rating": 3.0, + "received_at": "2021-09-01T00:00:00.000Z", + "review_body": "OK for the price. It works but the material feels flimsy.", + "review_id": "86ac1cd43", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "user_id": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "context_traits_name": "string", + "event": "string", + "event_text": "string", + "id": "string", + "original_timestamp": "datetime", + "product_id": "string", + "rating": "int", + "received_at": "datetime", + "review_body": "string", + "review_id": "string", + "sent_at": "datetime", + "timestamp": "datetime", + "user_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "event", + }, + "userId": "", + } +} + +func getTrackDefaultMergeOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "merge_property_1_type": "string", + "merge_property_1_value": "string", + "merge_property_2_type": "string", + "merge_property_2_value": "string", + }, + "isMergeRule": true, + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "rudder_identity_merge_rules", + }, + "userId": "", + } +} + +func getTrackMetadata(destinationType, sourceCategory string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: "track", + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + SourceCategory: sourceCategory, + MessageID: "messageId", + } +} + +func TestGroup(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + transformerResource, err := transformertest.Setup(pool, t) + require.NoError(t, err) + + testCases := []struct { + name string + configOverride map[string]any + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "group (Postgres)", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getGroupMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(), + Metadata: getGroupMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) without traits", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getGroupMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(). + RemoveDataFields("title", "url"). + RemoveColumnFields("title", "url"), + Metadata: getGroupMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) without context", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"}}`, + metadata: getGroupMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). // overriding the default value + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins"), + Metadata: getGroupMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) store rudder event", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getGroupMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{ + "storeFullEvent": true, + }), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(). + SetDataField("rudder_event", "{\"type\":\"group\",\"anonymousId\":\"anonymousId\",\"channel\":\"web\",\"context\":{\"destinationId\":\"destinationID\",\"destinationType\":\"POSTGRES\",\"ip\":\"1.2.3.4\",\"sourceId\":\"sourceID\",\"sourceType\":\"sourceType\",\"traits\":{\"email\":\"rhedricks@example.com\",\"logins\":2}},\"groupId\":\"groupId\",\"messageId\":\"messageId\",\"originalTimestamp\":\"2021-09-01T00:00:00.000Z\",\"receivedAt\":\"2021-09-01T00:00:00.000Z\",\"request_ip\":\"5.6.7.8\",\"sentAt\":\"2021-09-01T00:00:00.000Z\",\"timestamp\":\"2021-09-01T00:00:00.000Z\",\"traits\":{\"title\":\"Home | RudderStack\",\"url\":\"https://www.rudderstack.com\"},\"userId\":\"userId\"}"). + SetColumnField("rudder_event", "json"), + Metadata: getGroupMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (Postgres) partial rules", + eventPayload: `{"type":"group","messageId":"messageId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getGroupMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(). + RemoveDataFields("anonymous_id", "channel", "context_request_ip"). + RemoveColumnFields("anonymous_id", "channel", "context_request_ip"), + Metadata: getGroupMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "group (BQ) merge event", + configOverride: map[string]any{ + "Warehouse.enableIDResolution": true, + }, + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getGroupMetadata("BQ"), + destination: getDestination("BQ", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(). + SetDataField("context_destination_type", "BQ"). + SetColumnField("loaded_at", "datetime"). + SetTableName("_groups"), + Metadata: getGroupMetadata("BQ"), + StatusCode: http.StatusOK, + }, + { + Output: getGroupDefaultMergeOutput(), + Metadata: getGroupMetadata("BQ"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func getGroupDefaultOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "anonymous_id": "anonymousId", + "channel": "web", + "context_ip": "1.2.3.4", + "context_passed_ip": "1.2.3.4", + "context_request_ip": "5.6.7.8", + "context_traits_email": "rhedricks@example.com", + "context_traits_logins": float64(2), + "id": "messageId", + "original_timestamp": "2021-09-01T00:00:00.000Z", + "received_at": "2021-09-01T00:00:00.000Z", + "sent_at": "2021-09-01T00:00:00.000Z", + "timestamp": "2021-09-01T00:00:00.000Z", + "title": "Home | RudderStack", + "url": "https://www.rudderstack.com", + "user_id": "userId", + "group_id": "groupId", + "context_destination_id": "destinationID", + "context_destination_type": "POSTGRES", + "context_source_id": "sourceID", + "context_source_type": "sourceType", + }, + "metadata": map[string]any{ + "columns": map[string]any{ + "anonymous_id": "string", + "channel": "string", + "context_destination_id": "string", + "context_destination_type": "string", + "context_source_id": "string", + "context_source_type": "string", + "context_ip": "string", + "context_passed_ip": "string", + "context_request_ip": "string", + "context_traits_email": "string", + "context_traits_logins": "int", + "id": "string", + "original_timestamp": "datetime", + "received_at": "datetime", + "sent_at": "datetime", + "timestamp": "datetime", + "title": "string", + "url": "string", + "user_id": "string", + "group_id": "string", + "uuid_ts": "datetime", + }, + "receivedAt": "2021-09-01T00:00:00.000Z", + "table": "groups", + }, + "userId": "", + } +} + +func getGroupDefaultMergeOutput() testhelper.OutputBuilder { + return testhelper.OutputBuilder{ + "data": map[string]any{ + "merge_property_1_type": "anonymous_id", + "merge_property_1_value": "anonymousId", + "merge_property_2_type": "user_id", + "merge_property_2_value": "userId", + }, + "metadata": map[string]any{ + "table": "rudder_identity_merge_rules", + "columns": map[string]any{"merge_property_1_type": "string", "merge_property_1_value": "string", "merge_property_2_type": "string", "merge_property_2_value": "string"}, + "isMergeRule": true, + "receivedAt": "2021-09-01T00:00:00.000Z", + "mergePropOne": "anonymousId", + "mergePropTwo": "userId", + }, + "userId": "", + } +} + +func getGroupMetadata(destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: "group", + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + } +} + +func TestTransformer(t *testing.T) { + testCases := []struct { + name string + configOverride map[string]any + envOverride []string + eventPayload string + metadata ptrans.Metadata + destination backendconfig.DestinationT + expectedResponse ptrans.Response + }{ + { + name: "Unknown event", + eventPayload: `{"type":"unknown"}`, + metadata: getMetadata("unknown", "POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: "Unknown event type: \"unknown\"", + Metadata: getMetadata("unknown", "POSTGRES"), + StatusCode: http.StatusBadRequest, + }, + }, + }, + }, + // TODO: Enable this once we have the https://github.com/rudderlabs/rudder-transformer/pull/3806 changes in latest + //{ + // name: "Not populateSrcDestInfoInContext", + // configOverride: map[string]any{ + // "Warehouse.populateSrcDestInfoInContext": false, + // }, + // envOverride: []string{"WH_POPULATE_SRC_DEST_INFO_IN_CONTEXT=false"}, + // eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + // metadata: getMetadata("track", "POSTGRES"), + // destination: getDestination("POSTGRES", map[string]any{}), + // expectedResponse: ptrans.Response{ + // Events: []ptrans.TransformerResponse{ + // { + // Output: getTrackDefaultOutput(). + // RemoveDataFields("context_destination_id", "context_destination_type", "context_source_id", "context_source_type"). + // RemoveColumnFields("context_destination_id", "context_destination_type", "context_source_id", "context_source_type"), + // Metadata: getMetadata("track", "POSTGRES"), + // StatusCode: http.StatusOK, + // }, + // { + // Output: getEventDefaultOutput(). + // RemoveDataFields("context_destination_id", "context_destination_type", "context_source_id", "context_source_type"). + // RemoveColumnFields("context_destination_id", "context_destination_type", "context_source_id", "context_source_type"), + // Metadata: getMetadata("track", "POSTGRES"), + // StatusCode: http.StatusOK, + // }, + // }, + // }, + //}, + { + name: "Too many columns", + eventPayload: testhelper.AddRandomColumns(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","context":{%s},"ip":"1.2.3.4"}`, 500), + metadata: getMetadata("track", "POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: "postgres transformer: Too many columns outputted from the event", + Metadata: getMetadata("track", "POSTGRES"), + StatusCode: http.StatusBadRequest, + }, + }, + }, + }, + { + name: "Too many columns (DataLake)", + eventPayload: testhelper.AddRandomColumns(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500), + metadata: getMetadata("track", "GCS_DATALAKE"), + destination: getDestination("GCS_DATALAKE", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput().SetDataField("context_destination_type", "GCS_DATALAKE").AddRandomEntries(500, func(index int) (string, string, string, string) { + return fmt.Sprintf("context_random_column_%d", index), fmt.Sprintf("random_value_%d", index), fmt.Sprintf("context_random_column_%d", index), "string" + }), + Metadata: getMetadata("track", "GCS_DATALAKE"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput().SetDataField("context_destination_type", "GCS_DATALAKE").AddRandomEntries(500, func(index int) (string, string, string, string) { + return fmt.Sprintf("context_random_column_%d", index), fmt.Sprintf("random_value_%d", index), fmt.Sprintf("context_random_column_%d", index), "string" + }), + Metadata: getMetadata("track", "GCS_DATALAKE"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "Too many columns channel as sources", + eventPayload: testhelper.AddRandomColumns(`{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"sources","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3.0,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},%s,"ip":"1.2.3.4"}}`, 500), + metadata: getMetadata("track", "POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput().SetDataField("channel", "sources").AddRandomEntries(500, func(index int) (string, string, string, string) { + return fmt.Sprintf("context_random_column_%d", index), fmt.Sprintf("random_value_%d", index), fmt.Sprintf("context_random_column_%d", index), "string" + }), + Metadata: getMetadata("track", "POSTGRES"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput().SetDataField("channel", "sources").AddRandomEntries(500, func(index int) (string, string, string, string) { + return fmt.Sprintf("context_random_column_%d", index), fmt.Sprintf("random_value_%d", index), fmt.Sprintf("context_random_column_%d", index), "string" + }), + Metadata: getMetadata("track", "POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "StringLikeObject for context traits", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"0":"a","1":"b","2":"c"},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + RemoveDataFields("context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_traits_email", "context_traits_logins", "context_traits_name"). + SetDataField("context_traits", "abc"). + SetColumnField("context_traits", "string"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + RemoveDataFields("context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_traits_email", "context_traits_logins", "context_traits_name"). + SetDataField("context_traits", "abc"). + SetColumnField("context_traits", "string"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "StringLikeObject for group traits", + eventPayload: `{"type":"group","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","groupId":"groupId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","request_ip":"5.6.7.8","traits":{"title":"Home | RudderStack","url":"https://www.rudderstack.com"},"context":{"traits":{"0":"a","1":"b","2":"c"},"ip":"1.2.3.4"}}`, + metadata: getGroupMetadata("POSTGRES"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getGroupDefaultOutput(). + RemoveDataFields("context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_traits_email", "context_traits_logins", "context_traits_name"). + SetDataField("context_traits", "abc"). + SetColumnField("context_traits", "string"), + Metadata: getGroupMetadata("POSTGRES"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "Not StringLikeObject for context properties", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"0":"a","1":"b","2":"c"},"userProperties":{"rating":3,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + RemoveDataFields("product_id", "review_id"). + RemoveColumnFields("product_id", "review_id"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + RemoveDataFields("product_id", "review_id"). + RemoveColumnFields("product_id", "review_id"). + SetDataField("_0", "a").SetColumnField("_0", "string"). + SetDataField("_1", "b").SetColumnField("_1", "string"). + SetDataField("_2", "c").SetColumnField("_2", "string"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "context, properties and userProperties as null", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":null,"userProperties":null,"context":null}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "product_id", "rating", "review_body", "review_id"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "product_id", "rating", "review_body", "review_id"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "context, properties and userProperties as not a object", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":"properties","userProperties":"userProperties","context":"context"}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + FailedEvents: []ptrans.TransformerResponse{ + { + Error: response.ErrContextNotMap.Error(), + StatusCode: response.ErrContextNotMap.StatusCode(), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + }, + }, + }, + }, + { + name: "context, properties and userProperties as empty map", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{},"userProperties":{},"context":{}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("context_ip", "5.6.7.8"). + RemoveDataFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "product_id", "rating", "review_body", "review_id"). + RemoveColumnFields("context_passed_ip", "context_traits_email", "context_traits_logins", "context_traits_name", "product_id", "rating", "review_body", "review_id"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "Nested object level with no limit when source category is not cloud", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2,"location":{"city":"Palo Alto","state":"California","country":"USA","coordinates":{"latitude":37.4419,"longitude":-122.143,"geo":{"altitude":30.5,"accuracy":5,"details":{"altitudeUnits":"meters","accuracyUnits":"meters"}}}}},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "webhook"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("context_traits_location_city", "Palo Alto"). + SetDataField("context_traits_location_state", "California"). + SetDataField("context_traits_location_country", "USA"). + SetDataField("context_traits_location_coordinates_latitude", 37.4419). + SetDataField("context_traits_location_coordinates_longitude", -122.143). + SetDataField("context_traits_location_coordinates_geo_altitude", 30.5). + SetDataField("context_traits_location_coordinates_geo_accuracy", 5.0). + SetDataField("context_traits_location_coordinates_geo_details_altitude_units", "meters"). + SetDataField("context_traits_location_coordinates_geo_details_accuracy_units", "meters"). + SetColumnField("context_traits_location_city", "string"). + SetColumnField("context_traits_location_state", "string"). + SetColumnField("context_traits_location_country", "string"). + SetColumnField("context_traits_location_coordinates_latitude", "float"). + SetColumnField("context_traits_location_coordinates_longitude", "float"). + SetColumnField("context_traits_location_coordinates_geo_altitude", "float"). + SetColumnField("context_traits_location_coordinates_geo_accuracy", "int"). + SetColumnField("context_traits_location_coordinates_geo_details_altitude_units", "string"). + SetColumnField("context_traits_location_coordinates_geo_details_accuracy_units", "string"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("context_traits_location_city", "Palo Alto"). + SetDataField("context_traits_location_state", "California"). + SetDataField("context_traits_location_country", "USA"). + SetDataField("context_traits_location_coordinates_latitude", 37.4419). + SetDataField("context_traits_location_coordinates_longitude", -122.143). + SetDataField("context_traits_location_coordinates_geo_altitude", 30.5). + SetDataField("context_traits_location_coordinates_geo_accuracy", 5.0). + SetDataField("context_traits_location_coordinates_geo_details_altitude_units", "meters"). + SetDataField("context_traits_location_coordinates_geo_details_accuracy_units", "meters"). + SetColumnField("context_traits_location_city", "string"). + SetColumnField("context_traits_location_state", "string"). + SetColumnField("context_traits_location_country", "string"). + SetColumnField("context_traits_location_coordinates_latitude", "float"). + SetColumnField("context_traits_location_coordinates_longitude", "float"). + SetColumnField("context_traits_location_coordinates_geo_altitude", "float"). + SetColumnField("context_traits_location_coordinates_geo_accuracy", "int"). + SetColumnField("context_traits_location_coordinates_geo_details_altitude_units", "string"). + SetColumnField("context_traits_location_coordinates_geo_details_accuracy_units", "string"), + Metadata: getTrackMetadata("POSTGRES", "webhook"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + { + name: "Nested object level limits to 3 when source category is cloud", + eventPayload: `{"type":"track","messageId":"messageId","anonymousId":"anonymousId","userId":"userId","sentAt":"2021-09-01T00:00:00.000Z","timestamp":"2021-09-01T00:00:00.000Z","receivedAt":"2021-09-01T00:00:00.000Z","originalTimestamp":"2021-09-01T00:00:00.000Z","channel":"web","event":"event","request_ip":"5.6.7.8","properties":{"review_id":"86ac1cd43","product_id":"9578257311"},"userProperties":{"rating":3,"review_body":"OK for the price. It works but the material feels flimsy."},"context":{"traits":{"name":"Richard Hendricks","email":"rhedricks@example.com","logins":2,"location":{"city":"Palo Alto","state":"California","country":"USA","coordinates":{"latitude":37.4419,"longitude":-122.143,"geo":{"altitude":30.5,"accuracy":5,"details":{"altitudeUnits":"meters","accuracyUnits":"meters"}}}}},"ip":"1.2.3.4"}}`, + metadata: getTrackMetadata("POSTGRES", "cloud"), + destination: getDestination("POSTGRES", map[string]any{}), + expectedResponse: ptrans.Response{ + Events: []ptrans.TransformerResponse{ + { + Output: getTrackDefaultOutput(). + SetDataField("context_traits_location_city", "Palo Alto"). + SetDataField("context_traits_location_state", "California"). + SetDataField("context_traits_location_country", "USA"). + SetDataField("context_traits_location_coordinates_latitude", 37.4419). + SetDataField("context_traits_location_coordinates_longitude", -122.143). + SetDataField("context_traits_location_coordinates_geo", `{"accuracy":5,"altitude":30.5,"details":{"accuracyUnits":"meters","altitudeUnits":"meters"}}`). + SetColumnField("context_traits_location_city", "string"). + SetColumnField("context_traits_location_state", "string"). + SetColumnField("context_traits_location_country", "string"). + SetColumnField("context_traits_location_coordinates_latitude", "float"). + SetColumnField("context_traits_location_coordinates_geo", "string"). + SetColumnField("context_traits_location_coordinates_longitude", "float"), + Metadata: getTrackMetadata("POSTGRES", "cloud"), + StatusCode: http.StatusOK, + }, + { + Output: getEventDefaultOutput(). + SetDataField("context_traits_location_city", "Palo Alto"). + SetDataField("context_traits_location_state", "California"). + SetDataField("context_traits_location_country", "USA"). + SetDataField("context_traits_location_coordinates_latitude", 37.4419). + SetDataField("context_traits_location_coordinates_longitude", -122.143). + SetDataField("context_traits_location_coordinates_geo", `{"accuracy":5,"altitude":30.5,"details":{"accuracyUnits":"meters","altitudeUnits":"meters"}}`). + SetColumnField("context_traits_location_city", "string"). + SetColumnField("context_traits_location_state", "string"). + SetColumnField("context_traits_location_country", "string"). + SetColumnField("context_traits_location_coordinates_latitude", "float"). + SetColumnField("context_traits_location_coordinates_geo", "string"). + SetColumnField("context_traits_location_coordinates_longitude", "float"), + Metadata: getTrackMetadata("POSTGRES", "cloud"), + StatusCode: http.StatusOK, + }, + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + var opts []transformertest.Option + for _, envOverride := range tc.envOverride { + opts = append(opts, transformertest.WithEnv(envOverride)) + } + transformerResource, err := transformertest.Setup(pool, t, opts...) + require.NoError(t, err) + + c := setupConfig(transformerResource, tc.configOverride) + eventsInfos := []testhelper.EventInfo{ + { + Payload: []byte(tc.eventPayload), + Metadata: tc.metadata, + Destination: tc.destination, + }, + } + destinationTransformer := ptrans.NewTransformer(c, logger.NOP, stats.Default) + warehouseTransformer := New(c, logger.NOP, stats.NOP) + + testhelper.ValidateEvents(t, eventsInfos, destinationTransformer, warehouseTransformer, tc.expectedResponse) + }) + } +} + +func setupConfig(resource *transformertest.Resource, configOverride map[string]any) *config.Config { + c := config.New() + c.Set("DEST_TRANSFORM_URL", resource.TransformerURL) + c.Set("USER_TRANSFORM_URL", resource.TransformerURL) + + for k, v := range configOverride { + c.Set(k, v) + } + return c +} + +func getDestination(destinationType string, config map[string]any) backendconfig.DestinationT { + return backendconfig.DestinationT{ + Name: destinationType, + Config: config, + DestinationDefinition: backendconfig.DestinationDefinitionT{ + Name: destinationType, + }, + } +} + +func getMetadata(eventType, destinationType string) ptrans.Metadata { + return ptrans.Metadata{ + EventType: eventType, + DestinationType: destinationType, + ReceivedAt: "2021-09-01T00:00:00.000Z", + SourceID: "sourceID", + DestinationID: "destinationID", + SourceType: "sourceType", + MessageID: "messageId", + } +} + +func TestGetDataType(t *testing.T) { + testCases := []struct { + name, destType, key string + val any + isJSONKey bool + expected string + }{ + // Primitive types + {"Primitive Type Int", whutils.POSTGRES, "someKey", 42, false, "int"}, + {"Primitive Type Float", whutils.POSTGRES, "someKey", 42.0, false, "int"}, + {"Primitive Type Float (non-int)", whutils.POSTGRES, "someKey", 42.5, false, "float"}, + {"Primitive Type Bool", whutils.POSTGRES, "someKey", true, false, "boolean"}, + + // Valid timestamp + {"Valid Timestamp String", whutils.POSTGRES, "someKey", "2022-10-05T14:48:00.000Z", false, "datetime"}, + + // JSON Key cases for different destinations + {"Postgres JSON Key", whutils.POSTGRES, "someKey", "someValue", true, "json"}, + {"Snowflake JSON Key", whutils.SNOWFLAKE, "someKey", "someValue", true, "json"}, + {"Redshift JSON Key", whutils.RS, "someKey", "someValue", true, "json"}, + + // Redshift with text and string types + {"Redshift Text Type", whutils.RS, "someKey", string(make([]byte, 513)), false, "text"}, + {"Redshift String Type", whutils.RS, "someKey", "shortValue", false, "string"}, + {"Redshift String Type", whutils.RS, "someKey", nil, false, "string"}, + + // Empty string values + {"Empty String Value", whutils.POSTGRES, "someKey", "", false, "string"}, + {"Empty String with JSON Key", whutils.POSTGRES, "someKey", "", true, "json"}, + + // Unsupported types (should default to string) + {"Unsupported Type Struct", whutils.POSTGRES, "someKey", struct{}{}, false, "string"}, + {"Unsupported Type Map", whutils.POSTGRES, "someKey", map[string]any{"key": "value"}, false, "string"}, + + // Special string values + {"Special Timestamp-like String", whutils.POSTGRES, "someKey", "not-a-timestamp", false, "string"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + destinationTransformer := &transformer{} + + actual := destinationTransformer.getDataType(tc.destType, tc.key, tc.val, tc.isJSONKey) + require.Equal(t, tc.expected, actual) + }) + } +} + +func TestGetColumns(t *testing.T) { + testCases := []struct { + name string + destType string + data map[string]any + columnTypes map[string]string + maxColumns int32 + expected map[string]any + wantError bool + }{ + { + name: "Basic data types", + destType: whutils.POSTGRES, + data: map[string]any{ + "field1": "value1", "field2": 123, "field3": true, + }, + columnTypes: map[string]string{ + "field1": "string", "field2": "int", + }, + maxColumns: 10, + expected: map[string]any{ + "uuid_ts": "datetime", "field1": "string", "field2": "int", "field3": "boolean", + }, + }, + { + name: "Basic data types (BQ)", + destType: whutils.BQ, + data: map[string]any{ + "field1": "value1", "field2": 123, "field3": true, + }, + columnTypes: map[string]string{ + "field1": "string", "field2": "int", + }, + maxColumns: 10, + expected: map[string]any{ + "uuid_ts": "datetime", "field1": "string", "field2": "int", "field3": "boolean", "loaded_at": "datetime", + }, + }, + { + name: "Basic data types (SNOWFLAKE)", + destType: whutils.SNOWFLAKE, + data: map[string]any{ + "FIELD1": "value1", "FIELD2": 123, "FIELD3": true, + }, + columnTypes: map[string]string{ + "FIELD1": "string", "FIELD2": "int", + }, + maxColumns: 10, + expected: map[string]any{ + "UUID_TS": "datetime", "FIELD1": "string", "FIELD2": "int", "FIELD3": "boolean", + }, + }, + { + name: "Key not in columnTypes", + destType: whutils.POSTGRES, + data: map[string]any{ + "field1": "value1", "field2": 123, "field3": true, + }, + columnTypes: map[string]string{}, + maxColumns: 10, + expected: map[string]any{ + "uuid_ts": "datetime", "field1": "string", "field2": "int", "field3": "boolean", + }, + }, + { + name: "Too many columns", + destType: whutils.POSTGRES, + data: map[string]any{ + "field1": "value1", "field2": 123, "field3": true, "field4": "extra", + }, + columnTypes: map[string]string{ + "field1": "string", "field2": "int", + }, + maxColumns: 3, + expected: nil, + wantError: true, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trans := &transformer{} + trans.config.maxColumnsInEvent = config.SingleValueLoader(int(tc.maxColumns)) + + columns, err := trans.getColumns(tc.destType, tc.data, tc.columnTypes) + if tc.wantError { + require.Error(t, err) + require.Nil(t, columns) + return + } + require.NoError(t, err) + require.Equal(t, tc.expected, columns) + }) + } +} + +func TestIntegrationOptions(t *testing.T) { + t.Run("AllOptionsSet", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": map[string]any{ + "skipReservedKeywordsEscaping": true, + "useBlendoCasing": false, + "skipTracksTable": true, + "skipUsersTable": false, + "jsonPaths": []any{"path1", "path2", "path3"}, + }, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + + opts := prepareIntegrationOptions(event) + + require.True(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.True(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.Equal(t, []string{"path1", "path2", "path3"}, opts.jsonPaths) + }) + t.Run("MissingOptions", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": map[string]any{}, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + opts := prepareIntegrationOptions(event) + + require.False(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.Empty(t, opts.jsonPaths) + }) + t.Run("NilIntegrationOptions", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": nil, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + opts := prepareIntegrationOptions(event) + + require.False(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.Empty(t, opts.jsonPaths) + }) + t.Run("PartialOptionsSet", func(t *testing.T) { + event := ptrans.TransformerEvent{ + Message: map[string]any{ + "integrations": map[string]any{ + "destinationType": map[string]any{ + "options": map[string]any{ + "skipUsersTable": true, + "jsonPaths": []any{"path1"}, + }, + }, + }, + }, + Metadata: ptrans.Metadata{ + DestinationType: "destinationType", + }, + } + + opts := prepareIntegrationOptions(event) + + require.True(t, opts.skipUsersTable) + require.False(t, opts.skipReservedKeywordsEscaping) + require.False(t, opts.useBlendoCasing) + require.False(t, opts.skipTracksTable) + require.Equal(t, []string{"path1"}, opts.jsonPaths) + }) +} + +func TestDestinationOptions(t *testing.T) { + t.Run("AllOptionsSet", func(t *testing.T) { + destConfig := map[string]any{ + "skipTracksTable": true, + "skipUsersTable": false, + "underscoreDivideNumbers": true, + "allowUsersContextTraits": false, + "storeFullEvent": true, + "jsonPaths": "path1,path2", + } + + opts := prepareDestinationOptions(whutils.POSTGRES, destConfig) + + require.True(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.True(t, opts.underscoreDivideNumbers) + require.False(t, opts.allowUsersContextTraits) + require.True(t, opts.storeFullEvent) + require.Equal(t, []string{"path1", "path2"}, opts.jsonPaths) + }) + t.Run("MissingOptions", func(t *testing.T) { + destConfig := map[string]any{} + + opts := prepareDestinationOptions(whutils.POSTGRES, destConfig) + + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.False(t, opts.underscoreDivideNumbers) + require.False(t, opts.allowUsersContextTraits) + require.False(t, opts.storeFullEvent) + require.Empty(t, opts.jsonPaths) + }) + t.Run("NilDestinationConfig", func(t *testing.T) { + opts := prepareDestinationOptions(whutils.POSTGRES, nil) + + require.False(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.False(t, opts.underscoreDivideNumbers) + require.False(t, opts.allowUsersContextTraits) + require.False(t, opts.storeFullEvent) + require.Empty(t, opts.jsonPaths) + }) + t.Run("PartialOptionsSet", func(t *testing.T) { + destConfig := map[string]any{ + "skipTracksTable": true, + "jsonPaths": "path1,path2", + "allowUsersContextTraits": true, + } + + opts := prepareDestinationOptions(whutils.POSTGRES, destConfig) + + require.True(t, opts.skipTracksTable) + require.False(t, opts.skipUsersTable) + require.False(t, opts.underscoreDivideNumbers) + require.True(t, opts.allowUsersContextTraits) + require.False(t, opts.storeFullEvent) + require.Equal(t, []string{"path1", "path2"}, opts.jsonPaths) + }) + t.Run("JSONPathSupported", func(t *testing.T) { + destConfig := map[string]any{ + "jsonPaths": "path1,path2", + } + + require.Equal(t, []string{"path1", "path2"}, prepareDestinationOptions(whutils.POSTGRES, destConfig).jsonPaths) + require.Empty(t, prepareDestinationOptions(whutils.CLICKHOUSE, destConfig).jsonPaths) + }) +} diff --git a/warehouse/transformer/types.go b/warehouse/transformer/types.go new file mode 100644 index 0000000000..2576d1bd1b --- /dev/null +++ b/warehouse/transformer/types.go @@ -0,0 +1,95 @@ +package transformer + +import ( + "time" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-go-kit/stats" + + ptrans "github.com/rudderlabs/rudder-server/processor/transformer" +) + +type ( + transformer struct { + now func() time.Time + + conf *config.Config + logger logger.Logger + statsFactory stats.Stats + + config struct { + enableIDResolution config.ValueLoader[bool] + populateSrcDestInfoInContext config.ValueLoader[bool] + maxColumnsInEvent config.ValueLoader[int] + } + } + + processingInfo struct { + event ptrans.TransformerEvent + itrOpts integrationsOptions + dstOpts destConfigOptions + jsonPathsInfo jsonPathInfo + } + + integrationsOptions struct { + // skipReservedKeywordsEscaping when set to true, will skip the escaping of reserved keywords + skipReservedKeywordsEscaping bool + + // useBlendoCasing when set to true, will use the casing as per Blendo's requirement + useBlendoCasing bool + + // jsonPaths is a list of json paths that should be extracted from the event and stored as raw instead of normalizing them + jsonPaths []string + + // skipTracksTable when set to true, will skip the tracks event + skipTracksTable bool + + // skipUsersTable when set to true, will skip the users event + skipUsersTable bool + } + destConfigOptions struct { + // skipTracksTable when set to true, will skip the tracks event + skipTracksTable bool + + // skipUsersTable when set to true, will skip the users event + skipUsersTable bool + + // storeFullEvent when set to true, will store the full event as rudder_event (JSON) + storeFullEvent bool + + // jsonPaths is a list of json paths that should be extracted from the event and stored as raw instead of normalizing them + jsonPaths []string + + // underscoreDivideNumbers when set to false, if a column has a format like "_v_3_", it will be formatted to "_v3_" + // underscoreDivideNumbers when set to true, if a column has a format like "_v_3_", we keep it like that + // For older destinations, it will come as true and for new destinations this config will not be present which means we will treat it as false. + underscoreDivideNumbers bool + + // allowUsersContextTraits when set to true, if context.traits.* is present, it will be added as context_traits_* and *, + // e.g., for context.traits.name, context_traits_name and name will be added to the user's table. + // allowUsersContextTraits when set to false, if context.traits.* is present, it will be added only as context_traits_* + // e.g., for context.traits.name, only context_traits_name will be added to the user's table. + // For older destinations, it will come as true, and for new destinations this config will not be present, which means we will treat it as false. + allowUsersContextTraits bool + } + + mergeRule struct { + Type, Value any + } + mergeRulesColumns struct { + Prop1Type, Prop1Value, Prop2Type, Prop2Value string + } + + prefixInfo struct { + completePrefix string + completeLevel int + prefix string + level int + } + + jsonPathInfo struct { + keysMap map[string]int + legacyKeysMap map[string]int + } +)