segmentio · erikdw · Jan 11, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 12, 2024
@@ -1,4 +1,4 @@
-FROM golang:1.20-alpine
+FROM golang:1.20-alpine3.18
 ENV SRC github.com/segmentio/ctlstore
 ARG VERSION
 

@@ -2,8 +2,8 @@ package changelog
 
 import (
 	"encoding/json"
-
 	"github.com/pkg/errors"
+	"github.com/segmentio/ctlstore/pkg/schema"
 	"github.com/segmentio/events/v2"
 )
 
@@ -16,25 +16,27 @@ type (
 		WriteLine WriteLine
 	}
 	ChangelogEntry struct {
-		Seq    int64
-		Family string
-		Table  string
-		Key    []interface{}
+		Seq         int64
+		Family      string
+		Table       string
+		Key         []interface{}
+		LedgerSeq   schema.DMLSequence
+		Transaction bool
 	}
 )
 
-func NewChangelogEntry(seq int64, family string, table string, key []interface{}) *ChangelogEntry {
-	return &ChangelogEntry{Seq: seq, Family: family, Table: table, Key: key}
-}
-
 func (w *ChangelogWriter) WriteChange(e ChangelogEntry) error {
 	structure := struct {
 func (w *ChangelogWriter) WriteChange(e ChangelogEntry) error { 
 	structure := struct { 
 		Seq    int64         `json:"seq"` 
 		Family string        `json:"family"` 
 		Table  string        `json:"table"` 
 		Key    []interface{} `json:"key"` 
 	}{ 
 // entry represents a single row in the changelog 
 // e.g. 
 //   {"seq":1,"family":"fam","table":"foo","key":[{"name":"id","type":"int","value":1}]} 
 type entry struct { 
 	Seq    int64  `json:"seq"` 
 	Family string `json:"family"` 
 	Table  string `json:"table"` 
 	Key    []Key  `json:"key"` 
 } 
 func (w *ChangelogWriter) WriteChange(e ChangelogEntry) error { 
 	structure := struct { 
 		Seq    int64         `json:"seq"` 
 		Family string        `json:"family"` 
 		Table  string        `json:"table"` 
 		Key    []interface{} `json:"key"` 
 	}{ 
 // entry represents a single row in the changelog 
 // e.g. 
 //   {"seq":1,"family":"fam","table":"foo","key":[{"name":"id","type":"int","value":1}]} 
 type entry struct { 
 	Seq    int64  `json:"seq"` 
 	Family string `json:"family"` 
 	Table  string `json:"table"` 
 	Key    []Key  `json:"key"` 
 } 
-		Seq    int64         `json:"seq"`
-		Family string        `json:"family"`
-		Table  string        `json:"table"`
-		Key    []interface{} `json:"key"`
+		Seq         int64         `json:"seq"`
+		LedgerSeq   int64         `json:"ledgerSeq"`
+		Transaction bool          `json:"tx"`
+		Family      string        `json:"family"`
+		Table       string        `json:"table"`
+		Key         []interface{} `json:"key"`
 	}{
 		e.Seq,
+		e.LedgerSeq.Int(),
+		e.Transaction,
 		e.Family,
 		e.Table,
 		e.Key,

@@ -32,5 +32,5 @@ func TestWriteChange(t *testing.T) {
 	})
 	require.NoError(t, err)
 	require.EqualValues(t, 1, len(mock.Lines))
-	require.Equal(t, `{"seq":42,"family":"family1","table":"table1","key":[18014398509481984,"foo"]}`, mock.Lines[0])
+	require.Equal(t, `{"seq":42,"ledgerSeq":0,"tx":false,"family":"family1","table":"table1","key":[18014398509481984,"foo"]}`, mock.Lines[0])
 }
@@ -114,7 +114,7 @@ type supervisorCliConfig struct {
 type ledgerHealthConfig struct {
 	Disable                 bool          `conf:"disable" help:"disable ledger latency health attributing (DEPRECATED: use disable-ecs-behavior instead)"`
 	DisableECSBehavior      bool          `conf:"disable-ecs-behavior" help:"disable ledger latency health attributing"`
-	MaxHealthyLatency       time.Duration `conf:"max-healty-latency" help:"Max latency considered healthy"`
+	MaxHealthyLatency       time.Duration `conf:"max-healthy-latency" help:"Max latency considered healthy"`
 	AttributeName           string        `conf:"attribute-name" help:"The name of the attribute"`
 	HealthyAttributeValue   string        `conf:"healthy-attribute-value" help:"The value of the attribute if healthy"`
 	UnhealthyAttributeValue string        `conf:"unhealth-attribute-value" help:"The value of the attribute if unhealthy"`

@@ -135,7 +135,7 @@ func TestChangelog(t *testing.T) {
 			numEvents:        10000,
 			rotateAfterBytes: 1024 * 128,
 			writeDelay:       100 * time.Microsecond,
-			mustRotateN:      8,
+			mustRotateN:      9,
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {

@@ -2,18 +2,23 @@ package event
 
 // entry represents a single row in the changelog
 // e.g.
-//   {"seq":1,"family":"fam","table":"foo","key":[{"name":"id","type":"int","value":1}]}
+//
+//	{"seq":1,"ledgerSeq":42,"tx":false,"family":"fam","table":"foo","key":[{"name":"id","type":"int","value":1}]}
 type entry struct {
-	Seq    int64  `json:"seq"`
-	Family string `json:"family"`
-	Table  string `json:"table"`
-	Key    []Key  `json:"key"`
+	Seq         int64  `json:"seq"`
+	LedgerSeq   int64  `json:"ledgerSeq"`
+	Transaction bool   `json:"tx"`
+	Family      string `json:"family"`
+	Table       string `json:"table"`
+	Key         []Key  `json:"key"`
 }
 
 // event converts the entry into an event for the iterator to return
 func (e entry) event() Event {
 	return Event{
-		Sequence: e.Seq,
+		Sequence:       e.Seq,
+		LedgerSequence: e.LedgerSeq,
+		Transaction:    e.Transaction,
 		RowUpdate: RowUpdate{
 			FamilyName: e.Family,
 			TableName:  e.Table,

@@ -2,8 +2,10 @@ package event
 
 // Event is the type that the Iterator produces
 type Event struct {
-	Sequence  int64
-	RowUpdate RowUpdate
+	Sequence       int64
+	LedgerSequence int64
+	Transaction    bool
+	RowUpdate      RowUpdate
 }
 
 // RowUpdate represents a single row update

@@ -319,7 +319,7 @@ func (e *dbExecutive) AddFields(familyName string, tableName string, fieldNames
 			}
 
 			// We first write the column modification to the DML ledger within the transaction.
-			// It's important that this is done befored the DDL is applied to the ctldb, as
+			// It's important that this is done before the DDL is applied to the ctldb, as
 			// the DDL is not able to be rolled back. In this way, if the DDL fails, the DML
 			// can be rolled back.
 			dlw := dmlLedgerWriter{Tx: tx, TableName: dmlLedgerTableName}

@@ -121,3 +121,7 @@ func FetchSeqFromLdb(ctx context.Context, db *sql.DB) (schema.DMLSequence, error
 	}
 	return schema.DMLSequence(seq), err
 }
+
+func IsInternalTable(name string) bool {
+	return name == LDBSeqTableName || name == LDBLastUpdateTableName
+}
@@ -37,10 +37,12 @@ func (c *ChangelogCallback) LDBWritten(ctx context.Context, data LDBWriteMetadat
 		for _, key := range keys {
 			seq := atomic.AddInt64(&c.Seq, 1)
 			err = c.ChangelogWriter.WriteChange(changelog.ChangelogEntry{
-				Seq:    seq,
-				Family: fam.Name,
-				Table:  tbl.Name,
-				Key:    key,
+				Seq:         seq,
+				LedgerSeq:   change.LedgerSequence,
+				Transaction: data.Transaction,
+				Family:      fam.Name,
+				Table:       tbl.Name,
+				Key:         key,
 			})
 			if err != nil {
 				events.Log("Skipped logging change to %{family}s.%{table}s:%{key}v: %{err}v",

@@ -3,33 +3,101 @@ package ldbwriter
 import (
 	"context"
 	"database/sql"
-
 	"github.com/segmentio/ctlstore/pkg/schema"
 	"github.com/segmentio/ctlstore/pkg/sqlite"
 	"github.com/segmentio/events/v2"
+	"github.com/segmentio/stats/v4"
 )
 
 // CallbackWriter is an LDBWriter that delegates to another
 // writer and then, upon a successful write, executes N callbacks.
 type CallbackWriter struct {
-	DB           *sql.DB
-	Delegate     LDBWriter
-	Callbacks    []LDBWriteCallback
+	DB        *sql.DB
+	Delegate  LDBWriter
+	Callbacks []LDBWriteCallback
+	// Buffer between SQLite preupdate Hook and this code
 	ChangeBuffer *sqlite.SQLChangeBuffer
+	// Accumulated changes across multiple ApplyDMLStatement calls
+	transactionChanges []sqlite.SQLiteWatchChange
 // Reject requests that are too large 
 if len(requests) > limits.LimitMaxMutateRequestCount { 
 	return &errs.PayloadTooLargeError{Err: "Number of requests exceeds maximum"} 
 } 
 // Reject requests that are too large 
 if len(requests) > limits.LimitMaxMutateRequestCount { 
 	return &errs.PayloadTooLargeError{Err: "Number of requests exceeds maximum"} 
 } 
+}
+
+func (w *CallbackWriter) inTransaction() bool {
+	return w.transactionChanges != nil
+}
+
+func (w *CallbackWriter) beginTransaction(ledgerSequence schema.DMLSequence) {
+	if len(w.transactionChanges) > 0 {
+		// This should never happen, but just in case...
+		stats.Add("ldb_changes_abandoned", len(w.transactionChanges))
+		events.Log("error: abandoned %{count}d changes from incomplete transaction, current statement's ledger sequence: %{sequence}d",
+			len(w.transactionChanges), ledgerSequence)
+	}
+	w.transactionChanges = make([]sqlite.SQLiteWatchChange, 0)
+}
+
+// Transaction done! Return the accumulated changes including the latest ones
+func (w *CallbackWriter) endTransaction(changes []sqlite.SQLiteWatchChange) (transactionChanges []sqlite.SQLiteWatchChange) {
+	w.accumulateChanges(changes)
+	transactionChanges = w.transactionChanges
+	w.transactionChanges = nil
+	return
+}
+
+// Transaction isn't over yet, save the latest changes
+func (w *CallbackWriter) accumulateChanges(changes []sqlite.SQLiteWatchChange) {
+	w.transactionChanges = append(w.transactionChanges, changes...)
 }
 
+// ApplyDMLStatement
+//
+// It is not obvious, but this code executes synchronously:
+//  1. Delegate.AppyDMLStatement executes the DML statement against the SQLite LDB.
+//     (⚠️ WARNING: That's what the code is wired up to do today, January 2024, though the Delegate
+//     could be doing other things since the code is so flexible.)
+//  2. When SQLite processes the statement it invokes our preupdate hook (see sqlite_watch.go).
+//  3. Our preupdate hook writes the changes to the change buffer.
+//  4. The code returns here, and we decide whether to process the change buffer immediately or
+//     wait until the end of the ledger transaction.
 func (w *CallbackWriter) ApplyDMLStatement(ctx context.Context, statement schema.DMLStatement) error {
 	err := w.Delegate.ApplyDMLStatement(ctx, statement)
 	if err != nil {
 		return err
 	}
+
+	// If beginning a transaction then start accumulating changes, don't send them out yet
+	if statement.Statement == schema.DMLTxBeginKey {
+		w.beginTransaction(statement.Sequence)
+		return nil
+	}
+
 	changes := w.ChangeBuffer.Pop()
+
+	// Record the responsible ledger sequence in each change so that the callback can use it
+	for i := range changes {
+		changes[i].LedgerSequence = statement.Sequence
+	}
+
+	var transaction bool
+	if w.inTransaction() {
+		transaction = true
+		if statement.Statement == schema.DMLTxEndKey {
+			// Transaction done, let's send what we have accumulated
+			changes = w.endTransaction(changes)
+		} else {
+			// Transaction not over, continue accumulating
+			w.accumulateChanges(changes)
+			return nil
+		}
+	}
+
+	stats.Observe("ldb_changes_written", len(changes))
 	for _, callback := range w.Callbacks {
-		events.Debug("Writing DML callback for %{cb}T", callback)
+		events.Debug("Writing DML callback for %{cb}T with %{changeCount}d changes", callback, len(changes))
 		callback.LDBWritten(ctx, LDBWriteMetadata{
-			DB:        w.DB,
-			Statement: statement,
-			Changes:   changes,
+			DB:          w.DB,
+			Statement:   statement,
+			Changes:     changes,
+			Transaction: transaction,
 		})
 	}
 	return nil