From d216c362bb285ad0081e95b1f6e67956b57d64fd Mon Sep 17 00:00:00 2001 From: chirst Date: Wed, 24 Jul 2024 21:50:17 -0600 Subject: [PATCH 1/3] refactor --- planner/insert.go | 119 ++++++++++++++++++++++++++++++--------------- planner/node.go | 31 +++++++++--- planner/plan.go | 18 +++++-- planner/planner.go | 4 +- planner/select.go | 67 +++++++++++++++---------- 5 files changed, 164 insertions(+), 75 deletions(-) diff --git a/planner/insert.go b/planner/insert.go index 07c1f09..b605081 100644 --- a/planner/insert.go +++ b/planner/insert.go @@ -10,9 +10,11 @@ import ( "github.com/chirst/cdb/vm" ) -var errTableNotExist = errors.New("table does not exist") -var errValuesNotMatch = errors.New("values list did not match columns list") -var errMissingColumnName = errors.New("missing column") +var ( + errTableNotExist = errors.New("table does not exist") + errValuesNotMatch = errors.New("values list did not match columns list") + errMissingColumnName = errors.New("missing column") +) // insertCatalog defines the catalog methods needed by the insert planner type insertCatalog interface { @@ -22,29 +24,50 @@ type insertCatalog interface { GetPrimaryKeyColumn(tableName string) (string, error) } +// insertPlanner consists of planners capable of generating a logical query plan +// tree and bytecode execution plan for a insert statement. type insertPlanner struct { - qp *insertQueryPlanner - ep *insertExecutionPlanner + // The query planner generates a logical query plan tree made up of nodes + // similar to relational algebra operators. The query planner performs + // validation while building the tree. Otherwise known as binding. + queryPlanner *insertQueryPlanner + // The executionPlanner transforms the logical query plan tree to a bytecode + // execution plan that can be ran by the virtual machine. + executionPlanner *insertExecutionPlanner } +// insertQueryPlanner converts the AST generated by the compiler to a logical +// query plan tree. It is also responsible for validating the AST against the +// system catalog. type insertQueryPlanner struct { - catalog insertCatalog - stmt *compiler.InsertStmt + // catalog contains the schema. + catalog insertCatalog + // stmt contains the AST. + stmt *compiler.InsertStmt + // queryPlan contains the query plan being constructed. For an insert, the + // root node must be an insertNode. queryPlan *insertNode } +// insertExecutionPlanner converts the logical query plan to a bytecode routine +// to be ran by the vm. type insertExecutionPlanner struct { - queryPlan *insertNode + // queryPlan contains the query plan generated by the query planner's + // QueryPlan method. + queryPlan *insertNode + // executionPlan contains the execution plan generated by calling + // ExecutionPlan. executionPlan *vm.ExecutionPlan } +// NewInsert returns an instance of an insert planner for the given AST. func NewInsert(catalog insertCatalog, stmt *compiler.InsertStmt) *insertPlanner { return &insertPlanner{ - qp: &insertQueryPlanner{ + queryPlanner: &insertQueryPlanner{ catalog: catalog, stmt: stmt, }, - ep: &insertExecutionPlanner{ + executionPlanner: &insertExecutionPlanner{ executionPlan: vm.NewExecutionPlan( catalog.GetVersion(), stmt.Explain, @@ -53,8 +76,17 @@ func NewInsert(catalog insertCatalog, stmt *compiler.InsertStmt) *insertPlanner } } -func (ip *insertPlanner) QueryPlan() (*QueryPlan, error) { - p := ip.qp +// QueryPlan generates the query plan tree for the planner. +func (p *insertPlanner) QueryPlan() (*QueryPlan, error) { + qp, err := p.queryPlanner.getQueryPlan() + if err != nil { + return nil, err + } + p.executionPlanner.queryPlan = p.queryPlanner.queryPlan + return qp, err +} + +func (p *insertQueryPlanner) getQueryPlan() (*QueryPlan, error) { rootPage, err := p.catalog.GetRootPageNumber(p.stmt.TableName) if err != nil { return nil, errTableNotExist @@ -78,66 +110,73 @@ func (ip *insertPlanner) QueryPlan() (*QueryPlan, error) { colValues: p.stmt.ColValues, } p.queryPlan = insertNode - ip.ep.queryPlan = insertNode return newQueryPlan(insertNode, p.stmt.ExplainQueryPlan), nil } -func (ip *insertPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { - if ip.qp.queryPlan == nil { - _, err := ip.QueryPlan() +// ExecutionPlan returns the bytecode routine for the planner. Calling QueryPlan +// is not prerequisite to calling ExecutionPlan as ExecutionPlan will be called +// as needed. +func (p *insertPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { + if p.queryPlanner.queryPlan == nil { + _, err := p.QueryPlan() if err != nil { return nil, err } } - ep := ip.ep - cursorId := 1 - ep.executionPlan.Append(&vm.InitCmd{P2: 1}) - ep.executionPlan.Append(&vm.TransactionCmd{P2: 1}) - ep.executionPlan.Append(&vm.OpenWriteCmd{P1: cursorId, P2: ep.queryPlan.rootPage}) + return p.executionPlanner.getExecutionPlan() +} - for valueIdx := range len(ep.queryPlan.colValues) / len(ep.queryPlan.colNames) { +func (p *insertExecutionPlanner) getExecutionPlan() (*vm.ExecutionPlan, error) { + p.buildInit() + cursorId := 1 + p.executionPlan.Append(&vm.OpenWriteCmd{P1: cursorId, P2: p.queryPlan.rootPage}) + for valueIdx := range len(p.queryPlan.colValues) / len(p.queryPlan.colNames) { keyRegister := 1 statementIDIdx := -1 - if ep.queryPlan.pkColumn != "" { - statementIDIdx = slices.IndexFunc(ep.queryPlan.colNames, func(s string) bool { - return s == ep.queryPlan.pkColumn + if p.queryPlan.pkColumn != "" { + statementIDIdx = slices.IndexFunc(p.queryPlan.colNames, func(s string) bool { + return s == p.queryPlan.pkColumn }) } if statementIDIdx == -1 { - ep.executionPlan.Append(&vm.NewRowIdCmd{P1: ep.queryPlan.rootPage, P2: keyRegister}) + p.executionPlan.Append(&vm.NewRowIdCmd{P1: p.queryPlan.rootPage, P2: keyRegister}) } else { - rowId, err := strconv.Atoi(ep.queryPlan.colValues[statementIDIdx+valueIdx*len(ep.queryPlan.colNames)]) + rowId, err := strconv.Atoi(p.queryPlan.colValues[statementIDIdx+valueIdx*len(p.queryPlan.colNames)]) if err != nil { return nil, err } - integerCmdIdx := len(ep.executionPlan.Commands) + 2 - ep.executionPlan.Append(&vm.NotExistsCmd{P1: ep.queryPlan.rootPage, P2: integerCmdIdx, P3: rowId}) - ep.executionPlan.Append(&vm.HaltCmd{P1: 1, P4: "pk unique constraint violated"}) - ep.executionPlan.Append(&vm.IntegerCmd{P1: rowId, P2: keyRegister}) + integerCmdIdx := len(p.executionPlan.Commands) + 2 + p.executionPlan.Append(&vm.NotExistsCmd{P1: p.queryPlan.rootPage, P2: integerCmdIdx, P3: rowId}) + p.executionPlan.Append(&vm.HaltCmd{P1: 1, P4: "pk unique constraint violated"}) + p.executionPlan.Append(&vm.IntegerCmd{P1: rowId, P2: keyRegister}) } registerIdx := keyRegister - for _, catalogColumnName := range ep.queryPlan.catalogColumnNames { - if catalogColumnName != "" && catalogColumnName == ep.queryPlan.pkColumn { + for _, catalogColumnName := range p.queryPlan.catalogColumnNames { + if catalogColumnName != "" && catalogColumnName == p.queryPlan.pkColumn { continue } registerIdx += 1 vIdx := -1 - for i, statementColumnName := range ep.queryPlan.colNames { + for i, statementColumnName := range p.queryPlan.colNames { if statementColumnName == catalogColumnName { - vIdx = i + (valueIdx * len(ep.queryPlan.colNames)) + vIdx = i + (valueIdx * len(p.queryPlan.colNames)) } } if vIdx == -1 { return nil, fmt.Errorf("%w %s", errMissingColumnName, catalogColumnName) } - ep.executionPlan.Append(&vm.StringCmd{P1: registerIdx, P4: ep.queryPlan.colValues[vIdx]}) + p.executionPlan.Append(&vm.StringCmd{P1: registerIdx, P4: p.queryPlan.colValues[vIdx]}) } - ep.executionPlan.Append(&vm.MakeRecordCmd{P1: 2, P2: registerIdx - 1, P3: registerIdx + 1}) - ep.executionPlan.Append(&vm.InsertCmd{P1: ep.queryPlan.rootPage, P2: registerIdx + 1, P3: keyRegister}) + p.executionPlan.Append(&vm.MakeRecordCmd{P1: 2, P2: registerIdx - 1, P3: registerIdx + 1}) + p.executionPlan.Append(&vm.InsertCmd{P1: p.queryPlan.rootPage, P2: registerIdx + 1, P3: keyRegister}) } + p.executionPlan.Append(&vm.HaltCmd{}) + return p.executionPlan, nil +} - ep.executionPlan.Append(&vm.HaltCmd{}) - return ep.executionPlan, nil +func (p *insertExecutionPlanner) buildInit() { + p.executionPlan.Append(&vm.InitCmd{P2: 1}) + p.executionPlan.Append(&vm.TransactionCmd{P2: 1}) } func checkValuesMatchColumns(s *compiler.InsertStmt) error { diff --git a/planner/node.go b/planner/node.go index 48004d6..1d9c2e5 100644 --- a/planner/node.go +++ b/planner/node.go @@ -1,5 +1,7 @@ package planner +// This file defines the relational nodes in a logical query plan. + // logicalNode defines the interface for a node in the query plan tree. type logicalNode interface { children() []logicalNode @@ -12,8 +14,11 @@ type projectNode struct { child logicalNode } +// projection is part of the sum of projections in a project node. type projection struct { + // isCount signifies the projection is the count function. isCount bool + // colName is the name of the column to be projected. colName string } @@ -43,12 +48,19 @@ type countNode struct { rootPage int } +// TODO joinNode is unused, but remains as a prototype binary operation node. type joinNode struct { - left logicalNode - right logicalNode + // left is the left subtree of the join. + left logicalNode + // right is the right subtree of the join. + right logicalNode + // TODO operation is the type of join to be performed. Possibly left, right + // or inner join. Could also have a field for join algorithm i.e. loop. operation string } +// createNode represents a operation to create an object in the system catalog. +// For example a table, index, or trigger. type createNode struct { // objectName is the name of the index, trigger, or table. objectName string @@ -60,10 +72,17 @@ type createNode struct { schema string } +// insertNode represents an insert operation. type insertNode struct { - rootPage int + // rootPage is the rootPage of the table the insert is performed on. + rootPage int + // catalogColumnNames are the names of columns associated with the table. catalogColumnNames []string - pkColumn string - colNames []string - colValues []string + // pkColumn is the name of the primary key column. The value is empty if + // there is no user defined pk. + pkColumn string + // colNames are the names of columns specified in the insert statement. + colNames []string + // colValues are the values specified in the insert statement. + colValues []string } diff --git a/planner/plan.go b/planner/plan.go index becb5f9..c414e3f 100644 --- a/planner/plan.go +++ b/planner/plan.go @@ -6,11 +6,15 @@ import ( "unicode/utf8" ) -// QueryPlan contains the query plan tree. It is capable of converting the tree +// QueryPlan contains the query plan tree. It is capable of converting the tree // to a string representation for a query prefixed with `EXPLAIN QUERY PLAN`. type QueryPlan struct { - plan string - root logicalNode + // plan holds the string representation also known as the tree. + plan string + // root holds the root node of the query plan + root logicalNode + // ExplainQueryPlan is a flag indicating if the SQL asked for the query plan + // to be printed as a string representation with `EXPLAIN QUERY PLAN`. ExplainQueryPlan bool } @@ -21,6 +25,7 @@ func newQueryPlan(root logicalNode, explainQueryPlan bool) *QueryPlan { } } +// ToString evaluates and returns the query plan as a string representation. func (p *QueryPlan) ToString() string { qp := &QueryPlan{} qp.walk(p.root, 0) @@ -48,6 +53,7 @@ func (p *QueryPlan) visit(ln logicalNode, depth int) { p.plan += fmt.Sprintf("%s%s\n", padding, ln.print()) } +// trimLeft performs extra formatting after the initial walk is completed. func (p *QueryPlan) trimLeft() { trimBy := 4 newPlan := []string{} @@ -61,6 +67,12 @@ func (p *QueryPlan) trimLeft() { p.plan = strings.Join(newPlan, "\n") } +// connectSiblings is a messy method to perform extra formatting after the +// initial recursive walk is completed. connectSiblings goes over the string +// representation in reverse row order and forwards column order. When a '└' +// character is found connectSiblings moves upwards on the current column making +// replacements until the top is reached. Once reached the column and row search +// continue. func (p *QueryPlan) connectSiblings() string { planMatrix := strings.Split(p.plan, "\n") for rowIdx := len(planMatrix) - 1; 0 < rowIdx; rowIdx -= 1 { diff --git a/planner/planner.go b/planner/planner.go index fa41e95..0f8ee78 100644 --- a/planner/planner.go +++ b/planner/planner.go @@ -1,3 +1,5 @@ // planner generates a query plan from an AST (abstract syntax tree) generated -// by the compiler. This plan is then fed to the vm (virtual machine) to be ran. +// by the compiler. The query plan is a tree structure similar to relational +// algebra. The query plan is then converted to bytecode and fed to the vm +// (virtual machine) to be ran. package planner diff --git a/planner/select.go b/planner/select.go index 3b36eb9..3939acc 100644 --- a/planner/select.go +++ b/planner/select.go @@ -1,8 +1,6 @@ package planner import ( - "errors" - "github.com/chirst/cdb/compiler" "github.com/chirst/cdb/vm" ) @@ -17,12 +15,15 @@ type selectCatalog interface { // selectPlanner is capable of generating a logical query plan and a physical // execution plan for a select statement. The planners within are separated by -// their responsibility. Notice a statement or catalog is not shared with with -// the execution planner. This is by design since the logical query planner also -// performs binding. +// their responsibility. type selectPlanner struct { - qp *selectQueryPlanner - ep *selectExecutionPlanner + // queryPlanner is responsible for transforming the AST to a logical query + // plan tree. This tree is made up of nodes that map closely to a relational + // algebra tree. The query planner also performs binding and validation. + queryPlanner *selectQueryPlanner + // executionPlanner transforms the logical query tree to a bytecode routine, + // built to be ran by the virtual machine. + executionPlanner *selectExecutionPlanner } // selectQueryPlanner converts an AST to a logical query plan. Along the way it @@ -33,7 +34,8 @@ type selectQueryPlanner struct { catalog selectCatalog // stmt contains the AST stmt *compiler.SelectStmt - // queryPlan contains the logical plan. The root node must be a projection. + // queryPlan contains the logical plan being built. The root node must be a + // projection. queryPlan *projectNode } @@ -51,11 +53,11 @@ type selectExecutionPlanner struct { // NewSelect returns an instance of a select planner for the given AST. func NewSelect(catalog selectCatalog, stmt *compiler.SelectStmt) *selectPlanner { return &selectPlanner{ - qp: &selectQueryPlanner{ + queryPlanner: &selectQueryPlanner{ catalog: catalog, stmt: stmt, }, - ep: &selectExecutionPlanner{ + executionPlanner: &selectExecutionPlanner{ executionPlan: vm.NewExecutionPlan( catalog.GetVersion(), stmt.Explain, @@ -66,14 +68,23 @@ func NewSelect(catalog selectCatalog, stmt *compiler.SelectStmt) *selectPlanner // QueryPlan generates the query plan tree for the planner. func (p *selectPlanner) QueryPlan() (*QueryPlan, error) { - tableName := p.qp.stmt.From.TableName - rootPageNumber, err := p.qp.catalog.GetRootPageNumber(tableName) + qp, err := p.queryPlanner.getQueryPlan() + if err != nil { + return nil, err + } + p.executionPlanner.queryPlan = p.queryPlanner.queryPlan + return qp, err +} + +func (p *selectQueryPlanner) getQueryPlan() (*QueryPlan, error) { + tableName := p.stmt.From.TableName + rootPageNumber, err := p.catalog.GetRootPageNumber(tableName) if err != nil { return nil, err } var child logicalNode - if p.qp.stmt.ResultColumn.All { - scanColumns, err := p.qp.getScanColumns() + if p.stmt.ResultColumn.All { + scanColumns, err := p.getScanColumns() if err != nil { return nil, err } @@ -88,16 +99,15 @@ func (p *selectPlanner) QueryPlan() (*QueryPlan, error) { rootPage: rootPageNumber, } } - projections, err := p.qp.getProjections() + projections, err := p.getProjections() if err != nil { return nil, err } - p.qp.queryPlan = &projectNode{ + p.queryPlan = &projectNode{ projections: projections, child: child, } - p.ep.queryPlan = p.qp.queryPlan - return newQueryPlan(p.qp.queryPlan, p.qp.stmt.ExplainQueryPlan), nil + return newQueryPlan(p.queryPlan, p.stmt.ExplainQueryPlan), nil } func (p *selectQueryPlanner) getScanColumns() ([]scanColumn, error) { @@ -139,30 +149,33 @@ func (p *selectQueryPlanner) getProjections() ([]projection, error) { }) } return projections, nil - } else if p.stmt.ResultColumn.Count { + } + if p.stmt.ResultColumn.Count { return []projection{ { isCount: true, }, }, nil } - return nil, errors.New("unhandled projection") + panic("unhandled projection") } // ExecutionPlan returns the bytecode execution plan for the planner. Calling // QueryPlan is not a prerequisite to this method as it will be called by // ExecutionPlan if needed. func (sp *selectPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { - if sp.qp.queryPlan == nil { + if sp.queryPlanner.queryPlan == nil { _, err := sp.QueryPlan() if err != nil { return nil, err } } - p := sp.ep - p.resultHeader() - p.buildInit() + return sp.executionPlanner.getExecutionPlan() +} +func (p *selectExecutionPlanner) getExecutionPlan() (*vm.ExecutionPlan, error) { + p.setResultHeader() + p.buildInit() switch c := p.queryPlan.child.(type) { case *scanNode: if err := p.buildScan(c); err != nil { @@ -177,7 +190,7 @@ func (sp *selectPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { return p.executionPlan, nil } -func (p *selectExecutionPlanner) resultHeader() { +func (p *selectExecutionPlanner) setResultHeader() { resultHeader := []string{} for _, p := range p.queryPlan.projections { resultHeader = append(resultHeader, p.colName) @@ -213,6 +226,10 @@ func (p *selectExecutionPlanner) buildScan(n *scanNode) error { return nil } +// buildOptimizedCountScan is a special optimization made when a table only has +// a count aggregate and no other projections. Since the optimized scan +// aggregates the count of tuples on each page, but does not look at individual +// tuples. func (p *selectExecutionPlanner) buildOptimizedCountScan(n *countNode) { const cursorId = 1 p.executionPlan.Append(&vm.OpenReadCmd{P1: cursorId, P2: n.rootPage}) From 5169981a86d4c6e660dd9a26dc8bfba9d2acc4f1 Mon Sep 17 00:00:00 2001 From: chirst Date: Wed, 24 Jul 2024 22:46:30 -0600 Subject: [PATCH 2/3] insert refactors --- planner/insert.go | 87 +++++++++++++++++++++++++++++++---------------- planner/node.go | 7 ++-- 2 files changed, 62 insertions(+), 32 deletions(-) diff --git a/planner/insert.go b/planner/insert.go index b605081..e7bb595 100644 --- a/planner/insert.go +++ b/planner/insert.go @@ -16,6 +16,10 @@ var ( errMissingColumnName = errors.New("missing column") ) +// pkConstraint is the error message displayed when a primary key constraint is +// violated. +const pkConstraint = "pk unique constraint violated" + // insertCatalog defines the catalog methods needed by the insert planner type insertCatalog interface { GetColumns(tableOrIndexName string) ([]string, error) @@ -128,44 +132,23 @@ func (p *insertPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { func (p *insertExecutionPlanner) getExecutionPlan() (*vm.ExecutionPlan, error) { p.buildInit() - cursorId := 1 - p.executionPlan.Append(&vm.OpenWriteCmd{P1: cursorId, P2: p.queryPlan.rootPage}) + p.openWrite() for valueIdx := range len(p.queryPlan.colValues) / len(p.queryPlan.colNames) { - keyRegister := 1 - statementIDIdx := -1 - if p.queryPlan.pkColumn != "" { - statementIDIdx = slices.IndexFunc(p.queryPlan.colNames, func(s string) bool { - return s == p.queryPlan.pkColumn - }) - } - if statementIDIdx == -1 { - p.executionPlan.Append(&vm.NewRowIdCmd{P1: p.queryPlan.rootPage, P2: keyRegister}) - } else { - rowId, err := strconv.Atoi(p.queryPlan.colValues[statementIDIdx+valueIdx*len(p.queryPlan.colNames)]) - if err != nil { - return nil, err - } - integerCmdIdx := len(p.executionPlan.Commands) + 2 - p.executionPlan.Append(&vm.NotExistsCmd{P1: p.queryPlan.rootPage, P2: integerCmdIdx, P3: rowId}) - p.executionPlan.Append(&vm.HaltCmd{P1: 1, P4: "pk unique constraint violated"}) - p.executionPlan.Append(&vm.IntegerCmd{P1: rowId, P2: keyRegister}) + // For simplicity, the primary key is in the first register. + const keyRegister = 1 + if err := p.buildPrimaryKey(keyRegister, valueIdx); err != nil { + return nil, err } registerIdx := keyRegister for _, catalogColumnName := range p.queryPlan.catalogColumnNames { if catalogColumnName != "" && catalogColumnName == p.queryPlan.pkColumn { + // Skip the primary key column since it is handled before. continue } registerIdx += 1 - vIdx := -1 - for i, statementColumnName := range p.queryPlan.colNames { - if statementColumnName == catalogColumnName { - vIdx = i + (valueIdx * len(p.queryPlan.colNames)) - } - } - if vIdx == -1 { - return nil, fmt.Errorf("%w %s", errMissingColumnName, catalogColumnName) + if err := p.buildNonPkValue(valueIdx, registerIdx, catalogColumnName); err != nil { + return nil, err } - p.executionPlan.Append(&vm.StringCmd{P1: registerIdx, P4: p.queryPlan.colValues[vIdx]}) } p.executionPlan.Append(&vm.MakeRecordCmd{P1: 2, P2: registerIdx - 1, P3: registerIdx + 1}) p.executionPlan.Append(&vm.InsertCmd{P1: p.queryPlan.rootPage, P2: registerIdx + 1, P3: keyRegister}) @@ -179,6 +162,52 @@ func (p *insertExecutionPlanner) buildInit() { p.executionPlan.Append(&vm.TransactionCmd{P2: 1}) } +func (p *insertExecutionPlanner) openWrite() { + const cursorId = 1 + p.executionPlan.Append(&vm.OpenWriteCmd{P1: cursorId, P2: p.queryPlan.rootPage}) +} + +func (p *insertExecutionPlanner) buildPrimaryKey(keyRegister int, valueIdx int) error { + // If the table has a user defined pk column it needs to be looked up in the + // user defined column list. If the user has defined the pk column the + // execution plan will involve checking the uniqueness of the pk during + // execution. Otherwise the system guarantees a unique key. + statementPkIdx := -1 + if p.queryPlan.pkColumn != "" { + statementPkIdx = slices.IndexFunc(p.queryPlan.colNames, func(s string) bool { + return s == p.queryPlan.pkColumn + }) + } + if statementPkIdx == -1 { + p.executionPlan.Append(&vm.NewRowIdCmd{P1: p.queryPlan.rootPage, P2: keyRegister}) + return nil + } + rowId, err := strconv.Atoi(p.queryPlan.colValues[statementPkIdx+valueIdx*len(p.queryPlan.colNames)]) + if err != nil { + return err + } + integerCmdIdx := len(p.executionPlan.Commands) + 2 + p.executionPlan.Append(&vm.NotExistsCmd{P1: p.queryPlan.rootPage, P2: integerCmdIdx, P3: rowId}) + p.executionPlan.Append(&vm.HaltCmd{P1: 1, P4: pkConstraint}) + p.executionPlan.Append(&vm.IntegerCmd{P1: rowId, P2: keyRegister}) + return nil +} + +func (p *insertExecutionPlanner) buildNonPkValue(valueIdx, registerIdx int, catalogColumnName string) error { + // Get the statement index of the column name. Because the name positions + // can mismatch the table column positions. + stmtColIdx := slices.IndexFunc(p.queryPlan.colNames, func(stmtColName string) bool { + return stmtColName == catalogColumnName + }) + // Requires the statement to define a value for each column in the table. + if stmtColIdx == -1 { + return fmt.Errorf("%w %s", errMissingColumnName, catalogColumnName) + } + valuesListIdx := stmtColIdx + (valueIdx * len(p.queryPlan.colNames)) + p.executionPlan.Append(&vm.StringCmd{P1: registerIdx, P4: p.queryPlan.colValues[valuesListIdx]}) + return nil +} + func checkValuesMatchColumns(s *compiler.InsertStmt) error { // TODO need to enhance for INSERT INTO foo (name) VALUES ('n1', 'n2') vl := len(s.ColValues) diff --git a/planner/node.go b/planner/node.go index 1d9c2e5..8b63fe3 100644 --- a/planner/node.go +++ b/planner/node.go @@ -76,10 +76,11 @@ type createNode struct { type insertNode struct { // rootPage is the rootPage of the table the insert is performed on. rootPage int - // catalogColumnNames are the names of columns associated with the table. + // catalogColumnNames are all of the names of columns associated with the + // table. catalogColumnNames []string - // pkColumn is the name of the primary key column. The value is empty if - // there is no user defined pk. + // pkColumn is the name of the primary key column in the catalog. The value + // is empty if no user defined pk. pkColumn string // colNames are the names of columns specified in the insert statement. colNames []string From ea3347b9e3ca083dcb72a96394941103666b0743 Mon Sep 17 00:00:00 2001 From: chirst Date: Wed, 24 Jul 2024 22:58:26 -0600 Subject: [PATCH 3/3] create refactors --- planner/create.go | 78 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/planner/create.go b/planner/create.go index e2cface..377850e 100644 --- a/planner/create.go +++ b/planner/create.go @@ -9,9 +9,11 @@ import ( "github.com/chirst/cdb/vm" ) -var errInvalidPKColumnType = errors.New("primary key must be INTEGER type") -var errTableExists = errors.New("table exists") -var errMoreThanOnePK = errors.New("more than one primary key specified") +var ( + errInvalidPKColumnType = errors.New("primary key must be INTEGER type") + errTableExists = errors.New("table exists") + errMoreThanOnePK = errors.New("more than one primary key specified") +) // createCatalog defines the catalog methods needed by the create planner type createCatalog interface { @@ -21,29 +23,52 @@ type createCatalog interface { GetVersion() string } +// createPlanner is capable of generating a logical query plan and a physical +// executionPlan for a create statement. The planners within are separated by +// their responsibility. type createPlanner struct { - qp *createQueryPlanner - ep *createExecutionPlanner + // queryPlanner is responsible for transforming the AST to a logical query + // plan tree. This tree is made up of nodes similar to a relational algebra + // tree. The query planner also performs binding and validation. + queryPlanner *createQueryPlanner + // executionPlanner is responsible for converting the logical query plan + // tree to a bytecode execution plan capable of being run by the virtual + // machine. + executionPlanner *createExecutionPlanner } +// createQueryPlanner converts the AST to a logical query plan. Along the way it +// validates the statement makes sense with the catalog a process known as +// binding. type createQueryPlanner struct { - catalog createCatalog - stmt *compiler.CreateStmt + // catalog contains the schema + catalog createCatalog + // stmt contains the AST + stmt *compiler.CreateStmt + // queryPlan contains the query plan being constructed. The root node must + // be createNode. queryPlan *createNode } +// createExecutionPlanner converts logical nodes to a bytecode execution plan +// that can be run by the vm. type createExecutionPlanner struct { - queryPlan *createNode + // queryPlan contains the logical query plan. The is populated by calling + // QueryPlan. + queryPlan *createNode + // executionPlan contains the bytecode execution plan being constructed. + // This is populated by calling ExecutionPlan. executionPlan *vm.ExecutionPlan } +// NewCreate creates a planner for the given create statement. func NewCreate(catalog createCatalog, stmt *compiler.CreateStmt) *createPlanner { return &createPlanner{ - qp: &createQueryPlanner{ + queryPlanner: &createQueryPlanner{ catalog: catalog, stmt: stmt, }, - ep: &createExecutionPlanner{ + executionPlanner: &createExecutionPlanner{ executionPlan: vm.NewExecutionPlan( catalog.GetVersion(), stmt.Explain, @@ -52,12 +77,22 @@ func NewCreate(catalog createCatalog, stmt *compiler.CreateStmt) *createPlanner } } +// QueryPlan generates the query plan for the planner. func (p *createPlanner) QueryPlan() (*QueryPlan, error) { - tableName, err := p.qp.ensureTableDoesNotExist() + qp, err := p.queryPlanner.getQueryPlan() if err != nil { return nil, err } - jSchema, err := p.qp.getSchemaString() + p.executionPlanner.queryPlan = p.queryPlanner.queryPlan + return qp, err +} + +func (p *createQueryPlanner) getQueryPlan() (*QueryPlan, error) { + tableName, err := p.ensureTableDoesNotExist() + if err != nil { + return nil, err + } + jSchema, err := p.getSchemaString() if err != nil { return nil, err } @@ -67,9 +102,8 @@ func (p *createPlanner) QueryPlan() (*QueryPlan, error) { tableName: tableName, schema: jSchema, } - qp := newQueryPlan(createNode, p.qp.stmt.ExplainQueryPlan) - p.ep.queryPlan = createNode - return qp, nil + p.queryPlan = createNode + return newQueryPlan(createNode, p.stmt.ExplainQueryPlan), nil } func (p *createQueryPlanner) ensureTableDoesNotExist() (string, error) { @@ -141,14 +175,20 @@ func (p *createQueryPlanner) schemaFrom() *kv.TableSchema { return &schema } -func (cp *createPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { - if cp.qp.queryPlan == nil { - _, err := cp.QueryPlan() +// ExecutionPlan returns the bytecode execution plan for the planner. Calling +// QueryPlan is not a prerequisite to this method as it will be called by +// ExecutionPlan if needed. +func (p *createPlanner) ExecutionPlan() (*vm.ExecutionPlan, error) { + if p.queryPlanner.queryPlan == nil { + _, err := p.QueryPlan() if err != nil { return nil, err } } - p := cp.ep + return p.executionPlanner.getExecutionPlan() +} + +func (p *createExecutionPlanner) getExecutionPlan() (*vm.ExecutionPlan, error) { p.executionPlan.Append(&vm.InitCmd{P2: 1}) p.executionPlan.Append(&vm.TransactionCmd{P2: 1}) p.executionPlan.Append(&vm.CreateBTreeCmd{P2: 1})