From a81f62b1e3ba479af06605898f5a0b9d44db2460 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 5 Oct 2024 12:07:27 +0300 Subject: [PATCH] stuff --- core/pseudo.rs | 12 +- core/storage/btree.rs | 181 +++++++++++++++++++++-- core/translate/emitter.rs | 152 ++++++++++++++----- core/translate/optimizer.rs | 281 +++++++++++++++++++----------------- core/translate/plan.rs | 15 +- core/translate/planner.rs | 2 - core/types.rs | 6 +- core/vdbe/explain.rs | 2 + core/vdbe/mod.rs | 130 ++++++++++++++--- core/vdbe/sorter.rs | 12 +- testing/cmdlineshell.test | 3 +- 11 files changed, 574 insertions(+), 222 deletions(-) diff --git a/core/pseudo.rs b/core/pseudo.rs index 78a80afc6..3ef849c9e 100644 --- a/core/pseudo.rs +++ b/core/pseudo.rs @@ -52,11 +52,19 @@ impl Cursor for PseudoCursor { unimplemented!(); } - fn seek_ge(&mut self, _: &OwnedRecord) -> Result> { + fn seek_ge_rowid(&mut self, _: u64) -> Result> { unimplemented!(); } - fn seek_gt(&mut self, _: &OwnedRecord) -> Result> { + fn seek_gt_rowid(&mut self, _: u64) -> Result> { + unimplemented!(); + } + + fn seek_ge_index(&mut self, _: &OwnedRecord) -> Result> { + unimplemented!(); + } + + fn seek_gt_index(&mut self, _: &OwnedRecord) -> Result> { unimplemented!(); } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index dfb9d313c..87261681a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -24,7 +24,7 @@ const BTREE_HEADER_OFFSET_FRAGMENTED: usize = 7; /* number of fragmented bytes - const BTREE_HEADER_OFFSET_RIGHTMOST: usize = 8; /* if internalnode, pointer right most pointer (saved separately from cells) -> u32 */ #[derive(Clone)] -pub enum IndexSeekOp { +pub enum SeekOp { GT, GE, } @@ -152,6 +152,7 @@ impl BTreeCursor { return Ok(CursorResult::Ok((Some(*_rowid), Some(record)))); } BTreeCell::IndexInteriorCell(IndexInteriorCell { + payload, left_child_page, .. }) => { mem_page.advance(); @@ -176,7 +177,7 @@ impl BTreeCursor { fn btree_index_seek( &mut self, key: &OwnedRecord, - op: IndexSeekOp, + op: SeekOp, ) -> Result, Option)>> { self.move_to_index_leaf(key, op.clone())?; @@ -203,8 +204,8 @@ impl BTreeCursor { mem_page.advance(); let record = crate::storage::sqlite3_ondisk::read_record(payload)?; let comparison = match op { - IndexSeekOp::GT => record > *key, - IndexSeekOp::GE => record >= *key, + SeekOp::GT => record > *key, + SeekOp::GE => record >= *key, }; if comparison { let rowid = match record.values.get(1) { @@ -222,11 +223,63 @@ impl BTreeCursor { Ok(CursorResult::Ok((None, None))) } + fn btree_table_seek( + &mut self, + rowid: u64, + op: SeekOp, + ) -> Result, Option)>> { + self.move_to_table_leaf(rowid, op.clone())?; + + let mem_page = self.get_mem_page(); + let page_idx = mem_page.page_idx; + let page = self.pager.read_page(page_idx)?; + let page = RefCell::borrow(&page); + if page.is_locked() { + return Ok(CursorResult::IO); + } + + let page = page.contents.read().unwrap(); + let page = page.as_ref().unwrap(); + + for cell_idx in 0..page.cell_count() { + match &page.cell_get( + cell_idx, + self.pager.clone(), + self.max_local(page.page_type()), + self.min_local(page.page_type()), + self.usable_space(), + )? { + BTreeCell::TableLeafCell(TableLeafCell { + _rowid: cell_rowid, + _payload: payload, + first_overflow_page: _, + }) => { + mem_page.advance(); + let comparison = match op { + SeekOp::GT => *cell_rowid > rowid, + SeekOp::GE => *cell_rowid >= rowid, + }; + if comparison { + let record = crate::storage::sqlite3_ondisk::read_record(payload)?; + return Ok(CursorResult::Ok((Some(*cell_rowid), Some(record)))); + } + } + cell_type => { + unreachable!("unexpected cell type: {:?}", cell_type); + } + } + } + Ok(CursorResult::Ok((None, None))) + } + fn btree_seek_rowid( &mut self, rowid: u64, ) -> Result, Option)>> { - self.move_to(rowid)?; + match self.move_to(rowid)? { + CursorResult::Ok(_) => {} + CursorResult::IO => return Ok(CursorResult::IO), + }; let mem_page = self.get_mem_page(); @@ -266,6 +319,80 @@ impl BTreeCursor { Ok(CursorResult::Ok((None, None))) } + fn move_to_table_leaf(&mut self, rowid: u64, cmp: SeekOp) -> Result> { + self.move_to_root(); + + loop { + let mem_page = self.get_mem_page(); + let page_idx = mem_page.page_idx; + let page = self.pager.read_page(page_idx)?; + let page = RefCell::borrow(&page); + if page.is_locked() { + return Ok(CursorResult::IO); + } + + let page = page.contents.read().unwrap(); + let page = page.as_ref().unwrap(); + if page.is_leaf() { + return Ok(CursorResult::Ok(())); + } + + let mut found_cell = false; + for cell_idx in 0..page.cell_count() { + match &page.cell_get( + cell_idx, + self.pager.clone(), + self.max_local(page.page_type()), + self.min_local(page.page_type()), + self.usable_space(), + )? { + BTreeCell::TableInteriorCell(TableInteriorCell { + _left_child_page, + _rowid, + }) => { + let comparison = match cmp { + SeekOp::GT => *_rowid > rowid, + SeekOp::GE => *_rowid >= rowid, + }; + if comparison { + mem_page.advance(); + let mem_page = + MemPage::new(Some(mem_page.clone()), *_left_child_page as usize, 0); + self.page.replace(Some(Rc::new(mem_page))); + found_cell = true; + break; + } + } + BTreeCell::TableLeafCell(_) => { + unreachable!("we don't iterate leaf cells while trying to move to a leaf cell"); + } + BTreeCell::IndexInteriorCell(_) => { + unimplemented!(); + } + BTreeCell::IndexLeafCell(_) => { + unimplemented!(); + } + } + } + + if !found_cell { + let parent = mem_page.clone(); + match page.rightmost_pointer() { + Some(right_most_pointer) => { + let mem_page = MemPage::new(Some(parent), right_most_pointer as usize, 0); + self.page.replace(Some(Rc::new(mem_page))); + continue; + } + None => { + unreachable!("we shall not go back up! The only way is down the slope"); + } + } + } + + return Ok(CursorResult::Ok(())); + } + } + fn move_to_root(&mut self) { self.page .replace(Some(Rc::new(MemPage::new(None, self.root_page, 0)))); @@ -407,7 +534,7 @@ impl BTreeCursor { fn move_to_index_leaf( &mut self, key: &OwnedRecord, - cmp: IndexSeekOp, + cmp: SeekOp, ) -> Result> { self.move_to_root(); loop { @@ -443,8 +570,8 @@ impl BTreeCursor { let record = crate::storage::sqlite3_ondisk::read_record(payload)?; let comparison = match cmp { - IndexSeekOp::GT => record > *key, - IndexSeekOp::GE => record >= *key, + SeekOp::GT => record > *key, + SeekOp::GE => record >= *key, }; if comparison { mem_page.advance(); @@ -1410,6 +1537,12 @@ impl Cursor for BTreeCursor { fn next(&mut self) -> Result> { match self.get_next_record()? { CursorResult::Ok((rowid, next)) => { + { + let curr_rowid = self.rowid.borrow(); + if curr_rowid.is_some() && curr_rowid.unwrap() >= 8000 && rowid.is_some() && rowid.unwrap() < 8000 { + println!("curr_rowid: {:?}, rowid: {:?}, next: {:?}", curr_rowid.unwrap(), rowid.unwrap(), next); + } + } self.rowid.replace(rowid); self.record.replace(next); Ok(CursorResult::Ok(())) @@ -1438,11 +1571,32 @@ impl Cursor for BTreeCursor { } } - fn seek_ge(&mut self, key: &OwnedRecord) -> Result> { - match self.btree_index_seek(key, IndexSeekOp::GE)? { + fn seek_ge_index(&mut self, key: &OwnedRecord) -> Result> { + match self.btree_index_seek(key, SeekOp::GE)? { + CursorResult::Ok((rowid, record)) => { + self.rowid.replace(rowid); + self.record.replace(record); + Ok(CursorResult::Ok(rowid.is_some())) + } + CursorResult::IO => Ok(CursorResult::IO), + } + } + + fn seek_gt_index(&mut self, key: &OwnedRecord) -> Result> { + match self.btree_index_seek(key, SeekOp::GT)? { + CursorResult::Ok((rowid, record)) => { + self.rowid.replace(rowid); + self.record.replace(record); + Ok(CursorResult::Ok(rowid.is_some())) + } + CursorResult::IO => Ok(CursorResult::IO), + } + } + + fn seek_ge_rowid(&mut self, rowid: u64) -> Result> { + match self.btree_table_seek(rowid, SeekOp::GE)? { CursorResult::Ok((rowid, record)) => { self.rowid.replace(rowid); - println!("seek_ge: {:?}", record); self.record.replace(record); Ok(CursorResult::Ok(rowid.is_some())) } @@ -1450,11 +1604,10 @@ impl Cursor for BTreeCursor { } } - fn seek_gt(&mut self, key: &OwnedRecord) -> Result> { - match self.btree_index_seek(key, IndexSeekOp::GT)? { + fn seek_gt_rowid(&mut self, rowid: u64) -> Result> { + match self.btree_table_seek(rowid, SeekOp::GT)? { CursorResult::Ok((rowid, record)) => { self.rowid.replace(rowid); - println!("seek_gt: {:?}", record); self.record.replace(record); Ok(CursorResult::Ok(rowid.is_some())) } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 22dc0a149..e5b4635f9 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -250,7 +250,7 @@ impl Emitter for Operator { _ => Ok(OpStepResult::Done), } } - Operator::IndexScan { + Operator::Search { table, table_identifier, index, @@ -262,18 +262,25 @@ impl Emitter for Operator { .. } => { *step += 1; - const INDEX_SCAN_OPEN_AND_SEEK: usize = 1; - const INDEX_SCAN_NEXT: usize = 2; + const SEARCH_OPEN_READ: usize = 1; + const SEARCH_SEEK_AND_CONDITIONS: usize = 2; + const SEARCH_NEXT: usize = 3; match *step { - INDEX_SCAN_OPEN_AND_SEEK => { + SEARCH_OPEN_READ => { let table_cursor_id = program.alloc_cursor_id( Some(table_identifier.clone()), Some(Table::BTree(table.clone())), ); - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - Some(Table::Index(index.clone())), - ); + + let index_cursor_id = if let Some(index) = index { + program.alloc_cursor_id( + Some(index.name.clone()), + Some(Table::Index(index.clone())), + ) + } else { + table_cursor_id + }; + let next_row_label = program.allocate_label(); m.next_row_labels.insert(*id, next_row_label); let rewind_label = program.allocate_label(); @@ -283,12 +290,24 @@ impl Emitter for Operator { root_page: table.root_page, }); program.emit_insn(Insn::OpenReadAwait); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); + if let Some(index) = index { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + Ok(OpStepResult::Continue) + } + SEARCH_SEEK_AND_CONDITIONS => { + let table_cursor_id = program.resolve_cursor_id(table_identifier, None); + let index_cursor_id = if let Some(index) = index { + program.resolve_cursor_id(&index.name, None) + } else { + table_cursor_id + }; + let rewind_label = *m.rewind_labels.last().unwrap(); let cmp_reg = program.alloc_register(); // TODO this only handles ascending indexes match seek_cmp { @@ -316,6 +335,7 @@ impl Emitter for Operator { match seek_cmp { ast::Operator::Equals | ast::Operator::GreaterEquals => { Insn::SeekGE { + is_index: index.is_some(), cursor_id: index_cursor_id, start_reg: cmp_reg, num_regs: 1, @@ -325,6 +345,7 @@ impl Emitter for Operator { ast::Operator::Greater | ast::Operator::Less | ast::Operator::LessEquals => Insn::SeekGT { + is_index: index.is_some(), cursor_id: index_cursor_id, start_reg: cmp_reg, num_regs: 1, @@ -349,50 +370,113 @@ impl Emitter for Operator { program.defer_label_resolution(rewind_label, program.offset() as usize); - // We are currently only handling ascending indexes. + // TODO: We are currently only handling ascending indexes. // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. + // + // For primary key searches we emit RowId and then compare it to the seek value. - let abort_jump_target = *m.termination_label_stack.last().unwrap(); + let abort_jump_target = *m.next_row_labels.get(id).unwrap_or(m.termination_label_stack.last().unwrap()); match seek_cmp { ast::Operator::Equals | ast::Operator::LessEquals => { - program.emit_insn_with_label_dependency( - Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); + if index.is_some() { + program.emit_insn_with_label_dependency( + Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } } ast::Operator::Less => { + if index.is_some() { program.emit_insn_with_label_dependency( Insn::IdxGE { cursor_id: index_cursor_id, start_reg: cmp_reg, num_regs: 1, target_pc: abort_jump_target, - }, - abort_jump_target, - ); + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } } _ => {} } - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); + if index.is_some() { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + + let jump_label = m + .next_row_labels + .get(id) + .unwrap_or(m.termination_label_stack.last().unwrap()); + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } Ok(OpStepResult::ReadyToEmit) } - INDEX_SCAN_NEXT => { - let cursor_id = program.resolve_cursor_id(&index.name, None); + SEARCH_NEXT => { + let cursor_id = if let Some(index) = index { + program.resolve_cursor_id(&index.name, None) + } else { + program.resolve_cursor_id(table_identifier, None) + }; program .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); program.emit_insn(Insn::NextAsync { cursor_id }); @@ -1354,7 +1438,7 @@ impl Emitter for Operator { Ok(start_reg) } - Operator::IndexScan { + Operator::Search { table, table_identifier, .. diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 71ceb5765..54b7e083b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -53,7 +53,7 @@ fn use_indexes( available_indexes: &[Rc], ) -> Result<()> { match operator { - Operator::IndexScan { .. } => Ok(()), + Operator::Search { .. } => Ok(()), Operator::Scan { table, predicates: filter, @@ -66,75 +66,59 @@ fn use_indexes( } let fs = filter.as_mut().unwrap(); - let mut i = 0; - let mut maybe_rowid_predicate = None; - while i < fs.len() { + for i in 0..fs.len() { let f = fs[i].take_ownership(); - let table_index = referenced_tables + let table = referenced_tables .iter() - .position(|(t, t_id)| Rc::ptr_eq(t, table) && t_id == table_identifier) + .find(|(t, t_id)| Rc::ptr_eq(t, table) && t_id == table_identifier) .unwrap(); - let (can_use, expr) = - try_extract_rowid_comparison_expression(f, table_index, referenced_tables)?; - if can_use { - maybe_rowid_predicate = Some(expr); - fs.remove(i); - break; - } else { - fs[i] = expr; - i += 1; - } - } - - if let Some(rowid_predicate) = maybe_rowid_predicate { - let predicates_owned = if fs.is_empty() { - None - } else { - Some(std::mem::take(fs)) - }; - *operator = Operator::SeekRowid { - table: table.clone(), - table_identifier: table_identifier.clone(), - rowid_predicate, - predicates: predicates_owned, - id: *id, - step: 0, - }; - return Ok(()); - } - - let mut maybe_index_predicate = None; - let mut maybe_index_idx = None; - let fs = filter.as_mut().unwrap(); - for i in 0..fs.len() { - let mut f = fs[i].take_ownership(); - let index_idx = f.check_index_scan(available_indexes)?; - if index_idx.is_some() { - maybe_index_predicate = Some(f); - maybe_index_idx = index_idx; - fs.remove(i); - break; - } - } - - if let Some(index_idx) = maybe_index_idx { - let index_predicate = maybe_index_predicate.unwrap(); - match index_predicate { - ast::Expr::Binary(lhs, op, rhs) => { - *operator = Operator::IndexScan { - table: table.clone(), - index: available_indexes[index_idx].clone(), - index_predicate: ast::Expr::Binary(lhs, op, rhs.clone()), - predicates: Some(std::mem::take(fs)), - seek_cmp: op, - seek_expr: *rhs, - table_identifier: table_identifier.clone(), - id: *id, - step: 0, - }; + match try_extract_expr_that_utilizes_index(f, table, available_indexes)? { + Either::Left(non_index_using_expr) => { + fs[i] = non_index_using_expr; } - _ => { - crate::bail_parse_error!("Unsupported index predicate"); + Either::Right(index_using_expr) => { + match index_using_expr { + SearchableExpr::IndexSearch { index, cmp_op, cmp_expr } => { + fs.remove(i); + *operator = Operator::Search { + table: table.0.clone(), + index: Some(index.clone()), + predicates: Some(std::mem::take(fs)), + seek_cmp: cmp_op, + seek_expr: cmp_expr, + table_identifier: table_identifier.clone(), + id: *id, + step: 0, + }; + return Ok(()); + } + SearchableExpr::PrimaryKeySearch { table, cmp_op, cmp_expr } => { + fs.remove(i); + *operator = Operator::Search { + table, + index: None, + predicates: Some(std::mem::take(fs)), + seek_cmp: cmp_op, + seek_expr: cmp_expr, + table_identifier: table_identifier.clone(), + id: *id, + step: 0, + }; + return Ok(()); + } + SearchableExpr::PrimaryKeyEq { cmp_expr, table } => { + fs.remove(i); + *operator = Operator::SeekRowid { + table, + table_identifier: table_identifier.clone(), + rowid_predicate: cmp_expr, + predicates: Some(std::mem::take(fs)), + id: *id, + step: 0, + }; + return Ok(()); + } + } } } } @@ -325,7 +309,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result Ok(ConstantConditionEliminationResult::Continue), + Operator::Search { .. } => Ok(ConstantConditionEliminationResult::Continue), Operator::Nothing => Ok(ConstantConditionEliminationResult::Continue), } } @@ -426,7 +410,7 @@ fn push_predicates( Ok(()) } Operator::Scan { .. } => Ok(()), - Operator::IndexScan { .. } => Ok(()), + Operator::Search { .. } => Ok(()), Operator::Nothing => Ok(()), } } @@ -472,7 +456,7 @@ fn push_predicate( Ok(None) } - Operator::IndexScan { .. } => Ok(Some(predicate)), + Operator::Search { .. } => Ok(Some(predicate)), Operator::Filter { source, predicates: ps, @@ -733,7 +717,7 @@ fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_o mask } Operator::Scan { .. } => 0, - Operator::IndexScan { .. } => 0, + Operator::Search { .. } => 0, Operator::Nothing => 0, }; @@ -933,7 +917,7 @@ fn find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_o find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) } Operator::Scan { .. } => {} - Operator::IndexScan { .. } => {} + Operator::Search { .. } => {} Operator::Nothing => {} } } @@ -962,71 +946,40 @@ pub trait Optimizable { .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } // if the expression is the primary key of a table, returns the index of the table - fn check_primary_key( + fn is_primary_key_of( &self, - referenced_tables: &[(Rc, String)], - ) -> Result>; - fn check_index_scan(&mut self, available_indexes: &[Rc]) -> Result>; + table: &(Rc, String), + ) -> bool; + fn check_index_scan(&mut self, table: &(Rc, String), available_indexes: &[Rc]) -> Result>; } impl Optimizable for ast::Expr { - fn check_primary_key( + fn is_primary_key_of( &self, - referenced_tables: &[(Rc, String)], - ) -> Result> { + table: &(Rc, String), + ) -> bool { match self { ast::Expr::Id(ident) => { let ident = normalize_ident(&ident.0); - let tables = referenced_tables - .iter() - .enumerate() - .filter_map(|(i, (t, _))| { - if t.get_column(&ident).map_or(false, |(_, c)| c.primary_key) { - Some(i) - } else { - None - } - }); - - let mut matches = 0; - let mut matching_tbl = None; - - for tbl in tables { - matching_tbl = Some(tbl); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - } - - Ok(matching_tbl) + table.0.get_column(&ident).map_or(false, |(_, c)| c.primary_key) } ast::Expr::Qualified(tbl, ident) => { let tbl = normalize_ident(&tbl.0); let ident = normalize_ident(&ident.0); - let table = referenced_tables.iter().enumerate().find(|(_, (t, t_id))| { - *t_id == tbl && t.get_column(&ident).map_or(false, |(_, c)| c.primary_key) - }); - - if table.is_none() { - return Ok(None); - } - - let table = table.unwrap(); - - Ok(Some(table.0)) + + tbl == table.1 && table.0.get_column(&ident).map_or(false, |(_, c)| c.primary_key) } - _ => Ok(None), + _ => false, } } - fn check_index_scan(&mut self, available_indexes: &[Rc]) -> Result> { + fn check_index_scan(&mut self, table: &(Rc, String), available_indexes: &[Rc]) -> Result> { match self { ast::Expr::Id(ident) => { let ident = normalize_ident(&ident.0); let indexes = available_indexes .iter() .enumerate() - .filter(|(_, i)| i.columns.iter().any(|c| c.name == ident)) + .filter(|(_, i)| i.table_name == table.1 && i.columns.iter().any(|c| c.name == ident)) .collect::>(); if indexes.is_empty() { return Ok(None); @@ -1040,9 +993,10 @@ impl Optimizable for ast::Expr { let tbl = normalize_ident(&tbl.0); let ident = normalize_ident(&ident.0); let index = available_indexes.iter().enumerate().find(|(_, i)| { - let normalized_tbl = normalize_ident(&i.table_name); - normalized_tbl == tbl - && i.columns.iter().any(|c| normalize_ident(&c.name) == ident) + if i.table_name != tbl { + return false; + } + i.columns.iter().any(|c| normalize_ident(&c.name) == ident) }); if index.is_none() { return Ok(None); @@ -1050,11 +1004,11 @@ impl Optimizable for ast::Expr { Ok(Some(index.unwrap().0)) } ast::Expr::Binary(lhs, op, rhs) => { - let lhs_index = lhs.check_index_scan(available_indexes)?; + let lhs_index = lhs.check_index_scan(table, available_indexes)?; if lhs_index.is_some() { return Ok(lhs_index); } - let rhs_index = rhs.check_index_scan(available_indexes)?; + let rhs_index = rhs.check_index_scan(table, available_indexes)?; if rhs_index.is_some() { // swap lhs and rhs let lhs_new = rhs.take_ownership(); @@ -1186,28 +1140,89 @@ impl Optimizable for ast::Expr { } } -pub fn try_extract_rowid_comparison_expression( +pub enum Either { + Left(T), + Right(U), +} + +/// An expression that can be used to search for a row in a table using an index +/// (i.e. a primary key or a secondary index) +/// +pub enum SearchableExpr { + /// A primary key equality search. This is a special case of the primary key search + /// that uses the SeekRowid operator and bytecode instruction. + PrimaryKeyEq { + table: Rc, + cmp_expr: ast::Expr, + }, + /// A primary key search. This uses the Search operator and uses bytecode instructions like SeekGT, SeekGE etc. + PrimaryKeySearch { + table: Rc, + cmp_op: ast::Operator, + cmp_expr: ast::Expr, + }, + /// A secondary index search. This uses the Search operator and uses bytecode instructions like SeekGE, SeekGT etc. + IndexSearch { + index: Rc, + cmp_op: ast::Operator, + cmp_expr: ast::Expr, + }, +} + +pub fn try_extract_expr_that_utilizes_index( expr: ast::Expr, - table_index: usize, - referenced_tables: &[(Rc, String)], -) -> Result<(bool, ast::Expr)> { + table: &(Rc, String), + available_indexes: &[Rc], +) -> Result> { match expr { - ast::Expr::Binary(lhs, ast::Operator::Equals, rhs) => { - if let Some(lhs_table_index) = lhs.check_primary_key(referenced_tables)? { - if lhs_table_index == table_index { - return Ok((true, *rhs)); + ast::Expr::Binary(mut lhs, operator, mut rhs) => { + if lhs.is_primary_key_of(table) { + match operator { + ast::Operator::Equals => { + return Ok(Either::Right(SearchableExpr::PrimaryKeyEq { table: table.0.clone(), cmp_expr: *rhs })); + } + ast::Operator::Greater | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { + return Ok(Either::Right(SearchableExpr::PrimaryKeySearch { table: table.0.clone(), cmp_op: operator, cmp_expr: *rhs })); + } + _ => {} } } - if let Some(rhs_table_index) = rhs.check_primary_key(referenced_tables)? { - if rhs_table_index == table_index { - return Ok((true, *lhs)); + if rhs.is_primary_key_of(table) { + match operator { + ast::Operator::Equals => { + return Ok(Either::Right(SearchableExpr::PrimaryKeyEq { table: table.0.clone(), cmp_expr: *lhs })); + } + ast::Operator::Greater | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { + return Ok(Either::Right(SearchableExpr::PrimaryKeySearch { table: table.0.clone(), cmp_op: operator, cmp_expr: *lhs })); + } + _ => {} + } + } + + if let Some(index_index) = lhs.check_index_scan(table, available_indexes)? { + match operator { + ast::Operator::Equals | + ast::Operator::Greater | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { + return Ok(Either::Right(SearchableExpr::IndexSearch { index: available_indexes[index_index].clone(), cmp_op: operator, cmp_expr: *rhs })); + } + _ => {} + } + } + + if let Some(index_index) = rhs.check_index_scan(table, available_indexes)? { + match operator { + ast::Operator::Equals | + ast::Operator::Greater | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { + return Ok(Either::Right(SearchableExpr::IndexSearch { index: available_indexes[index_index].clone(), cmp_op: operator, cmp_expr: *lhs })); + } + _ => {} } } - Ok((false, ast::Expr::Binary(lhs, ast::Operator::Equals, rhs))) + Ok(Either::Left(ast::Expr::Binary(lhs, operator, rhs))) } - _ => Ok((false, expr)), + _ => Ok(Either::Left(expr)), } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index e7e02f23c..24a3b6513 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -129,12 +129,11 @@ pub enum Operator { predicates: Option>, step: usize, }, - IndexScan { + Search { id: usize, - index: Rc, + index: Option>, seek_cmp: ast::Operator, seek_expr: ast::Expr, - index_predicate: ast::Expr, table: Rc, table_identifier: String, predicates: Option>, @@ -189,7 +188,7 @@ impl Operator { .map(|e| e.column_count(referenced_tables)) .sum(), Operator::Scan { table, .. } => table.columns.len(), - Operator::IndexScan { table, .. } => table.columns.len(), + Operator::Search { table, .. } => table.columns.len(), Operator::Nothing => 0, } } @@ -244,7 +243,7 @@ impl Operator { }) .collect(), Operator::Scan { table, .. } => table.columns.iter().map(|c| c.name.clone()).collect(), - Operator::IndexScan { table, .. } => { + Operator::Search { table, .. } => { table.columns.iter().map(|c| c.name.clone()).collect() } Operator::Nothing => vec![], @@ -261,7 +260,7 @@ impl Operator { Operator::Order { id, .. } => *id, Operator::Projection { id, .. } => *id, Operator::Scan { id, .. } => *id, - Operator::IndexScan { id, .. } => *id, + Operator::Search { id, .. } => *id, Operator::Nothing => unreachable!(), } } @@ -451,7 +450,7 @@ impl Display for Operator { }?; Ok(()) } - Operator::IndexScan { table, .. } => { + Operator::Search { table, .. } => { writeln!(f, "{}INDEX SCAN {}", indent, table.name)?; Ok(()) } @@ -515,7 +514,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|(t, _)| Rc::ptr_eq(t, table)) .unwrap(); } - Operator::IndexScan { table, .. } => { + Operator::Search { table, .. } => { table_refs_mask |= 1 << tables .iter() diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 1c3dba4e6..deca4497c 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -277,8 +277,6 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

Result<()>; fn rowid(&self) -> Result>; fn seek_rowid(&mut self, rowid: u64) -> Result>; - fn seek_ge(&mut self, key: &OwnedRecord) -> Result>; - fn seek_gt(&mut self, key: &OwnedRecord) -> Result>; + fn seek_ge_rowid(&mut self, rowid: u64) -> Result>; + fn seek_gt_rowid(&mut self, rowid: u64) -> Result>; + fn seek_ge_index(&mut self, key: &OwnedRecord) -> Result>; + fn seek_gt_index(&mut self, key: &OwnedRecord) -> Result>; fn seek_to_last(&mut self) -> Result>; fn record(&self) -> Result>>; fn insert( diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 25b6de9ca..669c159b0 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -534,6 +534,7 @@ pub fn insn_to_str( "".to_string(), ), Insn::SeekGT { + is_index, cursor_id, start_reg, num_regs, @@ -548,6 +549,7 @@ pub fn insn_to_str( "".to_string(), ), Insn::SeekGE { + is_index, cursor_id, start_reg, num_regs, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 5ca0c83ba..0d5c714dc 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -305,16 +305,22 @@ pub enum Insn { table_cursor_id: CursorID, }, + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. // Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekGE { + is_index: bool, cursor_id: CursorID, start_reg: usize, num_regs: usize, target_pc: BranchOffset, }, + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. // Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekGT { + is_index: bool, cursor_id: CursorID, start_reg: usize, num_regs: usize, @@ -1190,21 +1196,59 @@ impl Program { start_reg, num_regs, target_pc, + is_index, } => { - let cursor = cursors.get_mut(cursor_id).unwrap(); - let record_from_regs: OwnedRecord = - make_owned_record(&state.registers, start_reg, num_regs); - match cursor.seek_ge(&record_from_regs)? { - CursorResult::Ok(found) => { - if !found { - state.pc = *target_pc; - } else { - state.pc += 1; + if *is_index { + let cursor = cursors.get_mut(cursor_id).unwrap(); + let record_from_regs: OwnedRecord = + make_owned_record(&state.registers, start_reg, num_regs); + match cursor.seek_ge_index(&record_from_regs)? { + CursorResult::Ok(found) => { + if !found { + state.pc = *target_pc; + } else { + state.pc += 1; + } + } + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); } } - CursorResult::IO => { - // If there is I/O, the instruction is restarted. - return Ok(StepResult::IO); + } else { + let cursor = cursors.get_mut(cursor_id).unwrap(); + let rowid = match &state.registers[*start_reg] { + OwnedValue::Null => { + // All integer values are greater than null so we just rewind the cursor + match cursor.rewind()? { + CursorResult::Ok(()) => {} + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); + } + } + state.pc += 1; + continue; + } + OwnedValue::Integer(rowid) => *rowid as u64, + _ => { + return Err(LimboError::InternalError( + "SeekRowid: the value in the register is not an integer".into(), + )); + } + }; + match cursor.seek_ge_rowid(rowid)? { + CursorResult::Ok(found) => { + if !found { + state.pc = *target_pc; + } else { + state.pc += 1; + } + } + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); + } } } } @@ -1213,21 +1257,59 @@ impl Program { start_reg, num_regs, target_pc, + is_index, } => { - let cursor = cursors.get_mut(cursor_id).unwrap(); - let record_from_regs: OwnedRecord = - make_owned_record(&state.registers, start_reg, num_regs); - match cursor.seek_gt(&record_from_regs)? { - CursorResult::Ok(found) => { - if !found { - state.pc = *target_pc; - } else { - state.pc += 1; + if *is_index { + let cursor = cursors.get_mut(cursor_id).unwrap(); + let record_from_regs: OwnedRecord = + make_owned_record(&state.registers, start_reg, num_regs); + match cursor.seek_gt_index(&record_from_regs)? { + CursorResult::Ok(found) => { + if !found { + state.pc = *target_pc; + } else { + state.pc += 1; + } + } + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); } } - CursorResult::IO => { - // If there is I/O, the instruction is restarted. - return Ok(StepResult::IO); + } else { + let cursor = cursors.get_mut(cursor_id).unwrap(); + let rowid = match &state.registers[*start_reg] { + OwnedValue::Null => { + // All integer values are greater than null so we just rewind the cursor + match cursor.rewind()? { + CursorResult::Ok(()) => {} + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); + } + } + state.pc += 1; + continue; + } + OwnedValue::Integer(rowid) => *rowid as u64, + _ => { + return Err(LimboError::InternalError( + "SeekRowid: the value in the register is not an integer".into(), + )); + } + }; + match cursor.seek_gt_rowid(rowid)? { + CursorResult::Ok(found) => { + if !found { + state.pc = *target_pc; + } else { + state.pc += 1; + } + } + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); + } } } } diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 75980f222..e75650f26 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -79,11 +79,19 @@ impl Cursor for Sorter { unimplemented!(); } - fn seek_ge(&mut self, key: &OwnedRecord) -> Result> { + fn seek_ge_rowid(&mut self, _: u64) -> Result> { unimplemented!(); } - fn seek_gt(&mut self, key: &OwnedRecord) -> Result> { + fn seek_gt_rowid(&mut self, _: u64) -> Result> { + unimplemented!(); + } + + fn seek_ge_index(&mut self, _: &OwnedRecord) -> Result> { + unimplemented!(); + } + + fn seek_gt_index(&mut self, _: &OwnedRecord) -> Result> { unimplemented!(); } diff --git a/testing/cmdlineshell.test b/testing/cmdlineshell.test index 599dcfeb4..d68a341a0 100755 --- a/testing/cmdlineshell.test +++ b/testing/cmdlineshell.test @@ -21,7 +21,8 @@ CREATE TABLE products ( id INTEGER PRIMARY KEY, name TEXT, price REAL - );}} + );} +"CREATE INDEX age_idx on users (age);"} do_execsql_test schema-1 { .schema users