Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(core): Primary key index scans and single-column secondary index scans #350

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f02da18
index scan wip foo doesnt work yet
jussisaurio Sep 29, 2024
3d56fbd
stuff
jussisaurio Oct 5, 2024
e118b70
fmt
jussisaurio Oct 5, 2024
99871bb
yield on io
jussisaurio Oct 5, 2024
ff236c7
Fix not advancing the cell index of pages
jussisaurio Oct 5, 2024
ed19f47
fix
jussisaurio Oct 5, 2024
3a11887
fixerinos
jussisaurio Oct 5, 2024
02d6fa3
Fix .schema users not displaying indexes on the users table
jussisaurio Oct 5, 2024
fe90aac
Handle CursorResult in deferred seek
jussisaurio Oct 5, 2024
9169f6e
same thing
jussisaurio Oct 5, 2024
43015f6
Workaround for compat test
jussisaurio Oct 5, 2024
d8a695a
rename tests
jussisaurio Oct 5, 2024
db0e2ea
Change another compat test to work around sqlite's weird choice to us…
jussisaurio Oct 5, 2024
d2233d6
Dont assume the rowid is the second column - it's the last
jussisaurio Oct 5, 2024
3826d4e
Add comment about code duplication
jussisaurio Oct 5, 2024
d22dbe9
remove garbage comment
jussisaurio Oct 5, 2024
d3e797f
rewind_labels was renamed to scan_loop_body_labels
jussisaurio Oct 5, 2024
47534cb
Get rid of Seekrowid operator in favor of a unified Search operator
jussisaurio Oct 5, 2024
dde10d2
Better EXPLAIN QUERY PLAN for Operator::Search
jussisaurio Oct 5, 2024
bb1c8b6
fmt
jussisaurio Oct 5, 2024
37f8771
Reduce duplication in btree.rs
jussisaurio Oct 5, 2024
1ae8d28
Use same move_to() function for tables and indexes
jussisaurio Oct 5, 2024
af9a751
Single seek function
jussisaurio Oct 5, 2024
6e7db36
reorder
jussisaurio Oct 5, 2024
15a66ea
single seek function in cursor trait
jussisaurio Oct 6, 2024
e5cf052
Why do sqlite btree child keys have <= keys and not < keys
jussisaurio Oct 6, 2024
fc71f2b
traverse index properly
jussisaurio Oct 7, 2024
8563d62
renaming
jussisaurio Oct 7, 2024
93a8110
dont assume index key has rowid in the second column: its the last
jussisaurio Oct 7, 2024
572db69
Add TODO comment about index corner case
jussisaurio Oct 7, 2024
43038cb
Handle seek() edge case with index seek
jussisaurio Oct 8, 2024
556f4b7
Refine edge case handling: add optional predicate to get_next_record()
jussisaurio Oct 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ fn display_schema(
) -> anyhow::Result<()> {
let sql = match table {
Some(table_name) => format!(
"SELECT sql FROM sqlite_schema WHERE type='table' AND name = '{}' AND name NOT LIKE 'sqlite_%'",
"SELECT sql FROM sqlite_schema WHERE type IN ('table', 'index') AND tbl_name = '{}' AND name NOT LIKE 'sqlite_%'",
table_name
),
None => String::from(
Expand Down
7 changes: 5 additions & 2 deletions core/pseudo.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::Result;
use crate::{
types::{SeekKey, SeekOp},
Result,
};
use std::cell::{Ref, RefCell};

use crate::types::{Cursor, CursorResult, OwnedRecord, OwnedValue};
Expand Down Expand Up @@ -48,7 +51,7 @@ impl Cursor for PseudoCursor {
Ok(x)
}

fn seek_rowid(&mut self, _: u64) -> Result<CursorResult<bool>> {
fn seek(&mut self, _: SeekKey<'_>, _: SeekOp) -> Result<CursorResult<bool>> {
unimplemented!();
}

Expand Down
14 changes: 13 additions & 1 deletion core/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ impl Schema {
#[derive(Clone, Debug)]
pub enum Table {
BTree(Rc<BTreeTable>),
Index(Rc<Index>),
Pseudo(Rc<PseudoTable>),
}

Expand All @@ -57,20 +58,23 @@ impl Table {
pub fn get_rowid_alias_column(&self) -> Option<(usize, &Column)> {
match self {
Table::BTree(table) => table.get_rowid_alias_column(),
Table::Index(_) => None,
Table::Pseudo(_) => None,
}
}

pub fn column_is_rowid_alias(&self, col: &Column) -> bool {
match self {
Table::BTree(table) => table.column_is_rowid_alias(col),
Table::Index(_) => false,
Table::Pseudo(_) => false,
}
}

pub fn get_name(&self) -> &str {
match self {
Table::BTree(table) => &table.name,
Table::Index(index) => &index.name,
Table::Pseudo(_) => "",
}
}
Expand All @@ -81,6 +85,10 @@ impl Table {
Some(column) => Some(&column.name),
None => None,
},
Table::Index(i) => match i.columns.get(index) {
Some(column) => Some(&column.name),
None => None,
},
Table::Pseudo(table) => match table.columns.get(index) {
Some(column) => Some(&column.name),
None => None,
Expand All @@ -91,28 +99,32 @@ impl Table {
pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> {
match self {
Table::BTree(table) => table.get_column(name),
Table::Index(index) => unimplemented!(),
Table::Pseudo(table) => table.get_column(name),
}
}

pub fn get_column_at(&self, index: usize) -> &Column {
match self {
Table::BTree(table) => table.columns.get(index).unwrap(),
Table::Index(index) => unimplemented!(),
Table::Pseudo(table) => table.columns.get(index).unwrap(),
}
}

pub fn columns(&self) -> &Vec<Column> {
match self {
Table::BTree(table) => &table.columns,
Table::Index(index) => unimplemented!(),
Table::Pseudo(table) => &table.columns,
}
}

pub fn has_rowid(&self) -> bool {
match self {
Table::BTree(table) => table.has_rowid,
Table::Pseudo(_) => todo!(),
Table::Index(_) => unimplemented!(),
Table::Pseudo(_) => unimplemented!(),
}
}
}
Expand Down
148 changes: 120 additions & 28 deletions core/storage/btree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ use crate::storage::sqlite3_ondisk::{
read_btree_cell, read_varint, write_varint, BTreeCell, DatabaseHeader, PageContent, PageType,
TableInteriorCell, TableLeafCell,
};
use crate::types::{Cursor, CursorResult, OwnedRecord, OwnedValue};
use crate::types::{Cursor, CursorResult, OwnedRecord, OwnedValue, SeekKey, SeekOp};
use crate::Result;

use std::cell::{Ref, RefCell};
use std::rc::Rc;

use super::sqlite3_ondisk::{write_varint_to_vec, OverflowCell};
use super::sqlite3_ondisk::{write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell};

/*
These are offsets of fields in the header of a b-tree page.
Expand All @@ -23,6 +23,7 @@ const BTREE_HEADER_OFFSET_CELL_CONTENT: usize = 5; /* pointer to first byte of c
const BTREE_HEADER_OFFSET_FRAGMENTED: usize = 7; /* number of fragmented bytes -> u8 */
const BTREE_HEADER_OFFSET_RIGHTMOST: usize = 8; /* if internalnode, pointer right most pointer (saved separately from cells) -> u32 */

#[derive(Debug)]
pub struct MemPage {
parent: Option<Rc<MemPage>>,
page_idx: usize,
Expand Down Expand Up @@ -56,6 +57,7 @@ pub struct BTreeCursor {
record: RefCell<Option<OwnedRecord>>,
null_flag: bool,
database_header: Rc<RefCell<DatabaseHeader>>,
going_upwards: bool,
}

impl BTreeCursor {
Expand All @@ -72,6 +74,7 @@ impl BTreeCursor {
record: RefCell::new(None),
null_flag: false,
database_header,
going_upwards: false,
}
}

Expand Down Expand Up @@ -109,6 +112,7 @@ impl BTreeCursor {
}
None => match parent {
Some(ref parent) => {
self.going_upwards = true;
self.page.replace(Some(parent.clone()));
continue;
}
Expand Down Expand Up @@ -145,57 +149,114 @@ impl BTreeCursor {
let record = crate::storage::sqlite3_ondisk::read_record(_payload)?;
return Ok(CursorResult::Ok((Some(*_rowid), Some(record))));
}
BTreeCell::IndexInteriorCell(_) => {
unimplemented!();
BTreeCell::IndexInteriorCell(IndexInteriorCell {
payload,
left_child_page,
..
}) => {
if self.going_upwards {
self.going_upwards = false;
mem_page.advance();
let record = crate::storage::sqlite3_ondisk::read_record(payload)?;
let rowid = match record.values.last() {
Some(OwnedValue::Integer(rowid)) => *rowid as u64,
_ => unreachable!("index cells should have an integer rowid"),
};
return Ok(CursorResult::Ok((Some(rowid), Some(record))));
} else {
let mem_page =
MemPage::new(Some(mem_page.clone()), *left_child_page as usize, 0);
self.page.replace(Some(Rc::new(mem_page)));
continue;
}
}
BTreeCell::IndexLeafCell(_) => {
unimplemented!();
BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => {
mem_page.advance();
let record = crate::storage::sqlite3_ondisk::read_record(payload)?;
let rowid = match record.values.last() {
Some(OwnedValue::Integer(rowid)) => *rowid as u64,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't index keys also blobs/strings/whatever?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are getting the rowid here and AFAIK it's always the last value in the payload record

_ => unreachable!("index cells should have an integer rowid"),
};

return Ok(CursorResult::Ok((Some(rowid), Some(record))));
}
}
}
}

fn btree_seek_rowid(
fn seek<'a>(
&mut self,
rowid: u64,
key: SeekKey<'a>,
op: SeekOp,
) -> Result<CursorResult<(Option<u64>, Option<OwnedRecord>)>> {
self.move_to(rowid)?;
match self.move_to(key.clone(), op.clone())? {
CursorResult::Ok(_) => {}
CursorResult::IO => return Ok(CursorResult::IO),
};

let mem_page = self.get_mem_page();

let page_idx = mem_page.page_idx;
let page = self.pager.read_page(page_idx)?;
let page = RefCell::borrow(&page);
if page.is_locked() {
return Ok(CursorResult::IO);
}

let page = page.contents.read().unwrap();
let page = page.as_ref().unwrap();

for cell_idx in 0..page.cell_count() {
match &page.cell_get(
let cell = page.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(page.page_type()),
self.min_local(page.page_type()),
self.usable_space(),
)? {
)?;
match &cell {
BTreeCell::TableLeafCell(TableLeafCell {
_rowid: cell_rowid,
_payload: p,
_payload: payload,
first_overflow_page: _,
}) => {
if *cell_rowid == rowid {
let record = crate::storage::sqlite3_ondisk::read_record(p)?;
let SeekKey::TableRowId(rowid_key) = key else {
unreachable!("table seek key should be a rowid");
};
mem_page.advance();
let comparison = match op {
SeekOp::GT => *cell_rowid > rowid_key,
SeekOp::GE => *cell_rowid >= rowid_key,
SeekOp::EQ => *cell_rowid == rowid_key,
};
if comparison {
let record = crate::storage::sqlite3_ondisk::read_record(payload)?;
return Ok(CursorResult::Ok((Some(*cell_rowid), Some(record))));
}
}
BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => {
let SeekKey::IndexKey(index_key) = key else {
unreachable!("index seek key should be a record");
};
mem_page.advance();
let record = crate::storage::sqlite3_ondisk::read_record(payload)?;
let comparison = match op {
SeekOp::GT => record > *index_key,
SeekOp::GE => record >= *index_key,
SeekOp::EQ => record == *index_key,
};
if comparison {
let rowid = match record.values.get(1) {
Some(OwnedValue::Integer(rowid)) => *rowid as u64,
_ => unreachable!("index cells should have an integer rowid"),
};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get this part. We assume record.values.get(1) is the rowid. But I imagine there could be indexes with multiple columns right? Shouldn't the rowid be the last value? Furthermore, should we maybe change rowid name to key or something similar because it can be eitther rowid and primary key I think.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats a bug i forgot to refactor. it should be last

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in penberg@93a8110

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left the rowid naming, copypasted from discord:

I think i will leave the rowid naming because it reflects current reality better, doesnt our code pretty much assume we dont have WITHOUT ROWID tables right now? And I think WITHOUT ROWID tables are pretty much btree indexes in that they store everything in the key area? https://www.sqlite.org/withoutrowid.html#:~:text=A%20WITHOUT%20ROWID%20table%20uses,binary%20search%20on%20the%20rowid.

return Ok(CursorResult::Ok((Some(rowid), Some(record))));
}
}
cell_type => {
unreachable!("unexpected cell type: {:?}", cell_type);
}
}
}

Ok(CursorResult::Ok((None, None)))
}

Expand Down Expand Up @@ -240,7 +301,7 @@ impl BTreeCursor {
}
}

pub fn move_to(&mut self, key: u64) -> Result<CursorResult<()>> {
pub fn move_to<'a>(&mut self, key: SeekKey<'a>, cmp: SeekOp) -> Result<CursorResult<()>> {
// For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers.
// B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data.
//
Expand Down Expand Up @@ -294,8 +355,16 @@ impl BTreeCursor {
_left_child_page,
_rowid,
}) => {
if key < *_rowid {
mem_page.advance();
let SeekKey::TableRowId(rowid_key) = key else {
unreachable!("table seek key should be a rowid");
};
mem_page.advance();
let comparison = match cmp {
SeekOp::GT => rowid_key < *_rowid,
SeekOp::GE => rowid_key <= *_rowid,
SeekOp::EQ => rowid_key <= *_rowid,
};
if comparison {
let mem_page =
MemPage::new(Some(mem_page.clone()), *_left_child_page as usize, 0);
self.page.replace(Some(Rc::new(mem_page)));
Expand All @@ -312,20 +381,43 @@ impl BTreeCursor {
"we don't iterate leaf cells while trying to move to a leaf cell"
);
}
BTreeCell::IndexInteriorCell(_) => {
unimplemented!();
BTreeCell::IndexInteriorCell(IndexInteriorCell {
left_child_page,
payload,
..
}) => {
let SeekKey::IndexKey(index_key) = key else {
unreachable!("index seek key should be a record");
};
let record = crate::storage::sqlite3_ondisk::read_record(payload)?;
let comparison = match cmp {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we rename comparison to found?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

penberg@8563d62

Renamed to found in seek() and to target_leaf_page_is_in_left_subtree in move_to()

SeekOp::GT => index_key < &record,
SeekOp::GE => index_key <= &record,
SeekOp::EQ => index_key <= &record,
};
if comparison {
let mem_page =
MemPage::new(Some(mem_page.clone()), *left_child_page as usize, 0);
self.page.replace(Some(Rc::new(mem_page)));
found_cell = true;
break;
} else {
mem_page.advance();
}
}
BTreeCell::IndexLeafCell(_) => {
unimplemented!();
unreachable!(
"we don't iterate leaf cells while trying to move to a leaf cell"
);
}
}
}

if !found_cell {
let parent = mem_page.clone();
let parent = mem_page.parent.clone();
match page.rightmost_pointer() {
Some(right_most_pointer) => {
let mem_page = MemPage::new(Some(parent), right_most_pointer as usize, 0);
let mem_page = MemPage::new(parent, right_most_pointer as usize, 0);
self.page.replace(Some(Rc::new(mem_page)));
continue;
}
Expand Down Expand Up @@ -1285,8 +1377,8 @@ impl Cursor for BTreeCursor {
Ok(*self.rowid.borrow())
}

fn seek_rowid(&mut self, rowid: u64) -> Result<CursorResult<bool>> {
match self.btree_seek_rowid(rowid)? {
fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result<CursorResult<bool>> {
match self.seek(key, op)? {
CursorResult::Ok((rowid, record)) => {
self.rowid.replace(rowid);
self.record.replace(record);
Expand All @@ -1311,7 +1403,7 @@ impl Cursor for BTreeCursor {
_ => unreachable!("btree tables are indexed by integers!"),
};
if !moved_before {
match self.move_to(*int_key as u64)? {
match self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)? {
CursorResult::Ok(_) => {}
CursorResult::IO => return Ok(CursorResult::IO),
};
Expand All @@ -1336,7 +1428,7 @@ impl Cursor for BTreeCursor {
OwnedValue::Integer(i) => i,
_ => unreachable!("btree tables are indexed by integers!"),
};
match self.move_to(*int_key as u64)? {
match self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)? {
CursorResult::Ok(_) => {}
CursorResult::IO => return Ok(CursorResult::IO),
};
Expand Down
Loading
Loading