Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace buffer management with Arrow buffers #173

Open
wants to merge 43 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
906d507
Replace buffer management with Arrow buffers
davisp Sep 20, 2024
905e5c5
Cleanup error handling
davisp Oct 4, 2024
a0df708
Show external reference error
davisp Oct 9, 2024
a8c75c3
Merge remote-tracking branch 'origin/main' into pd/experiment/arrow-b…
rroelke Nov 14, 2024
6ce17e7
Split arrow queries out to new tiledb-query-core crate
rroelke Nov 15, 2024
274446c
Remove SizeInfo, introduce struct QueryBuffer
rroelke Nov 15, 2024
48bc1fc
build.rs
rroelke Nov 15, 2024
5fa66b0
Config::capi behind raw feature
rroelke Nov 15, 2024
a2a7951
Configure query-core to do arrow-shaped offsets
rroelke Nov 15, 2024
1a2bae8
Fix empty offsets
rroelke Nov 15, 2024
3ffbbcb
Example typo
rroelke Nov 15, 2024
92ec59c
Move raw ptr methods into QueryBuffer
rroelke Nov 15, 2024
4f84284
clippy except for fn len
rroelke Nov 15, 2024
47b8edd
is_empty for clippy
rroelke Nov 15, 2024
29fef07
Add tiledb_query_field_t to sys
rroelke Nov 19, 2024
25b0a86
sys tiledb_field_get_nullable
rroelke Nov 20, 2024
a6d45cf
AggregateFunctionHandle
rroelke Nov 20, 2024
76cfe85
Add aggregate to query-core and a bunch of Error refactoring
rroelke Nov 20, 2024
3443d7d
Add Cells -> RecordBatch conversion
rroelke Nov 21, 2024
170cb84
Fix cargo check --tests for cells crate
rroelke Nov 21, 2024
566c2dd
arrow.rs => datatype/mod.rs
rroelke Nov 26, 2024
8cb083f
Use DimensionKey for SubarrayBuilder
rroelke Nov 26, 2024
5228a43
SharedBuffers is type alias instead of wrapper
rroelke Nov 26, 2024
891a1bf
Query::subarray
rroelke Nov 26, 2024
089c59a
Should be part of DimensionKey commit
rroelke Nov 26, 2024
c1caf7d
Add query_roundtrip tests, not passing yet
rroelke Nov 26, 2024
a0d433d
Make target query field available to MutableOrShared
rroelke Nov 26, 2024
be27400
Alloc validity buffer for writes when the source is not nullable and …
rroelke Nov 26, 2024
74a75d4
datatype mod cleanly separates default type mapping vs. is compatible
rroelke Nov 27, 2024
535cdc2
Change test name
rroelke Nov 27, 2024
d05c1c1
Eq for Mode
rroelke Nov 27, 2024
27043b8
fixed_offsets for reading/writing var-size tiledb field using FixedSi…
rroelke Nov 27, 2024
2021b28
Steal WithoutReplacement strategy extension from tables
rroelke Nov 27, 2024
fa29848
arrow-proptest-strategies, stolen from tables
rroelke Nov 27, 2024
2f7cf50
arrow-proptest-strategies tweaks
rroelke Nov 27, 2024
8d0fe42
proptest_list_buffers_roundtrip_var
rroelke Nov 27, 2024
8c51932
Fix PrimitiveBuffers for all primitive types
rroelke Nov 27, 2024
351a1f8
Fix a few datatype cases
rroelke Nov 27, 2024
e28cbe5
Fill in arrow array FixedSizeList case
rroelke Nov 27, 2024
9aaf2d5
proptest_list_buffers_roundtrip_fixed
rroelke Nov 27, 2024
358c674
enum Capacity
rroelke Dec 2, 2024
15cd3ae
Add stdx-binary-search crate for searching Range
rroelke Dec 13, 2024
d93fb81
proptest_capacity_limits passes
rroelke Dec 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
[workspace]
resolver = "2"
members = [
"stdx/binary-search",
"tiledb/api",
"tiledb/common",
"tiledb/pod",
"tiledb/proc-macro",
"tiledb/queries",
"tiledb/query-core",
"tiledb/sys",
"tiledb/sys-cfg",
"tiledb/sys-defs",
"tiledb/utils",
"test-utils/arrow-proptest-strategies",
"test-utils/cells",
"test-utils/proptest-config",
"test-utils/signal",
Expand All @@ -23,6 +26,7 @@ default-members = [
"tiledb/pod",
"tiledb/proc-macro",
"tiledb/queries",
"tiledb/query-core",
"tiledb/utils",
]

Expand All @@ -35,10 +39,14 @@ version = "0.1.0"
anyhow = "1.0"
armerge = "2"
arrow = { version = "52.0.0", features = ["prettyprint"] }
arrow-array = { version = "52.0.0" }
arrow-buffer = { version = "52.0.0" }
arrow-proptest-strategies = { path = "test-utils/arrow-proptest-strategies" }
arrow-schema = { version = "52.0.0" }
bindgen = "0.70"
cells = { path = "test-utils/cells", version = "0.1.0" }
cmake = "0.1"
half = { version = "2.2.1", default-features = false }
itertools = "0"
num-traits = "0.2"
paste = "1.0"
Expand All @@ -47,6 +55,7 @@ regex = "1"
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1", features = ["float_roundtrip"] }
signal = { path = "test-utils/signal", version = "0.1.0" }
stdx-binary-search = { path = "stdx/binary-search", version = "0.1.0" }
strategy-ext = { path = "test-utils/strategy-ext", version = "0.1.0" }
tempfile = { version = "3" }
thiserror = { version = "1" }
Expand Down
11 changes: 11 additions & 0 deletions stdx/binary-search/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "stdx-binary-search"
edition.workspace = true
rust-version.workspace = true
version.workspace = true

[dependencies]
num-traits = { workspace = true }

[dev-dependencies]
proptest = { workspace = true }
153 changes: 153 additions & 0 deletions stdx/binary-search/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
use std::ops::Range;

use num_traits::{FromPrimitive, ToPrimitive};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Bisect<T> {
NeverTrue,
UpperBound(T),
AlwaysTrue,
}

/// A type which represents a searchable space of values.
pub trait Search {
type Item;

/// Performs an efficient search over the items of `self` to find the upper bound
/// where `property` is true.
///
/// `property` is some function which bisects the search space,
/// returning `true` on each value in the first segment and `false` on each value in the second.
fn upper_bound<F>(&self, property: F) -> Bisect<Self::Item>
where
F: Fn(&Self::Item) -> bool;
}

macro_rules! binary_search_impl {
($($ITYPE:ty),+) => {
$(
impl Search for Range<$ITYPE> {
type Item = $ITYPE;

fn upper_bound<F>(&self, property: F) -> Bisect<Self::Item>
where
F: Fn(&Self::Item) -> bool,
{
if self.is_empty() {
return Bisect::AlwaysTrue
} else if self.start + 1 == self.end {
return if property(&self.start) {
Bisect::AlwaysTrue
} else {
Bisect::NeverTrue
}
}
let mut search = self.clone();
while search.start + 1 < search.end {
let midpoint = midpoint(&search);
if property(&midpoint) {
search.start = midpoint;
} else {
search.end = midpoint;
}
}
if search.end == self.end {
Bisect::AlwaysTrue
} else if property(&search.start) {
Bisect::UpperBound(search.start)
} else {
Bisect::NeverTrue
}
}
}
)+
};
}

fn midpoint<T>(range: &Range<T>) -> T
where
T: Copy + FromPrimitive + ToPrimitive,
{
T::from_i128(
(range.start.to_i128().unwrap() + range.end.to_i128().unwrap()) / 2,
)
.unwrap()
}

binary_search_impl!(
u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize
);

#[cfg(test)]
mod tests {
use proptest::prelude::*;

use super::*;

/// Performs a linear search to return the maximum value in the range
/// for which a `property` is true
fn linear_search<T, F>(range: Range<T>, property: F) -> Bisect<T>
where
Range<T>: Iterator<Item = T>,
F: Fn(&T) -> bool,
{
let mut prev = None;
for i in range {
if property(&i) {
prev = Some(i);
} else if let Some(prev) = prev {
return Bisect::UpperBound(prev);
} else {
return Bisect::NeverTrue;
}
}
Bisect::AlwaysTrue
}

fn search_results<T, F>(
range: Range<T>,
property: F,
) -> (Bisect<T>, Bisect<T>)
where
Range<T>: Iterator<Item = T> + Search<Item = T>,
T: Clone + PartialEq,
F: Clone + Fn(&T) -> bool,
{
let linear_search_result =
linear_search(range.clone(), property.clone());
let binary_search_result = range.upper_bound(property.clone());

(linear_search_result, binary_search_result)
}

#[test]
fn example_simple_less_than() {
let cmp = |value: &usize| *value < 5;

for i in 0..10 {
for j in i..10 {
let (linear, binary) = search_results(i..j, &cmp);
assert_eq!(linear, binary, "i..j = {:?}", i..j)
}
}
}

proptest! {
#[test]
fn proptest_simple_less_than(target in any::<usize>(), range in any::<Range<usize>>()) {
match range.upper_bound(|value: &usize| *value < target) {
Bisect::AlwaysTrue => assert!(range.end <= target),
Bisect::NeverTrue => assert!(target < range.start),
Bisect::UpperBound(bound) => {
assert_eq!(target - 1, bound);
}
}
}

#[test]
fn proptest_search_compare(target in any::<u8>(), range in any::<Range<u8>>()) {
let (linear, binary) = search_results(range, |value: &u8| *value < target);
assert_eq!(linear, binary);
}
}
}
13 changes: 13 additions & 0 deletions test-utils/arrow-proptest-strategies/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "arrow-proptest-strategies"
edition.workspace = true
rust-version.workspace = true
version.workspace = true

[dependencies]
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
half = { workspace = true }
proptest = { workspace = true }
strategy-ext = { workspace = true }
Loading