diff --git a/.github/actions/setup-rust/action.yml b/.github/actions/setup-rust/action.yml index 45f96b3ddd..e3a8a9d4da 100644 --- a/.github/actions/setup-rust/action.yml +++ b/.github/actions/setup-rust/action.yml @@ -14,7 +14,7 @@ runs: if: steps.rustup-cache.outputs.cache-hit != 'true' with: toolchain: "${{ steps.rust-version.outputs.version }}" - components: clippy, rustfmt + components: clippy, rustfmt, miri - name: Rust Dependency Cache uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72027219d6..9f69203947 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,13 +10,15 @@ permissions: actions: read contents: read +env: + CARGO_TERM_COLOR: always + jobs: build: name: 'build' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: ./.github/actions/cleanup - name: Install Protoc @@ -40,9 +42,32 @@ jobs: run: cargo test --workspace --all-features - name: Rust Build run: cargo build --all-features --all-targets + - name: License Check run: cargo install --locked cargo-deny && cargo deny check + - uses: rustsec/audit-check@v1.4.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} - name: Pytest - PyVortex run: rye run pytest --benchmark-disable test/ working-directory: pyvortex/ + + miri: + name: 'miri' + runs-on: ubuntu-latest + env: + RUST_BACKTRACE: 1 + MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-symbolic-alignment-check -Zmiri-backtrace=full + steps: + - uses: actions/checkout@v4 + + - uses: ./.github/actions/cleanup + + - name: Install Protoc + uses: arduino/setup-protoc@v3 + + - uses: ./.github/actions/setup-rust + + - name: Run tests with Miri + run: cargo miri test diff --git a/.github/workflows/rustsec-audit.yml b/.github/workflows/rustsec-audit.yml deleted file mode 100644 index 4c179e4457..0000000000 --- a/.github/workflows/rustsec-audit.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: RustSec audit -on: - schedule: - - cron: '0 0 * * *' -jobs: - audit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: rustsec/audit-check@v1.4.1 - with: - token: ${{ secrets.GITHUB_TOKEN }} diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index b07a81ae1d..0a3f5480bc 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -320,8 +320,13 @@ mod test { } #[test] - fn test_compression_roundtrip() { + fn test_compression_roundtrip_fast() { compression_roundtrip(125); + } + + #[test] + #[cfg_attr(miri, ignore)] // This test is too slow on miri + fn test_compression_roundtrip() { compression_roundtrip(1024); compression_roundtrip(10_000); compression_roundtrip(10_240); diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs index de0540b586..ff355b662d 100644 --- a/encodings/fastlanes/src/bitpacking/compute/take.rs +++ b/encodings/fastlanes/src/bitpacking/compute/take.rs @@ -172,6 +172,7 @@ mod test { } #[test] + #[cfg_attr(miri, ignore)] // This test is too slow on miri fn take_random_indices() { let num_patches: usize = 128; let values = (0..u16::MAX as u32 + num_patches as u32).collect::>(); diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index 938236d7d9..a966a8981d 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -6,7 +6,7 @@ use crate::array::varbinview::{VarBinViewArray, VIEW_SIZE}; use crate::compute::unary::ScalarAtFn; use crate::compute::{slice, ArrayCompute, SliceFn}; use crate::validity::ArrayValidity; -use crate::{Array, ArrayDType, IntoArray, IntoArrayData}; +use crate::{Array, ArrayDType, IntoArray}; impl ArrayCompute for VarBinViewArray { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { @@ -32,9 +32,7 @@ impl ScalarAtFn for VarBinViewArray { impl SliceFn for VarBinViewArray { fn slice(&self, start: usize, stop: usize) -> VortexResult { Ok(Self::try_new( - slice(&self.views(), start * VIEW_SIZE, stop * VIEW_SIZE)? - .into_array_data() - .into_array(), + slice(&self.views(), start * VIEW_SIZE, stop * VIEW_SIZE)?, (0..self.metadata().data_lens.len()) .map(|i| self.bytes(i)) .collect::>(), diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 3aef8f6cca..12addc709e 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -31,13 +31,12 @@ mod variants; #[derive(Clone, Copy, Debug)] #[repr(C, align(8))] -struct Inlined { +pub struct Inlined { size: u32, data: [u8; BinaryView::MAX_INLINED_SIZE], } impl Inlined { - #[allow(dead_code)] pub fn new(value: &[u8]) -> Self { assert!( value.len() <= BinaryView::MAX_INLINED_SIZE, @@ -55,7 +54,7 @@ impl Inlined { #[derive(Clone, Copy, Debug)] #[repr(C, align(8))] -struct Ref { +pub struct Ref { size: u32, prefix: [u8; 4], buffer_index: u32, @@ -105,7 +104,8 @@ impl Debug for BinaryView { } } -pub const VIEW_SIZE: usize = mem::size_of::(); +// reminder: views are 16 bytes with 8-byte alignment +pub(crate) const VIEW_SIZE: usize = mem::size_of::(); impl_encoding!("vortex.varbinview", 5u16, VarBinView); @@ -140,10 +140,9 @@ impl VarBinViewArray { vortex_bail!("incorrect validity {:?}", validity); } - let length = views.len() / VIEW_SIZE; - + let num_views = views.len() / VIEW_SIZE; let metadata = VarBinViewMetadata { - validity: validity.to_metadata(views.len() / VIEW_SIZE)?, + validity: validity.to_metadata(num_views)?, data_lens: data.iter().map(|a| a.len()).collect_vec(), }; @@ -154,7 +153,7 @@ impl VarBinViewArray { children.push(a) } - Self::try_from_parts(dtype, length, metadata, children.into(), StatsSet::new()) + Self::try_from_parts(dtype, num_views, metadata, children.into(), StatsSet::new()) } fn view_slice(&self) -> &[BinaryView] { @@ -358,7 +357,7 @@ impl<'a> FromIterator> for VarBinViewArray { mod test { use vortex_scalar::Scalar; - use crate::array::varbinview::VarBinViewArray; + use crate::array::varbinview::{BinaryView, Inlined, Ref, VarBinViewArray, VIEW_SIZE}; use crate::compute::slice; use crate::compute::unary::scalar_at; use crate::{Canonical, IntoArray, IntoCanonical}; @@ -404,4 +403,13 @@ mod test { assert_eq!(scalar_at(&var_bin, 0).unwrap(), Scalar::from("string1")); assert_eq!(scalar_at(&var_bin, 1).unwrap(), Scalar::from("string2")); } + + #[test] + pub fn binary_view_size_and_alignment() { + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(std::mem::size_of::(), VIEW_SIZE); + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(std::mem::align_of::(), 8); + } } diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 5c7f4f3c79..5a6adb6195 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -8,7 +8,6 @@ //! Every data type recognized by Vortex also has a canonical physical encoding format, which //! arrays can be [canonicalized](Canonical) into for ease of access in compute functions. //! - use std::fmt::{Debug, Display, Formatter}; use std::future::ready; diff --git a/vortex-datafusion/src/lib.rs b/vortex-datafusion/src/lib.rs index 4d1d5f5d25..af4cd1be7f 100644 --- a/vortex-datafusion/src/lib.rs +++ b/vortex-datafusion/src/lib.rs @@ -510,6 +510,7 @@ mod test { } #[tokio::test] + #[cfg_attr(miri, ignore)] async fn test_datafusion_pushdown() { let ctx = SessionContext::new(); @@ -538,6 +539,7 @@ mod test { } #[tokio::test] + #[cfg_attr(miri, ignore)] async fn test_datafusion_no_pushdown() { let ctx = SessionContext::new(); diff --git a/vortex-sampling-compressor/tests/smoketest.rs b/vortex-sampling-compressor/tests/smoketest.rs index 4449be47ba..fe21044485 100644 --- a/vortex-sampling-compressor/tests/smoketest.rs +++ b/vortex-sampling-compressor/tests/smoketest.rs @@ -24,6 +24,7 @@ use vortex_sampling_compressor::compressors::CompressorRef; use vortex_sampling_compressor::{CompressConfig, SamplingCompressor}; #[test] +#[cfg_attr(miri, ignore)] // This test is too slow on miri pub fn smoketest_compressor() { let compressor = SamplingCompressor::new_with_options( HashSet::from([