diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 06e288a..7ce5b6c 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -45,9 +45,9 @@ jobs: - name: Build python packages run: | # arro3-core needs to be first - uv run maturin develop -m arro3-core/Cargo.toml - uv run maturin develop -m arro3-compute/Cargo.toml - uv run maturin develop -m arro3-io/Cargo.toml + uv run maturin dev -m arro3-core/Cargo.toml + uv run maturin dev -m arro3-compute/Cargo.toml + uv run maturin dev -m arro3-io/Cargo.toml - name: Deploy docs env: diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 88e730e..79efac6 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -62,9 +62,9 @@ jobs: - name: Build rust submodules run: | # Note: core module must be first, because it's depended on by others - uv run maturin develop -m arro3-core/Cargo.toml - uv run maturin develop -m arro3-compute/Cargo.toml - uv run maturin develop -m arro3-io/Cargo.toml + uv run maturin dev -m arro3-core/Cargo.toml + uv run maturin dev -m arro3-compute/Cargo.toml + uv run maturin dev -m arro3-io/Cargo.toml - name: Run python tests run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 23afd73..9d8cfa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ This is the changelog for arro3. pyo3-arrow has a separate changelog. +## [0.4.4] - 2024-12-09 + +### Bug fixes :bug: + +- Raise IndexError and KeyError for invalid column access https://github.com/kylebarron/arro3/pull/272 + ## [0.4.3] - 2024-11-21 ### What's Changed diff --git a/Cargo.lock b/Cargo.lock index 9cfdd51..d11c098 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,7 +72,7 @@ dependencies = [ [[package]] name = "arro3-compute" -version = "0.4.3" +version = "0.4.4" dependencies = [ "arrow", "arrow-array", @@ -87,7 +87,7 @@ dependencies = [ [[package]] name = "arro3-core" -version = "0.4.3" +version = "0.4.4" dependencies = [ "arrow-array", "arrow-buffer", @@ -98,7 +98,7 @@ dependencies = [ [[package]] name = "arro3-io" -version = "0.4.3" +version = "0.4.4" dependencies = [ "arrow", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 29ce133..203034f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ resolver = "2" [workspace.package] # Package version for arro3-*, not for pyo3-arrow -version = "0.4.3" +version = "0.4.4" authors = ["Kyle Barron "] edition = "2021" homepage = "https://kylebarron.dev/arro3" diff --git a/DEVELOP.md b/DEVELOP.md index 60e22ab..0db3c85 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -4,9 +4,9 @@ rm -rf .venv uv sync # Note: need to install core first because others depend on core -uv run maturin develop -m arro3-core/Cargo.toml -uv run maturin develop -m arro3-compute/Cargo.toml -uv run maturin develop -m arro3-io/Cargo.toml +uv run maturin dev -m arro3-core/Cargo.toml +uv run maturin dev -m arro3-compute/Cargo.toml +uv run maturin dev -m arro3-io/Cargo.toml uv run mkdocs serve ``` diff --git a/pyo3-arrow/src/input.rs b/pyo3-arrow/src/input.rs index 1006dd7..f6de32f 100644 --- a/pyo3-arrow/src/input.rs +++ b/pyo3-arrow/src/input.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use arrow_array::{Datum, RecordBatchIterator, RecordBatchReader}; use arrow_schema::{ArrowError, Field, FieldRef, Fields, Schema, SchemaRef}; -use pyo3::exceptions::PyValueError; +use pyo3::exceptions::{PyIndexError, PyKeyError, PyValueError}; use pyo3::prelude::*; use crate::array_reader::PyArrayReader; @@ -162,10 +162,21 @@ pub(crate) enum FieldIndexInput { } impl FieldIndexInput { - pub fn into_position(self, schema: &Schema) -> PyArrowResult { + /// This will additionally check that the input is valid against the given schema. + /// + /// This will raise a KeyError if the provided name does not exist, or an IndexError if the + /// provided integer index is out of bounds. + pub fn into_position(self, schema: &Schema) -> PyResult { match self { - Self::Name(name) => Ok(schema.index_of(name.as_ref())?), - Self::Position(position) => Ok(position), + Self::Name(name) => schema + .index_of(name.as_ref()) + .map_err(|err| PyKeyError::new_err(err.to_string())), + Self::Position(position) => { + if position >= schema.fields().len() { + return Err(PyIndexError::new_err("Index out of range").into()); + } + Ok(position) + } } } } diff --git a/tests/core/test_table.py b/tests/core/test_table.py index 3d3dc90..bdaa678 100644 --- a/tests/core/test_table.py +++ b/tests/core/test_table.py @@ -10,11 +10,18 @@ def test_table_getitem(): a = pa.chunked_array([[1, 2, 3, 4]]) b = pa.chunked_array([["a", "b", "c", "d"]]) table = Table.from_pydict({"a": a, "b": b}) + assert a == pa.chunked_array(table["a"]) assert b == pa.chunked_array(table["b"]) assert a == pa.chunked_array(table[0]) assert b == pa.chunked_array(table[1]) + with pytest.raises(KeyError): + table["foo"] + + with pytest.raises(IndexError): + table[10] + def test_table_from_arrays(): a = pa.array([1, 2, 3, 4])