diff --git a/CHANGELOG.md b/CHANGELOG.md index 2afd8623b2..a5f2be6092 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ -## 0.9.1 (unreleased) +## 0.10.0 (unreleased) -- Updated Polars to 0.38.3 +- Updated Polars to 0.39.0 - Added support for writing JSON to string - Added support for writing Parquet to `StringIO` - Added support for cross joins diff --git a/Cargo.lock b/Cargo.lock index efd2419582..6aca1dfc32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1251,7 +1251,7 @@ dependencies = [ "jemallocator", "magnus", "mimalloc", - "polars 0.38.3", + "polars 0.39.0", "polars-core", "polars-parquet", "polars-utils", @@ -1261,9 +1261,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01006048a264047d6cba081fed8e11adbd69c15956f9e53185a9ac4a541853c" +checksum = "2dbcfc1444984ea01a2109c042b6413bbef2e147bb2a8ecb324237973a41ca56" dependencies = [ "getrandom", "polars-arrow", @@ -1282,9 +1282,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25197f40d71f82b2f79bb394f03e555d3cc1ce4db1dd052c28318721c71e96ad" +checksum = "a446c4f871cc92b0671116bf2550eb6037b3087486a6d94b0d4a25a900505ddc" dependencies = [ "ahash", "atoi", @@ -1330,9 +1330,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c354515f73cdbbad03c2bf723fcd68e6825943b3ec503055abc8a8cb08ce46bb" +checksum = "f18711721605134e04cff8bacee231b96ee148e00c7dfef720840c0362692bbe" dependencies = [ "bytemuck", "either", @@ -1346,9 +1346,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f20d3c227186f74aa3c228c64ef72f5a15617322fed30b4323eaf53b25f8e7b" +checksum = "7c2564d8383217bdc6de5a684d9df75a2cbedf23906d0df227d1ef638b6e3f95" dependencies = [ "ahash", "bitflags 2.4.1", @@ -1380,9 +1380,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dd0ce51f8bd620eb8bd376502fe68a2b1a446d5433ecd2e75270b0755ce76" +checksum = "60fdc5165c4447e5afd7ca417b38dccc05ee189dcfabec11957a93e79016f5ab" dependencies = [ "avro-schema", "polars-arrow-format", @@ -1393,9 +1393,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b40bef2edcdc58394792c4d779465144283a09ff1836324e7b72df7978a6e992" +checksum = "5e9e26cf2c62e29c638449f2c4beaf79fae19ebb21aa021bad621eb9cf50df31" dependencies = [ "ahash", "async-trait", @@ -1433,9 +1433,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef86aca08f10ddc939fe95aabb44e1d2582dcb08b55d4dadb93353ce42adc248" +checksum = "03d4a6674e24d521e0419553748721b1beb8c84665a23dcf6f2551f8199e8597" dependencies = [ "ahash", "chrono", @@ -1454,9 +1454,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c27df26a19d3092298d31d47614ad84dc330c106e38aa8cd53727cd91c07cf56" +checksum = "459a2995976fe67b9a878398bd406b6fe8340d04591db1228a80cec8727a9b58" dependencies = [ "ahash", "bitflags 2.4.1", @@ -1478,9 +1478,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8a51c3bdc9e7c34196ff6f5c3cb17da134e5aafb1756aaf24b76c7118e63dc" +checksum = "cd1358fa9f07d779bea1aba72c04e9809e8266a944d5297909391b47c7f4f174" dependencies = [ "ahash", "aho-corasick", @@ -1515,9 +1515,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8824ee00fbbe83d69553f2711014c50361238d210ed81a7a297695b7db97d42" +checksum = "5c436751e22b87cf301a99c4a7786a35eedf7436d2d40779e575fa16c9f8017c" dependencies = [ "ahash", "async-stream", @@ -1541,9 +1541,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c5e2c1f14e81d60cfa9afe4e611a9bad9631a2cb7cd19b7c0094d0dc32f0231" +checksum = "2a8f97a51fbe38f5c70e07fa3c585fc963674811b4ccddb6a13ae54b162c7943" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1566,14 +1566,15 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff48362bd1b078bbbec7e7ba9ec01fea58fee2887db22a8e3deaf78f322fa3c4" +checksum = "1b6e66f9fa6b5f1c15db671d0f4a3713e15e49bd0d141b8e441d800a0b3cecee" dependencies = [ "ahash", "bytemuck", "chrono", "chrono-tz", + "hashbrown 0.14.3", "once_cell", "percent-encoding", "polars-arrow", @@ -1585,6 +1586,7 @@ dependencies = [ "polars-time", "polars-utils", "rayon", + "recursive", "regex", "serde", "smartstring", @@ -1594,9 +1596,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63029da56ff6a720b190490bbc7b6263f9b72d1134311b1f381fc8d306d37770" +checksum = "788c92dd5933c2891c827feb6b6247b3fcece866c873cbab8ea3431e19a710d0" dependencies = [ "bytemuck", "polars-arrow", @@ -1606,9 +1608,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3652c362959f608d1297196b973d1e3acb508a9562b886ac39bf7606b841052b" +checksum = "aca24b4cbf8718a3579f82200c83f93bc75a00656bc267fc503e5232bcd81ead" dependencies = [ "hex", "polars-arrow", @@ -1624,9 +1626,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86eb74ea6ddfe675aa5c3f33c00dadbe2b85f0e8e3887b85db1fd5a3397267fd" +checksum = "582b0775bcc72d9b5cd0eab876728b75616614c04352653135a9a81dc935fb7f" dependencies = [ "atoi", "chrono", @@ -1645,9 +1647,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.38.3" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694656a7d2b0cd8f07660dbc8d0fb7a81066ff57a452264907531d805c1e58c4" +checksum = "073417d96545a1f1ed0ed112bb2e99a0f016182b4fe112151f4b1b57abb52000" dependencies = [ "ahash", "bytemuck", @@ -1659,6 +1661,7 @@ dependencies = [ "raw-cpuid", "rayon", "smartstring", + "stacker", "sysinfo", "version_check", ] @@ -1678,6 +1681,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.35" @@ -1786,6 +1798,26 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb" +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.46", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -2012,6 +2044,19 @@ dependencies = [ "log", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/ext/polars/Cargo.toml b/ext/polars/Cargo.toml index 1ae9b4e9ea..e1b24c05a8 100644 --- a/ext/polars/Cargo.toml +++ b/ext/polars/Cargo.toml @@ -15,14 +15,14 @@ ahash = "0.8" chrono = "0.4" either = "1.8" magnus = "0.6" -polars-core = "=0.38.3" -polars-parquet = "=0.38.3" -polars-utils = "=0.38.3" +polars-core = "=0.39.0" +polars-parquet = "=0.39.0" +polars-utils = "=0.39.0" serde_json = "1" smartstring = "1" [dependencies.polars] -version = "=0.38.3" +version = "=0.39.0" features = [ "abs", "approx_unique", diff --git a/ext/polars/src/conversion/anyvalue.rs b/ext/polars/src/conversion/anyvalue.rs index 5659208880..5e8782913e 100644 --- a/ext/polars/src/conversion/anyvalue.rs +++ b/ext/polars/src/conversion/anyvalue.rs @@ -3,8 +3,8 @@ use magnus::{ class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value, }; -use polars::frame::row::any_values_to_dtype; use polars::prelude::*; +use polars_core::utils::any_values_to_supertype_and_n_dtypes; use super::{struct_dict, ObjectValue, Wrap}; @@ -120,7 +120,8 @@ impl<'s> TryConvert for Wrap> { avs.push(Wrap::::try_convert(item?)?.0) } - let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?; + let (dtype, _n_types) = + any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?; // push the rest avs.reserve(list.len()); diff --git a/ext/polars/src/expr/array.rs b/ext/polars/src/expr/array.rs index 041d4becde..c59baf97b9 100644 --- a/ext/polars/src/expr/array.rs +++ b/ext/polars/src/expr/array.rs @@ -59,8 +59,12 @@ impl RbExpr { self.inner.clone().arr().arg_max().into() } - pub fn arr_get(&self, index: &RbExpr) -> Self { - self.inner.clone().arr().get(index.inner.clone()).into() + pub fn arr_get(&self, index: &RbExpr, null_on_oob: bool) -> Self { + self.inner + .clone() + .arr() + .get(index.inner.clone(), null_on_oob) + .into() } pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self { diff --git a/ext/polars/src/expr/general.rs b/ext/polars/src/expr/general.rs index 98f91c8d72..bbd876ba35 100644 --- a/ext/polars/src/expr/general.rs +++ b/ext/polars/src/expr/general.rs @@ -267,7 +267,7 @@ impl RbExpr { pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self { self.clone() .inner - .sort_with(SortOptions { + .sort(SortOptions { descending, nulls_last, multithreaded: true, @@ -323,9 +323,28 @@ impl RbExpr { self.clone().inner.gather(idx.inner.clone()).into() } - pub fn sort_by(&self, by: RArray, reverse: Vec) -> RbResult { + pub fn sort_by( + &self, + by: RArray, + descending: Vec, + nulls_last: bool, + multithreaded: bool, + maintain_order: bool, + ) -> RbResult { let by = rb_exprs_to_exprs(by)?; - Ok(self.clone().inner.sort_by(by, reverse).into()) + Ok(self + .clone() + .inner + .sort_by( + by, + SortMultipleOptions { + descending, + nulls_last, + multithreaded, + maintain_order, + }, + ) + .into()) } pub fn backward_fill(&self, limit: FillNullLimit) -> Self { diff --git a/ext/polars/src/expr/list.rs b/ext/polars/src/expr/list.rs index 7b458be7a4..55c8490023 100644 --- a/ext/polars/src/expr/list.rs +++ b/ext/polars/src/expr/list.rs @@ -51,8 +51,12 @@ impl RbExpr { .into() } - pub fn list_get(&self, index: &RbExpr) -> Self { - self.inner.clone().list().get(index.inner.clone()).into() + pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self { + self.inner + .clone() + .list() + .get(index.inner.clone(), null_on_oob) + .into() } pub fn list_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self { diff --git a/ext/polars/src/expr/string.rs b/ext/polars/src/expr/string.rs index 1dd9745647..6240efa38d 100644 --- a/ext/polars/src/expr/string.rs +++ b/ext/polars/src/expr/string.rs @@ -244,12 +244,12 @@ impl RbExpr { .into() } - pub fn str_to_integer(&self, base: u32, strict: bool) -> Self { + pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self { self.inner .clone() .str() - .to_integer(base, strict) - .with_fmt("str.parse_int") + .to_integer(base.inner.clone(), strict) + .with_fmt("str.to_integer") .into() } diff --git a/ext/polars/src/functions/lazy.rs b/ext/polars/src/functions/lazy.rs index afa0abdcbe..97e34b87f8 100644 --- a/ext/polars/src/functions/lazy.rs +++ b/ext/polars/src/functions/lazy.rs @@ -55,9 +55,24 @@ pub fn rolling_cov( .into() } -pub fn arg_sort_by(by: RArray, descending: Vec) -> RbResult { +pub fn arg_sort_by( + by: RArray, + descending: Vec, + nulls_last: bool, + multithreaded: bool, + maintain_order: bool, +) -> RbResult { let by = rb_exprs_to_exprs(by)?; - Ok(dsl::arg_sort_by(by, &descending).into()) + Ok(dsl::arg_sort_by( + by, + SortMultipleOptions { + descending, + nulls_last, + multithreaded, + maintain_order, + }, + ) + .into()) } pub fn arg_where(condition: &RbExpr) -> RbExpr { diff --git a/ext/polars/src/lazyframe/mod.rs b/ext/polars/src/lazyframe/mod.rs index b53c4a743c..5e3c08e8ee 100644 --- a/ext/polars/src/lazyframe/mod.rs +++ b/ext/polars/src/lazyframe/mod.rs @@ -1,5 +1,5 @@ use magnus::{IntoValue, RArray, RHash, TryConvert, Value}; -use polars::io::RowIndex; +use polars::io::{HiveOptions, RowIndex}; use polars::lazy::frame::LazyFrame; use polars::prelude::*; use std::cell::RefCell; @@ -148,7 +148,8 @@ impl RbLazyFrame { #[allow(clippy::too_many_arguments)] pub fn new_from_parquet( - path: String, + path: Option, + paths: Vec, n_rows: Option, cache: bool, parallel: Wrap, @@ -157,21 +158,43 @@ impl RbLazyFrame { low_memory: bool, use_statistics: bool, hive_partitioning: bool, + hive_schema: Option>, ) -> RbResult { + let parallel = parallel.0; + let hive_schema = hive_schema.map(|s| Arc::new(s.0)); + + let first_path = if let Some(path) = &path { + path + } else { + paths + .first() + .ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))? + }; + let row_index = row_index.map(|(name, offset)| RowIndex { name, offset }); + let hive_options = HiveOptions { + enabled: hive_partitioning, + schema: hive_schema, + }; + let args = ScanArgsParquet { n_rows, cache, - parallel: parallel.0, + parallel, rechunk, row_index, low_memory, - // TODO support cloud options cloud_options: None, use_statistics, - hive_partitioning, + hive_options, }; - let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?; + + let lf = if path.is_some() { + LazyFrame::scan_parquet(first_path, args) + } else { + LazyFrame::scan_parquet_files(Arc::from(paths), args) + } + .map_err(RbPolarsErr::from)?; Ok(lf.into()) } @@ -189,7 +212,8 @@ impl RbLazyFrame { cache, rechunk, row_index, - memmap: memory_map, + memory_map, + cloud_options: None, }; let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?; Ok(lf.into()) @@ -246,17 +270,18 @@ impl RbLazyFrame { pub fn sort( &self, by_column: String, - reverse: bool, + descending: bool, nulls_last: bool, maintain_order: bool, + multithreaded: bool, ) -> Self { let ldf = self.ldf.clone(); ldf.sort( - &by_column, - SortOptions { - descending: reverse, + [&by_column], + SortMultipleOptions { + descending: vec![descending], nulls_last, - multithreaded: true, + multithreaded, maintain_order, }, ) @@ -265,15 +290,24 @@ impl RbLazyFrame { pub fn sort_by_exprs( &self, - by_column: RArray, - reverse: Vec, + by: RArray, + descending: Vec, nulls_last: bool, maintain_order: bool, + multithreaded: bool, ) -> RbResult { let ldf = self.ldf.clone(); - let exprs = rb_exprs_to_exprs(by_column)?; + let exprs = rb_exprs_to_exprs(by)?; Ok(ldf - .sort_by_exprs(exprs, reverse, nulls_last, maintain_order) + .sort_by_exprs( + exprs, + SortMultipleOptions { + descending, + nulls_last, + maintain_order, + multithreaded, + }, + ) .into()) } @@ -432,7 +466,7 @@ impl RbLazyFrame { let closed_window = closed.0; let ldf = self.ldf.clone(); let by = rb_exprs_to_exprs(by)?; - let lazy_gb = ldf.group_by_rolling( + let lazy_gb = ldf.rolling( index_column.inner.clone(), by, RollingGroupOptions { diff --git a/ext/polars/src/lib.rs b/ext/polars/src/lib.rs index 9954e0a978..6731459630 100644 --- a/ext/polars/src/lib.rs +++ b/ext/polars/src/lib.rs @@ -213,7 +213,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("arg_min", method!(RbExpr::arg_min, 0))?; class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?; class.define_method("gather", method!(RbExpr::gather, 1))?; - class.define_method("sort_by", method!(RbExpr::sort_by, 2))?; + class.define_method("sort_by", method!(RbExpr::sort_by, 5))?; class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?; class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?; class.define_method("shift", method!(RbExpr::shift, 2))?; @@ -312,7 +312,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("arr_reverse", method!(RbExpr::arr_reverse, 0))?; class.define_method("arr_arg_min", method!(RbExpr::arr_arg_min, 0))?; class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?; - class.define_method("arr_get", method!(RbExpr::arr_get, 1))?; + class.define_method("arr_get", method!(RbExpr::arr_get, 2))?; class.define_method("arr_join", method!(RbExpr::arr_join, 2))?; class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?; class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?; @@ -448,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("list_sort", method!(RbExpr::list_sort, 1))?; class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?; class.define_method("list_unique", method!(RbExpr::list_unique, 1))?; - class.define_method("list_get", method!(RbExpr::list_get, 1))?; + class.define_method("list_get", method!(RbExpr::list_get, 2))?; class.define_method("list_join", method!(RbExpr::list_join, 2))?; class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?; class.define_method("list_arg_max", method!(RbExpr::list_arg_max, 0))?; @@ -554,7 +554,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_singleton_method("arctan2d", function!(functions::lazy::arctan2d, 2))?; class.define_singleton_method("rolling_corr", function!(functions::lazy::rolling_corr, 5))?; class.define_singleton_method("rolling_cov", function!(functions::lazy::rolling_cov, 5))?; - class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by, 2))?; + class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by, 5))?; class.define_singleton_method("when", function!(functions::whenthen::when, 1))?; class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?; class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?; @@ -689,7 +689,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?; class.define_singleton_method( "new_from_parquet", - function!(RbLazyFrame::new_from_parquet, 9), + function!(RbLazyFrame::new_from_parquet, 11), )?; class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?; class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?; @@ -702,8 +702,8 @@ fn init(ruby: &Ruby) -> RbResult<()> { "optimization_toggle", method!(RbLazyFrame::optimization_toggle, 9), )?; - class.define_method("sort", method!(RbLazyFrame::sort, 4))?; - class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 4))?; + class.define_method("sort", method!(RbLazyFrame::sort, 5))?; + class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?; class.define_method("cache", method!(RbLazyFrame::cache, 0))?; class.define_method("collect", method!(RbLazyFrame::collect, 0))?; class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?; @@ -835,7 +835,7 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("mul", method!(RbSeries::mul, 1))?; class.define_method("div", method!(RbSeries::div, 1))?; class.define_method("rem", method!(RbSeries::rem, 1))?; - class.define_method("sort", method!(RbSeries::sort, 2))?; + class.define_method("sort", method!(RbSeries::sort, 3))?; class.define_method("value_counts", method!(RbSeries::value_counts, 1))?; class.define_method("any", method!(RbSeries::any, 1))?; class.define_method("all", method!(RbSeries::all, 1))?; diff --git a/ext/polars/src/map/dataframe.rs b/ext/polars/src/map/dataframe.rs index 277482a40e..3bcffe312b 100644 --- a/ext/polars/src/map/dataframe.rs +++ b/ext/polars/src/map/dataframe.rs @@ -265,9 +265,9 @@ pub fn apply_lambda_with_rows_output<'a>( // to the row. Before we mutate the row buf again, the reference is dropped. // we only cannot prove it to the compiler. // we still do this because it saves a Vec allocation in a hot loop. - unsafe { &*ptr } + Ok(unsafe { &*ptr }) } - None => &null_row, + None => Ok(&null_row), } } Err(e) => panic!("ruby function failed {}", e), @@ -277,22 +277,30 @@ pub fn apply_lambda_with_rows_output<'a>( // first rows for schema inference let mut buf = Vec::with_capacity(inference_size); buf.push(first_value); - buf.extend((&mut row_iter).take(inference_size).cloned()); - let schema = rows_to_schema_first_non_null(&buf, Some(50)); + for v in (&mut row_iter).take(inference_size) { + buf.push(v?.clone()); + } + + let schema = rows_to_schema_first_non_null(&buf, Some(50))?; if init_null_count > 0 { // Safety: we know the iterators size let iter = unsafe { (0..init_null_count) - .map(|_| &null_row) - .chain(buf.iter()) + .map(|_| Ok(&null_row)) + .chain(buf.iter().map(Ok)) .chain(row_iter) .trust_my_length(df.height()) }; - DataFrame::from_rows_iter_and_schema(iter, &schema) + DataFrame::try_from_rows_iter_and_schema(iter, &schema) } else { // Safety: we know the iterators size - let iter = unsafe { buf.iter().chain(row_iter).trust_my_length(df.height()) }; - DataFrame::from_rows_iter_and_schema(iter, &schema) + let iter = unsafe { + buf.iter() + .map(Ok) + .chain(row_iter) + .trust_my_length(df.height()) + }; + DataFrame::try_from_rows_iter_and_schema(iter, &schema) } } diff --git a/ext/polars/src/series/mod.rs b/ext/polars/src/series/mod.rs index 3cb8082abc..36a8276018 100644 --- a/ext/polars/src/series/mod.rs +++ b/ext/polars/src/series/mod.rs @@ -233,8 +233,18 @@ impl RbSeries { } } - pub fn sort(&self, descending: bool, nulls_last: bool) -> Self { - (self.series.borrow_mut().sort(descending, nulls_last)).into() + pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult { + Ok(self + .series + .borrow_mut() + .sort( + SortOptions::default() + .with_order_descending(descending) + .with_nulls_last(nulls_last) + .with_multithreaded(multithreaded), + ) + .map_err(RbPolarsErr::from)? + .into()) } pub fn value_counts(&self, sorted: bool) -> RbResult { diff --git a/lib/polars/array_expr.rb b/lib/polars/array_expr.rb index fdbc07f332..dccca199c6 100644 --- a/lib/polars/array_expr.rb +++ b/lib/polars/array_expr.rb @@ -353,9 +353,9 @@ def arg_max # # │ [4, 5, 6] ┆ -2 ┆ 5 │ # # │ [7, 8, 9] ┆ 4 ┆ null │ # # └───────────────┴─────┴──────┘ - def get(index) + def get(index, null_on_oob: true) index = Utils.parse_as_expression(index) - Utils.wrap_expr(_rbexpr.arr_get(index)) + Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob)) end # Get the first value of the sub-arrays. diff --git a/lib/polars/data_frame.rb b/lib/polars/data_frame.rb index afd82dddeb..96cbdcecf1 100644 --- a/lib/polars/data_frame.rb +++ b/lib/polars/data_frame.rb @@ -2214,12 +2214,13 @@ def group_by_rolling( # closed: "right" # ).agg(Polars.col("A").alias("A_agg_list")) # # => - # # shape: (3, 4) + # # shape: (4, 4) # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐ # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │ # # │ --- ┆ --- ┆ --- ┆ --- │ # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │ # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡ + # # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │ # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │ # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │ # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │ @@ -2674,26 +2675,26 @@ def apply(return_dtype: nil, inference_size: 256, &f) # # ┌─────┬─────┬───────────┐ # # │ a ┆ b ┆ b_squared │ # # │ --- ┆ --- ┆ --- │ - # # │ i64 ┆ i64 ┆ f64 │ + # # │ i64 ┆ i64 ┆ i64 │ # # ╞═════╪═════╪═══════════╡ - # # │ 1 ┆ 2 ┆ 4.0 │ - # # │ 3 ┆ 4 ┆ 16.0 │ - # # │ 5 ┆ 6 ┆ 36.0 │ + # # │ 1 ┆ 2 ┆ 4 │ + # # │ 3 ┆ 4 ┆ 16 │ + # # │ 5 ┆ 6 ┆ 36 │ # # └─────┴─────┴───────────┘ # # @example Replaced # df.with_column(Polars.col("a") ** 2) # # => # # shape: (3, 2) - # # ┌──────┬─────┐ - # # │ a ┆ b │ - # # │ --- ┆ --- │ - # # │ f64 ┆ i64 │ - # # ╞══════╪═════╡ - # # │ 1.0 ┆ 2 │ - # # │ 9.0 ┆ 4 │ - # # │ 25.0 ┆ 6 │ - # # └──────┴─────┘ + # # ┌─────┬─────┐ + # # │ a ┆ b │ + # # │ --- ┆ --- │ + # # │ i64 ┆ i64 │ + # # ╞═════╪═════╡ + # # │ 1 ┆ 2 │ + # # │ 9 ┆ 4 │ + # # │ 25 ┆ 6 │ + # # └─────┴─────┘ def with_column(column) lazy .with_column(column) @@ -3788,16 +3789,16 @@ def select(*exprs, **named_exprs) # df.with_columns((Polars.col("a") ** 2).alias("a^2")) # # => # # shape: (4, 4) - # # ┌─────┬──────┬───────┬──────┐ - # # │ a ┆ b ┆ c ┆ a^2 │ - # # │ --- ┆ --- ┆ --- ┆ --- │ - # # │ i64 ┆ f64 ┆ bool ┆ f64 │ - # # ╞═════╪══════╪═══════╪══════╡ - # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 │ - # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 │ - # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 │ - # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 │ - # # └─────┴──────┴───────┴──────┘ + # # ┌─────┬──────┬───────┬─────┐ + # # │ a ┆ b ┆ c ┆ a^2 │ + # # │ --- ┆ --- ┆ --- ┆ --- │ + # # │ i64 ┆ f64 ┆ bool ┆ i64 │ + # # ╞═════╪══════╪═══════╪═════╡ + # # │ 1 ┆ 0.5 ┆ true ┆ 1 │ + # # │ 2 ┆ 4.0 ┆ true ┆ 4 │ + # # │ 3 ┆ 10.0 ┆ false ┆ 9 │ + # # │ 4 ┆ 13.0 ┆ true ┆ 16 │ + # # └─────┴──────┴───────┴─────┘ # # @example Added columns will replace existing columns with the same name. # df.with_columns(Polars.col("a").cast(Polars::Float64)) @@ -3824,16 +3825,16 @@ def select(*exprs, **named_exprs) # ) # # => # # shape: (4, 6) - # # ┌─────┬──────┬───────┬──────┬──────┬───────┐ - # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │ - # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │ - # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡ - # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │ - # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │ - # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │ - # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │ - # # └─────┴──────┴───────┴──────┴──────┴───────┘ + # # ┌─────┬──────┬───────┬─────┬──────┬───────┐ + # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │ + # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │ + # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡ + # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │ + # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │ + # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │ + # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │ + # # └─────┴──────┴───────┴─────┴──────┴───────┘ # # @example Multiple columns also can be added using positional arguments instead of a list. # df.with_columns( @@ -3843,16 +3844,16 @@ def select(*exprs, **named_exprs) # ) # # => # # shape: (4, 6) - # # ┌─────┬──────┬───────┬──────┬──────┬───────┐ - # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │ - # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │ - # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡ - # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │ - # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │ - # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │ - # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │ - # # └─────┴──────┴───────┴──────┴──────┴───────┘ + # # ┌─────┬──────┬───────┬─────┬──────┬───────┐ + # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │ + # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │ + # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡ + # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │ + # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │ + # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │ + # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │ + # # └─────┴──────┴───────┴─────┴──────┴───────┘ # # @example Use keyword arguments to easily name your expression inputs. # df.with_columns( diff --git a/lib/polars/expr.rb b/lib/polars/expr.rb index bcd9caafec..d25095534a 100644 --- a/lib/polars/expr.rb +++ b/lib/polars/expr.rb @@ -1544,16 +1544,14 @@ def search_sorted(element, side: "any") # # │ one │ # # │ two │ # # └───────┘ - def sort_by(by, reverse: false) - if !by.is_a?(::Array) - by = [by] - end + def sort_by(by, *more_by, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) + by = Utils.parse_as_list_of_expressions(by, *more_by) if !reverse.is_a?(::Array) reverse = [reverse] + elsif by.length != reverse.length + raise ArgumentError, "the length of `reverse` (#{reverse.length}) does not match the length of `by` (#{by.length})" end - by = Utils.selection_to_rbexpr_list(by) - - _from_rbexpr(_rbexpr.sort_by(by, reverse)) + _from_rbexpr(_rbexpr.sort_by(by, reverse, nulls_last, multithreaded, maintain_order)) end # Take values by index. @@ -3515,20 +3513,23 @@ def truediv(other) # @return [Expr] # # @example - # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]}) - # df.select(Polars.col("foo").pow(3)) + # df = Polars::DataFrame.new({"x" => [1, 2, 4, 8]}) + # df.with_columns( + # Polars.col("x").pow(3).alias("cube"), + # Polars.col("x").pow(Polars.col("x").log(2)).alias("x ** xlog2") + # ) # # => - # # shape: (4, 1) - # # ┌──────┐ - # # │ foo │ - # # │ --- │ - # # │ f64 │ - # # ╞══════╡ - # # │ 1.0 │ - # # │ 8.0 │ - # # │ 27.0 │ - # # │ 64.0 │ - # # └──────┘ + # # shape: (4, 3) + # # ┌─────┬──────┬────────────┐ + # # │ x ┆ cube ┆ x ** xlog2 │ + # # │ --- ┆ --- ┆ --- │ + # # │ i64 ┆ i64 ┆ f64 │ + # # ╞═════╪══════╪════════════╡ + # # │ 1 ┆ 1 ┆ 1.0 │ + # # │ 2 ┆ 8 ┆ 2.0 │ + # # │ 4 ┆ 64 ┆ 16.0 │ + # # │ 8 ┆ 512 ┆ 512.0 │ + # # └─────┴──────┴────────────┘ def pow(exponent) self**exponent end @@ -5711,13 +5712,13 @@ def entropy(base: 2, normalize: true) # # ┌────────┐ # # │ values │ # # │ --- │ - # # │ f64 │ + # # │ i64 │ # # ╞════════╡ - # # │ 0.0 │ - # # │ -3.0 │ - # # │ -8.0 │ - # # │ -15.0 │ - # # │ -24.0 │ + # # │ 0 │ + # # │ -3 │ + # # │ -8 │ + # # │ -15 │ + # # │ -24 │ # # └────────┘ def cumulative_eval(expr, min_periods: 1, parallel: false) _from_rbexpr( diff --git a/lib/polars/functions/lazy.rb b/lib/polars/functions/lazy.rb index 14d7325d75..f5a6bf4477 100644 --- a/lib/polars/functions/lazy.rb +++ b/lib/polars/functions/lazy.rb @@ -1264,10 +1264,10 @@ def from_epoch(column, unit: "s", eager: false) # # ┌─────┬─────┬───────┐ # # │ a ┆ a_a ┆ a_txt │ # # │ --- ┆ --- ┆ --- │ - # # │ i64 ┆ f64 ┆ str │ + # # │ i64 ┆ i64 ┆ str │ # # ╞═════╪═════╪═══════╡ - # # │ 2 ┆ 4.0 ┆ 2 │ - # # │ 1 ┆ 1.0 ┆ 1 │ + # # │ 2 ┆ 4 ┆ 2 │ + # # │ 1 ┆ 1 ┆ 1 │ # # └─────┴─────┴───────┘ def sql_expr(sql) if sql.is_a?(::String) diff --git a/lib/polars/lazy_frame.rb b/lib/polars/lazy_frame.rb index 678e0128d5..86c7936037 100644 --- a/lib/polars/lazy_frame.rb +++ b/lib/polars/lazy_frame.rb @@ -105,6 +105,7 @@ def self._scan_parquet( _from_rbldf( RbLazyFrame.new_from_parquet( file, + [], n_rows, cache, parallel, @@ -112,7 +113,8 @@ def self._scan_parquet( Utils._prepare_row_count_args(row_count_name, row_count_offset), low_memory, use_statistics, - hive_partitioning + hive_partitioning, + nil ) ) end @@ -402,16 +404,16 @@ def describe_optimized_plan( # # │ 2 ┆ 7.0 ┆ b │ # # │ 1 ┆ 6.0 ┆ a │ # # └─────┴─────┴─────┘ - def sort(by, reverse: false, nulls_last: false, maintain_order: false) + def sort(by, reverse: false, nulls_last: false, maintain_order: false, multithreaded: true) if by.is_a?(::String) - return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order)) + return _from_rbldf(_ldf.sort(by, reverse, nulls_last, maintain_order, multithreaded)) end if Utils.bool?(reverse) reverse = [reverse] end by = Utils.selection_to_rbexpr_list(by) - _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order)) + _from_rbldf(_ldf.sort_by_exprs(by, reverse, nulls_last, maintain_order, multithreaded)) end # def profile @@ -1525,12 +1527,13 @@ def rolling( # closed: "right" # ).agg(Polars.col("A").alias("A_agg_list")) # # => - # # shape: (3, 4) + # # shape: (4, 4) # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐ # # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │ # # │ --- ┆ --- ┆ --- ┆ --- │ # # │ i64 ┆ i64 ┆ i64 ┆ list[str] │ # # ╞═════════════════╪═════════════════╪═════╪═════════════════╡ + # # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │ # # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │ # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │ # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │ @@ -1893,16 +1896,16 @@ def join( # ).collect # # => # # shape: (4, 6) - # # ┌─────┬──────┬───────┬──────┬──────┬───────┐ - # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │ - # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - # # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │ - # # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡ - # # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │ - # # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │ - # # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │ - # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │ - # # └─────┴──────┴───────┴──────┴──────┴───────┘ + # # ┌─────┬──────┬───────┬─────┬──────┬───────┐ + # # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │ + # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + # # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │ + # # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡ + # # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │ + # # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │ + # # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │ + # # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │ + # # └─────┴──────┴───────┴─────┴──────┴───────┘ def with_columns(*exprs, **named_exprs) structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", "0") != "0" rbexprs = Utils.parse_as_list_of_expressions(*exprs, **named_exprs, __structify: structify) @@ -1967,26 +1970,26 @@ def with_context(other) # # ┌─────┬─────┬───────────┐ # # │ a ┆ b ┆ b_squared │ # # │ --- ┆ --- ┆ --- │ - # # │ i64 ┆ i64 ┆ f64 │ + # # │ i64 ┆ i64 ┆ i64 │ # # ╞═════╪═════╪═══════════╡ - # # │ 1 ┆ 2 ┆ 4.0 │ - # # │ 3 ┆ 4 ┆ 16.0 │ - # # │ 5 ┆ 6 ┆ 36.0 │ + # # │ 1 ┆ 2 ┆ 4 │ + # # │ 3 ┆ 4 ┆ 16 │ + # # │ 5 ┆ 6 ┆ 36 │ # # └─────┴─────┴───────────┘ # # @example # df.with_column(Polars.col("a") ** 2).collect # # => # # shape: (3, 2) - # # ┌──────┬─────┐ - # # │ a ┆ b │ - # # │ --- ┆ --- │ - # # │ f64 ┆ i64 │ - # # ╞══════╪═════╡ - # # │ 1.0 ┆ 2 │ - # # │ 9.0 ┆ 4 │ - # # │ 25.0 ┆ 6 │ - # # └──────┴─────┘ + # # ┌─────┬─────┐ + # # │ a ┆ b │ + # # │ --- ┆ --- │ + # # │ i64 ┆ i64 │ + # # ╞═════╪═════╡ + # # │ 1 ┆ 2 │ + # # │ 9 ┆ 4 │ + # # │ 25 ┆ 6 │ + # # └─────┴─────┘ def with_column(column) with_columns([column]) end diff --git a/lib/polars/list_expr.rb b/lib/polars/list_expr.rb index c9501f8870..80924459e9 100644 --- a/lib/polars/list_expr.rb +++ b/lib/polars/list_expr.rb @@ -365,6 +365,10 @@ def concat(other) # # @param index [Integer] # Index to return per sublist + # @param null_on_oob [Boolean] + # Behavior if an index is out of bounds: + # true -> set as null + # false -> raise an error # # @return [Expr] # @@ -382,9 +386,9 @@ def concat(other) # # │ null │ # # │ 1 │ # # └──────┘ - def get(index) + def get(index, null_on_oob: true) index = Utils.parse_as_expression(index) - Utils.wrap_expr(_rbexpr.list_get(index)) + Utils.wrap_expr(_rbexpr.list_get(index, null_on_oob)) end # Get the value by index in the sublists. diff --git a/lib/polars/series.rb b/lib/polars/series.rb index 8e74f59603..114ab2cce7 100644 --- a/lib/polars/series.rb +++ b/lib/polars/series.rb @@ -1155,13 +1155,13 @@ def entropy(base: Math::E, normalize: false) # s.cumulative_eval(Polars.element.first - Polars.element.last ** 2) # # => # # shape: (5,) - # # Series: 'values' [f64] + # # Series: 'values' [i64] # # [ - # # 0.0 - # # -3.0 - # # -8.0 - # # -15.0 - # # -24.0 + # # 0 + # # -3 + # # -8 + # # -15 + # # -24 # # ] def cumulative_eval(expr, min_periods: 1, parallel: false) super @@ -1567,12 +1567,12 @@ def take_every(n) # # 2 # # 1 # # ] - def sort(reverse: false, nulls_last: false, in_place: false) + def sort(reverse: false, nulls_last: false, multithreaded: true, in_place: false) if in_place - self._s = _s.sort(reverse, nulls_last) + self._s = _s.sort(reverse, nulls_last, multithreaded) self else - Utils.wrap_s(_s.sort(reverse, nulls_last)) + Utils.wrap_s(_s.sort(reverse, nulls_last, multithreaded)) end end diff --git a/lib/polars/string_expr.rb b/lib/polars/string_expr.rb index ed9f0e213a..709a1cc355 100644 --- a/lib/polars/string_expr.rb +++ b/lib/polars/string_expr.rb @@ -1354,6 +1354,7 @@ def explode # # │ null ┆ null │ # # └──────┴────────┘ def to_integer(base: 10, strict: true) + base = Utils.parse_as_expression(base, str_as_lit: false) Utils.wrap_expr(_rbexpr.str_to_integer(base, strict)) end diff --git a/test/lazy_frame_test.rb b/test/lazy_frame_test.rb index ba329182fa..e68a6592e9 100644 --- a/test/lazy_frame_test.rb +++ b/test/lazy_frame_test.rb @@ -68,7 +68,7 @@ def test_pearson_corr def test_describe_optimized_plan df = Polars::DataFrame.new({"a" => [1, 2, 3]}).lazy - assert_match "FAST_PROJECT", df.select("a").describe_optimized_plan + assert_match "PROJECT", df.select("a").describe_optimized_plan end def test_concat