diff --git a/ext/polars/src/lib.rs b/ext/polars/src/lib.rs index c2f5194376..774e0738c8 100644 --- a/ext/polars/src/lib.rs +++ b/ext/polars/src/lib.rs @@ -48,125 +48,64 @@ type RbResult = Result; #[magnus::init] fn init(ruby: &Ruby) -> RbResult<()> { let module = define_module("Polars")?; - module.define_singleton_method( - "_dtype_cols", - function!(crate::functions::lazy::dtype_cols2, 1), - )?; module.define_singleton_method( "_concat_lf_diagonal", - function!(crate::functions::lazy::concat_lf_diagonal, 4), - )?; - module.define_singleton_method( - "_rb_duration", - function!(crate::functions::lazy::duration, 9), - )?; - module.define_singleton_method( - "_concat_df", - function!(crate::functions::eager::concat_df, 1), - )?; - module.define_singleton_method( - "_concat_lf", - function!(crate::functions::lazy::concat_lf, 4), + function!(functions::lazy::concat_lf_diagonal, 4), )?; + module.define_singleton_method("_rb_duration", function!(functions::lazy::duration, 9))?; + module.define_singleton_method("_concat_df", function!(functions::eager::concat_df, 1))?; + module.define_singleton_method("_concat_lf", function!(functions::lazy::concat_lf, 4))?; module.define_singleton_method( "_concat_df_diagonal", - function!(crate::functions::eager::concat_df_diagonal, 1), + function!(functions::eager::concat_df_diagonal, 1), )?; module.define_singleton_method( "_concat_df_horizontal", - function!(crate::functions::eager::concat_df_horizontal, 1), + function!(functions::eager::concat_df_horizontal, 1), )?; module.define_singleton_method( "_concat_series", - function!(crate::functions::eager::concat_series, 1), - )?; - module.define_singleton_method( - "_ipc_schema", - function!(crate::functions::io::read_ipc_schema, 1), + function!(functions::eager::concat_series, 1), )?; + module.define_singleton_method("_ipc_schema", function!(functions::io::read_ipc_schema, 1))?; module.define_singleton_method( "_parquet_schema", - function!(crate::functions::io::read_parquet_schema, 1), - )?; - module.define_singleton_method( - "_collect_all", - function!(crate::functions::lazy::collect_all, 1), - )?; - module.define_singleton_method( - "_rb_date_range", - function!(crate::functions::range::date_range, 6), - )?; - module.define_singleton_method( - "_coalesce_exprs", - function!(crate::functions::lazy::coalesce, 1), - )?; - module.define_singleton_method( - "_all_horizontal", - function!(crate::functions::aggregation::all_horizontal, 1), - )?; - module.define_singleton_method( - "_any_horizontal", - function!(crate::functions::aggregation::any_horizontal, 1), - )?; - module.define_singleton_method( - "_max_horizontal", - function!(crate::functions::aggregation::max_horizontal, 1), - )?; - module.define_singleton_method( - "_min_horizontal", - function!(crate::functions::aggregation::min_horizontal, 1), - )?; - module.define_singleton_method( - "_sum_horizontal", - function!(crate::functions::aggregation::sum_horizontal, 1), - )?; - module.define_singleton_method( - "_mean_horizontal", - function!(crate::functions::aggregation::mean_horizontal, 1), + function!(functions::io::read_parquet_schema, 1), )?; + module.define_singleton_method("_collect_all", function!(functions::lazy::collect_all, 1))?; + module.define_singleton_method("_rb_date_range", function!(functions::range::date_range, 6))?; module.define_singleton_method( "_dtype_str_repr", - function!(crate::functions::misc::dtype_str_repr, 1), - )?; - module.define_singleton_method( - "_as_struct", - function!(crate::functions::lazy::as_struct, 1), - )?; - module.define_singleton_method( - "_arg_where", - function!(crate::functions::lazy::arg_where, 1), - )?; - module.define_singleton_method( - "_get_idx_type", - function!(crate::functions::meta::get_idx_type, 0), + function!(functions::misc::dtype_str_repr, 1), )?; + module.define_singleton_method("_get_idx_type", function!(functions::meta::get_idx_type, 0))?; module.define_singleton_method( "_threadpool_size", - function!(crate::functions::meta::threadpool_size, 0), + function!(functions::meta::threadpool_size, 0), )?; module.define_singleton_method( "_enable_string_cache", - function!(crate::functions::string_cache::enable_string_cache, 0), + function!(functions::string_cache::enable_string_cache, 0), )?; module.define_singleton_method( "_disable_string_cache", - function!(crate::functions::string_cache::disable_string_cache, 0), + function!(functions::string_cache::disable_string_cache, 0), )?; module.define_singleton_method( "_using_string_cache", - function!(crate::functions::string_cache::using_string_cache, 0), + function!(functions::string_cache::using_string_cache, 0), )?; module.define_singleton_method( "_set_float_fmt", - function!(crate::functions::meta::set_float_fmt, 1), + function!(functions::meta::set_float_fmt, 1), )?; module.define_singleton_method( "_get_float_fmt", - function!(crate::functions::meta::get_float_fmt, 0), + function!(functions::meta::get_float_fmt, 0), )?; module.define_singleton_method( "_set_random_seed", - function!(crate::functions::random::set_random_seed, 1), + function!(functions::random::set_random_seed, 1), )?; let class = module.define_class("RbBatchedCsv", ruby.class_object())?; @@ -649,56 +588,61 @@ fn init(ruby: &Ruby) -> RbResult<()> { class.define_method("name_to_uppercase", method!(RbExpr::name_to_uppercase, 0))?; // maybe add to different class - class.define_singleton_method("col", function!(crate::functions::lazy::col, 1))?; - class.define_singleton_method("len", function!(crate::functions::lazy::len, 0))?; - class.define_singleton_method("first", function!(crate::functions::lazy::first, 0))?; - class.define_singleton_method("last", function!(crate::functions::lazy::last, 0))?; - class.define_singleton_method("cols", function!(crate::functions::lazy::cols, 1))?; - class.define_singleton_method("fold", function!(crate::functions::lazy::fold, 3))?; - class.define_singleton_method("cum_fold", function!(crate::functions::lazy::cum_fold, 4))?; - class.define_singleton_method("lit", function!(crate::functions::lazy::lit, 2))?; - class.define_singleton_method( - "int_range", - function!(crate::functions::range::int_range, 4), - )?; - class.define_singleton_method( - "int_ranges", - function!(crate::functions::range::int_ranges, 4), - )?; - class.define_singleton_method("repeat", function!(crate::functions::lazy::repeat, 3))?; - class.define_singleton_method( - "pearson_corr", - function!(crate::functions::lazy::pearson_corr, 3), - )?; + let class = module.define_module("Plr")?; + class.define_singleton_method("dtype_cols", function!(functions::lazy::dtype_cols2, 1))?; + class.define_singleton_method("col", function!(functions::lazy::col, 1))?; + class.define_singleton_method("len", function!(functions::lazy::len, 0))?; + class.define_singleton_method("first", function!(functions::lazy::first, 0))?; + class.define_singleton_method("last", function!(functions::lazy::last, 0))?; + class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?; + class.define_singleton_method("fold", function!(functions::lazy::fold, 3))?; + class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?; + class.define_singleton_method("lit", function!(functions::lazy::lit, 2))?; + class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?; + class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?; + class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?; + class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 3))?; class.define_singleton_method( "spearman_rank_corr", - function!(crate::functions::lazy::spearman_rank_corr, 4), + function!(functions::lazy::spearman_rank_corr, 4), + )?; + class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?; + class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?; + class.define_singleton_method("arctan2", function!(functions::lazy::arctan2, 2))?; + class.define_singleton_method("arctan2d", function!(functions::lazy::arctan2d, 2))?; + class.define_singleton_method("rolling_corr", function!(functions::lazy::rolling_corr, 5))?; + class.define_singleton_method("rolling_cov", function!(functions::lazy::rolling_cov, 5))?; + class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by, 2))?; + class.define_singleton_method("when", function!(functions::whenthen::when, 1))?; + class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?; + class.define_singleton_method("concat_lst", function!(functions::lazy::concat_list, 1))?; + class.define_singleton_method( + "all_horizontal", + function!(functions::aggregation::all_horizontal, 1), )?; - class.define_singleton_method("sql_expr", function!(crate::functions::lazy::sql_expr, 1))?; - class.define_singleton_method("cov", function!(crate::functions::lazy::cov, 3))?; - class.define_singleton_method("arctan2", function!(crate::functions::lazy::arctan2, 2))?; - class.define_singleton_method("arctan2d", function!(crate::functions::lazy::arctan2d, 2))?; class.define_singleton_method( - "rolling_corr", - function!(crate::functions::lazy::rolling_corr, 5), + "any_horizontal", + function!(functions::aggregation::any_horizontal, 1), )?; class.define_singleton_method( - "rolling_cov", - function!(crate::functions::lazy::rolling_cov, 5), + "max_horizontal", + function!(functions::aggregation::max_horizontal, 1), )?; class.define_singleton_method( - "arg_sort_by", - function!(crate::functions::lazy::arg_sort_by, 2), + "min_horizontal", + function!(functions::aggregation::min_horizontal, 1), )?; - class.define_singleton_method("when", function!(crate::functions::whenthen::when, 1))?; class.define_singleton_method( - "concat_str", - function!(crate::functions::lazy::concat_str, 3), + "sum_horizontal", + function!(functions::aggregation::sum_horizontal, 1), )?; class.define_singleton_method( - "concat_lst", - function!(crate::functions::lazy::concat_list, 1), + "mean_horizontal", + function!(functions::aggregation::mean_horizontal, 1), )?; + class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?; + class.define_singleton_method("coalesce_exprs", function!(functions::lazy::coalesce, 1))?; + class.define_singleton_method("arg_where", function!(functions::lazy::arg_where, 1))?; let class = module.define_class("RbLazyFrame", ruby.class_object())?; class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?; diff --git a/lib/polars/lazy_functions.rb b/lib/polars/lazy_functions.rb index 356ff0b583..209caaefc9 100644 --- a/lib/polars/lazy_functions.rb +++ b/lib/polars/lazy_functions.rb @@ -8,11 +8,11 @@ def col(name, *more_names) if Utils.strlike?(name) names_str = [name] names_str.concat(more_names) - return Utils.wrap_expr(RbExpr.cols(names_str.map(&:to_s))) + return Utils.wrap_expr(Plr.cols(names_str.map(&:to_s))) elsif Utils.is_polars_dtype(name) dtypes = [name] dtypes.concat(more_names) - return Utils.wrap_expr(_dtype_cols(dtypes)) + return Utils.wrap_expr(Plr.dtype_cols(dtypes)) else msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}." raise TypeError, msg @@ -20,20 +20,20 @@ def col(name, *more_names) end if Utils.strlike?(name) - Utils.wrap_expr(RbExpr.col(name.to_s)) + Utils.wrap_expr(Plr.col(name.to_s)) elsif Utils.is_polars_dtype(name) - Utils.wrap_expr(_dtype_cols([name])) + Utils.wrap_expr(Plr.dtype_cols([name])) elsif name.is_a?(::Array) names = Array(name) if names.empty? - return Utils.wrap_expr(RbExpr.cols(names)) + return Utils.wrap_expr(Plr.cols(names)) end item = names[0] if Utils.strlike?(item) - Utils.wrap_expr(RbExpr.cols(names.map(&:to_s))) + Utils.wrap_expr(Plr.cols(names.map(&:to_s))) elsif Utils.is_polars_dtype(item) - Utils.wrap_expr(_dtype_cols(names)) + Utils.wrap_expr(Plr.dtype_cols(names)) else msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}." raise TypeError, msg @@ -114,7 +114,7 @@ def element def count(*columns) if columns.empty? warn "`Polars.count` is deprecated. Use `Polars.length` instead." - return Utils.wrap_expr(RbExpr.len._alias("count")) + return Utils.wrap_expr(Plr.len._alias("count")) end col(*columns).count @@ -194,7 +194,7 @@ def cum_count(*columns, reverse: false) # # │ 2 ┆ null ┆ null ┆ foo │ # # └───────┴──────┴──────┴─────┘ def len - Utils.wrap_expr(RbExpr.len) + Utils.wrap_expr(Plr.len) end alias_method :length, :len @@ -719,7 +719,7 @@ def approx_n_unique(*columns) # # └─────┴─────┘ def first(*columns) if columns.empty? - return Utils.wrap_expr(RbExpr.first) + return Utils.wrap_expr(Plr.first) end col(*columns).first @@ -779,7 +779,7 @@ def first(*columns) # # └─────┴─────┘ def last(*columns) if columns.empty? - return Utils.wrap_expr(RbExpr.last) + return Utils.wrap_expr(Plr.last) end col(*columns).last @@ -899,7 +899,7 @@ def lit(value, dtype: nil, allow_object: nil) elsif value.is_a?(Polars::Series) name = value.name value = value._s - e = Utils.wrap_expr(RbExpr.lit(value, allow_object)) + e = Utils.wrap_expr(Plr.lit(value, allow_object)) if name == "" return e end @@ -907,10 +907,10 @@ def lit(value, dtype: nil, allow_object: nil) elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array) return lit(Series.new("", value)) elsif dtype - return Utils.wrap_expr(RbExpr.lit(value, allow_object)).cast(dtype) + return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype) end - Utils.wrap_expr(RbExpr.lit(value, allow_object)) + Utils.wrap_expr(Plr.lit(value, allow_object)) end # Cumulatively sum all values. @@ -1015,9 +1015,9 @@ def corr( b = Utils.parse_as_expression(b) if method == "pearson" - Utils.wrap_expr(RbExpr.pearson_corr(a, b, ddof)) + Utils.wrap_expr(Plr.pearson_corr(a, b, ddof)) elsif method == "spearman" - Utils.wrap_expr(RbExpr.spearman_rank_corr(a, b, ddof, propagate_nans)) + Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans)) else msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}" raise ArgumentError, msg @@ -1092,7 +1092,7 @@ def pearson_corr(a, b, ddof: 1) def cov(a, b, ddof: 1) a = Utils.parse_as_expression(a) b = Utils.parse_as_expression(b) - Utils.wrap_expr(RbExpr.cov(a, b, ddof)) + Utils.wrap_expr(Plr.cov(a, b, ddof)) end # def map @@ -1111,7 +1111,7 @@ def fold(acc, f, exprs) end exprs = Utils.selection_to_rbexpr_list(exprs) - Utils.wrap_expr(RbExpr.fold(acc._rbexpr, f, exprs)) + Utils.wrap_expr(Plr.fold(acc._rbexpr, f, exprs)) end # def reduce @@ -1144,7 +1144,7 @@ def cum_fold(acc, f, exprs, include_init: false) end exprs = Utils.selection_to_rbexpr_list(exprs) - Utils.wrap_expr(RbExpr.cum_fold(acc._rbexpr, f, exprs, include_init)) + Utils.wrap_expr(Plr.cum_fold(acc._rbexpr, f, exprs, include_init)) end alias_method :cumfold, :cum_fold @@ -1354,7 +1354,7 @@ def int_range(start, stop = nil, step: 1, eager: false, dtype: nil) stop = Utils.parse_as_expression(stop) dtype ||= Int64 dtype = dtype.to_s if dtype.is_a?(Symbol) - result = Utils.wrap_expr(RbExpr.int_range(start, stop, step, dtype)).alias("arange") + result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange") if eager return select(result).to_series @@ -1384,7 +1384,7 @@ def arg_sort_by(exprs, reverse: false) reverse = [reverse] * exprs.length end exprs = Utils.selection_to_rbexpr_list(exprs) - Utils.wrap_expr(RbExpr.arg_sort_by(exprs, reverse)) + Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse)) end alias_method :argsort_by, :arg_sort_by @@ -1515,7 +1515,7 @@ def duration( # # └─────┴──────┴──────┴───────────────┘ def concat_str(exprs, sep: "", ignore_nulls: false) exprs = Utils.selection_to_rbexpr_list(exprs) - return Utils.wrap_expr(RbExpr.concat_str(exprs, sep, ignore_nulls)) + return Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls)) end # Format expressions as a string. @@ -1576,7 +1576,7 @@ def format(fstring, *args) # @return [Expr] def concat_list(exprs) exprs = Utils.selection_to_rbexpr_list(exprs) - Utils.wrap_expr(RbExpr.concat_lst(exprs)) + Utils.wrap_expr(Plr.concat_lst(exprs)) end # Collect multiple LazyFrames at the same time. @@ -1706,7 +1706,7 @@ def struct(exprs, eager: false) Polars.select(struct(exprs, eager: false)).to_series end exprs = Utils.selection_to_rbexpr_list(exprs) - Utils.wrap_expr(_as_struct(exprs)) + Utils.wrap_expr(Plr.as_struct(exprs)) end # Repeat a single value n times. @@ -1731,7 +1731,7 @@ def repeat(value, n, dtype: nil, eager: false, name: nil) end value = Utils.parse_as_expression(value, str_as_lit: true) - expr = Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr, dtype)) + expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype)) if !name.nil? expr = expr.alias(name) end @@ -1772,7 +1772,7 @@ def arg_where(condition, eager: false) condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series else condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true) - Utils.wrap_expr(_arg_where(condition._rbexpr)) + Utils.wrap_expr(Plr.arg_where(condition._rbexpr)) end end @@ -1811,7 +1811,7 @@ def coalesce(exprs, *more_exprs) if more_exprs.any? exprs.concat(Utils.selection_to_rbexpr_list(more_exprs)) end - Utils.wrap_expr(_coalesce_exprs(exprs)) + Utils.wrap_expr(Plr.coalesce_exprs(exprs)) end # Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time). @@ -1893,7 +1893,7 @@ def from_epoch(column, unit: "s", eager: false) # # └─────┴─────┴─────────┘ def when(expr) expr = Utils.expr_to_lit_or_expr(expr) - pw = RbExpr.when(expr._rbexpr) + pw = Plr.when(expr._rbexpr) When.new(pw) end @@ -1930,7 +1930,7 @@ def when(expr) # # └───────┴───────┴─────┴───────┘ def all_horizontal(*exprs) rbexprs = Utils.parse_as_list_of_expressions(*exprs) - Utils.wrap_expr(_all_horizontal(rbexprs)) + Utils.wrap_expr(Plr.all_horizontal(rbexprs)) end # Compute the bitwise OR horizontally across columns. @@ -1966,7 +1966,7 @@ def all_horizontal(*exprs) # # └───────┴───────┴─────┴───────┘ def any_horizontal(*exprs) rbexprs = Utils.parse_as_list_of_expressions(*exprs) - Utils.wrap_expr(_any_horizontal(rbexprs)) + Utils.wrap_expr(Plr.any_horizontal(rbexprs)) end # Get the maximum value horizontally across columns. @@ -1999,7 +1999,7 @@ def any_horizontal(*exprs) # # └─────┴──────┴─────┴─────┘ def max_horizontal(*exprs) rbexprs = Utils.parse_as_list_of_expressions(*exprs) - Utils.wrap_expr(_max_horizontal(rbexprs)) + Utils.wrap_expr(Plr.max_horizontal(rbexprs)) end # Get the minimum value horizontally across columns. @@ -2032,7 +2032,7 @@ def max_horizontal(*exprs) # # └─────┴──────┴─────┴─────┘ def min_horizontal(*exprs) rbexprs = Utils.parse_as_list_of_expressions(*exprs) - Utils.wrap_expr(_min_horizontal(rbexprs)) + Utils.wrap_expr(Plr.min_horizontal(rbexprs)) end # Sum all values horizontally across columns. @@ -2065,7 +2065,7 @@ def min_horizontal(*exprs) # # └─────┴──────┴─────┴─────┘ def sum_horizontal(*exprs) rbexprs = Utils.parse_as_list_of_expressions(*exprs) - Utils.wrap_expr(_sum_horizontal(rbexprs)) + Utils.wrap_expr(Plr.sum_horizontal(rbexprs)) end # Compute the mean of all values horizontally across columns. @@ -2098,7 +2098,7 @@ def sum_horizontal(*exprs) # # └─────┴──────┴─────┴──────┘ def mean_horizontal(*exprs) rbexprs = Utils.parse_as_list_of_expressions(*exprs) - Utils.wrap_expr(_mean_horizontal(rbexprs)) + Utils.wrap_expr(Plr.mean_horizontal(rbexprs)) end # Cumulatively sum all values horizontally across columns. @@ -2177,9 +2177,9 @@ def cum_sum_horizontal(*exprs) # # └─────┴─────┴───────┘ def sql_expr(sql) if sql.is_a?(::String) - Utils.wrap_expr(RbExpr.sql_expr(sql)) + Utils.wrap_expr(Plr.sql_expr(sql)) else - sql.map { |q| Utils.wrap_expr(RbExpr.sql_expr(q)) } + sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) } end end end