Skip to content

Commit

Permalink
Added data_page_size option to write_parquet method
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Mar 18, 2024
1 parent b70d33f commit 5ac38fd
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Updated Polars to 0.38.2
- Added support for writing JSON to string
- Added support for writing Parquet to `StringIO`
- Added `data_page_size` option to `write_parquet` method

## 0.9.0 (2024-03-03)

Expand Down
3 changes: 3 additions & 0 deletions ext/polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,7 @@ impl RbDataFrame {
compression_level: Option<i32>,
statistics: bool,
row_group_size: Option<usize>,
data_page_size: Option<usize>,
) -> RbResult<()> {
let compression = parse_parquet_compression(&compression, compression_level)?;

Expand All @@ -600,6 +601,7 @@ impl RbDataFrame {
.with_compression(compression)
.with_statistics(statistics)
.with_row_group_size(row_group_size)
.with_data_page_size(data_page_size)
.finish(&mut self.df.borrow_mut())
.map_err(RbPolarsErr::from)?;
} else {
Expand All @@ -608,6 +610,7 @@ impl RbDataFrame {
.with_compression(compression)
.with_statistics(statistics)
.with_row_group_size(row_group_size)
.with_data_page_size(data_page_size)
.finish(&mut self.df.borrow_mut())
.map_err(RbPolarsErr::from)?;
}
Expand Down
2 changes: 1 addition & 1 deletion ext/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
class.define_method("add", method!(RbDataFrame::add, 1))?;
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
class.define_method("div", method!(RbDataFrame::div, 1))?;
Expand Down
14 changes: 7 additions & 7 deletions lib/polars/data_frame.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,7 @@ def write_ipc(file, compression: "uncompressed")

# Write to Apache Parquet file.
#
# @param file [String]
# @param file [String, StringIO]
# File path to which the file should be written.
# @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
# Choose "zstd" for good compression performance.
Expand All @@ -1077,18 +1077,18 @@ def write_ipc(file, compression: "uncompressed")
# @param statistics [Boolean]
# Write statistics to the parquet headers. This requires extra compute.
# @param row_group_size [Integer, nil]
# Size of the row groups in number of rows.
# If `nil` (default), the chunks of the DataFrame are
# used. Writing in smaller chunks may reduce memory pressure and improve
# writing speeds.
# Size of the row groups in number of rows. Defaults to 512^2 rows.
# @param data_page_size [Integer, nil]
# Size of the data page in bytes. Defaults to 1024^2 bytes.
#
# @return [nil]
def write_parquet(
file,
compression: "zstd",
compression_level: nil,
statistics: false,
row_group_size: nil
row_group_size: nil,
data_page_size: nil
)
if compression.nil?
compression = "uncompressed"
Expand All @@ -1098,7 +1098,7 @@ def write_parquet(
end

_df.write_parquet(
file, compression, compression_level, statistics, row_group_size
file, compression, compression_level, statistics, row_group_size, data_page_size
)
end

Expand Down

0 comments on commit 5ac38fd

Please sign in to comment.