Skip to content

Commit

Permalink
Added support for writing JSON to string - closes #51
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Mar 5, 2024
1 parent cde4c54 commit 280393a
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 20 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.9.1 (unreleased)

- Added support for writing JSON to string

## 0.9.0 (2024-03-03)

See the [upgrade guide](https://docs.pola.rs/releases/upgrade/0.20/)
Expand Down
171 changes: 160 additions & 11 deletions ext/polars/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,169 @@
use magnus::{exception, prelude::*, Error, RString, Value};
use polars::io::mmap::MmapBytesReader;
use std::fs::File;
use std::io::Cursor;
use std::io;
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
use std::path::PathBuf;

use magnus::{exception, prelude::*, Error, RString, Value};
use polars::io::mmap::MmapBytesReader;

use crate::error::RbPolarsErr;
use crate::prelude::resolve_homedir;
use crate::RbResult;

pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
let str_slice = PathBuf::try_convert(f)?;
let f = if truncate {
File::create(str_slice)
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
#[derive(Clone)]
pub struct RbFileLikeObject {
inner: Value,
}

/// Wraps a `Value`, and implements read, seek, and write for it.
impl RbFileLikeObject {
/// Creates an instance of a `RbFileLikeObject` from a `Value`.
/// To assert the object has the required methods methods,
/// instantiate it with `RbFileLikeObject::require`
pub fn new(object: Value) -> Self {
RbFileLikeObject { inner: object }
}

#[allow(dead_code)]
pub fn as_buffer(&self) -> std::io::Cursor<Vec<u8>> {
let data = self.as_file_buffer().into_inner();
std::io::Cursor::new(data)
}

#[allow(dead_code)]
pub fn as_file_buffer(&self) -> Cursor<Vec<u8>> {
let bytes = self
.inner
.funcall::<_, _, RString>("read", ())
.expect("no read method found");

let buf = unsafe { bytes.as_slice() }.to_vec();

Cursor::new(buf)
}

/// Same as `RbFileLikeObject::new`, but validates that the underlying
/// ruby object has a `read`, `write`, and `seek` methods in respect to parameters.
/// Will return a `TypeError` if object does not have `read`, `seek`, and `write` methods.
pub fn with_requirements(object: Value, read: bool, write: bool, seek: bool) -> RbResult<Self> {
if read && !object.respond_to("read", false)? {
return Err(Error::new(
exception::type_error(),
"Object does not have a .read() method.",
));
}

if seek && !object.respond_to("seek", false)? {
return Err(Error::new(
exception::type_error(),
"Object does not have a .seek() method.",
));
}

if write && !object.respond_to("write", false)? {
return Err(Error::new(
exception::type_error(),
"Object does not have a .write() method.",
));
}

Ok(RbFileLikeObject::new(object))
}
}

/// Extracts a string repr from, and returns an IO error to send back to rust.
fn rberr_to_io_err(e: Error) -> io::Error {
io::Error::new(io::ErrorKind::Other, e.to_string())
}

impl Read for RbFileLikeObject {
fn read(&mut self, mut buf: &mut [u8]) -> Result<usize, io::Error> {
let bytes = self
.inner
.funcall::<_, _, RString>("read", (buf.len(),))
.map_err(rberr_to_io_err)?;

buf.write_all(unsafe { bytes.as_slice() })?;

Ok(bytes.len())
}
}

impl Write for RbFileLikeObject {
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
let rbbytes = RString::from_slice(buf);

let number_bytes_written = self
.inner
.funcall::<_, _, usize>("write", (rbbytes,))
.map_err(rberr_to_io_err)?;

Ok(number_bytes_written)
}

fn flush(&mut self) -> Result<(), io::Error> {
self.inner
.funcall::<_, _, Value>("flush", ())
.map_err(rberr_to_io_err)?;

Ok(())
}
}

impl Seek for RbFileLikeObject {
fn seek(&mut self, pos: SeekFrom) -> Result<u64, io::Error> {
let (whence, offset) = match pos {
SeekFrom::Start(i) => (0, i as i64),
SeekFrom::Current(i) => (1, i),
SeekFrom::End(i) => (2, i),
};

let new_position = self
.inner
.funcall("seek", (offset, whence))
.map_err(rberr_to_io_err)?;

Ok(new_position)
}
}

pub trait FileLike: Read + Write + Seek {}

impl FileLike for File {}
impl FileLike for RbFileLikeObject {}

pub enum EitherRustRubyFile {
Rb(RbFileLikeObject),
Rust(BufReader<File>),
}

///
/// # Arguments
/// * `truncate` - open or create a new file.
pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFile> {
if let Ok(rstring) = RString::try_convert(rb_f) {
let s = unsafe { rstring.as_str() }?;
let file_path = std::path::Path::new(&s);
let file_path = resolve_homedir(file_path);
let f = if truncate {
File::create(file_path).map_err(RbPolarsErr::io)?
} else {
polars_utils::open_file(&file_path).map_err(RbPolarsErr::from)?
};
let reader = BufReader::new(f);
Ok(EitherRustRubyFile::Rust(reader))
} else {
File::open(str_slice).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
};
Ok(f)
let f = RbFileLikeObject::with_requirements(rb_f, !truncate, truncate, !truncate)?;
Ok(EitherRustRubyFile::Rb(f))
}
}

pub fn get_file_like(f: Value, truncate: bool) -> RbResult<Box<dyn FileLike>> {
use EitherRustRubyFile::*;
match get_either_file(f, truncate)? {
Rb(f) => Ok(Box::new(f)),
Rust(f) => Ok(Box::new(f.into_inner())),
}
}

pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
Expand Down
68 changes: 59 additions & 9 deletions lib/polars/data_frame.rb
Original file line number Diff line number Diff line change
Expand Up @@ -814,26 +814,52 @@ def to_series(index = 0)

# Serialize to JSON representation.
#
# @return [nil]
#
# @param file [String]
# File path to which the result should be written.
# @param pretty [Boolean]
# Pretty serialize json.
# @param row_oriented [Boolean]
# Write to row oriented json. This is slower, but more common.
#
# @see #write_ndjson
# @return [nil]
#
# @example
# df = Polars::DataFrame.new(
# {
# "foo" => [1, 2, 3],
# "bar" => [6, 7, 8]
# }
# )
# df.write_json
# # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
#
# @example
# df.write_json(row_oriented: true)
# # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
def write_json(
file,
file = nil,
pretty: false,
row_oriented: false
)
if Utils.pathlike?(file)
file = Utils.normalise_filepath(file)
end

_df.write_json(file, pretty, row_oriented)
to_string_io = !file.nil? && file.is_a?(StringIO)
if file.nil? || to_string_io
buf = StringIO.new
buf.set_encoding(Encoding::BINARY)
_df.write_json(buf, pretty, row_oriented)
json_bytes = buf.string

json_str = json_bytes.force_encoding(Encoding::UTF_8)
if to_string_io
file.write(json_str)
else
return json_str
end
else
_df.write_json(file, pretty, row_oriented)
end
nil
end

Expand All @@ -843,12 +869,36 @@ def write_json(
# File path to which the result should be written.
#
# @return [nil]
def write_ndjson(file)
#
# @example
# df = Polars::DataFrame.new(
# {
# "foo" => [1, 2, 3],
# "bar" => [6, 7, 8]
# }
# )
# df.write_ndjson()
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
def write_ndjson(file = nil)
if Utils.pathlike?(file)
file = Utils.normalise_filepath(file)
end

_df.write_ndjson(file)
to_string_io = !file.nil? && file.is_a?(StringIO)
if file.nil? || to_string_io
buf = StringIO.new
buf.set_encoding(Encoding::BINARY)
_df.write_ndjson(buf)
json_bytes = buf.string

json_str = json_bytes.force_encoding(Encoding::UTF_8)
if to_string_io
file.write(json_str)
else
return json_str
end
else
_df.write_ndjson(file)
end
nil
end

Expand Down

0 comments on commit 280393a

Please sign in to comment.