diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index e6fc9d81df67..05a60664201f 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -23,6 +23,7 @@ use std::cmp::min; use std::collections::HashMap; use std::io::{BufWriter, Write}; +use std::mem::size_of; use std::sync::Arc; use flatbuffers::FlatBufferBuilder; @@ -1430,7 +1431,13 @@ fn reencode_offsets( let end_offset = offset_slice.last().unwrap(); let offsets = match start_offset.as_usize() { - 0 => offsets.clone(), + 0 => { + let size = size_of::(); + offsets.slice_with_length( + data.offset() * size, + (data.offset() + data.len() + 1) * size, + ) + } _ => offset_slice.iter().map(|x| *x - *start_offset).collect(), }; @@ -2517,6 +2524,36 @@ mod tests { ls.finish() } + fn generate_nested_list_data_starting_at_zero() -> GenericListArray { + let mut ls = + GenericListBuilder::::new(GenericListBuilder::::new(UInt32Builder::new())); + + for _i in 0..999 { + ls.values().append(true); + ls.append(true); + } + + for j in 0..10 { + for value in [j, j, j, j] { + ls.values().values().append_value(value); + } + ls.values().append(true) + } + ls.append(true); + + for i in 0..9_000 { + for j in 0..10 { + for value in [i + j, i + j, i + j, i + j] { + ls.values().values().append_value(value); + } + ls.values().append(true) + } + ls.append(true); + } + + ls.finish() + } + fn generate_map_array_data() -> MapArray { let keys_builder = UInt32Builder::new(); let values_builder = UInt32Builder::new(); @@ -2608,6 +2645,19 @@ mod tests { roundtrip_ensure_sliced_smaller(in_batch, 1000); } + #[test] + fn encode_nested_lists_starting_at_zero() { + let inner_int = Arc::new(Field::new("item", DataType::UInt32, true)); + let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_int), true)); + let list_field = Field::new("val", DataType::List(inner_list_field), true); + let schema = Arc::new(Schema::new(vec![list_field])); + + let values = Arc::new(generate_nested_list_data_starting_at_zero::()); + + let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap(); + roundtrip_ensure_sliced_smaller(in_batch, 1); + } + #[test] fn encode_map_array() { let keys = Arc::new(Field::new("keys", DataType::UInt32, false));