diff --git a/src/main.rs b/src/main.rs index 3266d62..ebd0ea5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -68,6 +68,10 @@ impl ParquetReader { fn info(&self) -> &ParquetInfo { &self.parquet_info } + + fn bytes(&self) -> &Bytes { + &self.bytes + } } impl AsyncFileReader for ParquetReader { @@ -448,7 +452,7 @@ fn App() -> impl IntoView { view! {
- +
diff --git a/src/metadata.rs b/src/metadata.rs index 2eba20f..fb7d4b0 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -1,7 +1,8 @@ use leptos::prelude::*; #[component] -pub fn MetadataSection(parquet_info: super::ParquetInfo) -> impl IntoView { +pub fn MetadataSection(parquet_reader: super::ParquetReader) -> impl IntoView { + let parquet_info = parquet_reader.info().clone(); let created_by = parquet_info .metadata .file_metadata() @@ -108,8 +109,8 @@ pub fn MetadataSection(parquet_info: super::ParquetInfo) -> impl IntoView { Some( view! {
- +
}, ) diff --git a/src/row_group_column.rs b/src/row_group_column.rs index 82d52d6..b729b0d 100644 --- a/src/row_group_column.rs +++ b/src/row_group_column.rs @@ -1,5 +1,7 @@ +use std::sync::Arc; + use leptos::prelude::*; -use parquet::file::statistics::Statistics; +use parquet::file::{reader::SerializedPageReader, statistics::Statistics}; use crate::format_rows; @@ -101,11 +103,11 @@ fn stats_to_string(stats: Option) -> String { } #[component] -pub fn RowGroupColumn(parquet_info: super::ParquetInfo) -> impl IntoView { +pub fn RowGroupColumn(parquet_reader: super::ParquetReader) -> impl IntoView { let (selected_row_group, set_selected_row_group) = signal(0); let (selected_column, set_selected_column) = signal(0); - let parquet_info_clone = parquet_info.clone(); + let parquet_info_clone = parquet_reader.info().clone(); let row_group_info = move || { let rg = parquet_info_clone .metadata @@ -117,12 +119,14 @@ pub fn RowGroupColumn(parquet_info: super::ParquetInfo) -> impl IntoView { (compressed_size, uncompressed_size, num_rows, compression) }; - let parquet_info_clone = parquet_info.clone(); + let parquet_info_clone = parquet_reader.info().clone(); + let parquet_bytes = parquet_reader.bytes().clone(); let column_info = move || { let rg = parquet_info_clone .metadata .row_group(selected_row_group.get()); let col = rg.column(selected_column.get()); + let row_count = rg.num_rows(); let compressed_size = col.compressed_size() as f64 / 1_048_576.0; let uncompressed_size = col.uncompressed_size() as f64 / 1_048_576.0; let compression = col.compression(); @@ -130,6 +134,20 @@ pub fn RowGroupColumn(parquet_info: super::ParquetInfo) -> impl IntoView { let has_bloom_filter = col.bloom_filter_offset().is_some(); let encodings = col.encodings().clone(); + let parquet_bytes = Arc::new(parquet_bytes.clone()); + let page_reader = + SerializedPageReader::new(parquet_bytes, col, row_count as usize, None).unwrap(); + + let mut page_info = Vec::new(); + for page in page_reader { + if let Ok(page) = page { + let page_type = page.page_type(); + let page_size = page.buffer().len() as f64 / 1024.0; + let num_values = page.num_values(); + page_info.push((page_type, page_size, num_values)); + } + } + ( compressed_size, uncompressed_size, @@ -137,6 +155,7 @@ pub fn RowGroupColumn(parquet_info: super::ParquetInfo) -> impl IntoView { statistics, has_bloom_filter, encodings, + page_info, ) }; @@ -156,7 +175,7 @@ pub fn RowGroupColumn(parquet_info: super::ParquetInfo) -> impl IntoView { .set(event_target_value(&ev).parse::().unwrap_or(0)) } > - {(0..parquet_info.row_group_count) + {(0..parquet_reader.info().row_group_count) .map(|i| { view! {
} }}