Skip to content

Commit

Permalink
build: update to df 43 (#646)
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck authored Dec 21, 2024
1 parent 70640a6 commit cb133bc
Show file tree
Hide file tree
Showing 28 changed files with 1,361 additions and 1,296 deletions.
2,507 changes: 1,231 additions & 1,276 deletions Cargo.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ members = [
resolver = "2"

[workspace.dependencies]
arrow = { version = "52.2.0" }
arrow = { version = "53.3.0" }
async-trait = "0.1.82"
datafusion = { version = "41", features = ["compression", "parquet"] }
datafusion = { version = "43", features = ["compression", "parquet"] }
futures = "0.3"
noodles = { version = "0.86" }
object_store = { version = "0.10.2" }
object_store = { version = "0.11.0" }
tokio = { version = "1", features = ["io-util"] }
tokio-util = { version = "0.7.13", features = ["compat"] }
url = { version = "2.5.2" }
Expand Down
2 changes: 1 addition & 1 deletion exon/exon-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ version.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
datafusion-cli = { version = "41" }
datafusion-cli = { version = "43" }
clap = { version = "4", features = ["derive", "cargo"] }
datafusion = { workspace = true }
object_store = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions exon/exon-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl CliSessionContext for ExonCLISession {

#[derive(Debug, Parser, PartialEq)]
struct Args {
#[arg(value_enum, long, default_value = "Table")]
#[arg(value_enum, long, default_value = "table")]
format: PrintFormat,

#[clap(
Expand All @@ -89,7 +89,7 @@ struct Args {

#[clap(
long,
help = "The max number of rows to display for 'Table' format\n[default: 40] [possible values: numbers(0/10/...), inf(no limit)]",
help = "The max number of rows to display for 'table' format\n[default: 40] [possible values: numbers(0/10/...), inf(no limit)]",
default_value = "40"
)]
maxrows: MaxRows,
Expand Down
2 changes: 1 addition & 1 deletion exon/exon-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exon-fcs = { path = "../exon-fcs", version = "0.32.3", optional = true }
exon-genbank = { path = "../exon-genbank", version = "0.32.3", optional = true }
exon-gff = { path = "../exon-gff", version = "0.32.3" }
exon-gtf = { path = "../exon-gtf", version = "0.32.3" }
deltalake = { version = "0.19.0", features = [
deltalake = { version = "0.22.3", features = [
"datafusion",
"deltalake-aws",
"s3",
Expand Down
5 changes: 3 additions & 2 deletions exon/exon-core/benches/reverse_complement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ fn generate_dna_sequences(num_sequences: usize) -> Vec<String> {
fn bench_reverse_complement(c: &mut Criterion) {
let mut group = c.benchmark_group("reverse_complement");

let sequences = generate_dna_sequences(500);
let size = 500;
let sequences = generate_dna_sequences(size);
let sequence_array = Arc::new(StringArray::from(sequences));
let sequence_column = ColumnarValue::Array(sequence_array.clone());

Expand All @@ -51,7 +52,7 @@ fn bench_reverse_complement(c: &mut Criterion) {
&sequence_column,
|b, s| {
b.iter(|| {
rc.invoke(&[s.clone()]).unwrap();
rc.invoke_batch(&[s.clone()], size).unwrap();
});
},
);
Expand Down
14 changes: 13 additions & 1 deletion exon/exon-core/src/datasources/bam/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;
use std::{fmt::Debug, sync::Arc};

use crate::{
config::extract_config_from_state,
Expand All @@ -37,6 +37,12 @@ pub struct BAMScanFunction {
ctx: SessionContext,
}

impl Debug for BAMScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BAMScanFunction").finish()
}
}

impl BAMScanFunction {
/// Create a new `BAMScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down Expand Up @@ -82,6 +88,12 @@ pub struct BAMIndexedScanFunction {
ctx: SessionContext,
}

impl Debug for BAMIndexedScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BAMIndexedScanFunction").finish()
}
}

impl BAMIndexedScanFunction {
/// Create a new `BAMIndexedScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/bcf/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ pub struct BCFScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for BCFScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BCFScanFunction").finish()
}
}

impl BCFScanFunction {
/// Create a new BCF scan function.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/bed/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ pub struct BEDScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for BEDScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BEDScanFunction").finish()
}
}

impl BEDScanFunction {
/// Create a new `BEDScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/cram/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ pub struct CRAMScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for CRAMScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CRAMScanFunction").finish()
}
}

impl CRAMScanFunction {
/// Create a new `CRAMScanFunction`.
pub fn _new(ctx: SessionContext) -> Self {
Expand Down
4 changes: 2 additions & 2 deletions exon/exon-core/src/datasources/exon_listing_table_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{str::FromStr, sync::Arc};
use std::{fmt::Debug, str::FromStr, sync::Arc};

use arrow::datatypes::Field;
use async_trait::async_trait;
Expand All @@ -36,7 +36,7 @@ use crate::{

#[async_trait]
/// Options for listing a table
pub trait ExonListingOptions: Default + Send + Sync {
pub trait ExonListingOptions: Default + Send + Sync + Debug {
/// The partition columns for the table
fn table_partition_cols(&self) -> &[Field];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ pub struct FastaIndexedScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for FastaIndexedScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FastaIndexedScanFunction").finish()
}
}

impl FastaIndexedScanFunction {
/// Create a new `FastaIndexedScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/fasta/udtfs/fasta_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ pub struct FastaScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for FastaScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FastaScanFunction").finish()
}
}

impl FastaScanFunction {
/// Create a new `FastaScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/fastq/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ pub struct FastqScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for FastqScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FastqScanFunction").finish()
}
}

impl FastqScanFunction {
/// Create a new `FastqScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/fcs/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ pub struct FCSScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for FCSScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FCSScanFunction").finish()
}
}

impl FCSScanFunction {
/// Create a new FCS scan function.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/genbank/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ pub struct GenbankScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for GenbankScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GenbankScanFunction").finish()
}
}

impl GenbankScanFunction {
/// Create a new `GenbankScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
12 changes: 12 additions & 0 deletions exon/exon-core/src/datasources/gff/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ pub struct GFFScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for GFFScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GFFScanFunction").finish()
}
}

impl GFFScanFunction {
/// Create a new `GFFScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down Expand Up @@ -78,6 +84,12 @@ pub struct GFFIndexedScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for GFFIndexedScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GFFIndexedScanFunction").finish()
}
}

impl GFFIndexedScanFunction {
/// Create a new `GFFIndexedScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/gtf/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ pub struct GTFScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for GTFScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GTFScanFunction").finish()
}
}

impl GTFScanFunction {
/// Create a new `GTFScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/mzml/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ pub struct MzMLScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for MzMLScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MzMLScanFunction").finish()
}
}

impl MzMLScanFunction {
/// Create a new `MzMLScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/datasources/sam/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ pub struct SAMScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for SAMScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SAMScanFunction").finish()
}
}

impl SAMScanFunction {
/// Create a new `SAMScanFunction`.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
12 changes: 12 additions & 0 deletions exon/exon-core/src/datasources/vcf/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ pub struct VCFScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for VCFScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("VCFScanFunction").finish()
}
}

impl VCFScanFunction {
/// Create a new VCF scan function.
pub fn new(ctx: SessionContext) -> Self {
Expand Down Expand Up @@ -81,6 +87,12 @@ pub struct VCFIndexedScanFunction {
ctx: SessionContext,
}

impl std::fmt::Debug for VCFIndexedScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("VCFIndexedScanFunction").finish()
}
}

impl VCFIndexedScanFunction {
/// Create a new VCF scan function.
pub fn new(ctx: SessionContext) -> Self {
Expand Down
6 changes: 6 additions & 0 deletions exon/exon-core/src/logical_plan/exon_data_sink_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ impl From<ExonCopyToStatement> for ExonDataSinkLogicalPlanNode {
}
}

impl PartialOrd for ExonDataSinkLogicalPlanNode {
fn partial_cmp(&self, _other: &Self) -> Option<std::cmp::Ordering> {
None
}
}

impl DfExtensionNode for ExonDataSinkLogicalPlanNode {}

impl Hash for ExonDataSinkLogicalPlanNode {
Expand Down
4 changes: 2 additions & 2 deletions exon/exon-core/src/physical_plan/file_scan_config_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use arrow::datatypes::{Field, SchemaRef};
use datafusion::{
datasource::{listing::PartitionedFile, physical_plan::FileScanConfig},
execution::object_store::ObjectStoreUrl,
physical_expr::PhysicalSortExpr,
physical_expr::LexOrdering,
physical_plan::Statistics,
};

Expand All @@ -30,7 +30,7 @@ pub struct FileScanConfigBuilder {
statistics: Statistics,
projection: Option<Vec<usize>>,
limit: Option<usize>,
output_ordering: Vec<Vec<PhysicalSortExpr>>,
output_ordering: Vec<LexOrdering>,
table_partition_cols: Vec<Field>,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use datafusion::{
physical_plan::FileSinkConfig, DefaultTableSource,
},
execution::context::SessionState,
logical_expr::{LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode},
logical_expr::{dml::InsertOp, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode},
physical_plan::{insert::DataSinkExec, ExecutionPlan},
physical_planner::{ExtensionPlanner, PhysicalPlanner},
sql::{
Expand Down Expand Up @@ -165,7 +165,7 @@ impl ExtensionPlanner for ExomeExtensionPlanner {
table_paths: vec![],
output_schema: Arc::clone(&schema),
table_partition_cols: vec![],
overwrite: false,
insert_op: InsertOp::Append,
keep_partition_by_columns: false,
};

Expand Down
2 changes: 1 addition & 1 deletion exon/exon-core/src/session_context/exon_context_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ mod tests {
let temp_path = temp_dir.join("test.fasta");

let sql = format!(
"COPY (SELECT * FROM test_fasta) TO '{}' STORED AS FASTA",
"COPY (SELECT id, description, sequence FROM test_fasta) TO '{}' STORED AS FASTA",
temp_path.display()
);
ctx.sql(&sql).await?.collect().await?;
Expand Down
3 changes: 2 additions & 1 deletion exon/exon-core/src/sinks/simple_record_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ mod tests {
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::physical_plan::FileSinkConfig;
use datafusion::execution::object_store::ObjectStoreUrl;
use datafusion::logical_expr::dml::InsertOp;
use datafusion::physical_plan::insert::DataSink;

#[tokio::test]
Expand Down Expand Up @@ -160,7 +161,7 @@ mod tests {
table_paths: vec![],
output_schema,
table_partition_cols: vec![],
overwrite: false,
insert_op: InsertOp::Append,
keep_partition_by_columns: false,
};

Expand Down
Loading

0 comments on commit cb133bc

Please sign in to comment.