Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into feature/12788-binary-…
Browse files Browse the repository at this point in the history
…as-string-opt
  • Loading branch information
alamb committed Oct 16, 2024
2 parents abce0e9 + 589c830 commit 5c32574
Show file tree
Hide file tree
Showing 191 changed files with 10,794 additions and 5,594 deletions.
2 changes: 1 addition & 1 deletion benchmarks/src/bin/h2o.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use datafusion::datasource::listing::{
use datafusion::datasource::MemTable;
use datafusion::prelude::CsvReadOptions;
use datafusion::{arrow::util::pretty, error::Result, prelude::SessionContext};
use datafusion_benchmarks::BenchmarkRun;
use datafusion_benchmarks::util::BenchmarkRun;
use std::path::PathBuf;
use std::sync::Arc;
use structopt::StructOpt;
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use std::path::Path;
use std::path::PathBuf;

use crate::util::{BenchmarkRun, CommonOpt};
use datafusion::{
error::{DataFusionError, Result},
prelude::SessionContext,
Expand All @@ -26,8 +27,6 @@ use datafusion_common::exec_datafusion_err;
use datafusion_common::instant::Instant;
use structopt::StructOpt;

use crate::{BenchmarkRun, CommonOpt};

/// Run the clickbench benchmark
///
/// The ClickBench[1] benchmarks are widely cited in the industry and
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/src/imdb/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::path::PathBuf;
use std::sync::Arc;

use super::{get_imdb_table_schema, get_query_sql, IMDB_TABLES};
use crate::{BenchmarkRun, CommonOpt};
use crate::util::{BenchmarkRun, CommonOpt};

use arrow::record_batch::RecordBatch;
use arrow::util::pretty::{self, pretty_format_batches};
Expand Down Expand Up @@ -489,6 +489,7 @@ mod tests {

use super::*;

use crate::util::CommonOpt;
use datafusion::common::exec_err;
use datafusion::error::Result;
use datafusion_proto::bytes::{
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@ pub mod imdb;
pub mod parquet_filter;
pub mod sort;
pub mod tpch;
mod util;
pub use util::*;
pub mod util;
2 changes: 1 addition & 1 deletion benchmarks/src/parquet_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::path::PathBuf;

use crate::{AccessLogOpt, BenchmarkRun, CommonOpt};
use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt};

use arrow::util::pretty;
use datafusion::common::Result;
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use std::path::PathBuf;
use std::sync::Arc;

use crate::{AccessLogOpt, BenchmarkRun, CommonOpt};
use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt};

use arrow::util::pretty;
use datafusion::common::Result;
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/tpch/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::sync::Arc;
use super::{
get_query_sql, get_tbl_tpch_table_schema, get_tpch_table_schema, TPCH_TABLES,
};
use crate::{BenchmarkRun, CommonOpt};
use crate::util::{BenchmarkRun, CommonOpt};

use arrow::record_batch::RecordBatch;
use arrow::util::pretty::{self, pretty_format_batches};
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ pub(crate) async fn register_object_store_and_config_extensions(
ctx.register_table_options_extension_from_scheme(scheme);

// Clone and modify the default table options based on the provided options
let mut table_options = ctx.session_state().default_table_options().clone();
let mut table_options = ctx.session_state().default_table_options();
if let Some(format) = format {
table_options.set_config_format(format);
}
Expand Down
10 changes: 5 additions & 5 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ mod tests {

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
let mut table_options = ctx.state().default_table_options().clone();
let mut table_options = ctx.state().default_table_options();
table_options.alter_with_string_hash_map(&cmd.options)?;
let aws_options = table_options.extensions.get::<AwsOptions>().unwrap();
let builder =
Expand Down Expand Up @@ -540,7 +540,7 @@ mod tests {

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
let mut table_options = ctx.state().default_table_options().clone();
let mut table_options = ctx.state().default_table_options();
table_options.alter_with_string_hash_map(&cmd.options)?;
let aws_options = table_options.extensions.get::<AwsOptions>().unwrap();
let err = get_s3_object_store_builder(table_url.as_ref(), aws_options)
Expand All @@ -566,7 +566,7 @@ mod tests {

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
let mut table_options = ctx.state().default_table_options().clone();
let mut table_options = ctx.state().default_table_options();
table_options.alter_with_string_hash_map(&cmd.options)?;
let aws_options = table_options.extensions.get::<AwsOptions>().unwrap();
// ensure this isn't an error
Expand Down Expand Up @@ -594,7 +594,7 @@ mod tests {

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
let mut table_options = ctx.state().default_table_options().clone();
let mut table_options = ctx.state().default_table_options();
table_options.alter_with_string_hash_map(&cmd.options)?;
let aws_options = table_options.extensions.get::<AwsOptions>().unwrap();
let builder = get_oss_object_store_builder(table_url.as_ref(), aws_options)?;
Expand Down Expand Up @@ -631,7 +631,7 @@ mod tests {

if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
ctx.register_table_options_extension_from_scheme(scheme);
let mut table_options = ctx.state().default_table_options().clone();
let mut table_options = ctx.state().default_table_options();
table_options.alter_with_string_hash_map(&cmd.options)?;
let gcp_options = table_options.extensions.get::<GcpOptions>().unwrap();
let builder = get_gcs_object_store_builder(table_url.as_ref(), gcp_options)?;
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/custom_file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ impl FileFormatFactory for TSVFileFactory {
&self,
state: &SessionState,
format_options: &std::collections::HashMap<String, String>,
) -> Result<std::sync::Arc<dyn FileFormat>> {
) -> Result<Arc<dyn FileFormat>> {
let mut new_options = format_options.clone();
new_options.insert("format.delimiter".to_string(), "\t".to_string());

Expand All @@ -164,7 +164,7 @@ impl FileFormatFactory for TSVFileFactory {
Ok(tsv_file_format)
}

fn default(&self) -> std::sync::Arc<dyn FileFormat> {
fn default(&self) -> Arc<dyn FileFormat> {
todo!()
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6603,7 +6603,7 @@ mod tests {
let not_nulls = not_nulls.finish();
let not_nulls = Some(NullBuffer::new(not_nulls));

let ar = unsafe { StructArray::new_unchecked(fields, arrays, not_nulls) };
let ar = StructArray::new(fields, arrays, not_nulls);
let s = ScalarValue::Struct(Arc::new(ar));

assert_eq!(s.to_string(), "{a:1,b:2}");
Expand Down
23 changes: 22 additions & 1 deletion datafusion/core/src/bin/print_functions_docs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@ use datafusion_expr::{
aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
DocSection, Documentation, ScalarUDF, WindowUDF,
};
use hashbrown::HashSet;
use itertools::Itertools;
use std::env::args;
use std::fmt::Write as _;

/// Print documentation for all functions of a given type to stdout
///
/// Usage: `cargo run --bin print_functions_docs -- <type>`
///
/// Called from `dev/update_function_docs.sh`
fn main() {
let args: Vec<String> = args().collect();

Expand Down Expand Up @@ -83,9 +89,12 @@ fn print_docs(
) -> String {
let mut docs = "".to_string();

// Ensure that all providers have documentation
let mut providers_with_no_docs = HashSet::new();

// doc sections only includes sections that have 'include' == true
for doc_section in doc_sections {
// make sure there is a function that is in this doc section
// make sure there is at least one function that is in this doc section
if !&providers.iter().any(|f| {
if let Some(documentation) = f.get_documentation() {
documentation.doc_section == doc_section
Expand All @@ -96,12 +105,14 @@ fn print_docs(
continue;
}

// filter out functions that are not in this doc section
let providers: Vec<&Box<dyn DocProvider>> = providers
.iter()
.filter(|&f| {
if let Some(documentation) = f.get_documentation() {
documentation.doc_section == doc_section
} else {
providers_with_no_docs.insert(f.get_name());
false
}
})
Expand Down Expand Up @@ -202,9 +213,19 @@ fn print_docs(
}
}

// If there are any functions that do not have documentation, print them out
// eventually make this an error: https://github.com/apache/datafusion/issues/12872
if !providers_with_no_docs.is_empty() {
eprintln!("INFO: The following functions do not have documentation:");
for f in providers_with_no_docs {
eprintln!(" - {f}");
}
}

docs
}

/// Trait for accessing name / aliases / documentation for differnet functions
trait DocProvider {
fn get_name(&self) -> String;
fn get_aliases(&self) -> Vec<String>;
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/catalog_common/listing_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ impl ListingSchemaProvider {
file_type: self.format.clone(),
table_partition_cols: vec![],
if_not_exists: false,
temporary: false,
definition: None,
order_exprs: vec![],
unbounded: false,
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/datasource/listing_table_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ mod tests {
schema: Arc::new(DFSchema::empty()),
table_partition_cols: vec![],
if_not_exists: false,
temporary: false,
definition: None,
order_exprs: vec![],
unbounded: false,
Expand Down Expand Up @@ -236,6 +237,7 @@ mod tests {
schema: Arc::new(DFSchema::empty()),
table_partition_cols: vec![],
if_not_exists: false,
temporary: false,
definition: None,
order_exprs: vec![],
unbounded: false,
Expand Down
16 changes: 16 additions & 0 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,11 @@ impl SessionContext {
cmd: &CreateExternalTable,
) -> Result<DataFrame> {
let exist = self.table_exist(cmd.name.clone())?;

if cmd.temporary {
return not_impl_err!("Temporary tables not supported");
}

if exist {
match cmd.if_not_exists {
true => return self.return_empty_dataframe(),
Expand All @@ -761,10 +766,16 @@ impl SessionContext {
or_replace,
constraints,
column_defaults,
temporary,
} = cmd;

let input = Arc::unwrap_or_clone(input);
let input = self.state().optimize(&input)?;

if temporary {
return not_impl_err!("Temporary tables not supported");
}

let table = self.table(name.clone()).await;
match (if_not_exists, or_replace, table) {
(true, false, Ok(_)) => self.return_empty_dataframe(),
Expand Down Expand Up @@ -813,10 +824,15 @@ impl SessionContext {
input,
or_replace,
definition,
temporary,
} = cmd;

let view = self.table(name.clone()).await;

if temporary {
return not_impl_err!("Temporary views not supported");
}

match (or_replace, view) {
(true, Ok(_)) => {
self.deregister_table(name.clone())?;
Expand Down
Loading

0 comments on commit 5c32574

Please sign in to comment.