From 922aa5e00e6fde765c3a6a6d1b87b342a9e13332 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 29 Jun 2024 07:41:46 -0400 Subject: [PATCH] Optionally display schema in explain plan --- datafusion/common/src/config.rs | 3 + datafusion/common/src/display/mod.rs | 12 +++- datafusion/core/src/physical_planner.rs | 71 +++++++++++++------ datafusion/physical-plan/src/display.rs | 51 +++++++++++-- datafusion/proto/proto/datafusion.proto | 2 + datafusion/proto/src/generated/pbjson.rs | 26 +++++++ datafusion/proto/src/generated/prost.rs | 9 ++- .../proto/src/logical_plan/from_proto.rs | 5 ++ datafusion/proto/src/logical_plan/to_proto.rs | 12 +++- .../sqllogictest/test_files/explain.slt | 30 ++++++++ .../test_files/information_schema.slt | 2 + docs/source/user-guide/configs.md | 1 + 12 files changed, 193 insertions(+), 31 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 1ecdb0efd2c2..1d2a9589adfc 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -613,6 +613,9 @@ config_namespace! { /// When set to true, the explain statement will print the partition sizes pub show_sizes: bool, default = true + + /// When set to true, the explain statement will print schema information + pub show_schema: bool, default = false } } diff --git a/datafusion/common/src/display/mod.rs b/datafusion/common/src/display/mod.rs index 4d1d48bf9fcc..732f6a73e1d2 100644 --- a/datafusion/common/src/display/mod.rs +++ b/datafusion/common/src/display/mod.rs @@ -49,6 +49,8 @@ pub enum PlanType { InitialPhysicalPlan, /// The initial physical plan with stats, prepared for execution InitialPhysicalPlanWithStats, + /// The initial physical plan with schema, prepared for execution + InitialPhysicalPlanWithSchema, /// The ExecutionPlan which results from applying an optimizer pass OptimizedPhysicalPlan { /// The name of the optimizer which produced this plan @@ -56,8 +58,10 @@ pub enum PlanType { }, /// The final, fully optimized physical which would be executed FinalPhysicalPlan, - /// The final with stats, fully optimized physical which would be executed + /// The final with stats, fully optimized physical plan which would be executed FinalPhysicalPlanWithStats, + /// The final with stats, fully optimized physical plan which would be executed + FinalPhysicalPlanWithSchema, } impl Display for PlanType { @@ -76,11 +80,17 @@ impl Display for PlanType { PlanType::InitialPhysicalPlanWithStats => { write!(f, "initial_physical_plan_with_stats") } + PlanType::InitialPhysicalPlanWithSchema => { + write!(f, "initial_physical_plan_with_schema") + } PlanType::OptimizedPhysicalPlan { optimizer_name } => { write!(f, "physical_plan after {optimizer_name}") } PlanType::FinalPhysicalPlan => write!(f, "physical_plan"), PlanType::FinalPhysicalPlanWithStats => write!(f, "physical_plan_with_stats"), + PlanType::FinalPhysicalPlanWithSchema => { + write!(f, "physical_plan_with_schema") + } } } } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index a0390cf16c17..6aad4d575532 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -1983,23 +1983,37 @@ impl DefaultPhysicalPlanner { .await { Ok(input) => { - // This plan will includes statistics if show_statistics is on + // Include statistics / schema if enabled stringified_plans.push( displayable(input.as_ref()) .set_show_statistics(config.show_statistics) + .set_show_schema(config.show_schema) .to_stringified(e.verbose, InitialPhysicalPlan), ); - // If the show_statisitcs is off, add another line to show statsitics in the case of explain verbose - if e.verbose && !config.show_statistics { - stringified_plans.push( - displayable(input.as_ref()) - .set_show_statistics(true) - .to_stringified( - e.verbose, - InitialPhysicalPlanWithStats, - ), - ); + // Show statistics + schema in verbose output even if not + // explicitly requested + if e.verbose { + if !config.show_statistics { + stringified_plans.push( + displayable(input.as_ref()) + .set_show_statistics(true) + .to_stringified( + e.verbose, + InitialPhysicalPlanWithStats, + ), + ); + } + if !config.show_schema { + stringified_plans.push( + displayable(input.as_ref()) + .set_show_schema(true) + .to_stringified( + e.verbose, + InitialPhysicalPlanWithSchema, + ), + ); + } } let optimized_plan = self.optimize_internal( @@ -2011,6 +2025,7 @@ impl DefaultPhysicalPlanner { stringified_plans.push( displayable(plan) .set_show_statistics(config.show_statistics) + .set_show_schema(config.show_schema) .to_stringified(e.verbose, plan_type), ); }, @@ -2021,19 +2036,33 @@ impl DefaultPhysicalPlanner { stringified_plans.push( displayable(input.as_ref()) .set_show_statistics(config.show_statistics) + .set_show_schema(config.show_schema) .to_stringified(e.verbose, FinalPhysicalPlan), ); - // If the show_statisitcs is off, add another line to show statsitics in the case of explain verbose - if e.verbose && !config.show_statistics { - stringified_plans.push( - displayable(input.as_ref()) - .set_show_statistics(true) - .to_stringified( - e.verbose, - FinalPhysicalPlanWithStats, - ), - ); + // Show statistics + schema in verbose output even if not + // explicitly requested + if e.verbose { + if !config.show_statistics { + stringified_plans.push( + displayable(input.as_ref()) + .set_show_statistics(true) + .to_stringified( + e.verbose, + FinalPhysicalPlanWithStats, + ), + ); + } + if !config.show_schema { + stringified_plans.push( + displayable(input.as_ref()) + .set_show_schema(true) + .to_stringified( + e.verbose, + FinalPhysicalPlanWithSchema, + ), + ); + } } } Err(DataFusionError::Context(optimizer_name, e)) => { diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index ed85c80251d6..7f4ae5797d97 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -21,12 +21,14 @@ use std::fmt; use std::fmt::Formatter; -use super::{accept, ExecutionPlan, ExecutionPlanVisitor}; - use arrow_schema::SchemaRef; + use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan}; +use datafusion_expr::display_schema; use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; +use super::{accept, ExecutionPlan, ExecutionPlanVisitor}; + /// Options for controlling how each [`ExecutionPlan`] should format itself #[derive(Debug, Clone, Copy)] pub enum DisplayFormatType { @@ -37,12 +39,15 @@ pub enum DisplayFormatType { } /// Wraps an `ExecutionPlan` with various ways to display this plan +#[derive(Debug, Clone)] pub struct DisplayableExecutionPlan<'a> { inner: &'a dyn ExecutionPlan, /// How to show metrics show_metrics: ShowMetrics, /// If statistics should be displayed show_statistics: bool, + /// If schema should be displayed. See [`Self::set_show_schema`] + show_schema: bool, } impl<'a> DisplayableExecutionPlan<'a> { @@ -53,6 +58,7 @@ impl<'a> DisplayableExecutionPlan<'a> { inner, show_metrics: ShowMetrics::None, show_statistics: false, + show_schema: false, } } @@ -64,6 +70,7 @@ impl<'a> DisplayableExecutionPlan<'a> { inner, show_metrics: ShowMetrics::Aggregated, show_statistics: false, + show_schema: false, } } @@ -75,9 +82,19 @@ impl<'a> DisplayableExecutionPlan<'a> { inner, show_metrics: ShowMetrics::Full, show_statistics: false, + show_schema: false, } } + /// Enable display of schema + /// + /// If true, plans will be displayed with schema information at the end + /// of each line. The format is `schema=[[a:Int32;N, b:Int32;N, c:Int32;N]]` + pub fn set_show_schema(mut self, show_schema: bool) -> Self { + self.show_schema = show_schema; + self + } + /// Enable display of statistics pub fn set_show_statistics(mut self, show_statistics: bool) -> Self { self.show_statistics = show_statistics; @@ -105,6 +122,7 @@ impl<'a> DisplayableExecutionPlan<'a> { plan: &'a dyn ExecutionPlan, show_metrics: ShowMetrics, show_statistics: bool, + show_schema: bool, } impl<'a> fmt::Display for Wrapper<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -114,6 +132,7 @@ impl<'a> DisplayableExecutionPlan<'a> { indent: 0, show_metrics: self.show_metrics, show_statistics: self.show_statistics, + show_schema: self.show_schema, }; accept(self.plan, &mut visitor) } @@ -123,6 +142,7 @@ impl<'a> DisplayableExecutionPlan<'a> { plan: self.inner, show_metrics: self.show_metrics, show_statistics: self.show_statistics, + show_schema: self.show_schema, } } @@ -179,6 +199,7 @@ impl<'a> DisplayableExecutionPlan<'a> { plan: &'a dyn ExecutionPlan, show_metrics: ShowMetrics, show_statistics: bool, + show_schema: bool, } impl<'a> fmt::Display for Wrapper<'a> { @@ -189,6 +210,7 @@ impl<'a> DisplayableExecutionPlan<'a> { indent: 0, show_metrics: self.show_metrics, show_statistics: self.show_statistics, + show_schema: self.show_schema, }; visitor.pre_visit(self.plan)?; Ok(()) @@ -199,6 +221,7 @@ impl<'a> DisplayableExecutionPlan<'a> { plan: self.inner, show_metrics: self.show_metrics, show_statistics: self.show_statistics, + show_schema: self.show_schema, } } @@ -221,6 +244,14 @@ enum ShowMetrics { } /// Formats plans with a single line per node. +/// +/// # Example +/// +/// ```text +/// ProjectionExec: expr=[column1@0 + 2 as column1 + Int64(2)] +/// FilterExec: column1@0 = 5 +/// ValuesExec +/// ``` struct IndentVisitor<'a, 'b> { /// How to format each node t: DisplayFormatType, @@ -232,6 +263,8 @@ struct IndentVisitor<'a, 'b> { show_metrics: ShowMetrics, /// If statistics should be displayed show_statistics: bool, + /// If schema should be displayed + show_schema: bool, } impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> { @@ -265,6 +298,13 @@ impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> { let stats = plan.statistics().map_err(|_e| fmt::Error)?; write!(self.f, ", statistics=[{}]", stats)?; } + if self.show_schema { + write!( + self.f, + ", schema={}", + display_schema(plan.schema().as_ref()) + )?; + } writeln!(self.f)?; self.indent += 1; Ok(true) @@ -465,12 +505,13 @@ mod tests { use std::fmt::Write; use std::sync::Arc; - use super::DisplayableExecutionPlan; - use crate::{DisplayAs, ExecutionPlan, PlanProperties}; - use datafusion_common::{DataFusionError, Result, Statistics}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; + use crate::{DisplayAs, ExecutionPlan, PlanProperties}; + + use super::DisplayableExecutionPlan; + #[derive(Debug, Clone, Copy)] enum TestStatsExecPlan { Panic, diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 004d7320e21b..7f4d6b9d927e 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -669,9 +669,11 @@ message PlanType { datafusion_common.EmptyMessage FinalLogicalPlan = 3; datafusion_common.EmptyMessage InitialPhysicalPlan = 4; datafusion_common.EmptyMessage InitialPhysicalPlanWithStats = 9; + datafusion_common.EmptyMessage InitialPhysicalPlanWithSchema = 11; OptimizedPhysicalPlanType OptimizedPhysicalPlan = 5; datafusion_common.EmptyMessage FinalPhysicalPlan = 6; datafusion_common.EmptyMessage FinalPhysicalPlanWithStats = 10; + datafusion_common.EmptyMessage FinalPhysicalPlanWithSchema = 12; } } diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index ebfa783f8561..33cd634c4aad 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -16140,6 +16140,9 @@ impl serde::Serialize for PlanType { plan_type::PlanTypeEnum::InitialPhysicalPlanWithStats(v) => { struct_ser.serialize_field("InitialPhysicalPlanWithStats", v)?; } + plan_type::PlanTypeEnum::InitialPhysicalPlanWithSchema(v) => { + struct_ser.serialize_field("InitialPhysicalPlanWithSchema", v)?; + } plan_type::PlanTypeEnum::OptimizedPhysicalPlan(v) => { struct_ser.serialize_field("OptimizedPhysicalPlan", v)?; } @@ -16149,6 +16152,9 @@ impl serde::Serialize for PlanType { plan_type::PlanTypeEnum::FinalPhysicalPlanWithStats(v) => { struct_ser.serialize_field("FinalPhysicalPlanWithStats", v)?; } + plan_type::PlanTypeEnum::FinalPhysicalPlanWithSchema(v) => { + struct_ser.serialize_field("FinalPhysicalPlanWithSchema", v)?; + } } } struct_ser.end() @@ -16168,9 +16174,11 @@ impl<'de> serde::Deserialize<'de> for PlanType { "FinalLogicalPlan", "InitialPhysicalPlan", "InitialPhysicalPlanWithStats", + "InitialPhysicalPlanWithSchema", "OptimizedPhysicalPlan", "FinalPhysicalPlan", "FinalPhysicalPlanWithStats", + "FinalPhysicalPlanWithSchema", ]; #[allow(clippy::enum_variant_names)] @@ -16182,9 +16190,11 @@ impl<'de> serde::Deserialize<'de> for PlanType { FinalLogicalPlan, InitialPhysicalPlan, InitialPhysicalPlanWithStats, + InitialPhysicalPlanWithSchema, OptimizedPhysicalPlan, FinalPhysicalPlan, FinalPhysicalPlanWithStats, + FinalPhysicalPlanWithSchema, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -16213,9 +16223,11 @@ impl<'de> serde::Deserialize<'de> for PlanType { "FinalLogicalPlan" => Ok(GeneratedField::FinalLogicalPlan), "InitialPhysicalPlan" => Ok(GeneratedField::InitialPhysicalPlan), "InitialPhysicalPlanWithStats" => Ok(GeneratedField::InitialPhysicalPlanWithStats), + "InitialPhysicalPlanWithSchema" => Ok(GeneratedField::InitialPhysicalPlanWithSchema), "OptimizedPhysicalPlan" => Ok(GeneratedField::OptimizedPhysicalPlan), "FinalPhysicalPlan" => Ok(GeneratedField::FinalPhysicalPlan), "FinalPhysicalPlanWithStats" => Ok(GeneratedField::FinalPhysicalPlanWithStats), + "FinalPhysicalPlanWithSchema" => Ok(GeneratedField::FinalPhysicalPlanWithSchema), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -16285,6 +16297,13 @@ impl<'de> serde::Deserialize<'de> for PlanType { return Err(serde::de::Error::duplicate_field("InitialPhysicalPlanWithStats")); } plan_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(plan_type::PlanTypeEnum::InitialPhysicalPlanWithStats) +; + } + GeneratedField::InitialPhysicalPlanWithSchema => { + if plan_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("InitialPhysicalPlanWithSchema")); + } + plan_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(plan_type::PlanTypeEnum::InitialPhysicalPlanWithSchema) ; } GeneratedField::OptimizedPhysicalPlan => { @@ -16306,6 +16325,13 @@ impl<'de> serde::Deserialize<'de> for PlanType { return Err(serde::de::Error::duplicate_field("FinalPhysicalPlanWithStats")); } plan_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(plan_type::PlanTypeEnum::FinalPhysicalPlanWithStats) +; + } + GeneratedField::FinalPhysicalPlanWithSchema => { + if plan_type_enum__.is_some() { + return Err(serde::de::Error::duplicate_field("FinalPhysicalPlanWithSchema")); + } + plan_type_enum__ = map_.next_value::<::std::option::Option<_>>()?.map(plan_type::PlanTypeEnum::FinalPhysicalPlanWithSchema) ; } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 1a3514dbd4f7..83b8b738c4f4 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -947,7 +947,10 @@ pub struct OptimizedPhysicalPlanType { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PlanType { - #[prost(oneof = "plan_type::PlanTypeEnum", tags = "1, 7, 8, 2, 3, 4, 9, 5, 6, 10")] + #[prost( + oneof = "plan_type::PlanTypeEnum", + tags = "1, 7, 8, 2, 3, 4, 9, 11, 5, 6, 10, 12" + )] pub plan_type_enum: ::core::option::Option, } /// Nested message and enum types in `PlanType`. @@ -969,12 +972,16 @@ pub mod plan_type { InitialPhysicalPlan(super::super::datafusion_common::EmptyMessage), #[prost(message, tag = "9")] InitialPhysicalPlanWithStats(super::super::datafusion_common::EmptyMessage), + #[prost(message, tag = "11")] + InitialPhysicalPlanWithSchema(super::super::datafusion_common::EmptyMessage), #[prost(message, tag = "5")] OptimizedPhysicalPlan(super::OptimizedPhysicalPlanType), #[prost(message, tag = "6")] FinalPhysicalPlan(super::super::datafusion_common::EmptyMessage), #[prost(message, tag = "10")] FinalPhysicalPlanWithStats(super::super::datafusion_common::EmptyMessage), + #[prost(message, tag = "12")] + FinalPhysicalPlanWithSchema(super::super::datafusion_common::EmptyMessage), } } #[allow(clippy::derive_partial_eq_without_eq)] diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 21331a94c18c..609cbc1a286b 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -36,6 +36,9 @@ use datafusion_expr::{ }; use datafusion_proto_common::{from_proto::FromOptionalField, FromProtoError as Error}; +use crate::protobuf::plan_type::PlanTypeEnum::{ + FinalPhysicalPlanWithSchema, InitialPhysicalPlanWithSchema, +}; use crate::protobuf::{ self, plan_type::PlanTypeEnum::{ @@ -121,6 +124,7 @@ impl From<&protobuf::StringifiedPlan> for StringifiedPlan { FinalLogicalPlan(_) => PlanType::FinalLogicalPlan, InitialPhysicalPlan(_) => PlanType::InitialPhysicalPlan, InitialPhysicalPlanWithStats(_) => PlanType::InitialPhysicalPlanWithStats, + InitialPhysicalPlanWithSchema(_) => PlanType::InitialPhysicalPlanWithSchema, OptimizedPhysicalPlan(OptimizedPhysicalPlanType { optimizer_name }) => { PlanType::OptimizedPhysicalPlan { optimizer_name: optimizer_name.clone(), @@ -128,6 +132,7 @@ impl From<&protobuf::StringifiedPlan> for StringifiedPlan { } FinalPhysicalPlan(_) => PlanType::FinalPhysicalPlan, FinalPhysicalPlanWithStats(_) => PlanType::FinalPhysicalPlanWithStats, + FinalPhysicalPlanWithSchema(_) => PlanType::FinalPhysicalPlanWithSchema, }, plan: Arc::new(stringified_plan.plan.clone()), } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 3a1db1defdd9..ccc64119c8a1 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -34,9 +34,9 @@ use crate::protobuf::{ self, plan_type::PlanTypeEnum::{ AnalyzedLogicalPlan, FinalAnalyzedLogicalPlan, FinalLogicalPlan, - FinalPhysicalPlan, FinalPhysicalPlanWithStats, InitialLogicalPlan, - InitialPhysicalPlan, InitialPhysicalPlanWithStats, OptimizedLogicalPlan, - OptimizedPhysicalPlan, + FinalPhysicalPlan, FinalPhysicalPlanWithSchema, FinalPhysicalPlanWithStats, + InitialLogicalPlan, InitialPhysicalPlan, InitialPhysicalPlanWithSchema, + InitialPhysicalPlanWithStats, OptimizedLogicalPlan, OptimizedPhysicalPlan, }, AnalyzedLogicalPlanType, CubeNode, EmptyMessage, GroupingSetNode, LogicalExprList, OptimizedLogicalPlanType, OptimizedPhysicalPlanType, PlaceholderNode, RollupNode, @@ -96,9 +96,15 @@ impl From<&StringifiedPlan> for protobuf::StringifiedPlan { PlanType::InitialPhysicalPlanWithStats => Some(protobuf::PlanType { plan_type_enum: Some(InitialPhysicalPlanWithStats(EmptyMessage {})), }), + PlanType::InitialPhysicalPlanWithSchema => Some(protobuf::PlanType { + plan_type_enum: Some(InitialPhysicalPlanWithSchema(EmptyMessage {})), + }), PlanType::FinalPhysicalPlanWithStats => Some(protobuf::PlanType { plan_type_enum: Some(FinalPhysicalPlanWithStats(EmptyMessage {})), }), + PlanType::FinalPhysicalPlanWithSchema => Some(protobuf::PlanType { + plan_type_enum: Some(FinalPhysicalPlanWithSchema(EmptyMessage {})), + }), }, plan: stringified_plan.plan.to_string(), } diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 96e73a591678..b850760b8734 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -241,6 +241,7 @@ logical_plan after optimize_projections SAME TEXT AS ABOVE logical_plan TableScan: simple_explain_test projection=[a, b, c] initial_physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true initial_physical_plan_with_stats CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] +initial_physical_plan_with_schema CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true, schema=[a:Int32;N, b:Int32;N, c:Int32;N] physical_plan after OutputRequirements 01)OutputRequirementExec 02)--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true @@ -259,6 +260,23 @@ physical_plan after ProjectionPushdown SAME TEXT AS ABOVE physical_plan after PipelineChecker SAME TEXT AS ABOVE physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true physical_plan_with_stats CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true, statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] +physical_plan_with_schema CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true, schema=[a:Int32;N, b:Int32;N, c:Int32;N] + +### tests for EXPLAIN with display schema enabled + +statement ok +set datafusion.explain.show_schema = true; + +# test EXPLAIN VERBOSE +query TT +EXPLAIN SELECT a, b, c FROM simple_explain_test; +---- +logical_plan TableScan: simple_explain_test projection=[a, b, c] +physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], has_header=true, schema=[a:Int32;N, b:Int32;N, c:Int32;N] + + +statement ok +set datafusion.explain.show_schema = false; ### tests for EXPLAIN with display statistics enabled @@ -297,6 +315,9 @@ EXPLAIN VERBOSE SELECT * FROM alltypes_plain limit 10; initial_physical_plan 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] +initial_physical_plan_with_schema +01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] physical_plan after OutputRequirements 01)OutputRequirementExec, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] @@ -319,6 +340,9 @@ physical_plan after PipelineChecker SAME TEXT AS ABOVE physical_plan 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] +physical_plan_with_schema +01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] statement ok @@ -334,6 +358,9 @@ initial_physical_plan initial_physical_plan_with_stats 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] +initial_physical_plan_with_schema +01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] physical_plan after OutputRequirements 01)OutputRequirementExec 02)--GlobalLimitExec: skip=0, fetch=10 @@ -359,6 +386,9 @@ physical_plan physical_plan_with_stats 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] 02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, statistics=[Rows=Exact(8), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]] +physical_plan_with_schema +01)GlobalLimitExec: skip=0, fetch=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, schema=[id:Int32;N, bool_col:Boolean;N, tinyint_col:Int32;N, smallint_col:Int32;N, int_col:Int32;N, bigint_col:Int64;N, float_col:Float32;N, double_col:Float64;N, date_string_col:Binary;N, string_col:Binary;N, timestamp_col:Timestamp(Nanosecond, None);N] statement ok diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index ee64f772917c..acd465a0c021 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -213,6 +213,7 @@ datafusion.execution.target_partitions 7 datafusion.execution.time_zone +00:00 datafusion.explain.logical_plan_only false datafusion.explain.physical_plan_only false +datafusion.explain.show_schema false datafusion.explain.show_sizes true datafusion.explain.show_statistics false datafusion.optimizer.allow_symmetric_joins_without_pruning true @@ -296,6 +297,7 @@ datafusion.execution.target_partitions 7 Number of partitions for query executio datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour datafusion.explain.logical_plan_only false When set to true, the explain statement will only print logical plans datafusion.explain.physical_plan_only false When set to true, the explain statement will only print physical plans +datafusion.explain.show_schema false When set to true, the explain statement will print schema information datafusion.explain.show_sizes true When set to true, the explain statement will print the partition sizes datafusion.explain.show_statistics false When set to true, the explain statement will print operator statistics for physical plans datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 0f0aa8460448..303caef57700 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -111,6 +111,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.physical_plan_only | false | When set to true, the explain statement will only print physical plans | | datafusion.explain.show_statistics | false | When set to true, the explain statement will print operator statistics for physical plans | | datafusion.explain.show_sizes | true | When set to true, the explain statement will print the partition sizes | +| datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |