Skip to content

Commit

Permalink
feat: add duckdb checks for unsupported column types (datafusion-cont…
Browse files Browse the repository at this point in the history
…rib#164)

* feat: Add DuckDB checks for unsupported column types

* deps: Update Cargo.toml

* fix: Make serde non-optional, let InvalidTypeAction be Copy

* fix: More features shenanigans
  • Loading branch information
zeroxaa committed Nov 27, 2024
1 parent 88f0dd4 commit bb8c27a
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 34 deletions.
51 changes: 30 additions & 21 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,8 @@ pem = { version = "3.0.4", optional = true }
tokio-rusqlite = { version = "0.6.0", optional = true }
tonic = { version = "0.12.2", optional = true }
datafusion-federation = "0.1"
<<<<<<< HEAD
datafusion-federation-sql = { git = "https://github.com/spiceai/datafusion-federation.git", rev = "5af0df83c2cd1d3f82f293b066b401a4dfd4064b" }
=======
datafusion-federation-sql = { git = "https://github.com/spiceai/datafusion-federation.git", rev = "914bd0836baa6990c5d03f977e5e87fe5eeaf4d6" }
>>>>>>> 36d7f1d (Include unnecessary columns pruning step during federated plan creation (#162))
#datafusion-federation-sql = { git = "https://github.com/spiceai/datafusion-federation.git", rev = "914bd0836baa6990c5d03f977e5e87fe5eeaf4d6" }
itertools = "0.13.0"
dyn-clone = { version = "1.0.17", optional = true }
geo-types = "0.7.13"
Expand All @@ -86,17 +83,30 @@ postgres = ["dep:tokio-postgres", "dep:uuid", "dep:postgres-native-tls", "dep:bb
sqlite = ["dep:rusqlite", "dep:tokio-rusqlite"]
duckdb = ["dep:duckdb", "dep:r2d2", "dep:uuid", "dep:dyn-clone", "dep:async-stream", "dep:arrow-schema"]
flight = [
# "dep:arrow-array",
# "dep:arrow-flight",
# "dep:arrow-schema",
# "dep:base64",
# "dep:bytes",
# "dep:datafusion-expr",
# "dep:datafusion-physical-expr",
# "dep:datafusion-physical-plan",
# "dep:datafusion-proto",
# "dep:prost",
# "dep:tonic",
<<<<<<< HEAD
# "dep:arrow-array",
# "dep:arrow-flight",
# "dep:arrow-schema",
# "dep:base64",
# "dep:bytes",
# "dep:datafusion-expr",
# "dep:datafusion-physical-expr",
# "dep:datafusion-physical-plan",
# "dep:datafusion-proto",
# "dep:prost",
# "dep:tonic",
# "dep:arrow-array",
# "dep:arrow-flight",
# "dep:arrow-schema",
# "dep:base64",
# "dep:bytes",
# "dep:datafusion-expr",
# "dep:datafusion-physical-expr",
# "dep:datafusion-physical-plan",
# "dep:datafusion-proto",
# "dep:prost",
# "dep:serde",
# "dep:tonic",
"dep:arrow-array",
"dep:arrow-flight",
"dep:arrow-schema",
Expand All @@ -107,7 +117,6 @@ flight = [
"dep:datafusion-physical-plan",
"dep:datafusion-proto",
"dep:prost",
"dep:serde",
"dep:tonic",
]
duckdb-federation = ["duckdb"]
Expand All @@ -118,11 +127,11 @@ postgres-federation = ["postgres"]
datafusion-federation = { git = "https://github.com/spiceai/datafusion-federation.git", rev = "5af0df83c2cd1d3f82f293b066b401a4dfd4064b" }
duckdb = { git = "https://github.com/spiceai/duckdb-rs.git", rev = "d6f8c3e0dc1ba073a86756eba4dacf044dfa892d" }

datafusion = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee"}
datafusion-expr = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee"}
datafusion-physical-expr = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee"}
datafusion-physical-plan = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee"}
datafusion-proto = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee"}
datafusion = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee" }
datafusion-expr = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee" }
datafusion-physical-expr = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee" }
datafusion-physical-plan = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee" }
datafusion-proto = { git = "https://github.com/spiceai/datafusion.git", rev = "aaa46be22cff778e0e618b5eaeee3c2830d4e7ee" }

# new
#datafusion-federation = { git = "https://github.com/spiceai/datafusion-federation.git", rev = "914bd0836baa6990c5d03f977e5e87fe5eeaf4d6" }
Expand Down
6 changes: 4 additions & 2 deletions src/sql/db_connection_pool/dbconnection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,16 @@ pub enum Error {
#[snafu(display("{source}"))]
UnableToGetSchema { source: GenericError },

#[snafu(display("The field '{field_name}' has an unsupported data type: {data_type}.\nReport a bug to request support for this data type: https://github.com/datafusion-contrib/datafusion-table-providers/issues"))]
// #[snafu(display("The field '{field_name}' has an unsupported data type: {data_type}.\nReport a bug to request support for this data type: https://github.com/datafusion-contrib/datafusion-table-providers/issues"))]
#[snafu(display("The field '{field_name}' has an unsupported data type: {data_type}"))]
#[cfg(feature = "duckdb")]
UnsupportedDataType {
data_type: DataType,
field_name: String,
},

#[snafu(display("Failed to execute query.\n{source}"))]
// #[snafu(display("Failed to execute query.\n{source}"))]
#[snafu(display("Unable to query arrow: {source}"))]
UnableToQueryArrow { source: GenericError },

#[snafu(display(
Expand Down
11 changes: 6 additions & 5 deletions src/sql/db_connection_pool/dbconnection/duckdbconn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,11 +299,12 @@ fn data_type_is_unsupported(data_type: &DataType) -> bool {
| DataType::LargeList(inner_field) => {
match inner_field.data_type() {
dt if dt.is_primitive() => false,
DataType::Utf8
| DataType::Binary
| DataType::Utf8View
| DataType::BinaryView
| DataType::Boolean => false,
// DataType::Utf8
// | DataType::Binary
// | DataType::Utf8View
// | DataType::BinaryView
// | DataType::Boolean => false,
DataType::Utf8 | DataType::Binary => false,
_ => true, // nested lists don't support anything else yet
}
}
Expand Down
14 changes: 8 additions & 6 deletions src/sql/db_connection_pool/duckdbpool.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
use async_trait::async_trait;
use duckdb::{vtab::arrow::ArrowVTab, AccessMode, DefaultNullOrder, DefaultOrder, DuckdbConnectionManager};
use snafu::{prelude::*, ResultExt};
use std::sync::Arc;
use r2d2::ManageConnection;
use super::{
dbconnection::duckdbconn::{DuckDBAttachments, DuckDBParameter},
DbConnectionPool, Mode, Result,
Expand All @@ -14,6 +9,13 @@ use crate::{
},
InvalidTypeAction,
};
use async_trait::async_trait;
use duckdb::{
vtab::arrow::ArrowVTab, AccessMode, DefaultNullOrder, DefaultOrder, DuckdbConnectionManager,
};
use r2d2::ManageConnection;
use snafu::{prelude::*, ResultExt};
use std::sync::Arc;

#[derive(Debug, Snafu)]
pub enum Error {
Expand Down Expand Up @@ -195,7 +197,7 @@ impl DuckDbConnectionPool {

#[async_trait]
impl DbConnectionPool<r2d2::PooledConnection<DuckdbConnectionManager>, DuckDBParameter>
for DuckDbConnectionPool
for DuckDbConnectionPool
{
async fn connect(
&self,
Expand Down

0 comments on commit bb8c27a

Please sign in to comment.