diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index 903d3e43b3d23..f44af2ad95a73 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -359,7 +359,13 @@ DROP TABLE listing_table; ## Create a table with a binary column query I -COPY (SELECT arrow_cast(string_col, 'Binary') as binary_col FROM src_table) +COPY ( + SELECT + arrow_cast(string_col, 'Binary') as binary_col, + arrow_cast(string_col, 'LargeBinary') as largebinary_col, + arrow_cast(string_col, 'BinaryView') as binaryview_col + FROM src_table + ) TO 'test_files/scratch/parquet/binary_as_string.parquet' STORED AS PARQUET; ---- @@ -371,37 +377,47 @@ CREATE EXTERNAL TABLE binary_as_string_default STORED AS PARQUET LOCATION 'test_files/scratch/parquet/binary_as_string.parquet' # NB the data is read and displayed as binary -query T? -select arrow_typeof(binary_col), binary_col from binary_as_string_default; +query T?T?T? +select + arrow_typeof(binary_col), binary_col, + arrow_typeof(largebinary_col), largebinary_col, + arrow_typeof(binaryview_col), binaryview_col + FROM binary_as_string_default; ---- -Binary 616161 -Binary 626262 -Binary 636363 -Binary 646464 -Binary 656565 -Binary 666666 -Binary 676767 -Binary 686868 -Binary 696969 - -# Run an explain plan to show the cast happens in the plan (a CAST is needed for the predicate) +Binary 616161 Binary 616161 Binary 616161 +Binary 626262 Binary 626262 Binary 626262 +Binary 636363 Binary 636363 Binary 636363 +Binary 646464 Binary 646464 Binary 646464 +Binary 656565 Binary 656565 Binary 656565 +Binary 666666 Binary 666666 Binary 666666 +Binary 676767 Binary 676767 Binary 676767 +Binary 686868 Binary 686868 Binary 686868 +Binary 696969 Binary 696969 Binary 696969 + +# Run an explain plan to show the cast happens in the plan (a CAST is needed for the predicates) query TT -EXPLAIN SELECT binary_col FROM binary_as_string_default WHERE binary_col LIKE '%a%'; +EXPLAIN + SELECT binary_col, largebinary_col, binaryview_col + FROM binary_as_string_default + WHERE + binary_col LIKE '%a%' AND + largebinary_col LIKE '%a%' AND + binaryview_col LIKE '%a%'; ---- logical_plan -01)Filter: CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%") -02)--TableScan: binary_as_string_default projection=[binary_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%")] +01)Filter: CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%") AND CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%") +02)--TableScan: binary_as_string_default projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[CAST(binary_as_string_default.binary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.largebinary_col AS Utf8) LIKE Utf8("%a%"), CAST(binary_as_string_default.binaryview_col AS Utf8) LIKE Utf8("%a%")] physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 -02)--FilterExec: CAST(binary_col@0 AS Utf8) LIKE %a% +02)--FilterExec: CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a% 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col], predicate=CAST(binary_col@0 AS Utf8) LIKE %a% +04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=CAST(binary_col@0 AS Utf8) LIKE %a% AND CAST(largebinary_col@1 AS Utf8) LIKE %a% AND CAST(binaryview_col@2 AS Utf8) LIKE %a% statement ok DROP TABLE binary_as_string_default; -## Test 2: Read table with binary_as_string option +## Test 2: Read table using the binary_as_string option statement ok CREATE EXTERNAL TABLE binary_as_string_option @@ -409,37 +425,47 @@ STORED AS PARQUET LOCATION 'test_files/scratch/parquet/binary_as_string.parquet' OPTIONS ('binary_as_string' 'true'); # NB the data is read and displayed as string -query TT -select arrow_typeof(binary_col), binary_col from binary_as_string_option; +query TTTTTT +select + arrow_typeof(binary_col), binary_col, + arrow_typeof(largebinary_col), largebinary_col, + arrow_typeof(binaryview_col), binaryview_col + FROM binary_as_string_option; ---- -Utf8 aaa -Utf8 bbb -Utf8 ccc -Utf8 ddd -Utf8 eee -Utf8 fff -Utf8 ggg -Utf8 hhh -Utf8 iii +Utf8 aaa Utf8 aaa Utf8 aaa +Utf8 bbb Utf8 bbb Utf8 bbb +Utf8 ccc Utf8 ccc Utf8 ccc +Utf8 ddd Utf8 ddd Utf8 ddd +Utf8 eee Utf8 eee Utf8 eee +Utf8 fff Utf8 fff Utf8 fff +Utf8 ggg Utf8 ggg Utf8 ggg +Utf8 hhh Utf8 hhh Utf8 hhh +Utf8 iii Utf8 iii Utf8 iii # Run an explain plan to show the cast happens in the plan (there should be no casts) query TT -EXPLAIN SELECT binary_col FROM binary_as_string_option WHERE binary_col LIKE '%a%'; +EXPLAIN + SELECT binary_col, largebinary_col, binaryview_col + FROM binary_as_string_option + WHERE + binary_col LIKE '%a%' AND + largebinary_col LIKE '%a%' AND + binaryview_col LIKE '%a%'; ---- logical_plan -01)Filter: binary_as_string_option.binary_col LIKE Utf8("%a%") -02)--TableScan: binary_as_string_option projection=[binary_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8("%a%")] +01)Filter: binary_as_string_option.binary_col LIKE Utf8("%a%") AND binary_as_string_option.largebinary_col LIKE Utf8("%a%") AND binary_as_string_option.binaryview_col LIKE Utf8("%a%") +02)--TableScan: binary_as_string_option projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_option.binary_col LIKE Utf8("%a%"), binary_as_string_option.largebinary_col LIKE Utf8("%a%"), binary_as_string_option.binaryview_col LIKE Utf8("%a%")] physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 -02)--FilterExec: binary_col@0 LIKE %a% +02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a% 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col], predicate=binary_col@0 LIKE %a% +04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a% statement ok DROP TABLE binary_as_string_option; -## Test 3: Read table with binary_as_string option and schema_force_view_types +## Test 3: Read table with binary_as_string option AND schema_force_view_types statement ok CREATE EXTERNAL TABLE binary_as_string_both @@ -450,9 +476,13 @@ OPTIONS ( ); # NB the data is read and displayed a StringView -query TT -select arrow_typeof(binary_col), binary_col from binary_as_string_both; ----- +query error DataFusion error: SQL error: ParserError\("Expected: an SQL statement, found: Utf8View"\) +select + arrow_typeof(binary_col), binary_col, + arrow_typeof(largebinary_col), largebinary_col, + arrow_typeof(binaryview_col), binaryview_col + FROM binary_as_string_both; + ---- Utf8View aaa Utf8View bbb Utf8View ccc @@ -465,17 +495,26 @@ Utf8View iii # Run an explain plan to show the cast happens in the plan (there should be no casts) query TT -EXPLAIN SELECT binary_col FROM binary_as_string_both WHERE binary_col LIKE '%a%'; +EXPLAIN + SELECT binary_col, largebinary_col, binaryview_col + FROM binary_as_string_both + WHERE + binary_col LIKE '%a%' AND + largebinary_col LIKE '%a%' AND + binaryview_col LIKE '%a%'; ---- logical_plan -01)Filter: binary_as_string_both.binary_col LIKE Utf8View("%a%") -02)--TableScan: binary_as_string_both projection=[binary_col], partial_filters=[binary_as_string_both.binary_col LIKE Utf8View("%a%")] +01)Filter: binary_as_string_both.binary_col LIKE Utf8View("%a%") AND binary_as_string_both.largebinary_col LIKE Utf8View("%a%") AND binary_as_string_both.binaryview_col LIKE Utf8View("%a%") +02)--TableScan: binary_as_string_both projection=[binary_col, largebinary_col, binaryview_col], partial_filters=[binary_as_string_both.binary_col LIKE Utf8View("%a%"), binary_as_string_both.largebinary_col LIKE Utf8View("%a%"), binary_as_string_both.binaryview_col LIKE Utf8View("%a%")] physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 -02)--FilterExec: binary_col@0 LIKE %a% +02)--FilterExec: binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a% 03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col], predicate=binary_col@0 LIKE %a% +04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/binary_as_string.parquet]]}, projection=[binary_col, largebinary_col, binaryview_col], predicate=binary_col@0 LIKE %a% AND largebinary_col@1 LIKE %a% AND binaryview_col@2 LIKE %a% + +statement ok +drop table binary_as_string_both; # Read a parquet file with binary data in a FixedSizeBinary column