diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala index c95b78858322..6a473cc54f7e 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala @@ -764,5 +764,48 @@ class GlutenClickHouseMergeTreeWriteOnS3Suite } } } + + test("GLUTEN-6750: Optimize error if file metadata not exist") { + spark.sql(s""" + |DROP TABLE IF EXISTS lineitem_mergetree_bucket_s3; + |""".stripMargin) + + spark.sql(s""" + |CREATE TABLE IF NOT EXISTS lineitem_mergetree_bucket_s3 + |( + | l_orderkey bigint, + | l_partkey bigint, + | l_suppkey bigint, + | l_linenumber bigint, + | l_quantity double, + | l_extendedprice double, + | l_discount double, + | l_tax double, + | l_returnflag string, + | l_linestatus string, + | l_shipdate date, + | l_commitdate date, + | l_receiptdate date, + | l_shipinstruct string, + | l_shipmode string, + | l_comment string + |) + |USING clickhouse + |PARTITIONED BY (l_returnflag) + |CLUSTERED BY (l_orderkey) + |${if (sparkVersion.equals("3.2")) "" else "SORTED BY (l_partkey)"} INTO 4 BUCKETS + |LOCATION 's3a://$BUCKET_NAME/lineitem_mergetree_bucket_s3' + |TBLPROPERTIES (storage_policy='__s3_main') + |""".stripMargin) + + spark.sql(s""" + | insert into table lineitem_mergetree_bucket_s3 + | select /*+ REPARTITION(3) */ * from lineitem + |""".stripMargin) + + FileUtils.deleteDirectory(new File(S3_METADATA_PATH)) + spark.sql("optimize lineitem_mergetree_bucket_s3") + spark.sql("drop table lineitem_mergetree_bucket_s3") + } } // scalastyle:off line.size.limit diff --git a/cpp-ch/local-engine/local_engine_jni.cpp b/cpp-ch/local-engine/local_engine_jni.cpp index c4e8ec67b106..db0dd8b623b6 100644 --- a/cpp-ch/local-engine/local_engine_jni.cpp +++ b/cpp-ch/local-engine/local_engine_jni.cpp @@ -995,6 +995,8 @@ JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn // each task using its own CustomStorageMergeTree, don't reuse auto temp_storage = local_engine::MergeTreeRelParser::copyToVirtualStorage(merge_tree_table, context); + // prefetch all needed parts metadata before merge + local_engine::restoreMetaData(temp_storage, merge_tree_table, *context); local_engine::TempStorageFreer freer{temp_storage->getStorageID()}; // to release temp CustomStorageMergeTree with RAII std::vector selected_parts = local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(