Skip to content

Commit

Permalink
[GLUTEN-6750][CH] Fix optimize error if file mappings not loaded (#6753)
Browse files Browse the repository at this point in the history
  • Loading branch information
lwz9103 authored Aug 9, 2024
1 parent 061efb1 commit 4b7b922
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -764,5 +764,48 @@ class GlutenClickHouseMergeTreeWriteOnS3Suite
}
}
}

test("GLUTEN-6750: Optimize error if file metadata not exist") {
spark.sql(s"""
|DROP TABLE IF EXISTS lineitem_mergetree_bucket_s3;
|""".stripMargin)

spark.sql(s"""
|CREATE TABLE IF NOT EXISTS lineitem_mergetree_bucket_s3
|(
| l_orderkey bigint,
| l_partkey bigint,
| l_suppkey bigint,
| l_linenumber bigint,
| l_quantity double,
| l_extendedprice double,
| l_discount double,
| l_tax double,
| l_returnflag string,
| l_linestatus string,
| l_shipdate date,
| l_commitdate date,
| l_receiptdate date,
| l_shipinstruct string,
| l_shipmode string,
| l_comment string
|)
|USING clickhouse
|PARTITIONED BY (l_returnflag)
|CLUSTERED BY (l_orderkey)
|${if (sparkVersion.equals("3.2")) "" else "SORTED BY (l_partkey)"} INTO 4 BUCKETS
|LOCATION 's3a://$BUCKET_NAME/lineitem_mergetree_bucket_s3'
|TBLPROPERTIES (storage_policy='__s3_main')
|""".stripMargin)

spark.sql(s"""
| insert into table lineitem_mergetree_bucket_s3
| select /*+ REPARTITION(3) */ * from lineitem
|""".stripMargin)

FileUtils.deleteDirectory(new File(S3_METADATA_PATH))
spark.sql("optimize lineitem_mergetree_bucket_s3")
spark.sql("drop table lineitem_mergetree_bucket_s3")
}
}
// scalastyle:off line.size.limit
2 changes: 2 additions & 0 deletions cpp-ch/local-engine/local_engine_jni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,8 @@ JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
// each task using its own CustomStorageMergeTree, don't reuse
auto temp_storage
= local_engine::MergeTreeRelParser::copyToVirtualStorage(merge_tree_table, context);
// prefetch all needed parts metadata before merge
local_engine::restoreMetaData(temp_storage, merge_tree_table, *context);

local_engine::TempStorageFreer freer{temp_storage->getStorageID()}; // to release temp CustomStorageMergeTree with RAII
std::vector<DB::DataPartPtr> selected_parts = local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(
Expand Down

0 comments on commit 4b7b922

Please sign in to comment.