Skip to content

Commit

Permalink
[SPARK-51049][CORE] Increase S3A Vector IO threshold for range merge
Browse files Browse the repository at this point in the history
  • Loading branch information
dongjoon-hyun committed Jan 31, 2025
1 parent ece1470 commit 9787c33
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,10 @@ class SparkContext(config: SparkConf) extends Logging {
if (!_conf.contains("spark.app.name")) {
throw new SparkException("An application name must be set in your configuration")
}
// HADOOP-19229 Vector IO on cloud storage: increase threshold for range merging
// We can remove this after Apache Hadoop 3.4.2 releases
conf.setIfMissing("spark.hadoop.fs.s3a.vectored.read.min.seek.size", "128K")
conf.setIfMissing("spark.hadoop.fs.s3a.vectored.read.max.merged.size", "2M")
// This should be set as early as possible.
SparkContext.fillMissingMagicCommitterConfsIfNeeded(_conf)

Expand Down

0 comments on commit 9787c33

Please sign in to comment.