From bcd9c3eb660e1e0ec03e027a35eba323b4c5d53d Mon Sep 17 00:00:00 2001
From: "wangguangxin.cn" <wangguangxin.cn@bytedance.com>
Date: Wed, 12 Jun 2024 00:27:03 +0800
Subject: [PATCH] Revert "[GLUTEN-5414] [VL] Support arrow csv option and
 schema (#5850)"

This reverts commit ad817ed51f381d037a9de250e639bb6514e94242.
---
 .github/workflows/velox_docker.yml            |  71 +-
 .github/workflows/velox_docker_cache.yml      |   8 +-
 .../datasource/ArrowCSVFileFormat.scala       | 111 +--
 .../datasource/ArrowCSVOptionConverter.scala  |  62 --
 .../datasource/ArrowConvertorRule.scala       |  12 +-
 .../v2/ArrowCSVPartitionReaderFactory.scala   |  79 +-
 .../gluten/datasource/v2/ArrowCSVTable.scala  |  12 -
 .../extension/ArrowScanReplaceRule.scala      |   3 +-
 .../datasource/csv/student_option.csv         |   4 -
 .../datasource/csv/student_option_schema.csv  |   4 -
 .../datasource/csv/student_option_str.csv     |   4 -
 .../gluten/execution/TestOperator.scala       | 235 +----
 ep/build-velox/src/build_velox.sh             |   3 +-
 ep/build-velox/src/get_velox.sh               |   2 -
 .../modify_arrow_dataset_scan_option.patch    | 883 ------------------
 ep/build-velox/src/modify_velox.patch         |   3 +-
 .../WholeStageTransformerSuite.scala          |  20 -
 gluten-data/pom.xml                           |   4 +-
 .../org/apache/gluten/utils/ArrowUtil.scala   | 147 ++-
 gluten-ut/spark32/pom.xml                     |   2 +-
 .../utils/velox/VeloxTestSettings.scala       |   9 -
 gluten-ut/spark33/pom.xml                     |   2 +-
 .../utils/velox/VeloxTestSettings.scala       |   9 -
 gluten-ut/spark34/pom.xml                     |   2 +-
 .../utils/velox/VeloxTestSettings.scala       |   9 -
 gluten-ut/spark35/pom.xml                     |   2 +-
 .../utils/velox/VeloxTestSettings.scala       |  11 -
 .../datasources/csv/GlutenCSVSuite.scala      |  65 +-
 gluten-ut/test/pom.xml                        |   2 +-
 pom.xml                                       |   1 -
 30 files changed, 311 insertions(+), 1470 deletions(-)
 delete mode 100644 backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVOptionConverter.scala
 delete mode 100644 backends-velox/src/test/resources/datasource/csv/student_option.csv
 delete mode 100644 backends-velox/src/test/resources/datasource/csv/student_option_schema.csv
 delete mode 100644 backends-velox/src/test/resources/datasource/csv/student_option_str.csv
 delete mode 100644 ep/build-velox/src/modify_arrow_dataset_scan_option.patch

diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml
index 6c1be4344c71a..864a3e1db4d54 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -61,24 +61,16 @@ jobs:
         id: cache
         uses: actions/cache/restore@v3
         with:
-          path: |
-            ./cpp/build/releases/
-            ~/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases/
           key: cache-velox-build-${{ hashFiles('./cache-key') }}
       - name: Build Gluten Velox third party
         if: ${{ steps.cache.outputs.cache-hit != 'true' }}
         run: |
           source dev/ci-velox-buildstatic.sh
-      - name: Upload Artifact Native
-        uses: actions/upload-artifact@v2
+      - uses: actions/upload-artifact@v2
         with:
           path: ./cpp/build/releases/
           name: velox-native-lib-centos-7-${{github.sha}}
-      - name: Upload Artifact Arrow Jar
-        uses: actions/upload-artifact@v2
-        with:
-          path: /root/.m2/repository/org/apache/arrow/
-          name: velox-arrow-jar-centos-7-${{github.sha}}
 
   run-tpc-test-ubuntu:
     needs: build-native-lib-centos-7
@@ -102,16 +94,11 @@ jobs:
     container: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v2
-      - name: Download All Native Artifacts
+      - name: Download All Artifacts
         uses: actions/download-artifact@v2
         with:
           name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases/
-      - name: Download All Arrow Jar Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases
       - name: Setup java and maven
         run: |
           if [ "${{ matrix.java }}" = "java-17" ]; then
@@ -120,7 +107,6 @@ jobs:
             apt-get update && apt-get install -y openjdk-8-jdk maven
           fi
           apt remove openjdk-11* -y
-          ls -l /root/.m2/repository/org/apache/arrow/arrow-dataset/15.0.0-gluten/
       - name: Build and run TPCH/DS
         run: |
           cd $GITHUB_WORKSPACE/
@@ -156,16 +142,11 @@ jobs:
     container: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v2
-      - name: Download All Native Artifacts
+      - name: Download All Artifacts
         uses: actions/download-artifact@v2
         with:
           name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases/
-      - name: Download All Arrow Jar Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases
       - name: Update mirror list
         if: matrix.os == 'centos:8'
         run: |
@@ -236,16 +217,11 @@ jobs:
           sudo docker image prune --all --force > /dev/null
           df -h
       - uses: actions/checkout@v2
-      - name: Download All Native Artifacts
+      - name: Download All Artifacts
         uses: actions/download-artifact@v2
         with:
           name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases/
-      - name: Download All Arrow Jar Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
-          path: /home/runner/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases
       - name: Setup java and maven
         run: |
           sudo apt-get update
@@ -338,16 +314,11 @@ jobs:
           sudo docker image prune --all --force > /dev/null
           df -h
       - uses: actions/checkout@v2
-      - name: Download All Native Artifacts
+      - name: Download All Artifacts
         uses: actions/download-artifact@v2
         with:
           name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases/
-      - name: Download All Arrow Jar Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
-          path: /home/runner/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases
       - name: Setup java and maven
         run: |
           sudo apt-get update
@@ -431,16 +402,11 @@ jobs:
     container: centos:8
     steps:
       - uses: actions/checkout@v2
-      - name: Download All Native Artifacts
+      - name: Download All Artifacts
         uses: actions/download-artifact@v2
         with:
           name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases/
-      - name: Download All Arrow Jar Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases
       - name: Update mirror list
         run: |
           sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
@@ -497,16 +463,11 @@ jobs:
     container: ubuntu:22.04
     steps:
       - uses: actions/checkout@v2
-      - name: Download All Native Artifacts
+      - name: Download All Artifacts
         uses: actions/download-artifact@v2
         with:
           name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases/
-      - name: Download All Arrow Jar Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases
       - name: Setup java and maven
         run: |
           apt-get update && apt-get install -y openjdk-8-jdk maven wget
@@ -547,9 +508,7 @@ jobs:
         id: cache
         uses: actions/cache/restore@v3
         with:
-          path: |
-            ./cpp/build/releases/
-            /root/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases/
           key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }}
       - name: Update mirror list
         run: |
diff --git a/.github/workflows/velox_docker_cache.yml b/.github/workflows/velox_docker_cache.yml
index 3c05acf7eca21..44271c4fc0d0e 100644
--- a/.github/workflows/velox_docker_cache.yml
+++ b/.github/workflows/velox_docker_cache.yml
@@ -38,9 +38,7 @@ jobs:
         uses: actions/cache/restore@v3
         with:
           lookup-only: true
-          path: |
-            ./cpp/build/releases/
-            /root/.m2/repository/org/apache/arrow/
+          path: ./cpp/build/releases/
           key: cache-velox-build-${{ hashFiles('./cache-key') }}
       - name: Build Gluten Velox third party
         if: steps.check-cache.outputs.cache-hit != 'true'
@@ -51,9 +49,7 @@ jobs:
         id: cache
         uses: actions/cache/save@v3
         with:
-          path: |
-            ./cpp/build/releases/
-            /root/.m2/repository/org/apache/arrow/  
+          path: ./cpp/build/releases/
           key: cache-velox-build-${{ hashFiles('./cache-key') }}
 
   # ccache-native-lib-ubuntu-velox-ut:
diff --git a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala
index 7c3ca8fc8cde4..0f6813d8fc6a1 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala
@@ -40,10 +40,8 @@ import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SerializableConfiguration
 
-import org.apache.arrow.c.ArrowSchema
 import org.apache.arrow.dataset.file.FileSystemDatasetFactory
 import org.apache.arrow.dataset.scanner.ScanOptions
-import org.apache.arrow.dataset.scanner.csv.CsvFragmentScanOptions
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector.VectorUnloader
 import org.apache.arrow.vector.types.pojo.Schema
@@ -53,17 +51,11 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import java.net.URLDecoder
 import java.util.Optional
 
-import scala.collection.JavaConverters.{asJavaIterableConverter, asScalaBufferConverter}
+import scala.collection.JavaConverters.asScalaBufferConverter
 
-class ArrowCSVFileFormat(parsedOptions: CSVOptions)
-  extends FileFormat
-  with DataSourceRegister
-  with Logging
-  with Serializable {
+class ArrowCSVFileFormat extends FileFormat with DataSourceRegister with Logging with Serializable {
 
   private val fileFormat = org.apache.arrow.dataset.file.FileFormat.CSV
-  private lazy val pool = ArrowNativeMemoryPool.arrowPool("FileSystem Read")
-  var fallback = false
 
   override def isSplitable(
       sparkSession: SparkSession,
@@ -76,11 +68,9 @@ class ArrowCSVFileFormat(parsedOptions: CSVOptions)
       sparkSession: SparkSession,
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
-    val arrowConfig = ArrowCSVOptionConverter.convert(parsedOptions)
     ArrowUtil.readSchema(
       files,
       fileFormat,
-      arrowConfig,
       ArrowBufferAllocators.contextInstance(),
       ArrowNativeMemoryPool.arrowPool("infer schema"))
   }
@@ -99,74 +89,51 @@ class ArrowCSVFileFormat(parsedOptions: CSVOptions)
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
     val batchSize = sqlConf.columnBatchSize
+    val caseSensitive = sqlConf.caseSensitiveAnalysis
     val columnPruning = sqlConf.csvColumnPruning &&
       !requiredSchema.exists(_.name == sparkSession.sessionState.conf.columnNameOfCorruptRecord)
+    val parsedOptions = new CSVOptions(
+      options,
+      columnPruning,
+      sparkSession.sessionState.conf.sessionLocalTimeZone,
+      sparkSession.sessionState.conf.columnNameOfCorruptRecord)
     val actualFilters =
       filters.filterNot(_.references.contains(parsedOptions.columnNameOfCorruptRecord))
     (file: PartitionedFile) => {
-      val actualDataSchema = StructType(
-        dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
-      val actualRequiredSchema = StructType(
-        requiredSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
       ArrowCSVFileFormat.checkHeader(
         file,
-        actualDataSchema,
-        actualRequiredSchema,
+        dataSchema,
+        requiredSchema,
         parsedOptions,
         actualFilters,
         broadcastedHadoopConf.value.value)
-
-      val arrowConfig = ArrowCSVOptionConverter.convert(parsedOptions)
-      val allocator = ArrowBufferAllocators.contextInstance()
-      // todo predicate validation / pushdown
-      val fileNames = ArrowUtil
-        .readArrowFileColumnNames(
+      val factory =
+        ArrowUtil.makeArrowDiscovery(
           URLDecoder.decode(file.filePath.toString, "UTF-8"),
           fileFormat,
-          arrowConfig,
           ArrowBufferAllocators.contextInstance(),
-          pool)
-      val tokenIndexArr =
-        actualRequiredSchema
-          .map(f => java.lang.Integer.valueOf(actualDataSchema.indexOf(f)))
-          .toArray
-      val fileIndex = tokenIndexArr.filter(_ < fileNames.length)
-      val requestSchema = new StructType(
-        fileIndex
-          .map(index => StructField(fileNames(index), actualDataSchema(index).dataType)))
-      val missingIndex = tokenIndexArr.filter(_ >= fileNames.length)
-      val missingSchema = new StructType(missingIndex.map(actualDataSchema(_)))
+          ArrowNativeMemoryPool.arrowPool("FileSystemDatasetFactory")
+        )
+      // todo predicate validation / pushdown
+      val fileFields = factory.inspect().getFields.asScala
       // TODO: support array/map/struct types in out-of-order schema reading.
-      val cSchema: ArrowSchema = ArrowSchema.allocateNew(allocator)
-      val cSchema2: ArrowSchema = ArrowSchema.allocateNew(allocator)
       try {
-        ArrowCSVOptionConverter.schema(requestSchema, cSchema, allocator, arrowConfig)
-        val factory =
-          ArrowUtil.makeArrowDiscovery(
-            URLDecoder.decode(file.filePath.toString, "UTF-8"),
-            fileFormat,
-            Optional.of(arrowConfig),
-            ArrowBufferAllocators.contextInstance(),
-            pool)
-        val fields = factory.inspect().getFields
-        val actualReadFields = new Schema(
-          fileIndex.map(index => fields.get(index)).toIterable.asJava)
-        ArrowCSVOptionConverter.schema(requestSchema, cSchema2, allocator, arrowConfig)
+        val actualReadFields =
+          ArrowUtil.getRequestedField(requiredSchema, fileFields, caseSensitive)
         ArrowCSVFileFormat
           .readArrow(
             ArrowBufferAllocators.contextInstance(),
             file,
             actualReadFields,
-            missingSchema,
+            caseSensitive,
+            requiredSchema,
             partitionSchema,
             factory,
-            batchSize,
-            arrowConfig)
+            batchSize)
           .asInstanceOf[Iterator[InternalRow]]
       } catch {
         case e: SchemaMismatchException =>
           logWarning(e.getMessage)
-          fallback = true
           val iter = ArrowCSVFileFormat.fallbackReadVanilla(
             dataSchema,
             requiredSchema,
@@ -181,10 +148,8 @@ class ArrowCSVFileFormat(parsedOptions: CSVOptions)
             .rowToColumn(schema, batchSize, rows)
             .asInstanceOf[Iterator[InternalRow]]
         case d: Exception => throw d
-      } finally {
-        cSchema.close()
-        cSchema2.close()
       }
+
     }
   }
 
@@ -219,23 +184,28 @@ object ArrowCSVFileFormat {
       allocator: BufferAllocator,
       file: PartitionedFile,
       actualReadFields: Schema,
-      missingSchema: StructType,
+      caseSensitive: Boolean,
+      requiredSchema: StructType,
       partitionSchema: StructType,
       factory: FileSystemDatasetFactory,
-      batchSize: Int,
-      arrowConfig: CsvFragmentScanOptions): Iterator[ColumnarBatch] = {
+      batchSize: Int): Iterator[ColumnarBatch] = {
+    val compare = ArrowUtil.compareStringFunc(caseSensitive)
     val actualReadFieldNames = actualReadFields.getFields.asScala.map(_.getName).toArray
+    val actualReadSchema = new StructType(
+      actualReadFieldNames.map(f => requiredSchema.find(field => compare(f, field.name)).get))
     val dataset = factory.finish(actualReadFields)
-    val scanOptions = new ScanOptions.Builder(batchSize)
-      .columns(Optional.of(actualReadFieldNames))
-      .fragmentScanOptions(arrowConfig)
-      .build()
+
+    val hasMissingColumns = actualReadFields.getFields.size() != requiredSchema.size
+
+    val scanOptions = new ScanOptions(batchSize, Optional.of(actualReadFieldNames))
     val scanner = dataset.newScan(scanOptions)
 
     val partitionVectors =
       ArrowUtil.loadPartitionColumns(batchSize, partitionSchema, file.partitionValues)
 
-    val nullVectors = if (missingSchema.nonEmpty) {
+    val nullVectors = if (hasMissingColumns) {
+      val missingSchema =
+        new StructType(requiredSchema.filterNot(actualReadSchema.contains).toArray)
       ArrowUtil.loadMissingColumns(batchSize, missingSchema)
     } else {
       Array.empty[ArrowWritableColumnVector]
@@ -255,7 +225,8 @@ object ArrowCSVFileFormat {
           val batch = ArrowUtil.loadBatch(
             allocator,
             unloader.getRecordBatch,
-            actualReadFields,
+            actualReadSchema,
+            requiredSchema,
             partitionVectors,
             nullVectors)
           batch
@@ -275,8 +246,8 @@ object ArrowCSVFileFormat {
 
   def checkHeader(
       file: PartitionedFile,
-      actualDataSchema: StructType,
-      actualRequiredSchema: StructType,
+      dataSchema: StructType,
+      requiredSchema: StructType,
       parsedOptions: CSVOptions,
       actualFilters: Seq[Filter],
       conf: Configuration): Unit = {
@@ -284,6 +255,10 @@ object ArrowCSVFileFormat {
     if (!isStartOfFile) {
       return
     }
+    val actualDataSchema = StructType(
+      dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+    val actualRequiredSchema = StructType(
+      requiredSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
     val parser =
       new UnivocityParser(actualDataSchema, actualRequiredSchema, parsedOptions, actualFilters)
     val schema = if (parsedOptions.columnPruning) actualRequiredSchema else actualDataSchema
diff --git a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVOptionConverter.scala b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVOptionConverter.scala
deleted file mode 100644
index 7d6a54c2ac7a6..0000000000000
--- a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVOptionConverter.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.gluten.datasource
-
-import org.apache.gluten.utils.ArrowAbiUtil
-
-import org.apache.spark.sql.catalyst.csv.CSVOptions
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.utils.SparkSchemaUtil
-
-import com.google.common.collect.ImmutableMap
-import org.apache.arrow.c.ArrowSchema
-import org.apache.arrow.dataset.scanner.csv.{CsvConvertOptions, CsvFragmentScanOptions}
-import org.apache.arrow.memory.BufferAllocator
-
-import java.util
-
-object ArrowCSVOptionConverter {
-  def convert(option: CSVOptions): CsvFragmentScanOptions = {
-    val parseMap = new util.HashMap[String, String]()
-    val default = new CSVOptions(
-      CaseInsensitiveMap(Map()),
-      option.columnPruning,
-      SparkSchemaUtil.getLocalTimezoneID)
-    parseMap.put("strings_can_be_null", "true")
-    if (option.delimiter != default.delimiter) {
-      parseMap.put("delimiter", option.delimiter)
-    }
-    if (option.escapeQuotes != default.escapeQuotes) {
-      parseMap.put("quoting", (!option.escapeQuotes).toString)
-    }
-
-    val convertOptions = new CsvConvertOptions(ImmutableMap.of())
-    new CsvFragmentScanOptions(convertOptions, ImmutableMap.of(), parseMap)
-  }
-
-  def schema(
-      requiredSchema: StructType,
-      cSchema: ArrowSchema,
-      allocator: BufferAllocator,
-      option: CsvFragmentScanOptions): Unit = {
-    val schema = SparkSchemaUtil.toArrowSchema(requiredSchema)
-    ArrowAbiUtil.exportSchema(allocator, schema, cSchema)
-    option.getConvertOptions.setArrowSchema(cSchema)
-  }
-
-}
diff --git a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowConvertorRule.scala b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowConvertorRule.scala
index 2778710155bf9..dab1ffd3b9e3f 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowConvertorRule.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowConvertorRule.scala
@@ -19,7 +19,6 @@ package org.apache.gluten.datasource
 import org.apache.gluten.backendsapi.BackendsApiManager
 import org.apache.gluten.datasource.v2.ArrowCSVTable
 import org.apache.gluten.sql.shims.SparkShimLoader
-import org.apache.gluten.utils.LogicalPlanSelector
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.SparkSession
@@ -40,7 +39,7 @@ import scala.collection.convert.ImplicitConversions.`map AsScala`
 
 @Experimental
 case class ArrowConvertorRule(session: SparkSession) extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = LogicalPlanSelector.maybe(session, plan) {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
     if (!BackendsApiManager.getSettings.enableNativeArrowReadFiles()) {
       return plan
     }
@@ -50,11 +49,7 @@ case class ArrowConvertorRule(session: SparkSession) extends Rule[LogicalPlan] {
             _,
             _,
             _) if validate(session, dataSchema, options) =>
-        val csvOptions = new CSVOptions(
-          options,
-          columnPruning = session.sessionState.conf.csvColumnPruning,
-          session.sessionState.conf.sessionLocalTimeZone)
-        l.copy(relation = r.copy(fileFormat = new ArrowCSVFileFormat(csvOptions))(session))
+        l.copy(relation = r.copy(fileFormat = new ArrowCSVFileFormat())(session))
       case d @ DataSourceV2Relation(
             t @ CSVTable(
               name,
@@ -93,8 +88,7 @@ case class ArrowConvertorRule(session: SparkSession) extends Rule[LogicalPlan] {
   }
 
   private def checkCsvOptions(csvOptions: CSVOptions, timeZone: String): Boolean = {
-    csvOptions.headerFlag && !csvOptions.multiLine &&
-    csvOptions.delimiter.length == 1 &&
+    csvOptions.headerFlag && !csvOptions.multiLine && csvOptions.delimiter == "," &&
     csvOptions.quote == '\"' &&
     csvOptions.escape == '\\' &&
     csvOptions.lineSeparator.isEmpty &&
diff --git a/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVPartitionReaderFactory.scala b/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVPartitionReaderFactory.scala
index 4af5022a6252a..ddc7f797fb938 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVPartitionReaderFactory.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVPartitionReaderFactory.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.gluten.datasource.v2
 
-import org.apache.gluten.datasource.{ArrowCSVFileFormat, ArrowCSVOptionConverter}
+import org.apache.gluten.datasource.ArrowCSVFileFormat
 import org.apache.gluten.exception.SchemaMismatchException
 import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
 import org.apache.gluten.memory.arrow.pool.ArrowNativeMemoryPool
@@ -31,17 +31,15 @@ import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.v2.FilePartitionReaderFactory
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.{SerializableConfiguration, TaskResources}
 
-import org.apache.arrow.c.ArrowSchema
-import org.apache.arrow.vector.types.pojo.Schema
+import org.apache.arrow.dataset.file.FileFormat
 
 import java.net.URLDecoder
-import java.util.Optional
 
-import scala.collection.JavaConverters.asJavaIterableConverter
+import scala.collection.JavaConverters.asScalaBufferConverter
 
 case class ArrowCSVPartitionReaderFactory(
     sqlConf: SQLConf,
@@ -55,9 +53,8 @@ case class ArrowCSVPartitionReaderFactory(
   with Logging {
 
   private val batchSize = sqlConf.parquetVectorizedReaderBatchSize
+  private val caseSensitive: Boolean = sqlConf.caseSensitiveAnalysis
   private val csvColumnPruning: Boolean = sqlConf.csvColumnPruning
-  private val fileFormat = org.apache.arrow.dataset.file.FileFormat.CSV
-  var fallback = false
 
   override def supportColumnarReads(partition: InputPartition): Boolean = true
 
@@ -70,12 +67,12 @@ case class ArrowCSVPartitionReaderFactory(
       partitionedFile: PartitionedFile): PartitionReader[ColumnarBatch] = {
     val actualDataSchema = StructType(
       dataSchema.filterNot(_.name == options.columnNameOfCorruptRecord))
-    val actualRequiredSchema = StructType(
+    val actualReadDataSchema = StructType(
       readDataSchema.filterNot(_.name == options.columnNameOfCorruptRecord))
     ArrowCSVFileFormat.checkHeader(
       partitionedFile,
       actualDataSchema,
-      actualRequiredSchema,
+      actualReadDataSchema,
       options,
       filters,
       broadcastedConf.value.value)
@@ -90,54 +87,29 @@ case class ArrowCSVPartitionReaderFactory(
         ArrowBufferAllocators.contextInstance(),
         ArrowNativeMemoryPool.arrowPool("FileSystemFactory"))
     }
-    val arrowConfig = ArrowCSVOptionConverter.convert(options)
-    val fileNames = ArrowUtil
-      .readArrowFileColumnNames(
-        URLDecoder.decode(partitionedFile.filePath.toString, "UTF-8"),
-        fileFormat,
-        arrowConfig,
-        ArrowBufferAllocators.contextInstance(),
-        pool)
-    val tokenIndexArr =
-      actualRequiredSchema.map(f => java.lang.Integer.valueOf(actualDataSchema.indexOf(f))).toArray
-    val fileIndex = tokenIndexArr.filter(_ < fileNames.length)
-    val requestSchema = new StructType(
-      fileIndex
-        .map(index => StructField(fileNames(index), actualDataSchema(index).dataType)))
-    val missingIndex = tokenIndexArr.filter(_ >= fileNames.length)
-    val missingSchema = new StructType(missingIndex.map(actualDataSchema(_)))
-    // TODO: support array/map/struct types in out-of-order schema reading.
-    val cSchema: ArrowSchema = ArrowSchema.allocateNew(allocator)
-    val cSchema2: ArrowSchema = ArrowSchema.allocateNew(allocator)
+    val factory = ArrowUtil.makeArrowDiscovery(
+      URLDecoder.decode(partitionedFile.filePath.toString(), "UTF-8"),
+      FileFormat.CSV,
+      allocator,
+      pool)
+    val parquetFileFields = factory.inspect().getFields.asScala
     // TODO: support array/map/struct types in out-of-order schema reading.
     val iter =
       try {
-        ArrowCSVOptionConverter.schema(requestSchema, cSchema, allocator, arrowConfig)
-        val factory =
-          ArrowUtil.makeArrowDiscovery(
-            URLDecoder.decode(partitionedFile.filePath.toString, "UTF-8"),
-            fileFormat,
-            Optional.of(arrowConfig),
-            ArrowBufferAllocators.contextInstance(),
-            pool)
-        val fields = factory.inspect().getFields
-        val actualReadFields = new Schema(
-          fileIndex.map(index => fields.get(index)).toIterable.asJava)
-        ArrowCSVOptionConverter.schema(requestSchema, cSchema2, allocator, arrowConfig)
-        ArrowCSVFileFormat
-          .readArrow(
-            ArrowBufferAllocators.contextInstance(),
-            partitionedFile,
-            actualReadFields,
-            missingSchema,
-            readPartitionSchema,
-            factory,
-            batchSize,
-            arrowConfig)
+        val actualReadFields =
+          ArrowUtil.getRequestedField(readDataSchema, parquetFileFields, caseSensitive)
+        ArrowCSVFileFormat.readArrow(
+          allocator,
+          partitionedFile,
+          actualReadFields,
+          caseSensitive,
+          readDataSchema,
+          readPartitionSchema,
+          factory,
+          batchSize)
       } catch {
         case e: SchemaMismatchException =>
           logWarning(e.getMessage)
-          fallback = true
           val iter = ArrowCSVFileFormat.fallbackReadVanilla(
             dataSchema,
             readDataSchema,
@@ -153,9 +125,6 @@ case class ArrowCSVPartitionReaderFactory(
             partitionedFile)
           ArrowCSVFileFormat.rowToColumn(schema, batchSize, rows)
         case d: Exception => throw d
-      } finally {
-        cSchema.close()
-        cSchema2.close()
       }
 
     new PartitionReader[ColumnarBatch] {
diff --git a/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVTable.scala b/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVTable.scala
index 02485975e7055..aa7f737f9cfcd 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVTable.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/datasource/v2/ArrowCSVTable.scala
@@ -16,13 +16,11 @@
  */
 package org.apache.gluten.datasource.v2
 
-import org.apache.gluten.datasource.ArrowCSVOptionConverter
 import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
 import org.apache.gluten.memory.arrow.pool.ArrowNativeMemoryPool
 import org.apache.gluten.utils.ArrowUtil
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.connector.read.ScanBuilder
 import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
@@ -33,8 +31,6 @@ import org.apache.spark.util.TaskResources
 
 import org.apache.hadoop.fs.FileStatus
 
-import scala.collection.JavaConverters.mapAsScalaMapConverter
-
 case class ArrowCSVTable(
     name: String,
     sparkSession: SparkSession,
@@ -52,17 +48,9 @@ case class ArrowCSVTable(
     } else {
       (ArrowBufferAllocators.contextInstance(), ArrowNativeMemoryPool.arrowPool("inferSchema"))
     }
-    val parsedOptions: CSVOptions = new CSVOptions(
-      options.asScala.toMap,
-      columnPruning = sparkSession.sessionState.conf.csvColumnPruning,
-      sparkSession.sessionState.conf.sessionLocalTimeZone,
-      sparkSession.sessionState.conf.columnNameOfCorruptRecord
-    )
-    val arrowConfig = ArrowCSVOptionConverter.convert(parsedOptions)
     ArrowUtil.readSchema(
       files.head,
       org.apache.arrow.dataset.file.FileFormat.CSV,
-      arrowConfig,
       allocator,
       pool
     )
diff --git a/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowScanReplaceRule.scala b/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowScanReplaceRule.scala
index dba8df5cf1a1f..adfc6ca742c93 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowScanReplaceRule.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/extension/ArrowScanReplaceRule.scala
@@ -19,7 +19,6 @@ package org.apache.gluten.extension
 import org.apache.gluten.datasource.ArrowCSVFileFormat
 import org.apache.gluten.datasource.v2.ArrowCSVScan
 import org.apache.gluten.execution.datasource.v2.ArrowBatchScanExec
-import org.apache.gluten.utils.PhysicalPlanSelector
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -27,7 +26,7 @@ import org.apache.spark.sql.execution.{ArrowFileSourceScanExec, FileSourceScanEx
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 
 case class ArrowScanReplaceRule(spark: SparkSession) extends Rule[SparkPlan] {
-  override def apply(plan: SparkPlan): SparkPlan = PhysicalPlanSelector.maybe(spark, plan) {
+  override def apply(plan: SparkPlan): SparkPlan = {
     plan.transformUp {
       case plan: FileSourceScanExec if plan.relation.fileFormat.isInstanceOf[ArrowCSVFileFormat] =>
         ArrowFileSourceScanExec(plan)
diff --git a/backends-velox/src/test/resources/datasource/csv/student_option.csv b/backends-velox/src/test/resources/datasource/csv/student_option.csv
deleted file mode 100644
index 919b7387b53c0..0000000000000
--- a/backends-velox/src/test/resources/datasource/csv/student_option.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-Name;Language
-Juno;Java
-Peter;Python
-Celin;C++
diff --git a/backends-velox/src/test/resources/datasource/csv/student_option_schema.csv b/backends-velox/src/test/resources/datasource/csv/student_option_schema.csv
deleted file mode 100644
index be8459a217393..0000000000000
--- a/backends-velox/src/test/resources/datasource/csv/student_option_schema.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,name,language
-1,Juno,Java
-2,Peter,Python
-3,Celin,C++
diff --git a/backends-velox/src/test/resources/datasource/csv/student_option_str.csv b/backends-velox/src/test/resources/datasource/csv/student_option_str.csv
deleted file mode 100644
index b4214b390caea..0000000000000
--- a/backends-velox/src/test/resources/datasource/csv/student_option_str.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-Name,Language
-Juno,Java
-Peter,Python
-,C++
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
index a892b6f313a4e..166529f193cc0 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DecimalType, IntegerType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{DecimalType, StringType, StructField, StructType}
 
 import java.util.concurrent.TimeUnit
 
@@ -588,153 +588,42 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla
   }
 
   test("csv scan") {
-    val df = runAndCompare("select * from student") {
-      val filePath = rootPath + "/datasource/csv/student.csv"
-      val df = spark.read
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
-    }
-    val plan = df.queryExecution.executedPlan
-    assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-    assert(plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
-    val scan = plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).toList.head
-    assert(
-      scan
-        .asInstanceOf[ArrowFileSourceScanExec]
-        .relation
-        .fileFormat
-        .isInstanceOf[ArrowCSVFileFormat])
-  }
-
-  test("csv scan with option string as null") {
-    val df = runAndCompare("select * from student") {
-      val filePath = rootPath + "/datasource/csv/student_option_str.csv"
-      // test strings as null
-      val df = spark.read
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
-    }
-    val plan = df.queryExecution.executedPlan
-    assert(plan.find(_.isInstanceOf[ColumnarToRowExec]).isDefined)
-    assert(plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
-  }
-
-  test("csv scan with option delimiter") {
-    val df = runAndCompare("select * from student") {
-      val filePath = rootPath + "/datasource/csv/student_option.csv"
-      val df = spark.read
-        .format("csv")
-        .option("header", "true")
-        .option("delimiter", ";")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
-    }
-    val plan = df.queryExecution.executedPlan
-    assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-    assert(plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
-  }
-
-  test("csv scan with schema") {
-    val df = runAndCompare("select * from student") {
-      val filePath = rootPath + "/datasource/csv/student_option_schema.csv"
-      val schema = new StructType()
-        .add("id", StringType)
-        .add("name", StringType)
-        .add("language", StringType)
-      val df = spark.read
-        .schema(schema)
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
-    }
-    val plan = df.queryExecution.executedPlan
-    assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-    val scan = plan.find(_.isInstanceOf[ArrowFileSourceScanExec])
-    assert(scan.isDefined)
-    assert(
-      !scan.get
-        .asInstanceOf[ArrowFileSourceScanExec]
-        .original
-        .relation
-        .fileFormat
-        .asInstanceOf[ArrowCSVFileFormat]
-        .fallback)
-  }
-
-  test("csv scan with missing columns") {
-    val df = runAndCompare("select languagemissing, language, id_new_col from student") {
-      val filePath = rootPath + "/datasource/csv/student_option_schema.csv"
-      val schema = new StructType()
-        .add("id_new_col", IntegerType)
-        .add("name", StringType)
-        .add("language", StringType)
-        .add("languagemissing", StringType)
-      val df = spark.read
-        .schema(schema)
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
-    }
-    val plan = df.queryExecution.executedPlan
-    assert(plan.find(s => s.isInstanceOf[VeloxColumnarToRowExec]).isDefined)
-    assert(plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
-  }
-
-  test("csv scan with different name") {
-    val df = runAndCompare("select * from student") {
-      val filePath = rootPath + "/datasource/csv/student_option_schema.csv"
-      val schema = new StructType()
-        .add("id_new_col", StringType)
-        .add("name", StringType)
-        .add("language", StringType)
-      val df = spark.read
-        .schema(schema)
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
-    }
-    val plan = df.queryExecution.executedPlan
-    assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-    assert(plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
-
-    val df2 = runAndCompare("select * from student_schema") {
-      val filePath = rootPath + "/datasource/csv/student_option_schema.csv"
-      val schema = new StructType()
-        .add("name", StringType)
-        .add("language", StringType)
-      val df = spark.read
-        .schema(schema)
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student_schema")
+    val filePath = rootPath + "/datasource/csv/student.csv"
+    val df = spark.read
+      .format("csv")
+      .option("header", "true")
+      .load(filePath)
+    df.createOrReplaceTempView("student")
+    runQueryAndCompare("select * from student") {
+      df =>
+        val plan = df.queryExecution.executedPlan
+        assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
+        assert(plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
+        val scan = plan.find(_.isInstanceOf[ArrowFileSourceScanExec]).toList.head
+        assert(
+          scan
+            .asInstanceOf[ArrowFileSourceScanExec]
+            .relation
+            .fileFormat
+            .isInstanceOf[ArrowCSVFileFormat])
     }
-    val plan2 = df2.queryExecution.executedPlan
-    assert(plan2.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-    assert(plan2.find(_.isInstanceOf[ArrowFileSourceScanExec]).isDefined)
   }
 
   test("csv scan with filter") {
-    val df = runAndCompare("select * from student where Name = 'Peter'") {
-      val filePath = rootPath + "/datasource/csv/student.csv"
-      val df = spark.read
-        .format("csv")
-        .option("header", "true")
-        .load(filePath)
-      df.createOrReplaceTempView("student")
+    val filePath = rootPath + "/datasource/csv/student.csv"
+    val df = spark.read
+      .format("csv")
+      .option("header", "true")
+      .load(filePath)
+    df.createOrReplaceTempView("student")
+    runQueryAndCompare("select * from student where Name = 'Peter'") {
+      df =>
+        assert(df.queryExecution.executedPlan.find(s => s.isInstanceOf[ColumnarToRowExec]).isEmpty)
+        assert(
+          df.queryExecution.executedPlan
+            .find(s => s.isInstanceOf[ArrowFileSourceScanExec])
+            .isDefined)
     }
-    assert(df.queryExecution.executedPlan.find(s => s.isInstanceOf[ColumnarToRowExec]).isEmpty)
-    assert(
-      df.queryExecution.executedPlan
-        .find(s => s.isInstanceOf[ArrowFileSourceScanExec])
-        .isDefined)
   }
 
   test("insert into select from csv") {
@@ -756,55 +645,21 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla
 
   test("csv scan datasource v2") {
     withSQLConf("spark.sql.sources.useV1SourceList" -> "") {
-      val df = runAndCompare("select * from student") {
-        val filePath = rootPath + "/datasource/csv/student.csv"
-        val df = spark.read
-          .format("csv")
-          .option("header", "true")
-          .load(filePath)
-        df.createOrReplaceTempView("student")
-      }
-      val plan = df.queryExecution.executedPlan
-      assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-      assert(plan.find(s => s.isInstanceOf[ArrowBatchScanExec]).isDefined)
-    }
-  }
-
-  test("csv scan datasource v2 with filter") {
-    withSQLConf("spark.sql.sources.useV1SourceList" -> "") {
-      val df = runAndCompare("select * from student where Name = 'Peter'") {
-        val filePath = rootPath + "/datasource/csv/student.csv"
-        val df = spark.read
-          .format("csv")
-          .option("header", "true")
-          .load(filePath)
-        df.createOrReplaceTempView("student")
+      val filePath = rootPath + "/datasource/csv/student.csv"
+      val df = spark.read
+        .format("csv")
+        .option("header", "true")
+        .load(filePath)
+      df.createOrReplaceTempView("student")
+      runQueryAndCompare("select * from student") {
+        checkGlutenOperatorMatch[ArrowBatchScanExec]
       }
-
-      val plan = df.queryExecution.executedPlan
-      assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isEmpty)
-      assert(plan.find(s => s.isInstanceOf[ArrowBatchScanExec]).isDefined)
-    }
-  }
-
-  test("csv scan with schema datasource v2") {
-    withSQLConf("spark.sql.sources.useV1SourceList" -> "") {
-      val df = runAndCompare("select * from student") {
-        val filePath = rootPath + "/datasource/csv/student_option_schema.csv"
-        val schema = new StructType()
-          .add("id", StringType)
-          .add("name", StringType)
-          .add("language", StringType)
-        val df = spark.read
-          .schema(schema)
-          .format("csv")
-          .option("header", "true")
-          .load(filePath)
-        df.createOrReplaceTempView("student")
+      runQueryAndCompare("select * from student where Name = 'Peter'") {
+        df =>
+          val plan = df.queryExecution.executedPlan
+          assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isEmpty)
+          assert(plan.find(s => s.isInstanceOf[ArrowBatchScanExec]).isDefined)
       }
-      val plan = df.queryExecution.executedPlan
-      assert(plan.find(s => s.isInstanceOf[ColumnarToRowExec]).isDefined)
-      assert(plan.find(_.isInstanceOf[ArrowBatchScanExec]).isDefined)
     }
   }
 
diff --git a/ep/build-velox/src/build_velox.sh b/ep/build-velox/src/build_velox.sh
index 0224e95468611..03795d43c5fb0 100755
--- a/ep/build-velox/src/build_velox.sh
+++ b/ep/build-velox/src/build_velox.sh
@@ -282,6 +282,7 @@ function compile_arrow_java_module() {
     ARROW_INSTALL_DIR="${ARROW_HOME}/../../install"
 
     pushd $ARROW_HOME/java
+<<<<<<< HEAD
     # Because arrow-bom module need the -DprocessAllModules
     mvn versions:set -DnewVersion=15.0.0-gluten -DprocessAllModules
    
@@ -299,7 +300,7 @@ function compile_arrow_java_module() {
 	    -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N
 
     # Arrow Java libraries
-    mvn install  -Parrow-jni -P arrow-c-data -pl c,dataset -am \
+    mvn clean install  -Parrow-jni -P arrow-c-data -pl dataset,c -am \
       -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.cpp.build.dir=$ARROW_INSTALL_DIR/lib \
       -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly
     popd
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 49654d8a8cd55..f9c3cd576a469 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -256,10 +256,8 @@ function apply_compilation_fixes {
   velox_home=$2
   sudo cp ${current_dir}/modify_velox.patch ${velox_home}/
   sudo cp ${current_dir}/modify_arrow.patch ${velox_home}/third_party/
-  sudo cp ${current_dir}/modify_arrow_dataset_scan_option.patch ${velox_home}/third_party/
   git add ${velox_home}/modify_velox.patch # to avoid the file from being deleted by git clean -dffx :/
   git add ${velox_home}/third_party/modify_arrow.patch # to avoid the file from being deleted by git clean -dffx :/
-  git add ${velox_home}/third_party/modify_arrow_dataset_scan_option.patch # to avoid the file from being deleted by git clean -dffx :/
   cd ${velox_home}
   echo "Applying patch to Velox source code..."
   git apply modify_velox.patch
diff --git a/ep/build-velox/src/modify_arrow_dataset_scan_option.patch b/ep/build-velox/src/modify_arrow_dataset_scan_option.patch
deleted file mode 100644
index 4af78c030c006..0000000000000
--- a/ep/build-velox/src/modify_arrow_dataset_scan_option.patch
+++ /dev/null
@@ -1,883 +0,0 @@
-diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc
-index 09ab77572..f09377cf9 100644
---- a/cpp/src/arrow/dataset/file_csv.cc
-+++ b/cpp/src/arrow/dataset/file_csv.cc
-@@ -24,6 +24,7 @@
- #include <unordered_set>
- #include <utility>
- 
-+#include "arrow/c/bridge.h"
- #include "arrow/csv/options.h"
- #include "arrow/csv/parser.h"
- #include "arrow/csv/reader.h"
-@@ -52,6 +53,9 @@ using internal::Executor;
- using internal::SerialExecutor;
- 
- namespace dataset {
-+namespace {
-+inline bool parseBool(const std::string& value) { return value == "true" ? true : false; }
-+}  // namespace
- 
- struct CsvInspectedFragment : public InspectedFragment {
-   CsvInspectedFragment(std::vector<std::string> column_names,
-@@ -503,5 +507,33 @@ Future<> CsvFileWriter::FinishInternal() {
-   return Status::OK();
- }
- 
-+Result<std::shared_ptr<FragmentScanOptions>> CsvFragmentScanOptions::from(
-+    const std::unordered_map<std::string, std::string>& configs) {
-+  std::shared_ptr<CsvFragmentScanOptions> options =
-+      std::make_shared<CsvFragmentScanOptions>();
-+  for (auto const& it : configs) {
-+    auto& key = it.first;
-+    auto& value = it.second;
-+    if (key == "delimiter") {
-+      options->parse_options.delimiter = value.data()[0];
-+    } else if (key == "quoting") {
-+      options->parse_options.quoting = parseBool(value);
-+    } else if (key == "column_types") {
-+      int64_t schema_address = std::stol(value);
-+      ArrowSchema* cSchema = reinterpret_cast<ArrowSchema*>(schema_address);
-+      ARROW_ASSIGN_OR_RAISE(auto schema, arrow::ImportSchema(cSchema));
-+      auto& column_types = options->convert_options.column_types;
-+      for (auto field : schema->fields()) {
-+        column_types[field->name()] = field->type();
-+      }
-+    } else if (key == "strings_can_be_null") {
-+      options->convert_options.strings_can_be_null = parseBool(value);
-+    } else {
-+      return Status::Invalid("Config " + it.first + "is not supported.");
-+    }
-+  }
-+  return options;
-+}
-+
- }  // namespace dataset
- }  // namespace arrow
-diff --git a/cpp/src/arrow/dataset/file_csv.h b/cpp/src/arrow/dataset/file_csv.h
-index 42e3fd724..4d2825183 100644
---- a/cpp/src/arrow/dataset/file_csv.h
-+++ b/cpp/src/arrow/dataset/file_csv.h
-@@ -85,6 +85,9 @@ class ARROW_DS_EXPORT CsvFileFormat : public FileFormat {
- struct ARROW_DS_EXPORT CsvFragmentScanOptions : public FragmentScanOptions {
-   std::string type_name() const override { return kCsvTypeName; }
- 
-+  static Result<std::shared_ptr<FragmentScanOptions>> from(
-+      const std::unordered_map<std::string, std::string>& configs);
-+
-   using StreamWrapFunc = std::function<Result<std::shared_ptr<io::InputStream>>(
-       std::shared_ptr<io::InputStream>)>;
- 
-diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc
-index 5d892af9a..0f8b0448b 100644
---- a/cpp/src/arrow/engine/substrait/expression_internal.cc
-+++ b/cpp/src/arrow/engine/substrait/expression_internal.cc
-@@ -1337,5 +1337,17 @@ Result<std::unique_ptr<substrait::Expression>> ToProto(
-   return std::move(out);
- }
- 
-+Status FromProto(const substrait::Expression::Literal& literal,
-+                 std::unordered_map<std::string, std::string>& out) {
-+  ARROW_RETURN_IF(!literal.has_map(), Status::Invalid("Literal does not have a map."));
-+  auto literalMap = literal.map();
-+  auto size = literalMap.key_values_size();
-+  for (auto i = 0; i < size; i++) {
-+    substrait::Expression_Literal_Map_KeyValue keyValue = literalMap.key_values(i);
-+    out.emplace(keyValue.key().string(), keyValue.value().string());
-+  }
-+  return Status::OK();
-+}
-+
- }  // namespace engine
- }  // namespace arrow
-diff --git a/cpp/src/arrow/engine/substrait/expression_internal.h b/cpp/src/arrow/engine/substrait/expression_internal.h
-index 2ce2ee76a..9be81b7ab 100644
---- a/cpp/src/arrow/engine/substrait/expression_internal.h
-+++ b/cpp/src/arrow/engine/substrait/expression_internal.h
-@@ -61,5 +61,9 @@ ARROW_ENGINE_EXPORT
- Result<SubstraitCall> FromProto(const substrait::AggregateFunction&, bool is_hash,
-                                 const ExtensionSet&, const ConversionOptions&);
- 
-+ARROW_ENGINE_EXPORT
-+Status FromProto(const substrait::Expression::Literal& literal,
-+                 std::unordered_map<std::string, std::string>& out);
-+
- }  // namespace engine
- }  // namespace arrow
-diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc
-index 9e670f121..02e5c7171 100644
---- a/cpp/src/arrow/engine/substrait/serde.cc
-+++ b/cpp/src/arrow/engine/substrait/serde.cc
-@@ -247,6 +247,16 @@ Result<BoundExpressions> DeserializeExpressions(
-   return FromProto(extended_expression, ext_set_out, conversion_options, registry);
- }
- 
-+Status DeserializeMap(const Buffer& buf,
-+                      std::unordered_map<std::string, std::string>& out) {
-+  // ARROW_ASSIGN_OR_RAISE(auto advanced_extension,
-+  //                       ParseFromBuffer<substrait::extensions::AdvancedExtension>(buf));
-+  // return FromProto(advanced_extension, out);
-+  ARROW_ASSIGN_OR_RAISE(auto literal,
-+                        ParseFromBuffer<substrait::Expression::Literal>(buf));
-+  return FromProto(literal, out);
-+}
-+
- namespace {
- 
- Result<std::shared_ptr<acero::ExecPlan>> MakeSingleDeclarationPlan(
-diff --git a/cpp/src/arrow/engine/substrait/serde.h b/cpp/src/arrow/engine/substrait/serde.h
-index ab749f4a6..6312ec239 100644
---- a/cpp/src/arrow/engine/substrait/serde.h
-+++ b/cpp/src/arrow/engine/substrait/serde.h
-@@ -23,6 +23,7 @@
- #include <memory>
- #include <string>
- #include <string_view>
-+#include <unordered_map>
- #include <vector>
- 
- #include "arrow/compute/type_fwd.h"
-@@ -183,6 +184,9 @@ ARROW_ENGINE_EXPORT Result<BoundExpressions> DeserializeExpressions(
-     const ConversionOptions& conversion_options = {},
-     ExtensionSet* ext_set_out = NULLPTR);
- 
-+ARROW_ENGINE_EXPORT Status
-+DeserializeMap(const Buffer& buf, std::unordered_map<std::string, std::string>& out);
-+
- /// \brief Deserializes a Substrait Type message to the corresponding Arrow type
- ///
- /// \param[in] buf a buffer containing the protobuf serialization of a Substrait Type
-diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
-index d4d3e2c0f..ce72eaa1f 100644
---- a/java/dataset/pom.xml
-+++ b/java/dataset/pom.xml
-@@ -25,9 +25,10 @@
-     <packaging>jar</packaging>
-     <properties>
-         <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
--        <protobuf.version>2.5.0</protobuf.version>
-         <parquet.version>1.11.0</parquet.version>
-         <avro.version>1.11.3</avro.version>
-+        <substrait.version>0.31.0</substrait.version>
-+        <protobuf.version>3.25.3</protobuf.version>
-     </properties>
- 
-     <dependencies>
-@@ -47,6 +48,18 @@
-             <artifactId>arrow-c-data</artifactId>
-             <scope>compile</scope>
-         </dependency>
-+        <dependency>
-+            <groupId>io.substrait</groupId>
-+            <artifactId>core</artifactId>
-+            <version>${substrait.version}</version>
-+            <scope>provided</scope>
-+        </dependency>
-+        <dependency>
-+            <groupId>com.google.protobuf</groupId>
-+            <artifactId>protobuf-java</artifactId>
-+            <version>${protobuf.version}</version>
-+            <scope>provided</scope>
-+        </dependency>
-         <dependency>
-             <groupId>org.apache.arrow</groupId>
-             <artifactId>arrow-memory-netty</artifactId>
-diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
-index 8d7dafd84..89cdc39fe 100644
---- a/java/dataset/src/main/cpp/jni_wrapper.cc
-+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
-@@ -25,6 +25,7 @@
- #include "arrow/c/helpers.h"
- #include "arrow/dataset/api.h"
- #include "arrow/dataset/file_base.h"
-+#include "arrow/dataset/file_csv.h"
- #include "arrow/filesystem/localfs.h"
- #include "arrow/filesystem/path_util.h"
- #ifdef ARROW_S3
-@@ -122,6 +123,19 @@ arrow::Result<std::shared_ptr<arrow::dataset::FileFormat>> GetFileFormat(
-   }
- }
- 
-+arrow::Result<std::shared_ptr<arrow::dataset::FragmentScanOptions>>
-+GetFragmentScanOptions(jint file_format_id,
-+                       const std::unordered_map<std::string, std::string>& configs) {
-+  switch (file_format_id) {
-+#ifdef ARROW_CSV
-+    case 3:
-+      return arrow::dataset::CsvFragmentScanOptions::from(configs);
-+#endif
-+    default:
-+      return arrow::Status::Invalid("Illegal file format id: " ,file_format_id);
-+  }
-+}
-+
- class ReserveFromJava : public arrow::dataset::jni::ReservationListener {
-  public:
-   ReserveFromJava(JavaVM* vm, jobject java_reservation_listener)
-@@ -460,12 +474,13 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_closeDataset
- /*
-  * Class:     org_apache_arrow_dataset_jni_JniWrapper
-  * Method:    createScanner
-- * Signature: (J[Ljava/lang/String;Ljava/nio/ByteBuffer;Ljava/nio/ByteBuffer;JJ)J
-+ * Signature:
-+ * (J[Ljava/lang/String;Ljava/nio/ByteBuffer;Ljava/nio/ByteBuffer;JJ;Ljava/nio/ByteBuffer;J)J
-  */
- JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScanner(
-     JNIEnv* env, jobject, jlong dataset_id, jobjectArray columns,
--    jobject substrait_projection, jobject substrait_filter,
--    jlong batch_size, jlong memory_pool_id) {
-+    jobject substrait_projection, jobject substrait_filter, jlong batch_size,
-+    jlong file_format_id, jobject options, jlong memory_pool_id) {
-   JNI_METHOD_START
-   arrow::MemoryPool* pool = reinterpret_cast<arrow::MemoryPool*>(memory_pool_id);
-   if (pool == nullptr) {
-@@ -514,6 +529,14 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScann
-     }
-     JniAssertOkOrThrow(scanner_builder->Filter(*filter_expr));
-   }
-+  if (file_format_id != -1 && options != nullptr) {
-+    std::unordered_map<std::string, std::string> option_map;
-+    std::shared_ptr<arrow::Buffer> buffer = LoadArrowBufferFromByteBuffer(env, options);
-+    JniAssertOkOrThrow(arrow::engine::DeserializeMap(*buffer, option_map));
-+    std::shared_ptr<arrow::dataset::FragmentScanOptions> scan_options =
-+        JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map));
-+    JniAssertOkOrThrow(scanner_builder->FragmentScanOptions(scan_options));
-+  }
-   JniAssertOkOrThrow(scanner_builder->BatchSize(batch_size));
- 
-   auto scanner = JniGetOrThrow(scanner_builder->Finish());
-@@ -627,14 +650,31 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Fina
- /*
-  * Class:     org_apache_arrow_dataset_file_JniWrapper
-  * Method:    makeFileSystemDatasetFactory
-- * Signature: (Ljava/lang/String;II)J
-+ * Signature: (Ljava/lang/String;IILjava/lang/String;Ljava/nio/ByteBuffer)J
-  */
- JNIEXPORT jlong JNICALL
--Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory__Ljava_lang_String_2I(
--    JNIEnv* env, jobject, jstring uri, jint file_format_id) {
-+Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory(
-+    JNIEnv* env, jobject, jstring uri, jint file_format_id, jobject options) {
-   JNI_METHOD_START
-   std::shared_ptr<arrow::dataset::FileFormat> file_format =
-       JniGetOrThrow(GetFileFormat(file_format_id));
-+  if (options != nullptr) {
-+    std::unordered_map<std::string, std::string> option_map;
-+    std::shared_ptr<arrow::Buffer> buffer = LoadArrowBufferFromByteBuffer(env, options);
-+    JniAssertOkOrThrow(arrow::engine::DeserializeMap(*buffer, option_map));
-+    std::shared_ptr<arrow::dataset::FragmentScanOptions> scan_options =
-+        JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map));
-+    file_format->default_fragment_scan_options = scan_options;
-+#ifdef ARROW_CSV
-+    if (file_format_id == 3) {
-+      std::shared_ptr<arrow::dataset::CsvFileFormat> csv_file_format =
-+          std::dynamic_pointer_cast<arrow::dataset::CsvFileFormat>(file_format);
-+      csv_file_format->parse_options =
-+          std::dynamic_pointer_cast<arrow::dataset::CsvFragmentScanOptions>(scan_options)
-+              ->parse_options;
-+    }
-+#endif
-+  }
-   arrow::dataset::FileSystemFactoryOptions options;
-   std::shared_ptr<arrow::dataset::DatasetFactory> d =
-       JniGetOrThrow(arrow::dataset::FileSystemDatasetFactory::Make(
-@@ -645,16 +685,33 @@ Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory__Ljav
- 
- /*
-  * Class:     org_apache_arrow_dataset_file_JniWrapper
-- * Method:    makeFileSystemDatasetFactory
-- * Signature: ([Ljava/lang/String;II)J
-+ * Method:    makeFileSystemDatasetFactoryWithFiles
-+ * Signature: ([Ljava/lang/String;IIJ;Ljava/nio/ByteBuffer)J
-  */
- JNIEXPORT jlong JNICALL
--Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory___3Ljava_lang_String_2I(
--    JNIEnv* env, jobject, jobjectArray uris, jint file_format_id) {
-+Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactoryWithFiles(
-+    JNIEnv* env, jobject, jobjectArray uris, jint file_format_id, jobject options) {
-   JNI_METHOD_START
- 
-   std::shared_ptr<arrow::dataset::FileFormat> file_format =
-       JniGetOrThrow(GetFileFormat(file_format_id));
-+  if (options != nullptr) {
-+    std::unordered_map<std::string, std::string> option_map;
-+    std::shared_ptr<arrow::Buffer> buffer = LoadArrowBufferFromByteBuffer(env, options);
-+    JniAssertOkOrThrow(arrow::engine::DeserializeMap(*buffer, option_map));
-+    std::shared_ptr<arrow::dataset::FragmentScanOptions> scan_options =
-+        JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map));
-+    file_format->default_fragment_scan_options = scan_options;
-+#ifdef ARROW_CSV
-+    if (file_format_id == 3) {
-+      std::shared_ptr<arrow::dataset::CsvFileFormat> csv_file_format =
-+          std::dynamic_pointer_cast<arrow::dataset::CsvFileFormat>(file_format);
-+      csv_file_format->parse_options =
-+          std::dynamic_pointer_cast<arrow::dataset::CsvFragmentScanOptions>(scan_options)
-+              ->parse_options;
-+    }
-+#endif
-+  }
-   arrow::dataset::FileSystemFactoryOptions options;
- 
-   std::vector<std::string> uri_vec = ToStringVector(env, uris);
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
-index aa3156905..a0b6fb168 100644
---- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
-@@ -17,8 +17,11 @@
- 
- package org.apache.arrow.dataset.file;
- 
-+import java.util.Optional;
-+
- import org.apache.arrow.dataset.jni.NativeDatasetFactory;
- import org.apache.arrow.dataset.jni.NativeMemoryPool;
-+import org.apache.arrow.dataset.scanner.FragmentScanOptions;
- import org.apache.arrow.memory.BufferAllocator;
- 
- /**
-@@ -27,21 +30,34 @@ import org.apache.arrow.memory.BufferAllocator;
- public class FileSystemDatasetFactory extends NativeDatasetFactory {
- 
-   public FileSystemDatasetFactory(BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format,
--      String uri) {
--    super(allocator, memoryPool, createNative(format, uri));
-+      String uri, Optional<FragmentScanOptions> fragmentScanOptions) {
-+    super(allocator, memoryPool, createNative(format, uri, fragmentScanOptions));
-+  }
-+
-+  public FileSystemDatasetFactory(BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format,
-+                                  String uri) {
-+    super(allocator, memoryPool, createNative(format, uri, Optional.empty()));
-+  }
-+
-+  public FileSystemDatasetFactory(BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format,
-+                                  String[] uris, Optional<FragmentScanOptions> fragmentScanOptions) {
-+    super(allocator, memoryPool, createNative(format, uris, fragmentScanOptions));
-   }
- 
-   public FileSystemDatasetFactory(BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format,
-                                   String[] uris) {
--    super(allocator, memoryPool, createNative(format, uris));
-+    super(allocator, memoryPool, createNative(format, uris, Optional.empty()));
-   }
- 
--  private static long createNative(FileFormat format, String uri) {
--    return JniWrapper.get().makeFileSystemDatasetFactory(uri, format.id());
-+  private static long createNative(FileFormat format, String uri, Optional<FragmentScanOptions> fragmentScanOptions) {
-+    return JniWrapper.get().makeFileSystemDatasetFactory(uri, format.id(),
-+        fragmentScanOptions.map(FragmentScanOptions::serialize).orElse(null));
-   }
- 
--  private static long createNative(FileFormat format, String[] uris) {
--    return JniWrapper.get().makeFileSystemDatasetFactory(uris, format.id());
-+  private static long createNative(FileFormat format, String[] uris,
-+                                   Optional<FragmentScanOptions> fragmentScanOptions) {
-+    return JniWrapper.get().makeFileSystemDatasetFactoryWithFiles(uris, format.id(),
-+        fragmentScanOptions.map(FragmentScanOptions::serialize).orElse(null));
-   }
- 
- }
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
-index c3a1a4e58..c3f8e12b3 100644
---- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
-@@ -17,6 +17,8 @@
- 
- package org.apache.arrow.dataset.file;
- 
-+import java.nio.ByteBuffer;
-+
- import org.apache.arrow.dataset.jni.JniLoader;
- 
- /**
-@@ -43,7 +45,8 @@ public class JniWrapper {
-    * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance.
-    * @see FileFormat
-    */
--  public native long makeFileSystemDatasetFactory(String uri, int fileFormat);
-+  public native long makeFileSystemDatasetFactory(String uri, int fileFormat,
-+                                                  ByteBuffer serializedFragmentScanOptions);
- 
-   /**
-    * Create FileSystemDatasetFactory and return its native pointer. The pointer is pointing to a
-@@ -54,7 +57,8 @@ public class JniWrapper {
-    * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance.
-    * @see FileFormat
-    */
--  public native long makeFileSystemDatasetFactory(String[] uris, int fileFormat);
-+  public native long makeFileSystemDatasetFactoryWithFiles(String[] uris, int fileFormat,
-+                                                  ByteBuffer serializedFragmentScanOptions);
- 
-   /**
-    * Write the content in a {@link org.apache.arrow.c.ArrowArrayStream} into files. This internally
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
-index 637a3e8f2..6d6309140 100644
---- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
-@@ -80,7 +80,8 @@ public class JniWrapper {
-    * @return the native pointer of the arrow::dataset::Scanner instance.
-    */
-   public native long createScanner(long datasetId, String[] columns, ByteBuffer substraitProjection,
--                                   ByteBuffer substraitFilter, long batchSize, long memoryPool);
-+                                   ByteBuffer substraitFilter, long batchSize, long fileFormat,
-+                                   ByteBuffer serializedFragmentScanOptions, long memoryPool);
- 
-   /**
-    * Get a serialized schema from native instance of a Scanner.
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
-index d9abad997..3a96fe768 100644
---- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
-@@ -17,6 +17,9 @@
- 
- package org.apache.arrow.dataset.jni;
- 
-+import java.nio.ByteBuffer;
-+
-+import org.apache.arrow.dataset.scanner.FragmentScanOptions;
- import org.apache.arrow.dataset.scanner.ScanOptions;
- import org.apache.arrow.dataset.source.Dataset;
- 
-@@ -40,11 +43,18 @@ public class NativeDataset implements Dataset {
-     if (closed) {
-       throw new NativeInstanceReleasedException();
-     }
--
-+    int fileFormat = -1;
-+    ByteBuffer serialized = null;
-+    if (options.getFragmentScanOptions().isPresent()) {
-+      FragmentScanOptions fragmentScanOptions = options.getFragmentScanOptions().get();
-+      fileFormat = fragmentScanOptions.fileFormatId();
-+      serialized = fragmentScanOptions.serialize();
-+    }
-     long scannerId = JniWrapper.get().createScanner(datasetId, options.getColumns().orElse(null),
-         options.getSubstraitProjection().orElse(null),
-         options.getSubstraitFilter().orElse(null),
--        options.getBatchSize(), context.getMemoryPool().getNativeInstanceId());
-+        options.getBatchSize(), fileFormat, serialized,
-+        context.getMemoryPool().getNativeInstanceId());
- 
-     return new NativeScanner(context, scannerId);
-   }
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java
-new file mode 100644
-index 000000000..8acb2b2d4
---- /dev/null
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java
-@@ -0,0 +1,50 @@
-+/*
-+ * Licensed to the Apache Software Foundation (ASF) under one or more
-+ * contributor license agreements.  See the NOTICE file distributed with
-+ * this work for additional information regarding copyright ownership.
-+ * The ASF licenses this file to You under the Apache License, Version 2.0
-+ * (the "License"); you may not use this file except in compliance with
-+ * the License.  You may obtain a copy of the License at
-+ *
-+ *    http://www.apache.org/licenses/LICENSE-2.0
-+ *
-+ * Unless required by applicable law or agreed to in writing, software
-+ * distributed under the License is distributed on an "AS IS" BASIS,
-+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+ * See the License for the specific language governing permissions and
-+ * limitations under the License.
-+ */
-+
-+package org.apache.arrow.dataset.scanner;
-+
-+import java.nio.ByteBuffer;
-+import java.util.Map;
-+
-+import org.apache.arrow.dataset.substrait.util.ConvertUtil;
-+
-+import io.substrait.proto.Expression;
-+
-+public interface FragmentScanOptions {
-+  String typeName();
-+
-+  int fileFormatId();
-+
-+  ByteBuffer serialize();
-+
-+  /**
-+   * serialize the map.
-+   *
-+   * @param config config map
-+   * @return bufer to jni call argument, should be DirectByteBuffer
-+   */
-+  default ByteBuffer serializeMap(Map<String, String> config) {
-+    if (config.isEmpty()) {
-+      return null;
-+    }
-+
-+    Expression.Literal literal = ConvertUtil.mapToExpressionLiteral(config);
-+    ByteBuffer buf = ByteBuffer.allocateDirect(literal.getSerializedSize());
-+    buf.put(literal.toByteArray());
-+    return buf;
-+  }
-+}
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
-index 995d05ac3..aad71930c 100644
---- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
-@@ -31,6 +31,8 @@ public class ScanOptions {
-   private final Optional<ByteBuffer> substraitProjection;
-   private final Optional<ByteBuffer> substraitFilter;
- 
-+  private final Optional<FragmentScanOptions> fragmentScanOptions;
-+
-   /**
-    * Constructor.
-    * @param columns Projected columns. Empty for scanning all columns.
-@@ -61,6 +63,7 @@ public class ScanOptions {
-     this.columns = columns;
-     this.substraitProjection = Optional.empty();
-     this.substraitFilter = Optional.empty();
-+    this.fragmentScanOptions = Optional.empty();
-   }
- 
-   public ScanOptions(long batchSize) {
-@@ -83,6 +86,10 @@ public class ScanOptions {
-     return substraitFilter;
-   }
- 
-+  public Optional<FragmentScanOptions> getFragmentScanOptions() {
-+    return fragmentScanOptions;
-+  }
-+
-   /**
-    * Builder for Options used during scanning.
-    */
-@@ -91,6 +98,7 @@ public class ScanOptions {
-     private Optional<String[]> columns;
-     private ByteBuffer substraitProjection;
-     private ByteBuffer substraitFilter;
-+    private FragmentScanOptions fragmentScanOptions;
- 
-     /**
-      * Constructor.
-@@ -136,6 +144,18 @@ public class ScanOptions {
-       return this;
-     }
- 
-+    /**
-+     * Set the FragmentScanOptions.
-+     *
-+     * @param fragmentScanOptions scan options
-+     * @return the ScanOptions configured.
-+     */
-+    public Builder fragmentScanOptions(FragmentScanOptions fragmentScanOptions) {
-+      Preconditions.checkNotNull(fragmentScanOptions);
-+      this.fragmentScanOptions = fragmentScanOptions;
-+      return this;
-+    }
-+
-     public ScanOptions build() {
-       return new ScanOptions(this);
-     }
-@@ -146,5 +166,6 @@ public class ScanOptions {
-     columns = builder.columns;
-     substraitProjection = Optional.ofNullable(builder.substraitProjection);
-     substraitFilter = Optional.ofNullable(builder.substraitFilter);
-+    fragmentScanOptions = Optional.ofNullable(builder.fragmentScanOptions);
-   }
- }
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java
-new file mode 100644
-index 000000000..08e35ede2
---- /dev/null
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java
-@@ -0,0 +1,51 @@
-+/*
-+ * Licensed to the Apache Software Foundation (ASF) under one or more
-+ * contributor license agreements.  See the NOTICE file distributed with
-+ * this work for additional information regarding copyright ownership.
-+ * The ASF licenses this file to You under the Apache License, Version 2.0
-+ * (the "License"); you may not use this file except in compliance with
-+ * the License.  You may obtain a copy of the License at
-+ *
-+ *    http://www.apache.org/licenses/LICENSE-2.0
-+ *
-+ * Unless required by applicable law or agreed to in writing, software
-+ * distributed under the License is distributed on an "AS IS" BASIS,
-+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+ * See the License for the specific language governing permissions and
-+ * limitations under the License.
-+ */
-+
-+package org.apache.arrow.dataset.scanner.csv;
-+
-+import java.util.Map;
-+import java.util.Optional;
-+
-+import org.apache.arrow.c.ArrowSchema;
-+
-+public class CsvConvertOptions {
-+
-+  private final Map<String, String> configs;
-+
-+  private Optional<ArrowSchema> cSchema = Optional.empty();
-+
-+  public CsvConvertOptions(Map<String, String> configs) {
-+    this.configs = configs;
-+  }
-+
-+  public Optional<ArrowSchema> getArrowSchema() {
-+    return cSchema;
-+  }
-+
-+  public Map<String, String> getConfigs() {
-+    return configs;
-+  }
-+
-+  public void set(String key, String value) {
-+    configs.put(key, value);
-+  }
-+
-+  public void setArrowSchema(ArrowSchema cSchema) {
-+    this.cSchema = Optional.of(cSchema);
-+  }
-+
-+}
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
-new file mode 100644
-index 000000000..88973f0a0
---- /dev/null
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
-@@ -0,0 +1,97 @@
-+/*
-+ * Licensed to the Apache Software Foundation (ASF) under one or more
-+ * contributor license agreements.  See the NOTICE file distributed with
-+ * this work for additional information regarding copyright ownership.
-+ * The ASF licenses this file to You under the Apache License, Version 2.0
-+ * (the "License"); you may not use this file except in compliance with
-+ * the License.  You may obtain a copy of the License at
-+ *
-+ *    http://www.apache.org/licenses/LICENSE-2.0
-+ *
-+ * Unless required by applicable law or agreed to in writing, software
-+ * distributed under the License is distributed on an "AS IS" BASIS,
-+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+ * See the License for the specific language governing permissions and
-+ * limitations under the License.
-+ */
-+
-+package org.apache.arrow.dataset.scanner.csv;
-+
-+import java.io.Serializable;
-+import java.nio.ByteBuffer;
-+import java.util.Locale;
-+import java.util.Map;
-+import java.util.stream.Collectors;
-+import java.util.stream.Stream;
-+
-+import org.apache.arrow.dataset.file.FileFormat;
-+import org.apache.arrow.dataset.scanner.FragmentScanOptions;
-+
-+public class CsvFragmentScanOptions implements Serializable, FragmentScanOptions {
-+  private final CsvConvertOptions convertOptions;
-+  private final Map<String, String> readOptions;
-+  private final Map<String, String> parseOptions;
-+
-+
-+  /**
-+   * csv scan options, map to CPP struct CsvFragmentScanOptions.
-+   *
-+   * @param convertOptions same struct in CPP
-+   * @param readOptions same struct in CPP
-+   * @param parseOptions same struct in CPP
-+   */
-+  public CsvFragmentScanOptions(CsvConvertOptions convertOptions,
-+                                Map<String, String> readOptions,
-+                                Map<String, String> parseOptions) {
-+    this.convertOptions = convertOptions;
-+    this.readOptions = readOptions;
-+    this.parseOptions = parseOptions;
-+  }
-+
-+  public String typeName() {
-+    return FileFormat.CSV.name().toLowerCase(Locale.ROOT);
-+  }
-+
-+  /**
-+   * File format id.
-+   *
-+   * @return id
-+   */
-+  public int fileFormatId() {
-+    return FileFormat.CSV.id();
-+  }
-+
-+  /**
-+   * Serialize this class to ByteBuffer and then called by jni call.
-+   *
-+   * @return DirectByteBuffer
-+   */
-+  public ByteBuffer serialize() {
-+    Map<String, String> options = Stream.concat(Stream.concat(readOptions.entrySet().stream(),
-+            parseOptions.entrySet().stream()),
-+        convertOptions.getConfigs().entrySet().stream()).collect(
-+        Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-+
-+    if (convertOptions.getArrowSchema().isPresent()) {
-+      options.put("column_types", Long.toString(convertOptions.getArrowSchema().get().memoryAddress()));
-+    }
-+    return serializeMap(options);
-+  }
-+
-+  public static CsvFragmentScanOptions deserialize(String serialized) {
-+    throw new UnsupportedOperationException("Not implemented now");
-+  }
-+
-+  public CsvConvertOptions getConvertOptions() {
-+    return convertOptions;
-+  }
-+
-+  public Map<String, String> getReadOptions() {
-+    return readOptions;
-+  }
-+
-+  public Map<String, String> getParseOptions() {
-+    return parseOptions;
-+  }
-+
-+}
-diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/util/ConvertUtil.java b/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/util/ConvertUtil.java
-new file mode 100644
-index 000000000..31a4023af
---- /dev/null
-+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/util/ConvertUtil.java
-@@ -0,0 +1,46 @@
-+/*
-+ * Licensed to the Apache Software Foundation (ASF) under one or more
-+ * contributor license agreements.  See the NOTICE file distributed with
-+ * this work for additional information regarding copyright ownership.
-+ * The ASF licenses this file to You under the Apache License, Version 2.0
-+ * (the "License"); you may not use this file except in compliance with
-+ * the License.  You may obtain a copy of the License at
-+ *
-+ *    http://www.apache.org/licenses/LICENSE-2.0
-+ *
-+ * Unless required by applicable law or agreed to in writing, software
-+ * distributed under the License is distributed on an "AS IS" BASIS,
-+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-+ * See the License for the specific language governing permissions and
-+ * limitations under the License.
-+ */
-+
-+package org.apache.arrow.dataset.substrait.util;
-+
-+import java.util.Map;
-+
-+import io.substrait.proto.Expression;
-+
-+public class ConvertUtil {
-+
-+  /**
-+   * Convert map to substrait Expression.
-+   *
-+   * @return Substrait Expression
-+   */
-+  public static Expression.Literal mapToExpressionLiteral(Map<String, String> values) {
-+    Expression.Literal.Builder literalBuilder = Expression.Literal.newBuilder();
-+    Expression.Literal.Map.KeyValue.Builder keyValueBuilder =
-+        Expression.Literal.Map.KeyValue.newBuilder();
-+    Expression.Literal.Map.Builder mapBuilder = Expression.Literal.Map.newBuilder();
-+    for (Map.Entry<String, String> entry : values.entrySet()) {
-+      literalBuilder.setString(entry.getKey());
-+      keyValueBuilder.setKey(literalBuilder.build());
-+      literalBuilder.setString(entry.getValue());
-+      keyValueBuilder.setValue(literalBuilder.build());
-+      mapBuilder.addKeyValues(keyValueBuilder.build());
-+    }
-+    literalBuilder.setMap(mapBuilder.build());
-+    return literalBuilder.build();
-+  }
-+}
-diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
-index 0fba72892..e7903b7a4 100644
---- a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
-+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
-@@ -31,6 +31,9 @@ import java.util.HashMap;
- import java.util.Map;
- import java.util.Optional;
- 
-+import org.apache.arrow.c.ArrowSchema;
-+import org.apache.arrow.c.CDataDictionaryProvider;
-+import org.apache.arrow.c.Data;
- import org.apache.arrow.dataset.ParquetWriteSupport;
- import org.apache.arrow.dataset.TestDataset;
- import org.apache.arrow.dataset.file.FileFormat;
-@@ -38,8 +41,11 @@ import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
- import org.apache.arrow.dataset.jni.NativeMemoryPool;
- import org.apache.arrow.dataset.scanner.ScanOptions;
- import org.apache.arrow.dataset.scanner.Scanner;
-+import org.apache.arrow.dataset.scanner.csv.CsvConvertOptions;
-+import org.apache.arrow.dataset.scanner.csv.CsvFragmentScanOptions;
- import org.apache.arrow.dataset.source.Dataset;
- import org.apache.arrow.dataset.source.DatasetFactory;
-+import org.apache.arrow.memory.BufferAllocator;
- import org.apache.arrow.vector.ipc.ArrowReader;
- import org.apache.arrow.vector.types.pojo.ArrowType;
- import org.apache.arrow.vector.types.pojo.Field;
-@@ -49,6 +55,8 @@ import org.junit.ClassRule;
- import org.junit.Test;
- import org.junit.rules.TemporaryFolder;
- 
-+import com.google.common.collect.ImmutableMap;
-+
- public class TestAceroSubstraitConsumer extends TestDataset {
- 
-   @ClassRule
-@@ -457,4 +465,42 @@ public class TestAceroSubstraitConsumer extends TestDataset {
-     substraitExpression.put(decodedSubstrait);
-     return substraitExpression;
-   }
-+
-+  @Test
-+  public void testCsvConvertOptions() throws Exception {
-+    final Schema schema = new Schema(Arrays.asList(
-+        Field.nullable("Id", new ArrowType.Int(32, true)),
-+        Field.nullable("Name", new ArrowType.Utf8()),
-+        Field.nullable("Language", new ArrowType.Utf8())
-+    ), null);
-+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
-+    BufferAllocator allocator = rootAllocator();
-+    try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
-+         CDataDictionaryProvider provider = new CDataDictionaryProvider()) {
-+      Data.exportSchema(allocator, schema, provider, cSchema);
-+      CsvConvertOptions convertOptions = new CsvConvertOptions(ImmutableMap.of("delimiter", ";"));
-+      convertOptions.setArrowSchema(cSchema);
-+      CsvFragmentScanOptions fragmentScanOptions = new CsvFragmentScanOptions(
-+          convertOptions, ImmutableMap.of(), ImmutableMap.of());
-+      ScanOptions options = new ScanOptions.Builder(/*batchSize*/ 32768)
-+          .columns(Optional.empty())
-+          .fragmentScanOptions(fragmentScanOptions)
-+          .build();
-+      try (
-+          DatasetFactory datasetFactory = new FileSystemDatasetFactory(allocator, NativeMemoryPool.getDefault(),
-+              FileFormat.CSV, path);
-+          Dataset dataset = datasetFactory.finish();
-+          Scanner scanner = dataset.newScan(options);
-+          ArrowReader reader = scanner.scanBatches()
-+      ) {
-+        assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
-+        int rowCount = 0;
-+        while (reader.loadNextBatch()) {
-+          assertEquals("[1, 2, 3]", reader.getVectorSchemaRoot().getVector("Id").toString());
-+          rowCount += reader.getVectorSchemaRoot().getRowCount();
-+        }
-+        assertEquals(3, rowCount);
-+      }
-+    }
-+  }
- }
-diff --git a/java/dataset/src/test/resources/data/student.csv b/java/dataset/src/test/resources/data/student.csv
-new file mode 100644
-index 000000000..329194609
---- /dev/null
-+++ b/java/dataset/src/test/resources/data/student.csv
-@@ -0,0 +1,4 @@
-+Id;Name;Language
-+1;Juno;Java
-+2;Peter;Python
-+3;Celin;C++
diff --git a/ep/build-velox/src/modify_velox.patch b/ep/build-velox/src/modify_velox.patch
index 81560917d6209..9eecb7895081c 100644
--- a/ep/build-velox/src/modify_velox.patch
+++ b/ep/build-velox/src/modify_velox.patch
@@ -92,12 +92,11 @@ index ce4c24dbe..785a2acc6 100644
        -DARROW_WITH_THRIFT=ON
        -DARROW_WITH_LZ4=ON
        -DARROW_WITH_SNAPPY=ON
-@@ -69,6 +71,8 @@ if(VELOX_ENABLE_ARROW)
+@@ -69,6 +71,7 @@ if(VELOX_ENABLE_ARROW)
      arrow_ep
      PREFIX ${ARROW_PREFIX}
      URL ${VELOX_ARROW_SOURCE_URL}
 +    PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow.patch
-+    COMMAND patch -p1 < ${CMAKE_CURRENT_SOURCE_DIR}/modify_arrow_dataset_scan_option.patch
      URL_HASH ${VELOX_ARROW_BUILD_SHA256_CHECKSUM}
      SOURCE_SUBDIR cpp
      CMAKE_ARGS ${ARROW_CMAKE_ARGS}
diff --git a/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala b/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala
index 95391a2c42f56..7d2d48828fb32 100644
--- a/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala
+++ b/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala
@@ -319,26 +319,6 @@ abstract class WholeStageTransformerSuite
     df
   }
 
-  /**
-   * Some rule on LogicalPlan will not only apply in select query, the total df.load() should in
-   * spark environment with gluten disabled config.
-   *
-   * @param sql
-   * @param f
-   * @return
-   */
-  protected def runAndCompare(sql: String)(f: => Unit): DataFrame = {
-    var expected: Seq[Row] = null
-    withSQLConf(vanillaSparkConfs(): _*) {
-      f
-      expected = spark.sql(sql).collect()
-    }
-    f
-    val df = spark.sql(sql)
-    checkAnswer(df, expected)
-    df
-  }
-
   protected def runQueryAndCompare(
       sqlStr: String,
       compareResult: Boolean = true,
diff --git a/gluten-data/pom.xml b/gluten-data/pom.xml
index c28490d77faad..bb84a06b4125d 100644
--- a/gluten-data/pom.xml
+++ b/gluten-data/pom.xml
@@ -124,7 +124,7 @@
     <dependency>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-c-data</artifactId>
-      <version>${arrow-gluten.version}</version>
+      <version>${arrow.version}</version>
       <exclusions>
         <exclusion>
           <groupId>org.apache.arrow</groupId>
@@ -140,7 +140,7 @@
     <dependency>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-dataset</artifactId>
-      <version>${arrow-gluten.version}</version>
+      <version>${arrow.version}</version>
       <exclusions>
         <exclusion>
           <groupId>io.netty</groupId>
diff --git a/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala b/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
index a94f8f2e3d49c..99eb72c70ea3f 100644
--- a/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
+++ b/gluten-data/src/main/scala/org/apache/gluten/utils/ArrowUtil.scala
@@ -16,12 +16,16 @@
  */
 package org.apache.gluten.utils
 
+import org.apache.gluten.exception.SchemaMismatchException
 import org.apache.gluten.vectorized.ArrowWritableColumnVector
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.vectorized.ArrowColumnVectorUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.utils.{SparkArrowUtil, SparkSchemaUtil}
 import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
@@ -29,15 +33,13 @@ import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
 import org.apache.arrow.c.{ArrowSchema, CDataDictionaryProvider, Data}
 import org.apache.arrow.dataset.file.{FileFormat, FileSystemDatasetFactory}
 import org.apache.arrow.dataset.jni.NativeMemoryPool
-import org.apache.arrow.dataset.scanner.FragmentScanOptions
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch
 import org.apache.arrow.vector.types.pojo.{ArrowType, Field, Schema}
 import org.apache.hadoop.fs.FileStatus
 
-import java.net.{URI, URLDecoder}
+import java.net.URI
 import java.util
-import java.util.Optional
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -97,6 +99,26 @@ object ArrowUtil extends Logging {
     new Schema(fields)
   }
 
+  def getFormat(format: String): FileFormat = {
+    format match {
+      case "parquet" => FileFormat.PARQUET
+      case "orc" => FileFormat.ORC
+      case "csv" => FileFormat.CSV
+      case _ => throw new IllegalArgumentException("Unrecognizable format")
+    }
+  }
+
+  def getFormat(format: org.apache.spark.sql.execution.datasources.FileFormat): FileFormat = {
+    format match {
+      case _: ParquetFileFormat =>
+        FileFormat.PARQUET
+      case _: CSVFileFormat =>
+        FileFormat.CSV
+      case _ =>
+        throw new IllegalArgumentException("Unrecognizable format")
+    }
+  }
+
   private def rewriteUri(encodeUri: String): String = {
     val decodedUri = encodeUri
     val uri = URI.create(decodedUri)
@@ -120,49 +142,19 @@ object ArrowUtil extends Logging {
   def makeArrowDiscovery(
       encodedUri: String,
       format: FileFormat,
-      option: Optional[FragmentScanOptions],
       allocator: BufferAllocator,
-      pool: NativeMemoryPool
-  ): FileSystemDatasetFactory = {
-    val factory =
-      new FileSystemDatasetFactory(allocator, pool, format, rewriteUri(encodedUri), option)
+      pool: NativeMemoryPool): FileSystemDatasetFactory = {
+    val factory = new FileSystemDatasetFactory(allocator, pool, format, rewriteUri(encodedUri))
     factory
   }
 
-  def readArrowSchema(
-      file: String,
-      format: FileFormat,
-      option: FragmentScanOptions,
-      allocator: BufferAllocator,
-      pool: NativeMemoryPool): Schema = {
-    val factory: FileSystemDatasetFactory =
-      makeArrowDiscovery(file, format, Optional.of(option), allocator, pool)
-    val schema = factory.inspect()
-    factory.close()
-    schema
-  }
-
-  def readArrowFileColumnNames(
-      file: String,
-      format: FileFormat,
-      option: FragmentScanOptions,
-      allocator: BufferAllocator,
-      pool: NativeMemoryPool): Array[String] = {
-    val fileFields = ArrowUtil
-      .readArrowSchema(URLDecoder.decode(file, "UTF-8"), format, option, allocator, pool)
-      .getFields
-      .asScala
-    fileFields.map(_.getName).toArray
-  }
-
   def readSchema(
       file: FileStatus,
       format: FileFormat,
-      option: FragmentScanOptions,
       allocator: BufferAllocator,
       pool: NativeMemoryPool): Option[StructType] = {
     val factory: FileSystemDatasetFactory =
-      makeArrowDiscovery(file.getPath.toString, format, Optional.of(option), allocator, pool)
+      makeArrowDiscovery(file.getPath.toString, format, allocator, pool)
     val schema = factory.inspect()
     try {
       Option(SparkSchemaUtil.fromArrowSchema(schema))
@@ -174,14 +166,67 @@ object ArrowUtil extends Logging {
   def readSchema(
       files: Seq[FileStatus],
       format: FileFormat,
-      option: FragmentScanOptions,
       allocator: BufferAllocator,
       pool: NativeMemoryPool): Option[StructType] = {
     if (files.isEmpty) {
       throw new IllegalArgumentException("No input file specified")
     }
 
-    readSchema(files.head, format, option, allocator, pool)
+    readSchema(files.head, format, allocator, pool)
+  }
+
+  def compareStringFunc(caseSensitive: Boolean): (String, String) => Boolean = {
+    if (caseSensitive) { (str1: String, str2: String) => str1.equals(str2) }
+    else { (str1: String, str2: String) => str1.equalsIgnoreCase(str2) }
+  }
+
+  // If user specify schema by .schema(newSchemaDifferentWithFile)
+  def checkSchema(
+      requiredField: DataType,
+      parquetFileFieldType: ArrowType,
+      parquetFileFields: mutable.Buffer[Field]): Unit = {
+    val requiredFieldType =
+      SparkArrowUtil.toArrowType(requiredField, SparkSchemaUtil.getLocalTimezoneID)
+    if (!requiredFieldType.equals(parquetFileFieldType)) {
+      val arrowFileSchema = parquetFileFields
+        .map(f => f.toString)
+        .reduceLeft((f1, f2) => f1 + "\n" + f2)
+      throw new SchemaMismatchException(
+        s"Not support specified schema is different with file schema\n$arrowFileSchema")
+    }
+  }
+
+  def getRequestedField(
+      requiredSchema: StructType,
+      parquetFileFields: mutable.Buffer[Field],
+      caseSensitive: Boolean): Schema = {
+    val compareFunc = compareStringFunc(caseSensitive)
+    requiredSchema.foreach {
+      readField =>
+        // TODO: check schema inside of complex type
+        val matchedFields =
+          parquetFileFields.filter(field => compareFunc(field.getName, readField.name))
+        if (!caseSensitive && matchedFields.size > 1) {
+          // Need to fail if there is ambiguity, i.e. more than one field is matched
+          val fieldsString = matchedFields.map(_.getName).mkString("[", ", ", "]")
+          throw new RuntimeException(
+            s"""
+               |Found duplicate field(s) "${readField.name}": $fieldsString
+
+               |in case-insensitive mode""".stripMargin.replaceAll("\n", " "))
+        }
+        if (matchedFields.nonEmpty) {
+          checkSchema(
+            readField.dataType,
+            matchedFields.head.getFieldType.getType,
+            parquetFileFields)
+        }
+    }
+
+    val requestColNames = requiredSchema.map(_.name)
+    new Schema(parquetFileFields.filter {
+      field => requestColNames.exists(col => compareFunc(col, field.getName))
+    }.asJava)
   }
 
   def loadMissingColumns(
@@ -217,14 +262,19 @@ object ArrowUtil extends Logging {
   def loadBatch(
       allocator: BufferAllocator,
       input: ArrowRecordBatch,
-      dataSchema: Schema,
+      dataSchema: StructType,
+      requiredSchema: StructType,
       partitionVectors: Array[ArrowWritableColumnVector] = Array.empty,
       nullVectors: Array[ArrowWritableColumnVector] = Array.empty): ColumnarBatch = {
     val rowCount: Int = input.getLength
 
     val vectors =
       try {
-        ArrowWritableColumnVector.loadColumns(rowCount, dataSchema, input, allocator)
+        ArrowWritableColumnVector.loadColumns(
+          rowCount,
+          SparkSchemaUtil.toArrowSchema(dataSchema),
+          input,
+          allocator)
       } finally {
         input.close()
       }
@@ -232,8 +282,21 @@ object ArrowUtil extends Logging {
     val totalVectors = if (nullVectors.nonEmpty) {
       val finalVectors =
         mutable.ArrayBuffer[ArrowWritableColumnVector]()
-      finalVectors.appendAll(vectors)
-      finalVectors.appendAll(nullVectors)
+      val requiredIterator = requiredSchema.iterator
+      val compareFunc = compareStringFunc(SQLConf.get.caseSensitiveAnalysis)
+      while (requiredIterator.hasNext) {
+        val field = requiredIterator.next()
+        finalVectors.append(vectors
+          .find(vector => compareFunc(vector.getValueVector.getName, field.name))
+          .getOrElse {
+            // The missing column need to be find in nullVectors
+            val nullVector =
+              nullVectors.find(vector => compareFunc(vector.getValueVector.getName, field.name)).get
+            nullVector.setValueCount(rowCount)
+            nullVector.retain()
+            nullVector
+          })
+      }
       finalVectors.toArray
     } else {
       vectors
diff --git a/gluten-ut/spark32/pom.xml b/gluten-ut/spark32/pom.xml
index b0744589d1611..e026bb424129d 100644
--- a/gluten-ut/spark32/pom.xml
+++ b/gluten-ut/spark32/pom.xml
@@ -76,7 +76,7 @@
         <dependency>
           <groupId>org.apache.arrow</groupId>
           <artifactId>arrow-c-data</artifactId>
-          <version>${arrow-gluten.version}</version>
+          <version>${arrow.version}</version>
           <scope>test</scope>
           <exclusions>
             <exclusion>
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 5df53953e4cce..584ace94fbcc1 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -411,9 +411,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenCSVv2Suite]
@@ -426,9 +423,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Rule org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown in batch
     // Early Filter and Projection Push-Down generated an invalid plan
     .exclude("SPARK-26208: write and read empty data to csv file with headers")
@@ -441,9 +435,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
   enableSuite[GlutenJsonV1Suite]
     // FIXME: Array direct selection fails
     .exclude("Complex field and type inferring")
diff --git a/gluten-ut/spark33/pom.xml b/gluten-ut/spark33/pom.xml
index 5f9a28e2459d3..0f0a0703c3c50 100644
--- a/gluten-ut/spark33/pom.xml
+++ b/gluten-ut/spark33/pom.xml
@@ -83,7 +83,7 @@
         <dependency>
           <groupId>org.apache.arrow</groupId>
           <artifactId>arrow-c-data</artifactId>
-          <version>${arrow-gluten.version}</version>
+          <version>${arrow.version}</version>
           <scope>test</scope>
           <exclusions>
             <exclusion>
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 79357e9e220df..f08ebd76ffbb0 100644
--- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -206,9 +206,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenCSVv2Suite]
@@ -221,9 +218,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Rule org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown in batch
     // Early Filter and Projection Push-Down generated an invalid plan
     .exclude("SPARK-26208: write and read empty data to csv file with headers")
@@ -235,9 +229,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenJsonV1Suite]
diff --git a/gluten-ut/spark34/pom.xml b/gluten-ut/spark34/pom.xml
index a8d24d5fd219f..d30f9644dbb4d 100644
--- a/gluten-ut/spark34/pom.xml
+++ b/gluten-ut/spark34/pom.xml
@@ -83,7 +83,7 @@
         <dependency>
           <groupId>org.apache.arrow</groupId>
           <artifactId>arrow-c-data</artifactId>
-          <version>${arrow-gluten.version}</version>
+          <version>${arrow.version}</version>
           <scope>test</scope>
           <exclusions>
             <exclusion>
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 060f199d1e239..de103cdec850d 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -185,9 +185,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenCSVv2Suite]
@@ -200,9 +197,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Rule org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown in batch
     // Early Filter and Projection Push-Down generated an invalid plan
     .exclude("SPARK-26208: write and read empty data to csv file with headers")
@@ -214,9 +208,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenJsonV1Suite]
diff --git a/gluten-ut/spark35/pom.xml b/gluten-ut/spark35/pom.xml
index cf2129389a6ec..8f0a5605bdff9 100644
--- a/gluten-ut/spark35/pom.xml
+++ b/gluten-ut/spark35/pom.xml
@@ -95,7 +95,7 @@
         <dependency>
           <groupId>org.apache.arrow</groupId>
           <artifactId>arrow-c-data</artifactId>
-          <version>${arrow-gluten.version}</version>
+          <version>${arrow.version}</version>
           <scope>test</scope>
           <exclusions>
             <exclusion>
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 2911512f5512c..d32c9685f5f67 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -188,9 +188,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenCSVv2Suite]
@@ -206,11 +203,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("save csv with empty fields with user defined empty values")
     .exclude("save csv with quote")
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
-    // Arrow not support corrupt record
-    .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
   enableSuite[GlutenCSVLegacyTimeParserSuite]
     // file cars.csv include null string, Arrow not support to read
     .exclude("DDL test with schema")
@@ -221,9 +213,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("SPARK-13543 Write the output as uncompressed via option()")
     // Arrow not support corrupt record
     .exclude("SPARK-27873: disabling enforceSchema should not fail columnNameOfCorruptRecord")
-    .exclude("DDL test with tab separated file")
-    .exclude("DDL test parsing decimal type")
-    .exclude("test with tab delimiter and double quote")
   enableSuite[GlutenJsonV1Suite]
     // FIXME: Array direct selection fails
     .exclude("Complex field and type inferring")
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
index 8b75dad33c385..cb7ce87f97dad 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
@@ -20,14 +20,16 @@ import org.apache.gluten.GlutenConfig
 import org.apache.gluten.exception.GlutenException
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
+import org.apache.spark.sql.{AnalysisException, GlutenSQLTestsBaseTrait, Row}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DateType, IntegerType, StructType, TimestampType}
+import org.apache.spark.sql.types.{DateType, IntegerType, StringType, StructType, TimestampType}
 
 import org.scalatest.exceptions.TestFailedException
 
 import java.sql.{Date, Timestamp}
 
+import scala.collection.JavaConverters.seqAsJavaListConverter
+
 class GlutenCSVSuite extends CSVSuite with GlutenSQLTestsBaseTrait {
 
   override def sparkConf: SparkConf =
@@ -41,9 +43,68 @@ class GlutenCSVSuite extends CSVSuite with GlutenSQLTestsBaseTrait {
 }
 
 class GlutenCSVv1Suite extends GlutenCSVSuite {
+  import testImplicits._
   override def sparkConf: SparkConf =
     super.sparkConf
       .set(SQLConf.USE_V1_SOURCE_LIST, "csv")
+
+  testGluten("SPARK-23786: Ignore column name case if spark.sql.caseSensitive is false") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      withTempPath {
+        path =>
+          val oschema = new StructType().add("A", StringType)
+          // change the row content 0 to string bbb in  Gluten for test
+          val odf = spark.createDataFrame(List(Row("bbb")).asJava, oschema)
+          odf.write.option("header", true).csv(path.getCanonicalPath)
+          val ischema = new StructType().add("a", StringType)
+          val idf = spark.read
+            .schema(ischema)
+            .option("header", true)
+            .option("enforceSchema", false)
+            .csv(path.getCanonicalPath)
+          checkAnswer(idf, odf)
+      }
+    }
+  }
+
+  testGluten("case sensitivity of filters references") {
+    Seq(true, false).foreach {
+      filterPushdown =>
+        withSQLConf(SQLConf.CSV_FILTER_PUSHDOWN_ENABLED.key -> filterPushdown.toString) {
+          withTempPath {
+            path =>
+              Seq("""aaa,BBB""", """0,1""", """2,3""")
+                .toDF()
+                .repartition(1)
+                .write
+                .text(path.getCanonicalPath)
+              withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+                // change the schema to Arrow schema to support read in Gluten
+                val readback = spark.read
+                  .schema("aaa long, BBB long")
+                  .option("header", true)
+                  .csv(path.getCanonicalPath)
+                checkAnswer(readback, Seq(Row(2, 3), Row(0, 1)))
+                checkAnswer(readback.filter($"AAA" === 2 && $"bbb" === 3), Seq(Row(2, 3)))
+              }
+              withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+                val readback = spark.read
+                  .schema("aaa long, BBB long")
+                  .option("header", true)
+                  .csv(path.getCanonicalPath)
+                checkAnswer(readback, Seq(Row(2, 3), Row(0, 1)))
+                checkError(
+                  exception = intercept[AnalysisException] {
+                    readback.filter($"AAA" === 2 && $"bbb" === 3).collect()
+                  },
+                  errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+                  parameters = Map("objectName" -> "`AAA`", "proposal" -> "`BBB`, `aaa`")
+                )
+              }
+          }
+        }
+    }
+  }
 }
 
 class GlutenCSVv2Suite extends GlutenCSVSuite {
diff --git a/gluten-ut/test/pom.xml b/gluten-ut/test/pom.xml
index 25ec542deab2b..d55e6ca917e7b 100644
--- a/gluten-ut/test/pom.xml
+++ b/gluten-ut/test/pom.xml
@@ -90,7 +90,7 @@
         <dependency>
           <groupId>org.apache.arrow</groupId>
           <artifactId>arrow-c-data</artifactId>
-          <version>${arrow-gluten.version}</version>
+          <version>${arrow.version}</version>
           <scope>test</scope>
           <exclusions>
             <exclusion>
diff --git a/pom.xml b/pom.xml
index 88cbb724e0531..63c53f1093025 100644
--- a/pom.xml
+++ b/pom.xml
@@ -56,7 +56,6 @@
     <celeborn.version>0.3.2-incubating</celeborn.version>
     <uniffle.version>0.8.0</uniffle.version>
     <arrow.version>15.0.0</arrow.version>
-    <arrow-gluten.version>15.0.0-gluten</arrow-gluten.version>
     <arrow-memory.artifact>arrow-memory-unsafe</arrow-memory.artifact>
     <hadoop.version>2.7.4</hadoop.version>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>