Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Nov 19, 2024
1 parent 1cca8d6 commit 51dd628
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
1 change: 1 addition & 0 deletions native/proto/src/proto/operator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ message SparkPartitionedFile {
int64 start = 2;
int64 length = 3;
int64 file_size = 4;
repeated spark.spark_expression.Expr partition_values = 5;
}

// This name and the one above are not great, but they correspond to the (unfortunate) Spark names.
Expand Down
26 changes: 23 additions & 3 deletions spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2507,12 +2507,15 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
partitions.foreach(p => {
val inputPartitions = p.asInstanceOf[DataSourceRDDPartition].inputPartitions
inputPartitions.foreach(partition => {
partition2Proto(partition.asInstanceOf[FilePartition], nativeScanBuilder)
partition2Proto(
partition.asInstanceOf[FilePartition],
nativeScanBuilder,
scan.relation.partitionSchema)
})
})
case rdd: FileScanRDD =>
rdd.filePartitions.foreach(partition => {
partition2Proto(partition, nativeScanBuilder)
partition2Proto(partition, nativeScanBuilder, scan.relation.partitionSchema)
})
case _ =>
}
Expand Down Expand Up @@ -3191,10 +3194,27 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim

private def partition2Proto(
partition: FilePartition,
nativeScanBuilder: OperatorOuterClass.NativeScan.Builder): Unit = {
nativeScanBuilder: OperatorOuterClass.NativeScan.Builder,
partitionSchema: StructType): Unit = {
val partitionBuilder = OperatorOuterClass.SparkFilePartition.newBuilder()
partition.files.foreach(file => {
// Process the partition values
val partitionValues = file.partitionValues
assert(partitionValues.numFields == partitionSchema.length)
val partitionVals =
partitionValues.toSeq(partitionSchema).zipWithIndex.map { case (value, i) =>
val attr = partitionSchema(i)
val valueProto = exprToProto(Literal(value, attr.dataType), Seq.empty)
// In `CometScanRule`, we have already checked that all partition values are
// supported. So, we can safely use `get` here.
assert(
valueProto.isDefined,
s"Unsupported partition value: $value, type: ${attr.dataType}")
valueProto.get
}

val fileBuilder = OperatorOuterClass.SparkPartitionedFile.newBuilder()
partitionVals.foreach(fileBuilder.addPartitionValues)
fileBuilder
.setFilePath(file.pathUri.toString)
.setStart(file.start)
Expand Down

0 comments on commit 51dd628

Please sign in to comment.