Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
wip
Browse files Browse the repository at this point in the history
pengyu-hou committed Nov 8, 2023
1 parent c45a286 commit d960fa6
Showing 2 changed files with 25 additions and 2 deletions.
20 changes: 20 additions & 0 deletions spark/src/main/scala/ai/chronon/spark/Driver.scala
Original file line number Diff line number Diff line change
@@ -213,6 +213,10 @@ object Driver {
opt[String](required = false,
descr =
"Start date to compute join backfill, this start date will override start partition in conf.")
val parallelism : ScallopOption[Int] =
opt[Int](required = false,
descr = "Number of parallel jobs to run, default is 1",
default = Option(1))
lazy val joinConf: api.Join = parseConf[api.Join](confPath())
override def subcommandName() = s"join_${joinConf.metaData.name}"
}
@@ -248,6 +252,14 @@ object Driver {
opt[Int](required = false,
descr = "Runs backfill in steps, step-days at a time. Default is 30 days",
default = Option(30))
val startPartitionOverride: ScallopOption[String] =
opt[String](required = false,
descr =
"Start date to compute group by backfill, this start date will override backfill start date in conf.")
val parallelism: ScallopOption[Int] =
opt[Int](required = false,
descr = "Number of parallel jobs to run, default is 1",
default = Option(1))
lazy val groupByConf: api.GroupBy = parseConf[api.GroupBy](confPath())
override def subcommandName() = s"groupBy_${groupByConf.metaData.name}_backfill"
}
@@ -361,6 +373,14 @@ object Driver {
opt[Boolean](required = false,
descr = "Auto expand hive table if new columns added in staging query",
default = Option(true))
val startPartitionOverride: ScallopOption[String] =
opt[String](required = false,
descr =
"Start date to compute staging query backfill, this start date will override start partition in conf.")
val parallelism: ScallopOption[Int] =
opt[Int](required = false,
descr = "Number of parallel jobs to run, default is 1",
default = Option(1))
lazy val stagingQueryConf: api.StagingQuery = parseConf[api.StagingQuery](confPath())
override def subcommandName() = s"staging_query_${stagingQueryConf.metaData.name}_backfill"
}
7 changes: 5 additions & 2 deletions spark/src/main/scala/ai/chronon/spark/TableUtils.scala
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@ package ai.chronon.spark

import ai.chronon.api.{Constants, PartitionSpec}
import ai.chronon.api.Extensions._
import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
@@ -226,19 +227,21 @@ case class TableUtils(sparkSession: SparkSession) {
try {
sql(creationSql)
} catch {
case _: TableAlreadyExistsException =>
println(s"Table $tableName already exists, skipping creation")
case e: Exception =>
println(s"Failed to create table $tableName with error: ${e.getMessage}")
throw e
}
} else {
}
if (tableProperties != null && tableProperties.nonEmpty) {
sql(alterTablePropertiesSql(tableName, tableProperties))
}

if (autoExpand) {
expandTable(tableName, dfRearranged.schema)
}
}


val finalizedDf = if (autoExpand) {
// reselect the columns so that an deprecated columns will be selected as NULL before write

0 comments on commit d960fa6

Please sign in to comment.