diff --git a/Dockerfile b/Dockerfile index 4caefd00b..ba01b09ef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM openjdk:8-jre-slim # Set this manually before building the image, requires a local build of the jar -ENV CHRONON_JAR_PATH=spark/target/scala-2.12/your_build.jar +ENV CHRONON_JAR_PATH=spark/target-embedded/scala-2.12/your_build.jar # Update package lists and install necessary tools RUN apt-get update && apt-get install -y \ @@ -75,7 +75,7 @@ WORKDIR ${SPARK_HOME} # If doing a regular local spark box. WORKDIR /srv/chronon -ENV DRIVER_JAR_PATH="/srv/spark/spark_uber.jar" +ENV DRIVER_JAR_PATH="/srv/spark/spark_embedded.jar" COPY api/py/test/sample ./ COPY quickstart/mongo-online-impl /srv/onlineImpl diff --git a/build.sbt b/build.sbt index 45afaa42c..ee6fa2428 100644 --- a/build.sbt +++ b/build.sbt @@ -1,11 +1,9 @@ -import sbt.Keys.* +import sbt.Keys._ import sbt.Test import scala.io.StdIn -import scala.sys.process.* -import complete.DefaultParsers.* - -import scala.language.postfixOps +import scala.sys.process._ +import complete.DefaultParsers._ lazy val scala211 = "2.11.12" lazy val scala212 = "2.12.12" @@ -53,7 +51,7 @@ lazy val publishSettings = Seq( ) // Release related configs -import sbtrelease.ReleasePlugin.autoImport.ReleaseTransformations.* +import sbtrelease.ReleasePlugin.autoImport.ReleaseTransformations._ lazy val releaseSettings = Seq( releaseUseGlobalVersion := false, releaseVersionBump := sbtrelease.Version.Bump.Next, @@ -82,13 +80,13 @@ enablePlugins(GitVersioning, GitBranchPrompt) lazy val supportedVersions = List(scala211, scala212, scala213) lazy val root = (project in file(".")) - .aggregate(api, aggregator, online, spark_uber, flink) + .aggregate(api, aggregator, online, spark_uber, spark_embedded, flink) .settings( publish / skip := true, crossScalaVersions := Nil, name := "chronon" ) - .settings(releaseSettings *) + .settings(releaseSettings: _*) // Git related config git.useGitDescribe := true @@ -98,10 +96,10 @@ git.gitTagToVersionNumber := { tag: String => val branchTag = git.gitCurrentBranch.value.replace("/", "-") if (branchTag == "main" || branchTag == "master") { // For main branches, we tag the packages as - - Some(versionStr) + Some(s"${versionStr}") } else { // For user branches, we tag the packages as -- - Some(s"$branchTag-$versionStr") + Some(s"${branchTag}-${versionStr}") } } @@ -205,7 +203,7 @@ lazy val api = project val outputJava = (Compile / sourceManaged).value Thrift.gen(inputThrift.getPath, outputJava.getPath, "java") }.taskValue, - Compile / sourceGenerators += python_api_build.taskValue, + sourceGenerators in Compile += python_api_build.taskValue, crossScalaVersions := supportedVersions, libraryDependencies ++= fromMatrix(scalaVersion.value, "spark-sql/provided") ++ @@ -242,8 +240,8 @@ python_api := { val s: TaskStreams = streams.value val versionStr = (api / version).value val branchStr = git.gitCurrentBranch.value.replace("/", "-") - s.log.info(s"Building Python API version: ${versionStr}, branch: $branchStr, action: $action ...") - if ((s"api/py/python-api-build.sh $versionStr $branchStr $action" !) == 0) { + s.log.info(s"Building Python API version: ${versionStr}, branch: ${branchStr}, action: ${action} ...") + if ((s"api/py/python-api-build.sh ${versionStr} ${branchStr} ${action}" !) == 0) { s.log.success("Built Python API") } else { throw new IllegalStateException("Python API build failed!") @@ -286,6 +284,21 @@ lazy val aggregator = project ) lazy val online = project + .dependsOn(aggregator.%("compile->compile;test->test")) + .settings( + publishSettings, + crossScalaVersions := supportedVersions, + libraryDependencies ++= Seq( + "org.scala-lang.modules" %% "scala-java8-compat" % "0.9.0", + // statsd 3.0 has local aggregation - TODO: upgrade + "com.datadoghq" % "java-dogstatsd-client" % "2.7", + "org.rogach" %% "scallop" % "4.0.1", + "net.jodah" % "typetools" % "0.4.1" + ), + libraryDependencies ++= fromMatrix(scalaVersion.value, "spark-all", "scala-parallel-collections", "netty-buffer") + ) + +lazy val online_unshaded = (project in file("online")) .dependsOn(aggregator.%("compile->compile;test->test")) .settings( target := target.value.toPath.resolveSibling("target-no-assembly").toFile, @@ -312,14 +325,13 @@ def cleanSparkMeta(): Unit = { file(tmp_warehouse) / "metastore_db") } -val sparkBaseSettings: Seq[Setting[?]] = Seq( +val sparkBaseSettings: Seq[Setting[_]] = Seq( assembly / test := {}, assembly / artifact := { val art = (assembly / artifact).value art.withClassifier(Some("assembly")) }, - Compile / mainClass := Some("ai.chronon.spark.Driver"), - run / mainClass := Some("ai.chronon.spark.Driver"), + mainClass in (Compile, run) := Some("ai.chronon.spark.Driver"), cleanFiles ++= Seq(file(tmp_warehouse)), Test / testOptions += Tests.Setup(() => cleanSparkMeta()), // compatibility for m1 chip laptop @@ -327,13 +339,23 @@ val sparkBaseSettings: Seq[Setting[?]] = Seq( ) ++ addArtifact(assembly / artifact, assembly) ++ publishSettings lazy val spark_uber = (project in file("spark")) - .dependsOn(aggregator.%("compile->compile;test->test"), online) + .dependsOn(aggregator.%("compile->compile;test->test"), online_unshaded) .settings( sparkBaseSettings, crossScalaVersions := supportedVersions, libraryDependencies ++= fromMatrix(scalaVersion.value, "jackson", "spark-all/provided") ) +lazy val spark_embedded = (project in file("spark")) + .dependsOn(aggregator.%("compile->compile;test->test"), online_unshaded) + .settings( + sparkBaseSettings, + crossScalaVersions := supportedVersions, + libraryDependencies ++= fromMatrix(scalaVersion.value, "spark-all"), + target := target.value.toPath.resolveSibling("target-embedded").toFile, + Test / test := {} + ) + lazy val flink = (project in file("flink")) .dependsOn(aggregator.%("compile->compile;test->test"), online) .settings( @@ -363,10 +385,10 @@ sphinx := { ThisBuild / assemblyMergeStrategy := { case PathList("META-INF", "MANIFEST.MF") => MergeStrategy.discard - case PathList("META-INF", _*) => MergeStrategy.filterDistinctLines + case PathList("META-INF", _ @_*) => MergeStrategy.filterDistinctLines case "plugin.xml" => MergeStrategy.last - case PathList("com", "fasterxml", _*) => MergeStrategy.last - case PathList("com", "google", _*) => MergeStrategy.last + case PathList("com", "fasterxml", _ @_*) => MergeStrategy.last + case PathList("com", "google", _ @_*) => MergeStrategy.last case _ => MergeStrategy.first } exportJars := true