From b334e8a0902e25f2ce49e238e3e19f275a3b968e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 28 Aug 2023 23:17:00 -0700 Subject: [PATCH 01/19] Upgrade to Spark 2.4.8 and now the tests pass on JDK11 yay! --- build.sbt | 10 +++++++--- project/plugins.sbt | 1 + .../data_validator/validator/ColumnSumCheck.scala | 5 +++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/build.sbt b/build.sbt index a5248ef..f96a494 100644 --- a/build.sbt +++ b/build.sbt @@ -1,11 +1,11 @@ name := "data-validator" organization := "com.target" -scalaVersion := "2.11.12" +scalaVersion := "2.12.13" -val sparkVersion = "2.3.4" +val sparkVersion = "2.4.8" -val circeVersion = "0.11.2" +val circeVersion = "0.14.2" //addDependencyTreePlugin enablePlugins(GitVersioning) @@ -78,3 +78,7 @@ TaskKey[Unit]("generateTestData") := { libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion (Compile / runMain).toTask(" com.target.data_validator.GenTestData").value } + +scalafixDependencies in ThisBuild += + "com.holdenkarau" %% "spark-scalafix-rules-2.4.8" % "0.1.15" +semanticdbEnabled in ThisBuild := true diff --git a/project/plugins.sbt b/project/plugins.sbt index 40db52a..afd567f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -3,3 +3,4 @@ addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.1") addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0") addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.3") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.0") +addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.10.4") diff --git a/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala b/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala index 6a01263..b3bc6ca 100644 --- a/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala +++ b/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala @@ -78,13 +78,14 @@ case class ColumnSumCheck( } def getData(pctError: String): ListMap[String, String] = { - ((minValue, maxValue) match { + val initial: ListMap[String, String] = ((minValue, maxValue) match { case (Some(x), Some(y)) => ListMap("lower_bound" -> x.asNumber.get.toString, "upper_bound" -> y.asNumber.get.toString) case (None, Some(y)) => ListMap("upper_bound" -> y.asNumber.get.toString) case (Some(x), None) => ListMap("lower_bound" -> x.asNumber.get.toString) case (None, None) => throw new RuntimeException("Must define at least one of minValue or maxValue.") - }) + ("inclusive" -> isInclusive.toString, "actual" -> r(idx).toString, "relative_error" -> pctError) + }) + initial ++ List("inclusive" -> isInclusive.toString, "actual" -> r(idx).toString, "relative_error" -> pctError) } val actualSum: Double = dataType match { From 38f2ff4352935fa62bac2dea8459c6ebde5fabbf Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 28 Aug 2023 23:58:00 -0700 Subject: [PATCH 02/19] Most of the way to working on Spark 3, still needs some tests to be updated since the SQL pretty printer has changed in Spark 3. --- build.sbt | 6 +----- .../data_validator/validator/ColumnBased.scala | 2 +- .../data_validator/validator/ValidatorBase.scala | 4 ++-- .../validator/StringLengthCheckSpec.scala | 14 +++++++------- .../validator/StringRegexCheckSpec.scala | 10 +++++----- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/build.sbt b/build.sbt index f96a494..b232fed 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ organization := "com.target" scalaVersion := "2.12.13" -val sparkVersion = "2.4.8" +val sparkVersion = "3.4.1" val circeVersion = "0.14.2" @@ -78,7 +78,3 @@ TaskKey[Unit]("generateTestData") := { libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion (Compile / runMain).toTask(" com.target.data_validator.GenTestData").value } - -scalafixDependencies in ThisBuild += - "com.holdenkarau" %% "spark-scalafix-rules-2.4.8" % "0.1.15" -semanticdbEnabled in ThisBuild := true diff --git a/src/main/scala/com/target/data_validator/validator/ColumnBased.scala b/src/main/scala/com/target/data_validator/validator/ColumnBased.scala index a283ab2..21c43c7 100644 --- a/src/main/scala/com/target/data_validator/validator/ColumnBased.scala +++ b/src/main/scala/com/target/data_validator/validator/ColumnBased.scala @@ -86,7 +86,7 @@ case class MinNumRows(minNumRows: Json) extends ColumnBased("", ValidatorBase.L0 } case class ColumnMaxCheck(column: String, value: Json) - extends ColumnBased(column, Max(UnresolvedAttribute(column)).toAggregateExpression()) { + extends ColumnBased(column, Max(UnresolvedAttribute.quoted(column)).toAggregateExpression()) { override def substituteVariables(dict: VarSubstitution): ValidatorBase = { val ret = copy(column = getVarSub(column, "column", dict), value = getVarSubJson(value, "value", dict)) diff --git a/src/main/scala/com/target/data_validator/validator/ValidatorBase.scala b/src/main/scala/com/target/data_validator/validator/ValidatorBase.scala index 0134a3a..17d46af 100644 --- a/src/main/scala/com/target/data_validator/validator/ValidatorBase.scala +++ b/src/main/scala/com/target/data_validator/validator/ValidatorBase.scala @@ -141,8 +141,8 @@ object ValidatorBase extends LazyLogging { private val backtick = "`" val I0: Literal = Literal.create(0, IntegerType) val D0: Literal = Literal.create(0.0, DoubleType) - val L0: Literal = Literal.create(0, LongType) - val L1: Literal = Literal.create(1, LongType) + val L0: Literal = Literal.create(0L, LongType) + val L1: Literal = Literal.create(1L, LongType) def isValueColumn(v: String): Boolean = v.startsWith(backtick) diff --git a/src/test/scala/com/target/data_validator/validator/StringLengthCheckSpec.scala b/src/test/scala/com/target/data_validator/validator/StringLengthCheckSpec.scala index 3e0493f..0166030 100644 --- a/src/test/scala/com/target/data_validator/validator/StringLengthCheckSpec.scala +++ b/src/test/scala/com/target/data_validator/validator/StringLengthCheckSpec.scala @@ -327,13 +327,13 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe ValidatorQuickCheckError( ("item", "I") :: Nil, "I", - "StringLengthCheck failed! item = I and ((length('item) < 5) || (length('item) > 6))" + "StringLengthCheck failed! item = I and ((length('item) < 5) OR (length('item) > 6))" )) ^ (sut.getEvents contains ValidatorQuickCheckError( ("item", "") :: Nil, "", - "StringLengthCheck failed! item = and ((length('item) < 5) || (length('item) > 6))" + "StringLengthCheck failed! item = and ((length('item) < 5) OR (length('item) > 6))" )) ) } @@ -364,7 +364,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe ValidatorQuickCheckError( ("item", "I") :: Nil, "I", - "StringLengthCheck failed! item = I and ((length('item) < 5) || (length('item) > 6))" + "StringLengthCheck failed! item = I and ((length('item) < 5) OR (length('item) > 6))" ) ) @@ -373,7 +373,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe ValidatorQuickCheckError( ("item", "") :: Nil, "", - "StringLengthCheck failed! item = and ((length('item) < 5) || (length('item) > 6))" + "StringLengthCheck failed! item = and ((length('item) < 5) OR (length('item) > 6))" ) ) } @@ -404,7 +404,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe ValidatorQuickCheckError( ("item", "I") :: Nil, "I", - "StringLengthCheck failed! item = I and ((length('item) < 5) || (length('item) > 5))" + "StringLengthCheck failed! item = I and ((length('item) < 5) OR (length('item) > 5))" ) ) @@ -413,7 +413,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe ValidatorQuickCheckError( ("item", "") :: Nil, "", - "StringLengthCheck failed! item = and ((length('item) < 5) || (length('item) > 5))" + "StringLengthCheck failed! item = and ((length('item) < 5) OR (length('item) > 5))" ) ) @@ -422,7 +422,7 @@ class StringLengthCheckSpec extends AnyFunSpec with Matchers with TestingSparkSe ValidatorQuickCheckError( ("item", "Item23") :: Nil, "Item23", - "StringLengthCheck failed! item = Item23 and ((length('item) < 5) || (length('item) > 5))" + "StringLengthCheck failed! item = Item23 and ((length('item) < 5) OR (length('item) > 5))" ) ) } diff --git a/src/test/scala/com/target/data_validator/validator/StringRegexCheckSpec.scala b/src/test/scala/com/target/data_validator/validator/StringRegexCheckSpec.scala index 542049e..23b59c9 100644 --- a/src/test/scala/com/target/data_validator/validator/StringRegexCheckSpec.scala +++ b/src/test/scala/com/target/data_validator/validator/StringRegexCheckSpec.scala @@ -183,7 +183,7 @@ class StringRegexCheckSpec extends AnyFunSpec with Matchers with TestingSparkSes ValidatorQuickCheckError( ("item", "I") :: Nil, "I", - "StringRegexCheck failed! item = I and (NOT 'item RLIKE ^It && isnotnull('item))" + "StringRegexCheck failed! item = I and (NOT RLIKE('item, ^It) AND isnotnull('item))" ) ) } @@ -214,7 +214,7 @@ class StringRegexCheckSpec extends AnyFunSpec with Matchers with TestingSparkSes ValidatorQuickCheckError( ("item", "I") :: Nil, "I", - "StringRegexCheck failed! item = I and (NOT 'item RLIKE ^Item2 && isnotnull('item))" + "StringRegexCheck failed! item = I and (NOT RLIKE('item, ^Item2) AND isnotnull('item))" ) ) @@ -223,7 +223,7 @@ class StringRegexCheckSpec extends AnyFunSpec with Matchers with TestingSparkSes ValidatorQuickCheckError( ("item", "Item1") :: Nil, "Item1", - "StringRegexCheck failed! item = Item1 and (NOT 'item RLIKE ^Item2 && isnotnull('item))" + "StringRegexCheck failed! item = Item1 and (NOT RLIKE('item, ^Item2) AND isnotnull('item))" ) ) } @@ -254,13 +254,13 @@ class StringRegexCheckSpec extends AnyFunSpec with Matchers with TestingSparkSes ValidatorQuickCheckError( ("item", "I") :: Nil, "I", - "StringRegexCheck failed! item = I and (NOT 'item RLIKE ^Item2 && isnotnull('item))" + "StringRegexCheck failed! item = I and (NOT RLIKE('item, ^Item2) AND isnotnull('item))" )) ^ (sut.getEvents contains ValidatorQuickCheckError( ("item", "Item1") :: Nil, "Item1", - "StringRegexCheck failed! item = Item1 and (NOT 'item RLIKE ^Item2 && isnotnull('item))" + "StringRegexCheck failed! item = Item1 and (NOT RLIKE('item, ^Item2) AND isnotnull('item))" )) ) } From 7d8ab649415b5a38b33b0e4ed966f23692409c66 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 28 Aug 2023 23:59:18 -0700 Subject: [PATCH 03/19] Use same circe version for yaml as well. --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index b232fed..4d8ab8b 100644 --- a/build.sbt +++ b/build.sbt @@ -35,7 +35,7 @@ libraryDependencies ++= Seq( "com.github.scopt" %% "scopt" % "4.1.0", "com.sun.mail" % "javax.mail" % "1.6.2", "com.lihaoyi" %% "scalatags" % "0.12.0", - "io.circe" %% "circe-yaml" % "0.10.1", + "io.circe" %% "circe-yaml" % circeVersion, "io.circe" %% "circe-core" % circeVersion, "io.circe" %% "circe-generic" % circeVersion, "io.circe" %% "circe-parser" % circeVersion, From 8c7ced016bd815f5de8cfe69fd49688bc571cd87 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 29 Aug 2023 00:04:55 -0700 Subject: [PATCH 04/19] Ok the tests pass now --- .../target/data_validator/validator/ColumnSumCheck.scala | 2 +- .../target/data_validator/validator/RangeCheckSpec.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala b/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala index b3bc6ca..d5196f9 100644 --- a/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala +++ b/src/main/scala/com/target/data_validator/validator/ColumnSumCheck.scala @@ -16,7 +16,7 @@ case class ColumnSumCheck( minValue: Option[Json] = None, maxValue: Option[Json] = None, inclusive: Option[Json] = None -) extends ColumnBased(column, Sum(UnresolvedAttribute(column)).toAggregateExpression()) { +) extends ColumnBased(column, Sum(UnresolvedAttribute.quoted(column)).toAggregateExpression()) { private val minOrMax: Either[String, Unit] = if (minValue.isEmpty && maxValue.isEmpty) { Left("'minValue' or 'maxValue' or both must be defined") diff --git a/src/test/scala/com/target/data_validator/validator/RangeCheckSpec.scala b/src/test/scala/com/target/data_validator/validator/RangeCheckSpec.scala index 9a77175..37e2198 100644 --- a/src/test/scala/com/target/data_validator/validator/RangeCheckSpec.scala +++ b/src/test/scala/com/target/data_validator/validator/RangeCheckSpec.scala @@ -305,7 +305,7 @@ class RangeCheckSpec extends AnyFunSpec with Matchers with TestingSparkSession { ValidatorQuickCheckError( ("item", "Eggs") :: Nil, 5.99, - "RangeCheck failed! max = 5.99 and (('max <= 6.0) || ('max >= 10.0))" + "RangeCheck failed! max = 5.99 and (('max <= 6.0) OR ('max >= 10.0))" ) ) } @@ -341,7 +341,7 @@ class RangeCheckSpec extends AnyFunSpec with Matchers with TestingSparkSession { val dict = new VarSubstitution val df = mkDataFrame(spark, defData) assert(!sut.configCheck(df)) - assert(sut.colTest(df.schema, dict).sql == "((`avg` < `min`) OR (`avg` > `max`))") + assert(sut.colTest(df.schema, dict).sql == "((avg < min) OR (avg > max))") } it("bad minValue column") { @@ -395,7 +395,7 @@ class RangeCheckSpec extends AnyFunSpec with Matchers with TestingSparkSession { ValidatorQuickCheckError( ("item", "Bread") :: Nil, 0.99, - "RangeCheck failed! avg = 0.99 and (('avg <= 'min) || ('avg >= 'max))" + "RangeCheck failed! avg = 0.99 and (('avg <= 'min) OR ('avg >= 'max))" ) ) } From 92e137ba82d18a43e4e1c3eedec70c4120c274e3 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 2 Sep 2023 16:15:18 -0700 Subject: [PATCH 05/19] A bit of work towards cross-building --- .github/workflows/ci.yaml | 6 +++-- .github/workflows/release.yaml | 2 ++ build.sbt | 49 +++++++++++++++++++++++++++------- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 638c75f..3f2a26f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,5 +23,7 @@ jobs: - uses: coursier/setup-action@v1 with: jvm: adopt:1.8 - - name: Build, test, and package project - run: bin/sbt clean compile test package makePom + - name: Build, test, and package project on Spark 3.3 + run: bin/sbt clean compile test package makePom -DsparkVersion=3.3.1 + - name: Build and package project on "legacy" Spark + run: bin/sbt clean compile package makePom diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 18dbf7a..f73f0ff 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -25,3 +25,5 @@ jobs: # uses sbt-github-packages, see build.sbt - name: Publish with SBT run: bin/sbt publish + - name: Publish with SBT + run: bin/sbt publish -DsparkVersion=3.3.1 diff --git a/build.sbt b/build.sbt index 4d8ab8b..b1f6ef6 100644 --- a/build.sbt +++ b/build.sbt @@ -1,11 +1,42 @@ name := "data-validator" organization := "com.target" -scalaVersion := "2.12.13" +val sparkVersion = settingKey[String]("Spark version") -val sparkVersion = "3.4.1" +sparkVersion := System.getProperty("sparkVersion", "2.3.4") -val circeVersion = "0.14.2" +scalaVersion := { + if (sparkVersion.value > "3.0") { + "2.12.13" + } else { + "2.11.12" + } +} + +val sparkValidationVersion = settingKey[String]("Version of package") + +sparkValidationVersion := "0.15.0" + +version := sparkVersion.value + "_" + sparkValidationVersion.value + +val circeVersion = settingKey[String]("Circe version") +val circeYamlVersion = settingKey[String]("Circe YAML version") + +circeVersion := { + if (sparkVersion.value > "3.0") { + "0.14.2" + } else { + "0.11.2" + } +} + +circeYamlVersion := { + if (sparkVersion.value > "3.0") { + "0.14.2" + } else { + "0.10.1" + } +} //addDependencyTreePlugin enablePlugins(GitVersioning) @@ -35,11 +66,11 @@ libraryDependencies ++= Seq( "com.github.scopt" %% "scopt" % "4.1.0", "com.sun.mail" % "javax.mail" % "1.6.2", "com.lihaoyi" %% "scalatags" % "0.12.0", - "io.circe" %% "circe-yaml" % circeVersion, - "io.circe" %% "circe-core" % circeVersion, - "io.circe" %% "circe-generic" % circeVersion, - "io.circe" %% "circe-parser" % circeVersion, - "org.apache.spark" %% "spark-sql" % sparkVersion % Provided, + "io.circe" %% "circe-yaml" % circeYamlVersion.value, + "io.circe" %% "circe-core" % circeVersion.value, + "io.circe" %% "circe-generic" % circeVersion.value, + "io.circe" %% "circe-parser" % circeVersion.value, + "org.apache.spark" %% "spark-sql" % sparkVersion.value % Provided, "junit" % "junit" % "4.13.2" % Test, "org.scalatest" %% "scalatest" % "3.2.15" % Test, "com.github.sbt" % "junit-interface" % "0.13.3" % Test exclude ("junit", "junit-dep") @@ -75,6 +106,6 @@ compileScalastyle := (Compile / scalastyle).toTask("").value (Compile / runMain) := Defaults.runMainTask(Compile / fullClasspath, Compile / run / runner).evaluated TaskKey[Unit]("generateTestData") := { - libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion + libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion.value (Compile / runMain).toTask(" com.target.data_validator.GenTestData").value } From d00d2ef7ba414dcbf76d93c088feb8fd39a9c75a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 4 Sep 2023 17:02:54 -0700 Subject: [PATCH 06/19] Fix JDK11 funtimes. --- build.sbt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index b1f6ef6..79d10fd 100644 --- a/build.sbt +++ b/build.sbt @@ -77,7 +77,15 @@ libraryDependencies ++= Seq( ) Test / fork := true -javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled") +javaOptions ++= (if (sparkVersion.value > "3.0") { + Seq("-Xms4048M", "-Xmx4048M", + "-Dio.netty.tryReflectionSetAccessible=true", + "--add-opens=java.base/java.lang=ALL-UNNAMED", + "--add-opens=java.base/java.io=ALL-UNNAMED", + "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED") +} else { + Seq("-Xms4048M", "-Xmx4048M") +}) Test / parallelExecution := false // required for unit tests, but not set in some environments Test / envVars ++= Map( From 782767e030dd90c1e43321d40e2a23f7197fb642 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 7 Sep 2023 20:46:29 -0700 Subject: [PATCH 07/19] In Spark 3 we need to shade shapeless/cats (or users need to specify user classpath first) and also pick a better Scala version. --- build.sbt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 79d10fd..cf5d84b 100644 --- a/build.sbt +++ b/build.sbt @@ -7,7 +7,7 @@ sparkVersion := System.getProperty("sparkVersion", "2.3.4") scalaVersion := { if (sparkVersion.value > "3.0") { - "2.12.13" + "2.12.17" } else { "2.11.12" } @@ -96,6 +96,11 @@ Test / envVars ++= Map( assembly / mainClass := Some("com.target.data_validator.Main") +assembly / assemblyShadeRules := Seq( + ShadeRule.rename("shapeless.**" -> "new_shapeless.@1").inAll, + ShadeRule.rename("cats.kernel.**" -> s"new_cats.kernel.@1").inAll + ) + // Enforces scalastyle checks val compileScalastyle = TaskKey[Unit]("compileScalastyle") scalastyleFailOnWarning := true From 0e5a2ab803270c5d98cbd4966bc0ab4e65d7e97c Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 18 Sep 2023 17:37:32 -0700 Subject: [PATCH 08/19] Only set these params for modern JDK. --- README.md | 2 ++ build.sbt | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c4bbe6d..c0ef6a3 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ Usage: data-validator [options] --help Show this help message and exit. ``` +If you want to build with a modern version of java set the "MODERN_JAVA" enviornment variable. + ## Example Run With the JAR directly: diff --git a/build.sbt b/build.sbt index cf5d84b..1d0e888 100644 --- a/build.sbt +++ b/build.sbt @@ -77,7 +77,8 @@ libraryDependencies ++= Seq( ) Test / fork := true -javaOptions ++= (if (sparkVersion.value > "3.0") { +javaOptions ++= (if (sparkVersion.value > "3.0" && System.getenv("MODERN_JAVA") == "TRUE") { + // For modern Java we need to open up a lot of config options. Seq("-Xms4048M", "-Xmx4048M", "-Dio.netty.tryReflectionSetAccessible=true", "--add-opens=java.base/java.lang=ALL-UNNAMED", From e0f6a1535bce93d6d0bfa5aed633c6cc5fdb1fd2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 09:05:37 +0000 Subject: [PATCH 09/19] Update scalatest from 3.2.17 to 3.2.18 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 61e2c89..b4e611b 100644 --- a/build.sbt +++ b/build.sbt @@ -41,7 +41,7 @@ libraryDependencies ++= Seq( "io.circe" %% "circe-parser" % circeVersion, "org.apache.spark" %% "spark-sql" % sparkVersion % Provided, "junit" % "junit" % "4.13.2" % Test, - "org.scalatest" %% "scalatest" % "3.2.17" % Test, + "org.scalatest" %% "scalatest" % "3.2.18" % Test, "com.github.sbt" % "junit-interface" % "0.13.3" % Test exclude ("junit", "junit-dep") ) From 28f033b74f5e6bb5dd6556751cbcc7a40e72e47f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 1 Mar 2024 09:05:39 +0000 Subject: [PATCH 10/19] Update sbt from 1.9.8 to 1.9.9 --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index 0aa5c39..49214c4 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version = 1.9.8 +sbt.version = 1.9.9 From 17b81952e5e674f7f204a056e64b14e1ccd49713 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 15 Mar 2024 09:04:55 +0000 Subject: [PATCH 11/19] Update sbt-assembly from 2.1.5 to 2.2.0 --- project/assembly.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/assembly.sbt b/project/assembly.sbt index d83c883..e5ab6cc 100644 --- a/project/assembly.sbt +++ b/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.5") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0") From 7f78124f04d11f1a9620f9834703e29a67720ddf Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:24:58 -0400 Subject: [PATCH 12/19] Use Spark 3.5.1 in CI --- .github/workflows/ci.yaml | 2 +- .github/workflows/release.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3a90660..e216e30 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,6 +24,6 @@ jobs: with: jvm: adopt:1.8 - name: Build, test, and package project on Spark 3.3 - run: bin/sbt clean compile test package makePom -DsparkVersion=3.3.1 + run: bin/sbt clean compile test package makePom -DsparkVersion=3.5.1 - name: Build and package project on "legacy" Spark run: bin/sbt clean compile package makePom diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 63046e8..4c6c21c 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -26,4 +26,4 @@ jobs: - name: Publish with SBT run: bin/sbt publish - name: Publish with SBT - run: bin/sbt publish -DsparkVersion=3.3.1 + run: bin/sbt publish -DsparkVersion=3.5.1 From 028ea777fc73a170da620afa6de3ab365789a322 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:28:05 -0400 Subject: [PATCH 13/19] Adds nudge to set MODERN_JAVA for JDK11+ --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c0ef6a3..c7aee7f 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,8 @@ Usage: data-validator [options] --help Show this help message and exit. ``` -If you want to build with a modern version of java set the "MODERN_JAVA" enviornment variable. +If you want to build with Java 11 or newer, set the "MODERN_JAVA" environment variable. +This may become the default in the future. ## Example Run From 459ac542b1900c7825ab6fb7e9c5f1514334a202 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:28:47 -0400 Subject: [PATCH 14/19] Fixes CI step name to match Spark version therein --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e216e30..7e1d68e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,7 +23,7 @@ jobs: - uses: coursier/setup-action@v1 with: jvm: adopt:1.8 - - name: Build, test, and package project on Spark 3.3 + - name: Build, test, and package project on Spark 3.5 run: bin/sbt clean compile test package makePom -DsparkVersion=3.5.1 - name: Build and package project on "legacy" Spark run: bin/sbt clean compile package makePom From 81d9d9d8da69adbd94d7e426f5aa8897eb3b65a6 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:29:03 -0400 Subject: [PATCH 15/19] Use latest Scala 2.12 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index d438698..c90cf88 100644 --- a/build.sbt +++ b/build.sbt @@ -7,7 +7,7 @@ sparkVersion := System.getProperty("sparkVersion", "2.3.4") scalaVersion := { if (sparkVersion.value > "3.0") { - "2.12.17" + "2.12.19" } else { "2.11.12" } From f363f3842f362d0436fbf292b45efbe6294d7ca7 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:29:36 -0400 Subject: [PATCH 16/19] Use latest circe 0.14.6 --- build.sbt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sbt b/build.sbt index c90cf88..1282b3b 100644 --- a/build.sbt +++ b/build.sbt @@ -24,7 +24,7 @@ val circeYamlVersion = settingKey[String]("Circe YAML version") circeVersion := { if (sparkVersion.value > "3.0") { - "0.14.2" + "0.14.6" } else { "0.11.2" } @@ -32,7 +32,7 @@ circeVersion := { circeYamlVersion := { if (sparkVersion.value > "3.0") { - "0.14.2" + "0.14.6" } else { "0.10.1" } From aacdf7f83323300ea7c61c518898eef735afb95f Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:30:27 -0400 Subject: [PATCH 17/19] Adds a note about new MODERN_JAVA options --- build.sbt | 1 + 1 file changed, 1 insertion(+) diff --git a/build.sbt b/build.sbt index 1282b3b..3d4c96b 100644 --- a/build.sbt +++ b/build.sbt @@ -80,6 +80,7 @@ Test / fork := true javaOptions ++= (if (sparkVersion.value > "3.0" && System.getenv("MODERN_JAVA") == "TRUE") { // For modern Java we need to open up a lot of config options. Seq("-Xms4048M", "-Xmx4048M", + // these were added in JDK 11 and newer, apparently. "-Dio.netty.tryReflectionSetAccessible=true", "--add-opens=java.base/java.lang=ALL-UNNAMED", "--add-opens=java.base/java.io=ALL-UNNAMED", From faa81abfe0100d42e68b73c2aa7b096f44b3da18 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:36:21 -0400 Subject: [PATCH 18/19] Use circe-yaml 0.15.1 There was no 0.14.6 for circe-yaml --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 3d4c96b..b9e3431 100644 --- a/build.sbt +++ b/build.sbt @@ -32,7 +32,7 @@ circeVersion := { circeYamlVersion := { if (sparkVersion.value > "3.0") { - "0.14.6" + "0.15.1" } else { "0.10.1" } From bb95cc996a018c63c8570dbb33ea4261ba766718 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Wed, 10 Apr 2024 17:49:35 -0400 Subject: [PATCH 19/19] Updates to latest sbtx with `make refresh-sbt`. --- bin/sbt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/sbt b/bin/sbt index 19d8917..8ecfd33 100755 --- a/bin/sbt +++ b/bin/sbt @@ -34,11 +34,11 @@ set -o pipefail -declare -r sbt_release_version="1.6.2" -declare -r sbt_unreleased_version="1.7.0-M2" +declare -r sbt_release_version="1.9.9" +declare -r sbt_unreleased_version="1.9.9" -declare -r latest_213="2.13.8" -declare -r latest_212="2.12.15" +declare -r latest_213="2.13.13" +declare -r latest_212="2.12.19" declare -r latest_211="2.11.12" declare -r latest_210="2.10.7" declare -r latest_29="2.9.3"