diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8330c9..41b735a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/setup-java@v4 with: distribution: 'zulu' - java-version: '21' + java-version: '11' cache: 'sbt' - name: 👌 Run "pre-push" tasks (compile and style-check) run: sbt prep diff --git a/src/main/g8/default.properties b/src/main/g8/default.properties index ea4cc27..44ea901 100644 --- a/src/main/g8/default.properties +++ b/src/main/g8/default.properties @@ -11,5 +11,8 @@ scalafmt_version = maven(org.scalameta, scalafmt-core_2.13, stable) scalatest_version = maven(org.scalatest, scalatest_2.13, stable) scalamock_version = maven(org.scalamock, scalamock_2.13, stable) nscala-time_version = maven(com.github.nscala-time, nscala-time_2.13, stable) +spark_version = maven(org.apache.spark, spark-core_2.13, stable) +spark_sql_version = maven(org.apache.spark, spark-sql_2.13, stable) +spark_streaming_version = maven(org.apache.spark, spark-streaming_2.13, stable) pprint_version = maven(com.lihaoyi, pprint_2.13, stable) verbatim = install-hooks.sh pre-push diff --git a/src/main/g8/project/Dependencies.scala b/src/main/g8/project/Dependencies.scala index 7f6b922..239ff62 100644 --- a/src/main/g8/project/Dependencies.scala +++ b/src/main/g8/project/Dependencies.scala @@ -2,10 +2,12 @@ import sbt._ object Dependencies { private val prod = Seq( - "com.github.nscala-time" %% "nscala-time" % "$nscala-time_version$", - "com.lihaoyi" %% "pprint" % "$pprint_version$" + "com.github.nscala-time" %% "nscala-time" % "$nscala-time_version$", + "com.lihaoyi" %% "pprint" % "$pprint_version$", + "org.apache.spark" %% "spark-core" % "$spark_version$" % Provided, + "org.apache.spark" %% "spark-sql" % "$spark_version$" % Provided, + "org.apache.spark" %% "spark-streaming" % "$spark_version$" % Provided ) - private val test = Seq( "org.scalatest" %% "scalatest" % "$scalatest_version$", "org.scalamock" %% "scalamock" % "$scalamock_version$" diff --git a/src/main/g8/src/test/$package$/$name__Camel$Test.scala b/src/main/g8/src/test/$package$/$name__Camel$Test.scala index c0853f4..f0f79aa 100644 --- a/src/main/g8/src/test/$package$/$name__Camel$Test.scala +++ b/src/main/g8/src/test/$package$/$name__Camel$Test.scala @@ -1,9 +1,8 @@ package $package$ -import org.scalatest.wordspec.AnyWordSpec -import org.scalatest.matchers.should._ +import org.apache.spark.sql.Row -final class $name;format="Camel"$Test extends AnyWordSpec with Matchers { +final class $name;format="Camel"$Test extends SparkTestHelper { "$name;format="Camel"$" should { "greet" in { val $name;format="camel"$ = new $name;format="Camel"$ @@ -11,7 +10,8 @@ final class $name;format="Camel"$Test extends AnyWordSpec with Matchers { val nameToGreet = "Codely" val greeting = $name;format="camel"$.greet(nameToGreet) - greeting shouldBe "Hello " + nameToGreet + import testSQLImplicits._ + Seq(greeting).toDF("greeting").collect() shouldBe Array(Row("Hello Codely")) } } } diff --git a/src/main/g8/src/test/$package$/SparkTestHelper.scala b/src/main/g8/src/test/$package$/SparkTestHelper.scala new file mode 100644 index 0000000..5423c56 --- /dev/null +++ b/src/main/g8/src/test/$package$/SparkTestHelper.scala @@ -0,0 +1,81 @@ +package $package$ + +import org.apache.commons.io.FileUtils +import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.File +import java.nio.file.Files +import scala.reflect.io.Directory + +trait SparkTestHelper + extends AnyWordSpec + with BeforeAndAfterEach + with BeforeAndAfterAll + with Matchers { + + private val sparkSession = SparkSession + .builder() + .master("local[*]") + .appName("test-spark-session") + .config(sparkConfiguration) + //.enableHiveSupport() uncomment this if you want to use Hive + .getOrCreate() + + protected var tempDir: String = _ + + protected implicit def spark: SparkSession = sparkSession + + protected def sc: SparkContext = sparkSession.sparkContext + + protected def sparkConfiguration: SparkConf = + new SparkConf() + /* Uncomment this if you want to use Delta Lake + + .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .set( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog" + ) + */ + + override protected def beforeAll(): Unit = { + super.beforeAll() + clearTemporaryDirectories() + } + + override protected def beforeEach(): Unit = { + super.beforeEach() + tempDir = Files.createTempDirectory(this.getClass.toString).toString + } + + override protected def afterAll(): Unit = { + super.afterAll() + sparkSession.stop() + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + clearTemporaryDirectories() + } + + override protected def afterEach(): Unit = { + super.afterEach() + new Directory(new File(tempDir)).deleteRecursively() + spark.sharedState.cacheManager.clearCache() + spark.sessionState.catalog.reset() + } + + protected object testSQLImplicits extends SQLImplicits { + protected override def _sqlContext: SQLContext = sparkSession.sqlContext + } + + private def clearTemporaryDirectories(): Unit = { + val warehousePath = new File("spark-warehouse").getAbsolutePath + FileUtils.deleteDirectory(new File(warehousePath)) + + val metastoreDbPath = new File("metastore_db").getAbsolutePath + FileUtils.deleteDirectory(new File(metastoreDbPath)) + } +}