diff --git a/.gitignore b/.gitignore index e9d8dc314..3a68abd95 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ sbt/*.jar .settings .cache +.mima-excludes /build/ work/ out/ diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 0624117f4..5f5439141 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -58,6 +58,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes" + CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes" diff --git a/bin/spark-class b/bin/spark-class index 229ae2ceb..a3efa2ff9 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -137,8 +137,7 @@ fi # Compute classpath using external script CLASSPATH=`$FWDIR/bin/compute-classpath.sh` - -if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then +if [[ "$1" =~ org.apache.spark.tools.* ]]; then CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR" fi diff --git a/dev/run-tests b/dev/run-tests index 6e78cad99..6f115d2ab 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -59,3 +59,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then export PYSPARK_PYTHON=/usr/local/bin/python2.7 fi ./python/run-tests + +echo "=========================================================================" +echo "Detecting binary incompatibilites with MiMa" +echo "=========================================================================" +./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore +sbt/sbt mima-report-binary-issues + diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala new file mode 100644 index 000000000..e7c9c47c9 --- /dev/null +++ b/project/MimaBuild.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact} +import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings +import sbt._ + +object MimaBuild { + + def ignoredABIProblems(base: File) = { + import com.typesafe.tools.mima.core._ + import com.typesafe.tools.mima.core.ProblemFilters._ + + // Excludes placed here will be used for all Spark versions + val defaultExcludes = Seq() + + // Read package-private excludes from file + val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes") + val excludeFile = file(excludeFilePath) + val packagePrivateList: Seq[String] = + if (!excludeFile.exists()) { + Seq() + } else { + IO.read(excludeFile).split("\n") + } + + def excludeClass(className: String) = { + Seq( + excludePackage(className), + ProblemFilters.exclude[MissingClassProblem](className), + ProblemFilters.exclude[MissingTypesProblem](className), + excludePackage(className + "$"), + ProblemFilters.exclude[MissingClassProblem](className + "$"), + ProblemFilters.exclude[MissingTypesProblem](className + "$") + ) + } + def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className) + + val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass) + + /* Excludes specific to a given version of Spark. When comparing the given version against + its immediate predecessor, the excludes listed here will be applied. */ + val versionExcludes = + SparkBuild.SPARK_VERSION match { + case v if v.startsWith("1.0") => + Seq( + excludePackage("org.apache.spark.api.java"), + excludePackage("org.apache.spark.streaming.api.java"), + excludePackage("org.apache.spark.mllib") + ) ++ + excludeSparkClass("rdd.ClassTags") ++ + excludeSparkClass("util.XORShiftRandom") ++ + excludeSparkClass("mllib.recommendation.MFDataGenerator") ++ + excludeSparkClass("mllib.optimization.SquaredGradient") ++ + excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++ + excludeSparkClass("mllib.regression.LassoWithSGD") ++ + excludeSparkClass("mllib.regression.LinearRegressionWithSGD") + case _ => Seq() + } + + defaultExcludes ++ packagePrivateExcludes ++ versionExcludes + } + + def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq( + previousArtifact := None, + binaryIssueFilters ++= ignoredABIProblems(sparkHome) + ) + +} diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 1969486f7..21d2779d8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -22,6 +22,7 @@ import sbtassembly.Plugin._ import AssemblyKeys._ import scala.util.Properties import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings} +import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact import scala.collection.JavaConversions._ @@ -29,6 +30,8 @@ import scala.collection.JavaConversions._ //import com.jsuereth.pgp.sbtplugin.PgpKeys._ object SparkBuild extends Build { + val SPARK_VERSION = "1.0.0-SNAPSHOT" + // Hadoop version to build against. For example, "1.0.4" for Apache releases, or // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN. @@ -146,9 +149,9 @@ object SparkBuild extends Build { lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj, hive) ++ maybeJava8Tests - def sharedSettings = Defaults.defaultSettings ++ Seq( + def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq( organization := "org.apache.spark", - version := "1.0.0-SNAPSHOT", + version := SPARK_VERSION, scalaVersion := "2.10.3", scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-target:" + SCALAC_JVM_VERSION), @@ -284,9 +287,14 @@ object SparkBuild extends Build { val excludeSLF4J = ExclusionRule(organization = "org.slf4j") val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap") + def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark", + version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = { + val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion + Some(organization % fullId % version) // the artifact to compare binary compatibility with + } + def coreSettings = sharedSettings ++ Seq( name := "spark-core", - libraryDependencies ++= Seq( "com.google.guava" % "guava" % "14.0.1", "com.google.code.findbugs" % "jsr305" % "1.3.9", @@ -325,7 +333,7 @@ object SparkBuild extends Build { publish := {} ) - def replSettings = sharedSettings ++ Seq( + def replSettings = sharedSettings ++ Seq( name := "spark-repl", libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v ), libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v ), @@ -354,17 +362,20 @@ object SparkBuild extends Build { def graphxSettings = sharedSettings ++ Seq( name := "spark-graphx", + previousArtifact := sparkPreviousArtifact("spark-graphx"), libraryDependencies ++= Seq( "org.jblas" % "jblas" % "1.2.3" ) ) def bagelSettings = sharedSettings ++ Seq( - name := "spark-bagel" + name := "spark-bagel", + previousArtifact := sparkPreviousArtifact("spark-bagel") ) def mllibSettings = sharedSettings ++ Seq( name := "spark-mllib", + previousArtifact := sparkPreviousArtifact("spark-mllib"), libraryDependencies ++= Seq( "org.jblas" % "jblas" % "1.2.3", "org.scalanlp" %% "breeze" % "0.7" @@ -428,6 +439,7 @@ object SparkBuild extends Build { def streamingSettings = sharedSettings ++ Seq( name := "spark-streaming", + previousArtifact := sparkPreviousArtifact("spark-streaming"), libraryDependencies ++= Seq( "commons-io" % "commons-io" % "2.4" ) @@ -503,6 +515,7 @@ object SparkBuild extends Build { def twitterSettings() = sharedSettings ++ Seq( name := "spark-streaming-twitter", + previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"), libraryDependencies ++= Seq( "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty) ) @@ -510,6 +523,7 @@ object SparkBuild extends Build { def kafkaSettings() = sharedSettings ++ Seq( name := "spark-streaming-kafka", + previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"), libraryDependencies ++= Seq( "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), "org.apache.kafka" %% "kafka" % "0.8.0" @@ -522,6 +536,7 @@ object SparkBuild extends Build { def flumeSettings() = sharedSettings ++ Seq( name := "spark-streaming-flume", + previousArtifact := sparkPreviousArtifact("spark-streaming-flume"), libraryDependencies ++= Seq( "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty) ) @@ -529,6 +544,7 @@ object SparkBuild extends Build { def zeromqSettings() = sharedSettings ++ Seq( name := "spark-streaming-zeromq", + previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"), libraryDependencies ++= Seq( "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty) ) @@ -536,6 +552,7 @@ object SparkBuild extends Build { def mqttSettings() = streamingSettings ++ Seq( name := "spark-streaming-mqtt", + previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"), libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0") ) } diff --git a/project/plugins.sbt b/project/plugins.sbt index 32bc044a9..4ff6f67af 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -19,4 +19,6 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0") +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6") + addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0") diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala new file mode 100644 index 000000000..5547e9fe5 --- /dev/null +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.tools + +import java.io.File +import java.util.jar.JarFile + +import scala.collection.mutable +import scala.collection.JavaConversions._ +import scala.reflect.runtime.universe.runtimeMirror + +/** + * A tool for generating classes to be excluded during binary checking with MIMA. It is expected + * that this tool is run with ./spark-class. + * + * MIMA itself only supports JVM-level visibility and doesn't account for package-private classes. + * This tool looks at all currently package-private classes and generates exclusions for them. Note + * that this approach is not sound. It can lead to false positives if we move or rename a previously + * package-private class. It can lead to false negatives if someone explicitly makes a class + * package-private that wasn't before. This exists only to help catch certain classes of changes + * which might be difficult to catch during review. + */ +object GenerateMIMAIgnore { + private val classLoader = Thread.currentThread().getContextClassLoader + private val mirror = runtimeMirror(classLoader) + + private def classesPrivateWithin(packageName: String): Set[String] = { + + val classes = getClasses(packageName, classLoader) + val privateClasses = mutable.HashSet[String]() + + def isPackagePrivate(className: String) = { + try { + /* Couldn't figure out if it's possible to determine a-priori whether a given symbol + is a module or class. */ + + val privateAsClass = mirror + .staticClass(className) + .privateWithin + .fullName + .startsWith(packageName) + + val privateAsModule = mirror + .staticModule(className) + .privateWithin + .fullName + .startsWith(packageName) + + privateAsClass || privateAsModule + } catch { + case _: Throwable => { + println("Error determining visibility: " + className) + false + } + } + } + + for (className <- classes) { + val directlyPrivateSpark = isPackagePrivate(className) + + /* Inner classes defined within a private[spark] class or object are effectively + invisible, so we account for them as package private. */ + val indirectlyPrivateSpark = { + val maybeOuter = className.toString.takeWhile(_ != '$') + if (maybeOuter != className) { + isPackagePrivate(maybeOuter) + } else { + false + } + } + if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className + } + privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet + } + + def main(args: Array[String]) { + scala.tools.nsc.io.File(".mima-excludes"). + writeAll(classesPrivateWithin("org.apache.spark").mkString("\n")) + println("Created : .mima-excludes in current directory.") + } + + + private def shouldExclude(name: String) = { + // Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala + name.contains("anon") || + name.endsWith("$class") || + name.contains("$sp") + } + + /** + * Scans all classes accessible from the context class loader which belong to the given package + * and subpackages both from directories and jars present on the classpath. + */ + private def getClasses(packageName: String, + classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = { + val path = packageName.replace('.', '/') + val resources = classLoader.getResources(path) + + val jars = resources.filter(x => x.getProtocol == "jar") + .map(_.getFile.split(":")(1).split("!")(0)).toSeq + + jars.flatMap(getClassesFromJar(_, path)) + .map(_.getName) + .filterNot(shouldExclude).toSet + } + + /** + * Get all classes in a package from a jar file. + */ + private def getClassesFromJar(jarPath: String, packageName: String) = { + val jar = new JarFile(new File(jarPath)) + val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName)) + val classes = for (entry <- enums if entry.endsWith(".class")) + yield Class.forName(entry.replace('/', '.').stripSuffix(".class")) + classes + } +}