Skip to content

Commit

Permalink
SPARK-1094 Support MiMa for reporting binary compatibility accross ve…
Browse files Browse the repository at this point in the history
…rsions.

This adds some changes on top of the initial work by @ScrapCodes in amplab#20:

The goal here is to do automated checking of Spark commits to determine whether they break binary compatibility.

1. Special case for inner classes of package-private objects.
2. Made tools classes accessible when running `spark-class`.
3. Made some declared types in MLLib more general.
4. Various other improvements to exclude-generation script.
5. In-code documentation.

Author: Patrick Wendell <[email protected]>
Author: Prashant Sharma <[email protected]>
Author: Prashant Sharma <[email protected]>

Closes #207 from pwendell/mima and squashes the following commits:

22ae267 [Patrick Wendell] New binary changes after upmerge
6c2030d [Patrick Wendell] Merge remote-tracking branch 'apache/master' into mima
3666cf1 [Patrick Wendell] Minor style change
0e0f570 [Patrick Wendell] Small fix and removing directory listings
647c547 [Patrick Wendell] Reveiw feedback.
c39f3b5 [Patrick Wendell] Some enhancements to binary checking.
4c771e0 [Prashant Sharma] Added a tool to generate mima excludes and also adapted build to pick automatically.
b551519 [Prashant Sharma] adding a new exclude after rebasing with master
651844c [Prashant Sharma] Support MiMa for reporting binary compatibility accross versions.
  • Loading branch information
pwendell committed Mar 25, 2014
1 parent 8043b7b commit dc126f2
Show file tree
Hide file tree
Showing 8 changed files with 249 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
sbt/*.jar
.settings
.cache
.mima-excludes
/build/
work/
out/
Expand Down
1 change: 1 addition & 0 deletions bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
Expand Down
3 changes: 1 addition & 2 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,7 @@ fi

# Compute classpath using external script
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`

if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
fi

Expand Down
7 changes: 7 additions & 0 deletions dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then
export PYSPARK_PYTHON=/usr/local/bin/python2.7
fi
./python/run-tests

echo "========================================================================="
echo "Detecting binary incompatibilites with MiMa"
echo "========================================================================="
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
sbt/sbt mima-report-binary-issues

83 changes: 83 additions & 0 deletions project/MimaBuild.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
import sbt._

object MimaBuild {

def ignoredABIProblems(base: File) = {
import com.typesafe.tools.mima.core._
import com.typesafe.tools.mima.core.ProblemFilters._

// Excludes placed here will be used for all Spark versions
val defaultExcludes = Seq()

// Read package-private excludes from file
val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
val excludeFile = file(excludeFilePath)
val packagePrivateList: Seq[String] =
if (!excludeFile.exists()) {
Seq()
} else {
IO.read(excludeFile).split("\n")
}

def excludeClass(className: String) = {
Seq(
excludePackage(className),
ProblemFilters.exclude[MissingClassProblem](className),
ProblemFilters.exclude[MissingTypesProblem](className),
excludePackage(className + "$"),
ProblemFilters.exclude[MissingClassProblem](className + "$"),
ProblemFilters.exclude[MissingTypesProblem](className + "$")
)
}
def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className)

val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass)

/* Excludes specific to a given version of Spark. When comparing the given version against
its immediate predecessor, the excludes listed here will be applied. */
val versionExcludes =
SparkBuild.SPARK_VERSION match {
case v if v.startsWith("1.0") =>
Seq(
excludePackage("org.apache.spark.api.java"),
excludePackage("org.apache.spark.streaming.api.java"),
excludePackage("org.apache.spark.mllib")
) ++
excludeSparkClass("rdd.ClassTags") ++
excludeSparkClass("util.XORShiftRandom") ++
excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
excludeSparkClass("mllib.optimization.SquaredGradient") ++
excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
excludeSparkClass("mllib.regression.LassoWithSGD") ++
excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
case _ => Seq()
}

defaultExcludes ++ packagePrivateExcludes ++ versionExcludes
}

def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
previousArtifact := None,
binaryIssueFilters ++= ignoredABIProblems(sparkHome)
)

}
27 changes: 22 additions & 5 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ import sbtassembly.Plugin._
import AssemblyKeys._
import scala.util.Properties
import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact

import scala.collection.JavaConversions._

// For Sonatype publishing
//import com.jsuereth.pgp.sbtplugin.PgpKeys._

object SparkBuild extends Build {
val SPARK_VERSION = "1.0.0-SNAPSHOT"

// Hadoop version to build against. For example, "1.0.4" for Apache releases, or
// "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
// through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
Expand Down Expand Up @@ -146,9 +149,9 @@ object SparkBuild extends Build {
lazy val allProjects = packageProjects ++ allExternalRefs ++
Seq[ProjectReference](examples, tools, assemblyProj, hive) ++ maybeJava8Tests

def sharedSettings = Defaults.defaultSettings ++ Seq(
def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
organization := "org.apache.spark",
version := "1.0.0-SNAPSHOT",
version := SPARK_VERSION,
scalaVersion := "2.10.3",
scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
"-target:" + SCALAC_JVM_VERSION),
Expand Down Expand Up @@ -284,9 +287,14 @@ object SparkBuild extends Build {
val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")

def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
Some(organization % fullId % version) // the artifact to compare binary compatibility with
}

def coreSettings = sharedSettings ++ Seq(
name := "spark-core",

libraryDependencies ++= Seq(
"com.google.guava" % "guava" % "14.0.1",
"com.google.code.findbugs" % "jsr305" % "1.3.9",
Expand Down Expand Up @@ -325,7 +333,7 @@ object SparkBuild extends Build {
publish := {}
)

def replSettings = sharedSettings ++ Seq(
def replSettings = sharedSettings ++ Seq(
name := "spark-repl",
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v ),
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v ),
Expand Down Expand Up @@ -354,17 +362,20 @@ object SparkBuild extends Build {

def graphxSettings = sharedSettings ++ Seq(
name := "spark-graphx",
previousArtifact := sparkPreviousArtifact("spark-graphx"),
libraryDependencies ++= Seq(
"org.jblas" % "jblas" % "1.2.3"
)
)

def bagelSettings = sharedSettings ++ Seq(
name := "spark-bagel"
name := "spark-bagel",
previousArtifact := sparkPreviousArtifact("spark-bagel")
)

def mllibSettings = sharedSettings ++ Seq(
name := "spark-mllib",
previousArtifact := sparkPreviousArtifact("spark-mllib"),
libraryDependencies ++= Seq(
"org.jblas" % "jblas" % "1.2.3",
"org.scalanlp" %% "breeze" % "0.7"
Expand Down Expand Up @@ -428,6 +439,7 @@ object SparkBuild extends Build {

def streamingSettings = sharedSettings ++ Seq(
name := "spark-streaming",
previousArtifact := sparkPreviousArtifact("spark-streaming"),
libraryDependencies ++= Seq(
"commons-io" % "commons-io" % "2.4"
)
Expand Down Expand Up @@ -503,13 +515,15 @@ object SparkBuild extends Build {

def twitterSettings() = sharedSettings ++ Seq(
name := "spark-streaming-twitter",
previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
libraryDependencies ++= Seq(
"org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty)
)
)

def kafkaSettings() = sharedSettings ++ Seq(
name := "spark-streaming-kafka",
previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
libraryDependencies ++= Seq(
"com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty),
"org.apache.kafka" %% "kafka" % "0.8.0"
Expand All @@ -522,20 +536,23 @@ object SparkBuild extends Build {

def flumeSettings() = sharedSettings ++ Seq(
name := "spark-streaming-flume",
previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
libraryDependencies ++= Seq(
"org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty)
)
)

def zeromqSettings() = sharedSettings ++ Seq(
name := "spark-streaming-zeromq",
previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
libraryDependencies ++= Seq(
"org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty)
)
)

def mqttSettings() = streamingSettings ++ Seq(
name := "spark-streaming-mqtt",
previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
)
}
2 changes: 2 additions & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")

addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")

addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")

addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")
132 changes: 132 additions & 0 deletions tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.tools

import java.io.File
import java.util.jar.JarFile

import scala.collection.mutable
import scala.collection.JavaConversions._
import scala.reflect.runtime.universe.runtimeMirror

/**
* A tool for generating classes to be excluded during binary checking with MIMA. It is expected
* that this tool is run with ./spark-class.
*
* MIMA itself only supports JVM-level visibility and doesn't account for package-private classes.
* This tool looks at all currently package-private classes and generates exclusions for them. Note
* that this approach is not sound. It can lead to false positives if we move or rename a previously
* package-private class. It can lead to false negatives if someone explicitly makes a class
* package-private that wasn't before. This exists only to help catch certain classes of changes
* which might be difficult to catch during review.
*/
object GenerateMIMAIgnore {
private val classLoader = Thread.currentThread().getContextClassLoader
private val mirror = runtimeMirror(classLoader)

private def classesPrivateWithin(packageName: String): Set[String] = {

val classes = getClasses(packageName, classLoader)
val privateClasses = mutable.HashSet[String]()

def isPackagePrivate(className: String) = {
try {
/* Couldn't figure out if it's possible to determine a-priori whether a given symbol
is a module or class. */

val privateAsClass = mirror
.staticClass(className)
.privateWithin
.fullName
.startsWith(packageName)

val privateAsModule = mirror
.staticModule(className)
.privateWithin
.fullName
.startsWith(packageName)

privateAsClass || privateAsModule
} catch {
case _: Throwable => {
println("Error determining visibility: " + className)
false
}
}
}

for (className <- classes) {
val directlyPrivateSpark = isPackagePrivate(className)

/* Inner classes defined within a private[spark] class or object are effectively
invisible, so we account for them as package private. */
val indirectlyPrivateSpark = {
val maybeOuter = className.toString.takeWhile(_ != '$')
if (maybeOuter != className) {
isPackagePrivate(maybeOuter)
} else {
false
}
}
if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className
}
privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet
}

def main(args: Array[String]) {
scala.tools.nsc.io.File(".mima-excludes").
writeAll(classesPrivateWithin("org.apache.spark").mkString("\n"))
println("Created : .mima-excludes in current directory.")
}


private def shouldExclude(name: String) = {
// Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala
name.contains("anon") ||
name.endsWith("$class") ||
name.contains("$sp")
}

/**
* Scans all classes accessible from the context class loader which belong to the given package
* and subpackages both from directories and jars present on the classpath.
*/
private def getClasses(packageName: String,
classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
val path = packageName.replace('.', '/')
val resources = classLoader.getResources(path)

val jars = resources.filter(x => x.getProtocol == "jar")
.map(_.getFile.split(":")(1).split("!")(0)).toSeq

jars.flatMap(getClassesFromJar(_, path))
.map(_.getName)
.filterNot(shouldExclude).toSet
}

/**
* Get all classes in a package from a jar file.
*/
private def getClassesFromJar(jarPath: String, packageName: String) = {
val jar = new JarFile(new File(jarPath))
val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
val classes = for (entry <- enums if entry.endsWith(".class"))
yield Class.forName(entry.replace('/', '.').stripSuffix(".class"))
classes
}
}

0 comments on commit dc126f2

Please sign in to comment.