Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into isLargerBetter
Browse files Browse the repository at this point in the history
  • Loading branch information
wbo4958 committed Jan 24, 2025
2 parents a948963 + 7c316f7 commit a28d7f1
Show file tree
Hide file tree
Showing 28 changed files with 666 additions and 129 deletions.
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@
],
"sqlState" : "22003"
},
"ARTIFACT_ALREADY_EXISTS" : {
"message" : [
"The artifact <normalizedRemoteRelativePath> already exists. Please choose a different name for the new artifact because it cannot be overwritten."
],
"sqlState" : "42713"
},
"ASSIGNMENT_ARITY_MISMATCH" : {
"message" : [
"The number of columns or variables assigned or aliased: <numTarget> does not match the number of source expressions: <numExpr>."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@ import com.google.protobuf.{BoolValue, BytesValue, DoubleValue, FloatValue, Int3
import com.google.protobuf.Descriptors.{Descriptor, FieldDescriptor}
import com.google.protobuf.WireFormat

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.internal.Logging
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.types._

@DeveloperApi
object SchemaConverters extends Logging {

/**
Expand All @@ -42,13 +40,13 @@ object SchemaConverters extends Logging {
*
* @since 3.4.0
*/
def toSqlType(
private[protobuf] def toSqlType(
descriptor: Descriptor,
protobufOptions: ProtobufOptions = ProtobufOptions(Map.empty)): SchemaType = {
toSqlTypeHelper(descriptor, protobufOptions)
}

def toSqlTypeHelper(
private[protobuf] def toSqlTypeHelper(
descriptor: Descriptor,
protobufOptions: ProtobufOptions): SchemaType = {
val fields = descriptor.getFields.asScala.flatMap(
Expand All @@ -65,7 +63,7 @@ object SchemaConverters extends Logging {
// exceed the maximum recursive depth specified by the recursiveFieldMaxDepth option.
// A return of None implies the field has reached the maximum allowed recursive depth and
// should be dropped.
def structFieldFor(
private def structFieldFor(
fd: FieldDescriptor,
existingRecordNames: Map[String, Int],
protobufOptions: ProtobufOptions): Option[StructField] = {
Expand Down
1 change: 1 addition & 0 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,7 @@ def __hash__(self):
"pyspark.ml.tests.connect.test_parity_clustering",
"pyspark.ml.tests.connect.test_parity_evaluation",
"pyspark.ml.tests.connect.test_parity_feature",
"pyspark.ml.tests.connect.test_parity_pipeline",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ sealed trait Vector extends Serializable {
@Since("2.0.0")
object Vectors {

private[ml] val empty: Vector = zeros(0)
private[ml] val empty: DenseVector = new DenseVector(Array.emptyDoubleArray)

/**
* Creates a dense vector from its values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# So register the supported estimator here if you're trying to add a new one.

# classification
org.apache.spark.ml.classification.LinearSVC
org.apache.spark.ml.classification.LogisticRegression
org.apache.spark.ml.classification.DecisionTreeClassifier
org.apache.spark.ml.classification.RandomForestClassifier
Expand Down Expand Up @@ -52,3 +53,4 @@ org.apache.spark.ml.feature.MinMaxScaler
org.apache.spark.ml.feature.RobustScaler
org.apache.spark.ml.feature.StringIndexer
org.apache.spark.ml.feature.PCA
org.apache.spark.ml.feature.Word2Vec
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,16 @@
# Spark Connect ML uses ServiceLoader to find out the supported Spark Ml non-model transformer.
# So register the supported transformer here if you're trying to add a new one.
########### Transformers
org.apache.spark.ml.feature.DCT
org.apache.spark.ml.feature.VectorAssembler
org.apache.spark.ml.feature.Tokenizer
org.apache.spark.ml.feature.RegexTokenizer
org.apache.spark.ml.feature.SQLTransformer
org.apache.spark.ml.feature.StopWordsRemover

########### Model for loading
# classification
org.apache.spark.ml.classification.LinearSVCModel
org.apache.spark.ml.classification.LogisticRegressionModel
org.apache.spark.ml.classification.DecisionTreeClassificationModel
org.apache.spark.ml.classification.RandomForestClassificationModel
Expand Down Expand Up @@ -50,3 +56,4 @@ org.apache.spark.ml.feature.MinMaxScalerModel
org.apache.spark.ml.feature.RobustScalerModel
org.apache.spark.ml.feature.StringIndexerModel
org.apache.spark.ml.feature.PCAModel
org.apache.spark.ml.feature.Word2VecModel
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,8 @@ class LinearSVCModel private[classification] (
extends ClassificationModel[Vector, LinearSVCModel]
with LinearSVCParams with MLWritable with HasTrainingSummary[LinearSVCTrainingSummary] {

private[ml] def this() = this(Identifiable.randomUID("linearsvc"), Vectors.empty, 0.0)

@Since("2.2.0")
override val numClasses: Int = 2

Expand Down
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class PCAModel private[ml] (
// For ml connect only
@Since("4.0.0")
private[ml] def this() = this(Identifiable.randomUID("pca"),
DenseMatrix.zeros(1, 1), Vectors.empty.asInstanceOf[DenseVector])
DenseMatrix.zeros(1, 1), Vectors.empty)

/** @group setParam */
@Since("1.5.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ class Word2VecModel private[ml] (

import Word2VecModel._

private[ml] def this() = this(Identifiable.randomUID("w2v"), null)

/**
* Returns a dataframe with two fields, "word" and "vector", with "word" being a String and
* and the vector the DenseVector that it is mapped to.
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
HasSolver,
HasParallelism,
)
from pyspark.ml.util import try_remote_attribute_relation
from pyspark.ml.tree import (
_DecisionTreeModel,
_DecisionTreeParams,
Expand All @@ -86,6 +85,7 @@
MLWriter,
MLWritable,
HasTrainingSummary,
try_remote_attribute_relation,
)
from pyspark.ml.wrapper import JavaParams, JavaPredictor, JavaPredictionModel, JavaWrapper
from pyspark.ml.common import inherit_doc
Expand Down
Loading

0 comments on commit a28d7f1

Please sign in to comment.