Skip to content

Commit

Permalink
improve docstr of preprocessors (#1227)
Browse files Browse the repository at this point in the history
* improve docstr of preprocessors

* Update SynapseML version

* RFix test

---------

Co-authored-by: Li Jiang <[email protected]>
  • Loading branch information
sonichi and thinkall authored Sep 29, 2023
1 parent 830ec45 commit fda9fa0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
4 changes: 2 additions & 2 deletions flaml/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,12 +476,12 @@ def save_best_config(self, filename):

@property
def feature_transformer(self):
"""Returns AutoML Transformer"""
"""Returns feature transformer which is used to preprocess data before applying training or inference."""
return getattr(self, "_transformer", None)

@property
def label_transformer(self):
"""Returns AutoML label transformer"""
"""Returns label transformer which is used to preprocess labels before scoring, and inverse transform labels after inference."""
return getattr(self, "_label_transformer", None)

@property
Expand Down
18 changes: 10 additions & 8 deletions test/spark/test_0sparkml.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
from pyspark.ml.feature import VectorAssembler
from flaml.automl.spark.utils import to_pandas_on_spark

postfix_version = "-spark3.3," if pyspark.__version__ > "3.2" else ","
spark = (
pyspark.sql.SparkSession.builder.appName("MyApp")
.master("local[2]")
.config(
"spark.jars.packages",
(
"com.microsoft.azure:synapseml_2.12:0.10.2,"
f"com.microsoft.azure:synapseml_2.12:0.11.3{postfix_version}"
"org.apache.hadoop:hadoop-azure:3.3.5,"
"com.microsoft.azure:azure-storage:8.6.6,"
f"org.mlflow:mlflow-spark:{mlflow.__version__}"
Expand Down Expand Up @@ -172,15 +173,16 @@ def test_spark_input_df():
try:
model = automl.model.estimator
predictions = model.transform(test_data)
predictions.show()

from synapse.ml.train import ComputeModelStatistics
# from synapse.ml.train import ComputeModelStatistics

metrics = ComputeModelStatistics(
evaluationMetric="classification",
labelCol="Bankrupt?",
scoredLabelsCol="prediction",
).transform(predictions)
metrics.show()
# metrics = ComputeModelStatistics(
# evaluationMetric="classification",
# labelCol="Bankrupt?",
# scoredLabelsCol="prediction",
# ).transform(predictions)
# metrics.show()
except AttributeError:
print("No fitted model because of too short training time.")

Expand Down

0 comments on commit fda9fa0

Please sign in to comment.