-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-15940 Added kolmogorov-Smirnov statistic method to H2OBinomialModelMetrics #16353
base: master
Are you sure you want to change the base?
Changes from all commits
c4b9bf0
b98bbfc
ba451f5
bd4dfb3
25923a5
e051e6f
969fab5
19167a5
d9f5935
714c6e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -976,3 +976,26 @@ def thresholds_and_metric_scores(self): | |||||
if 'thresholds_and_metric_scores' in self._metric_json: | ||||||
return self._metric_json['thresholds_and_metric_scores'] | ||||||
return None | ||||||
|
||||||
def kolmogorov_smirnov(self, thresholds= None): | ||||||
""" | ||||||
:param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). | ||||||
If None, then the threshold maximizing the KS statistic will be used. | ||||||
:returns: The Kolmogorov-Smirnov statistic for this set of metrics and thresholds. | ||||||
|
||||||
:examples: | ||||||
|
||||||
>>> from h2o.estimators.gbm import H2OGradientBoostingEstimator | ||||||
>>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") | ||||||
>>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() | ||||||
>>> predictors = ["displacement","power","weight","acceleration","year"] | ||||||
>>> response = "economy_20mpg" | ||||||
>>> train, valid = cars.split_frame(ratios = [.8], seed = 1234) | ||||||
>>> cars_gbm = H2OGradientBoostingEstimator(seed = 1234) | ||||||
>>> cars_gbm.train(x = predictors, | ||||||
... y = response, | ||||||
... training_frame = train, | ||||||
... validation_frame = valid) | ||||||
>>> cars_gbm.kolmogorov_smirnov() | ||||||
""" | ||||||
return self.metric("ks", thresholds=thresholds) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The goal is something like this:
Suggested change
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -10,7 +10,8 @@ def kolmogorov_smirnov(): | |||
model = H2OGradientBoostingEstimator(ntrees=1, gainslift_bins=20) | ||||
model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) | ||||
verify_ks(model, airlines) | ||||
|
||||
|
||||
# Test without Thresholds | ||||
model = H2OGradientBoostingEstimator(ntrees=1, gainslift_bins=5) | ||||
model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) | ||||
ks = model.kolmogorov_smirnov() | ||||
|
@@ -19,6 +20,21 @@ def kolmogorov_smirnov(): | |||
print(ks_verification) | ||||
assert round(ks, 5) != round(ks_verification, 5) | ||||
|
||||
# Test with specific thresholds | ||||
model = H2OGradientBoostingEstimator(ntrees=1, gainslift_bins=5) | ||||
model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) | ||||
ks = model.kolmogorov_smirnov(thresholds=[0.01, 0.5, 0.99]) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you tried to run the test? This is not working even if you add the method to binomial.py. The goal is not change and test
Suggested change
|
||||
print("KS with thresholds [0.01, 0.5, 0.99]:", ks) | ||||
ks_verification = ks_metric(model, airlines) | ||||
print("KS verification:", ks_verification) | ||||
assert round(ks, 5) != round(ks_verification, 5) | ||||
|
||||
# Test with invalid Thresholds | ||||
try: | ||||
ks= model.kolmogorov_smirnov(thresholds= "invalid") | ||||
except ValueError as e: | ||||
print("Caught excepted exception for invalid thresholds:",e) | ||||
|
||||
model = H2OXGBoostEstimator(gainslift_bins=10) | ||||
model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) | ||||
print(model.gains_lift()) | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.