From 2ddaaf393463864a9e0702e53f5b30716c43db70 Mon Sep 17 00:00:00 2001 From: Alireza Zolanvari <36195808+alirezazolanvari@users.noreply.github.com> Date: Thu, 19 Sep 2024 07:57:44 +0200 Subject: [PATCH] Messages unification (#552) * update error and warning messages * Add the modification of error and warning messages * minor edits on messages * update messages * edit message parameter name * update warnign messages * modify messages * modify messages * modify changelog * modify INVALID_CLASS_NAME_ERROR message * Add quotemarks to parameters in messages * Apply changes in the messages * Apply changes in the messages * unify some of the messages * Apply the respective changes in the error messeages * minor edit * correct an error message * minor edit in error messages --------- Co-authored-by: A. Zolanvari --- CHANGELOG.md | 1 + Test/compare_test.py | 14 ++-- Test/error_test.py | 148 ++++++++++++++++++------------------- Test/plot_error_test.py | 6 +- Test/warning_test.py | 8 +- pycm/pycm_handler.py | 2 +- pycm/pycm_param.py | 157 +++++++++++++++++++--------------------- 7 files changed, 164 insertions(+), 172 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8f3dace..f10143a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - `README.md` modified - Test system modified - `Python 3.12` added to `test.yml` +- Warning and error messages updated ## [4.0] - 2023-06-07 ### Added - `pycmMultiLabelError` class diff --git a/Test/compare_test.py b/Test/compare_test.py index 2dae06cd..7902de90 100644 --- a/Test/compare_test.py +++ b/Test/compare_test.py @@ -54,7 +54,7 @@ pycm.ConfusionMatrix(classes: [0, 1, 2]) >>> cp.best_name 'model2' ->>> with warns(RuntimeWarning, match='Confusion matrices are too close'): +>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'): ... cp2 = Compare({"model1": cm_comp1, "model2": cm_comp1}) >>> cp2.scores == {'model2': {'class': 0.50278, 'overall': 0.58095}, 'model1': {'class': 0.50278, 'overall': 0.58095}} True @@ -102,7 +102,7 @@ 1 cm2 0.93333 0.9 2 cm1 0.73333 0.71667 ->>> with warns(RuntimeWarning, match='Confusion matrices are too close'): +>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'): ... cp7 = Compare({"cm1": cm1, "cm2": cm2}, class_weight={0: 200, 1: 1, 2: 1}) >>> cp7.class_weight == {0: 200, 1: 1, 2: 1} True @@ -120,16 +120,16 @@ >>> cm = ConfusionMatrix(y_actu, y_pred) >>> cm.relabel({0: "L1", 1: "L2", 2: "L3"}) >>> cm_null = ConfusionMatrix(matrix={0: {0: 0, 1: 0, 2: 0}, 1: {0: 0, 1: 0, 2: 0}, 2: {0: 0, 1: 0, 2: 0}}) ->>> with warns(RuntimeWarning, match='Confusion matrices are too close'): +>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'): ... cp8 = Compare({"cm1": cm, "cm2": cm}, class_weight={'L3': 6, 'L1': 3, 'L2': 3}) ->>> with warns(RuntimeWarning, match='The class_weight format is wrong, the result is for unweighted mode.'): +>>> with warns(RuntimeWarning, match='Invalid `class_weight` format; the result is for unweighted mode.'): ... cp9 = Compare({"cm1": cm1, "cm2": cm2}, class_weight={0: 0, 1: 0, 2: 0}) >>> class_benchmark_weight = {"PLRI": 0, "NLRI": 0, "DPI": 0, "AUCI": 0, "MCCI": 0, "QI": 0} ->>> with warns(RuntimeWarning, match='The class_benchmark_weight format is wrong, the result is for unweighted mode.'): +>>> with warns(RuntimeWarning, match='Invalid `class_benchmark_weight` format; the result is for unweighted mode.'): ... cp10 = Compare({"cm1": cm1, "cm2": cm2}, class_benchmark_weight=class_benchmark_weight) >>> overall_benchmark_weight = {"SOA1": 0, "SOA2": 0, "SOA3": 0, "SOA4": 0, "SOA5": 0, "SOA6": 0, "SOA7": 0, "SOA8": 0, "SOA9": 0, "SOA10": 0} ->>> with warns(RuntimeWarning, match='The overall_benchmark_weight format is wrong, the result is for unweighted mode.'): +>>> with warns(RuntimeWarning, match='Invalid `overall_benchmark_weight` format; the result is for unweighted mode.'): ... cp11 = Compare({"cm1": cm1, "cm2": cm2}, overall_benchmark_weight=overall_benchmark_weight) ->>> with warns(RuntimeWarning, match='Confusion matrices are too close'): +>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'): ... cp12 = Compare({"cm1": cm_null, "cm2": cm_null}) """ diff --git a/Test/error_test.py b/Test/error_test.py index eeb25f74..3f976dbe 100644 --- a/Test/error_test.py +++ b/Test/error_test.py @@ -8,121 +8,121 @@ >>> cm_2 = ConfusionMatrix(y_actu, 2) Traceback (most recent call last): ... -pycm.pycm_obj.pycmVectorError: The type of input vectors is assumed to be a list or a NumPy array +pycm.pycm_obj.pycmVectorError: Input vectors must be provided as a list or a NumPy array. >>> cm_3 = ConfusionMatrix(y_actu, [1, 2]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmVectorError: Input vectors must have same length +pycm.pycm_obj.pycmVectorError: Input vectors must have the same length. >>> cm_4 = ConfusionMatrix([], []) Traceback (most recent call last): ... -pycm.pycm_obj.pycmVectorError: Input vectors are empty +pycm.pycm_obj.pycmVectorError: Input vectors must not be empty. >>> cm_5 = ConfusionMatrix([1, 1, 1, ], [1, 1, 1, 1]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmVectorError: Input vectors must have same length +pycm.pycm_obj.pycmVectorError: Input vectors must have the same length. >>> cm_6 = ConfusionMatrix(matrix={0: {0: 2, 1: 50, 2: 6}, 1: {0: 5, 1: 50, 2: 3}, 2: {0: 1, 1: 7, 2: 50}}) >>> cm_6.position() Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Input vectors are empty +pycm.pycm_error.pycmVectorError: This option is only available in vector mode. >>> cm = ConfusionMatrix([1, 2, 3, 4], [1, 2, 3, 4], classes=[1, 2, 2, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: The classes list isn't unique. It contains duplicated labels. +pycm.pycm_error.pycmVectorError: `classes` must contain unique labels with no duplicates. >>> cm3=ConfusionMatrix(matrix={}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmMatrixError: Input confusion matrix format error +pycm.pycm_obj.pycmMatrixError: Invalid input confusion matrix format. >>> cm_4=ConfusionMatrix(matrix={1: {1: 2, "1": 2}, "1": {1: 2, "1": 3}}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmMatrixError: Type of the input matrix classes is assumed be the same +pycm.pycm_obj.pycmMatrixError: All input matrix classes must be of the same type. >>> cm_5=ConfusionMatrix(matrix={1: {1: 2}}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmMatrixError: Number of the classes is lower than 2 +pycm.pycm_obj.pycmMatrixError: The number of classes must be at least 2. >>> cm = ConfusionMatrix([1, 2, 3, 4], [1, 2, 3, 4], classes=[1]) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: Number of the classes is lower than 2 +pycm.pycm_error.pycmMatrixError: The number of classes must be at least 2. >>> cm = ConfusionMatrix([1, 1, 1, 1], [1, 2, 1, 1], classes=[]) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: Number of the classes is lower than 2 +pycm.pycm_error.pycmMatrixError: The number of classes must be at least 2. >>> y_actu = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] >>> y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] >>> cm = ConfusionMatrix(y_actu, y_pred) >>> cm.distance(metric = 2) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: The metric type must be DistanceType +pycm.pycm_error.pycmMatrixError: `metric` type must be DistanceType. >>> cm.relabel([1, 2, 3]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmMatrixError: Mapping format error +pycm.pycm_obj.pycmMatrixError: Invalid mapping format. >>> cm.relabel({1: "L1", 2: "L2"}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmMatrixError: Mapping class names error +pycm.pycm_obj.pycmMatrixError: Invalid mapping class names. >>> cm.relabel({0: "L1", 1: "L2", 2: "L2"}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmMatrixError: Mapping class names error +pycm.pycm_obj.pycmMatrixError: Invalid mapping class names. >>> cp = Compare([cm, cm]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The input type is supposed to be dictionary but it's not! +pycm.pycm_obj.pycmCompareError: Input must be provided as a dictionary. >>> cp = Compare({"cm1": cm}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: Lower than two confusion matrices is given for comparing. The minimum number of confusion matrix for comparing is 2. +pycm.pycm_obj.pycmCompareError: At least 2 confusion matrices are required for comparison. >>> cp = Compare({"cm1": cm, "cm2": []}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The input is supposed to consist of pycm.ConfusionMatrix object but it's not! +pycm.pycm_obj.pycmCompareError: Input must be a dictionary containing pycm.ConfusionMatrix objects. >>> cm2 = ConfusionMatrix(matrix={"Class1": {"Class1": 9, "Class2": 3, "Class3": 0}, "Class2": {"Class1": 3, "Class2": 5, "Class3": 1}, "Class3": {"Class1": 1, "Class2": 1, "Class3": 4}}) >>> cp = Compare({"cm1": cm, "cm2": cm2}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The domain of all ConfusionMatrix objects must be same! The sample size or the number of classes are different. +pycm.pycm_obj.pycmCompareError: All ConfusionMatrix objects must have the same domain (same sample size and number of classes). >>> cm = ConfusionMatrix(matrix={1: {1: 9, 2: 3}, 2: {1: 3, 2: 5}}, classes=[1, 2, 3]) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: Used classes is not a subset of matrix's classes. +pycm.pycm_error.pycmMatrixError: The specified classes are not a subset of the matrix's classes. >>> y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 1] >>> cm3 = ConfusionMatrix(y_actu, y_pred) >>> cp = Compare({"cm1": cm, "cm2": cm3}, by_class=True, class_weight={1: 1, 2: 1}) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The class_weight type must be dictionary and also must be specified for all of the classes. +pycm.pycm_obj.pycmCompareError: `class_weight` must be a dictionary and specified for all classes. >>> cp = Compare({"cm1": cm, "cm2": cm3}, by_class=True, class_weight=[]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The class_weight type must be dictionary and also must be specified for all of the classes. +pycm.pycm_obj.pycmCompareError: `class_weight` must be a dictionary and specified for all classes. >>> cp = Compare({"cm1": cm, "cm2": cm3}, by_class=True, class_benchmark_weight=[]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The class_benchmark_weight type must be dictionary and also must be specified for all of the class benchmarks. +pycm.pycm_obj.pycmCompareError: `class_benchmark_weight` must be a dictionary and specified for all class benchmarks. >>> cp = Compare({"cm1": cm, "cm2": cm3}, by_class=True, overall_benchmark_weight=[]) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCompareError: The overall_benchmark_weight type must be dictionary and also must be specified for all of the overall benchmarks. +pycm.pycm_obj.pycmCompareError: `overall_benchmark_weight` must be a dictionary and specified for all overall benchmarks. >>> cm1 = ConfusionMatrix([1, 1, 1, 0], [1, 0, 1, 1], metrics_off=True) >>> cm2 = ConfusionMatrix([1, 1, 1, 0], [1, 0, 1, 1], metrics_off=False) >>> cp = Compare({"cm1":cm1, "cm2":cm2}) Traceback (most recent call last): ... -pycm.pycm_error.pycmCompareError: Compare cannot be executed while in either of matrices 'metrics_off=True'. +pycm.pycm_error.pycmCompareError: Comparison cannot be performed when `metrics_off=True` in any matrix. >>> cm.CI("MCC") Traceback (most recent call last): ... -pycm.pycm_obj.pycmCIError: CI calculation for this parameter is no supported on this version of pycm. -Supported parameters : TPR, TNR, PPV, NPV, ACC, PLR, NLR, FPR, FNR, AUC, PRE, Kappa, Overall ACC +pycm.pycm_obj.pycmCIError: Confidence interval calculation for this parameter is not supported in this version of pycm. + Supported parameters are: TPR, TNR, PPV, NPV, ACC, PLR, NLR, FPR, FNR, AUC, PRE, Kappa, Overall ACC >>> cm.CI(2) Traceback (most recent call last): ... -pycm.pycm_obj.pycmCIError: The input type is supposed to be string but it's not! +pycm.pycm_obj.pycmCIError: Input must be provided as a string. >>> cm.average("AUCC") Traceback (most recent call last): ... @@ -134,192 +134,192 @@ >>> cm.weighted_average("AUC", weight=2) Traceback (most recent call last): ... -pycm.pycm_error.pycmAverageError: The weight type must be dictionary and also must be specified for all of the classes. +pycm.pycm_error.pycmAverageError: `weight` must be a dictionary and specified for all classes. >>> cm.weighted_average("AUC", weight={1: 23}) Traceback (most recent call last): ... -pycm.pycm_error.pycmAverageError: The weight type must be dictionary and also must be specified for all of the classes. +pycm.pycm_error.pycmAverageError: `weight` must be a dictionary and specified for all classes. >>> cm.combine(1) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: The input type is supposed to be pycm.ConfusionMatrix object but it's not! +pycm.pycm_error.pycmMatrixError: Input must be an instance of pycm.ConfusionMatrix. >>> cm = ConfusionMatrix([1, 0, 2, 0], [1, 1, 2, 1]) >>> cm.brier_score() Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: This option only works in binary probability mode +pycm.pycm_error.pycmVectorError: This option is only available in binary probability mode. >>> cm.log_loss() Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: This option only works in binary probability mode +pycm.pycm_error.pycmVectorError: This option is only available in binary probability mode. >>> cm = ConfusionMatrix(["ham", "spam", "ham", "ham"], [0.1, 0.4, 0.25, 1], threshold=lambda x : "ham") >>> cm.brier_score() Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Actual vector contains string so pos_class should be explicitly specified +pycm.pycm_error.pycmVectorError: Actual vector contains strings; `pos_class` must be explicitly specified. >>> cm.log_loss() Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Actual vector contains string so pos_class should be explicitly specified +pycm.pycm_error.pycmVectorError: Actual vector contains strings; `pos_class` must be explicitly specified. >>> matrix = [[1, 2, 3], [4, 6, 1], [1, 2, 3]] >>> cm = ConfusionMatrix(matrix=matrix, classes=["L1", "L1", "L3", "L2"]) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: The classes list isn't unique. It contains duplicated labels. +pycm.pycm_error.pycmMatrixError: `classes` must contain unique labels with no duplicates. >>> cm = ConfusionMatrix(matrix=matrix, classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: Classes length is not equal to the array length. +pycm.pycm_error.pycmMatrixError: The length of the classes does not match the length of the array. >>> crv = Curve([1, 2, 2, 1], {1, 2, 2, 1}, classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The type of input vectors is assumed to be a list or a NumPy array +pycm.pycm_error.pycmCurveError: Input vectors must be provided as a list or a NumPy array. >>> crv = Curve({1, 2, 2, 1}, [1, 2, 2, 1], classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The type of input vectors is assumed to be a list or a NumPy array +pycm.pycm_error.pycmCurveError: Input vectors must be provided as a list or a NumPy array. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9]], classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: Input vectors must have same length +pycm.pycm_error.pycmCurveError: Input vectors must have the same length. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.2, 0.9]], classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The sum of probability values must be one +pycm.pycm_error.pycmCurveError: The sum of the probability values must equal 1. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9]], classes={1, 2}) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The type of classes is assumed to be list +pycm.pycm_error.pycmCurveError: `classes` must be provided as a list. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9]], classes=[1, 2, 3]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The classes don't match to actual_vector +pycm.pycm_error.pycmCurveError: `classes` does not match the actual vector. >>> crv = Curve([1, 1, 1, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9]], classes=[1]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: Number of the classes is lower than 2 +pycm.pycm_error.pycmCurveError: The number of classes must be at least 2. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.2, "salam"]], classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The elements of the probability vector can only contain numeric values +pycm.pycm_error.pycmCurveError: Probability vector elements must be numeric. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.2, 0.8]], classes=[1, 2], thresholds={1, 2}) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The type of thresholds is assumed to be list or NumPy array +pycm.pycm_error.pycmCurveError: `thresholds` must be provided as a list or a NumPy array. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.2, 0.8]], classes=[1, 2], thresholds=[0.1]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: Number of the thresholds is lower than 2 +pycm.pycm_error.pycmCurveError: The number of thresholds must be at least 2. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.2, 0.8]], classes=[1, 2], thresholds=[0.1, "q"]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The thresholds can only contain numeric values +pycm.pycm_error.pycmCurveError: `thresholds` must contain only numeric values. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.2, 0.8]], classes=[1, 1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The classes list isn't unique. It contains duplicated labels. +pycm.pycm_error.pycmCurveError: `classes` must contain unique labels with no duplicates. >>> crv = Curve([1, 2, 2, 1], [[0.1, 0.9], [0.1, 0.9], [0.1, 0.8, 0.1], [0.2, 0.8]], classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: Probability vector elements must have same length and equal to classes +pycm.pycm_error.pycmCurveError: All elements of the probability vector must have the same length and match the number of classes. >>> crv = Curve([1, 2, 2, 1], [[1], [1], [1], [1]], classes=[1, 2]) Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: Probability vector elements must have same length and equal to classes +pycm.pycm_error.pycmCurveError: All elements of the probability vector must have the same length and match the number of classes. >>> crv = Curve(actual_vector = np.array([1, 1, 2, 2]), probs = np.array([[0.1, 0.9], [0.4, 0.6], [0.35, 0.65], [0.8, 0.2]]), classes=[2, 1]) >>> crv.area(method="trpz") Traceback (most recent call last): ... -pycm.pycm_error.pycmCurveError: The numeric integral method can only be selected between 'trapezoidal' and 'midpoint'! +pycm.pycm_error.pycmCurveError: The integral method must be either 'trapezoidal' or 'midpoint'. >>> crv.plot(colors=['c']) Traceback (most recent call last): ... -pycm.pycm_error.pycmPlotError: Given colors and classes have not the same length. +pycm.pycm_error.pycmPlotError: The number of colors does not match the number of classes. >>> crv.plot(markers=['*']) Traceback (most recent call last): ... -pycm.pycm_error.pycmPlotError: Given markers and classes have not the same length. +pycm.pycm_error.pycmPlotError: The number of markers does not match the number of classes. >>> cm = ConfusionMatrix(y_actu, y_pred, metrics_off=True) >>> cm.stat() Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.sensitivity_index() Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.IBA_alpha(0.2) Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.NB() Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.CI("Kappa") Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.average("ACC") Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.weighted_average("ACC") Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.weighted_kappa() Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.weighted_alpha() Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> cm.aickin_alpha() Traceback (most recent call last): ... -pycm.pycm_error.pycmMatrixError: This method cannot be executed while 'metrics_off=True'. +pycm.pycm_error.pycmMatrixError: This method cannot be executed when `metrics_off=True`. >>> mlcm = MultiLabelCM([[0, 1], [1, 1]], [[1, 0], [1, 0]]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Class extraction from input failed. Input vectors should be a list of sets with unified types. +pycm.pycm_error.pycmVectorError: Failed to extract classes from input. Input vectors should be a list of sets with unified types. >>> mlcm = MultiLabelCM([{'dog'}, {'cat', 'dog'}], ['cat', {'cat'}]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Class extraction from input failed. Input vectors should be a list of sets with unified types. +pycm.pycm_error.pycmVectorError: Failed to extract classes from input. Input vectors should be a list of sets with unified types. >>> mlcm = MultiLabelCM(['dog', {'cat', 'dog'}], [{'cat'}, {'cat'}]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Class extraction from input failed. Input vectors should be a list of sets with unified types. +pycm.pycm_error.pycmVectorError: Failed to extract classes from input. Input vectors should be a list of sets with unified types. >>> mlcm = MultiLabelCM([{'dog'}, {'cat', 'dog'}], [{'cat'}, {'cat'}]) >>> mlcm.get_cm_by_class(1) Traceback (most recent call last): ... -pycm.pycm_error.pycmMultiLabelError: Given class name is not among problem's classes. +pycm.pycm_error.pycmMultiLabelError: The specified class name is not among the problem's classes. >>> mlcm.get_cm_by_sample(2) Traceback (most recent call last): ... -pycm.pycm_error.pycmMultiLabelError: Given index is out of vector's range. +pycm.pycm_error.pycmMultiLabelError: Index is out of range for the given vector. >>> mlcm = MultiLabelCM([{'dog'}, {'cat', 'dog'}], [{'cat'}, {'cat', 'bird'}], classes=['dog', 'cat']) >>> mlcm.get_cm_by_class('bird') Traceback (most recent call last): ... -pycm.pycm_error.pycmMultiLabelError: Given class name is not among problem's classes. +pycm.pycm_error.pycmMultiLabelError: The specified class name is not among the problem's classes. >>> mlcm = MultiLabelCM(2, [[1, 0], [1, 0]]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: The type of input vectors is assumed to be a list or a NumPy array +pycm.pycm_error.pycmVectorError: Input vectors must be provided as a list or a NumPy array. >>> mlcm = MultiLabelCM([{1, 0}, {1, 0}, {1,1}], [{1, 0}, {1, 0}]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Input vectors must have same length +pycm.pycm_error.pycmVectorError: Input vectors must have the same length. >>> mlcm = MultiLabelCM([], []) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: Input vectors are empty +pycm.pycm_error.pycmVectorError: Input vectors must not be empty. >>> mlcm = MultiLabelCM([{1, 0}, {1, 0}], [{1, 0}, {1, 0}], classes=[1,0,1]) Traceback (most recent call last): ... -pycm.pycm_error.pycmVectorError: The classes list isn't unique. It contains duplicated labels. +pycm.pycm_error.pycmVectorError: `classes` must contain unique labels with no duplicates. """ diff --git a/Test/plot_error_test.py b/Test/plot_error_test.py index 590d7d94..ff536354 100644 --- a/Test/plot_error_test.py +++ b/Test/plot_error_test.py @@ -18,14 +18,14 @@ >>> ax = cm.plot() Traceback (most recent call last): ... -pycm.pycm_error.pycmPlotError: Error in importing matplotlib module. Please install it using this command: pip install matplotlib +pycm.pycm_error.pycmPlotError: Failed to import matplotlib module. Please install it using: `pip install matplotlib`. >>> ax = cm.plot(plot_lib='seaborn') Traceback (most recent call last): ... -pycm.pycm_error.pycmPlotError: Error in importing seaborn module. Please install it using this command: pip install seaborn +pycm.pycm_error.pycmPlotError: Failed to import seaborn module. Please install it using: `pip install seaborn`. >>> crv = Curve(actual_vector = np.array([1, 1, 2, 2]), probs = np.array([[0.1, 0.9], [0.4, 0.6], [0.35, 0.65], [0.8, 0.2]]), classes=[2, 1]) >>> ax = crv.plot(classes=[1]) Traceback (most recent call last): ... -pycm.pycm_error.pycmPlotError: Error in importing matplotlib module. Please install it using this command: pip install matplotlib +pycm.pycm_error.pycmPlotError: Failed to import matplotlib module. Please install it using: `pip install matplotlib`. """ diff --git a/Test/warning_test.py b/Test/warning_test.py index b34c2d91..619bd164 100644 --- a/Test/warning_test.py +++ b/Test/warning_test.py @@ -4,7 +4,7 @@ >>> from pycm.pycm_util import deprecated >>> from pytest import warns >>> large_cm = ConfusionMatrix(list(range(10)) + [2, 3, 5], list(range(10)) + [1, 7, 2]) ->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'): +>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'): ... large_cm.print_matrix() Predict 0 1 2 3 4 5 6 7 8 9 Actual @@ -28,7 +28,7 @@ 9 0 0 0 0 0 0 0 0 0 1 ->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'): +>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'): ... large_cm.print_normalized_matrix() Predict 0 1 2 3 4 5 6 7 8 9 Actual @@ -52,7 +52,7 @@ 9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'): +>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'): ... large_cm.stat() Overall Statistics : @@ -193,7 +193,7 @@ dInd(Distance index) 0.0 0.08333 0.5082 0.5 0.0 0.5 0.0 0.08333 0.0 0.0 sInd(Similarity index) 1.0 0.94107 0.64065 0.64645 1.0 0.64645 1.0 0.94107 1.0 1.0 ->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'): +>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'): ... print(large_cm) Predict 0 1 2 3 4 5 6 7 8 9 Actual diff --git a/pycm/pycm_handler.py b/pycm/pycm_handler.py index 5cd7dfe1..ffbcd167 100644 --- a/pycm/pycm_handler.py +++ b/pycm/pycm_handler.py @@ -186,7 +186,7 @@ def __obj_array_handler__(array, classes, transpose): if classes is None: classes = list(range(len(array))) if len(classes) != len(array): - raise pycmMatrixError(CLASSES_LENGHT_ERROR) + raise pycmMatrixError(CLASSES_LENGTH_ERROR) matrix = {} for i in range(len(array)): matrix[classes[i]] = {classes[j]: x for j, x in enumerate(array[i])} diff --git a/pycm/pycm_param.py b/pycm/pycm_param.py index 87be837e..5f50d763 100644 --- a/pycm/pycm_param.py +++ b/pycm/pycm_param.py @@ -46,87 +46,76 @@ ''' -PROBABILITY_SIZE_ERROR = "Probability vector elements must have same length and equal to classes" -PROBABILITY_TYPE_ERROR = "The elements of the probability vector can only contain numeric values" -PROBABILITY_SUM_ERROR = "The sum of probability values must be one" -THRESHOLDS_NUMBER_ERROR = "Number of the thresholds is lower than 2" -THRESHOLDS_TYPE_ERROR = "The type of thresholds is assumed to be list or NumPy array" -THRESHOLDS_NUMERIC_ERROR = "The thresholds can only contain numeric values" -CLASSES_TYPE_ERROR = "The type of classes is assumed to be list" -CLASSES_MATCH_ERROR = "The classes don't match to actual_vector" -MATRIX_CLASS_TYPE_ERROR = "Type of the input matrix classes is assumed be the same" -MATRIX_FORMAT_ERROR = "Input confusion matrix format error" -MAPPING_FORMAT_ERROR = "Mapping format error" -MAPPING_CLASS_NAME_ERROR = "Mapping class names error" -SEABORN_PLOT_LIBRARY_ERROR = "Error in importing seaborn module. Please install it using this command: pip install seaborn" -MATPLOTLIB_PLOT_LIBRARY_ERROR = "Error in importing matplotlib module. Please install it using this command: pip install matplotlib" -PLOT_COLORS_CLASS_MISMATCH_ERROR = "Given colors and classes have not the same length." -PLOT_MARKERS_CLASS_MISMATCH_ERROR = "Given markers and classes have not the same length." -VECTOR_TYPE_ERROR = "The type of input vectors is assumed to be a list or a NumPy array" -VECTOR_SIZE_ERROR = "Input vectors must have same length" -VECTOR_EMPTY_ERROR = "Input vectors are empty" -VECTOR_ONLY_ERROR = "This option only works in vector mode" -VECTOR_UNIQUE_CLASS_ERROR = "The classes list isn't unique. It contains duplicated labels." -NOT_ALL_SET_VECTOR_ERROR = "Class extraction from input failed. Input vectors should be a list of sets with unified types." -CLASS_NUMBER_ERROR = "Number of the classes is lower than 2" -METRICS_OFF_ERROR = "This method cannot be executed while 'metrics_off=True'." -CLASSES_ERROR = "Used classes is not a subset of matrix's classes." -COMPARE_FORMAT_ERROR = "The input type is supposed to be dictionary but it's not!" -CLASSES_LENGHT_ERROR = "Classes length is not equal to the array length." -AREA_METHOD_ERROR = "The numeric integral method can only be selected between 'trapezoidal' and 'midpoint'!" - -VECTOR_INDEX_ERROR = "Given index is out of vector's range." -INVALID_CLASS_NAME_ERROR = "Given class name is not among problem's classes." - -COMPARE_TYPE_ERROR = "The input is supposed to consist of pycm.ConfusionMatrix object but it's not!" -COMPARE_DOMAIN_ERROR = "The domain of all ConfusionMatrix objects must be same! The sample size or the number " \ - "of classes are different." -COMPARE_NUMBER_ERROR = "Lower than two confusion matrices is given for comparing. The minimum number of " \ - "confusion matrix for comparing is 2." - -COMPARE_CLASS_WEIGHT_ERROR = "The class_weight type must be dictionary and also must be specified for all of the classes." - -COMPARE_CLASS_BENCHMARK_WEIGHT_ERROR = "The class_benchmark_weight type must be dictionary and also must be specified for all of the class benchmarks." - -COMPARE_OVERALL_BENCHMARK_WEIGHT_ERROR = "The overall_benchmark_weight type must be dictionary and also must be specified for all of the overall benchmarks." - -COMPARE_CLASS_WEIGHT_WARNING = "The class_weight format is wrong, the result is for unweighted mode." - -COMPARE_CLASS_BENCHMARK_WEIGHT_WARNING = "The class_benchmark_weight format is wrong, the result is for unweighted mode." - -COMPARE_OVERALL_BENCHMARK_WEIGHT_WARNING = "The overall_benchmark_weight format is wrong, the result is for unweighted mode." - -COMPARE_METRICS_OFF_ERROR = "Compare cannot be executed while in either of matrices 'metrics_off=True'." - -COMBINE_TYPE_ERROR = "The input type is supposed to be pycm.ConfusionMatrix object but it's not!" - -COMPARE_RESULT_WARNING = "Confusion matrices are too close and the best one can not be recognized." - -WEIGHTED_KAPPA_WARNING = "The weight format is wrong, the result is for unweighted kappa." - -WEIGHTED_ALPHA_WARNING = "The weight format is wrong, the result is for unweighted alpha." - -AVERAGE_WEIGHT_ERROR = "The weight type must be dictionary and also must be specified for all of the classes." - +PROBABILITY_SIZE_ERROR = "All elements of the probability vector must have the same length and match the number of classes." +PROBABILITY_TYPE_ERROR = "Probability vector elements must be numeric." +PROBABILITY_SUM_ERROR = "The sum of the probability values must equal 1." +THRESHOLDS_NUMBER_ERROR = "The number of thresholds must be at least 2." +THRESHOLDS_TYPE_ERROR = "`thresholds` must be provided as a list or a NumPy array." +THRESHOLDS_NUMERIC_ERROR = "`thresholds` must contain only numeric values." +CLASSES_TYPE_ERROR = "`classes` must be provided as a list." +CLASSES_MATCH_ERROR = "`classes` does not match the actual vector." +MATRIX_CLASS_TYPE_ERROR = "All input matrix classes must be of the same type." +MATRIX_FORMAT_ERROR = "Invalid input confusion matrix format." +MAPPING_FORMAT_ERROR = "Invalid mapping format." +MAPPING_CLASS_NAME_ERROR = "Invalid mapping class names." +SEABORN_PLOT_LIBRARY_ERROR = "Failed to import seaborn module. Please install it using: `pip install seaborn`." +MATPLOTLIB_PLOT_LIBRARY_ERROR = "Failed to import matplotlib module. Please install it using: `pip install matplotlib`." +PLOT_COLORS_CLASS_MISMATCH_ERROR = "The number of colors does not match the number of classes." +PLOT_MARKERS_CLASS_MISMATCH_ERROR = "The number of markers does not match the number of classes." +VECTOR_TYPE_ERROR = "Input vectors must be provided as a list or a NumPy array." +VECTOR_SIZE_ERROR = "Input vectors must have the same length." +VECTOR_EMPTY_ERROR = "Input vectors must not be empty." +VECTOR_ONLY_ERROR = "This option is only available in vector mode." +VECTOR_UNIQUE_CLASS_ERROR = "`classes` must contain unique labels with no duplicates." +NOT_ALL_SET_VECTOR_ERROR = "Failed to extract classes from input. Input vectors should be a list of sets with unified types." +CLASS_NUMBER_ERROR = "The number of classes must be at least 2." +METRICS_OFF_ERROR = "This method cannot be executed when `metrics_off=True`." +CLASSES_ERROR = "The specified classes are not a subset of the matrix's classes." +COMPARE_FORMAT_ERROR = "Input must be provided as a dictionary." +CLASSES_LENGTH_ERROR = "The length of the classes does not match the length of the array." +AREA_METHOD_ERROR = "The integral method must be either 'trapezoidal' or 'midpoint'." + +VECTOR_INDEX_ERROR = "Index is out of range for the given vector." +INVALID_CLASS_NAME_ERROR = "The specified class name is not among the confusion matrix's classes." + +COMPARE_TYPE_ERROR = "Input must be a dictionary containing pycm.ConfusionMatrix objects." +COMPARE_DOMAIN_ERROR = "All ConfusionMatrix objects must have the same domain (same sample size and number of classes)." +COMPARE_NUMBER_ERROR = "At least 2 confusion matrices are required for comparison." + +COMPARE_CLASS_WEIGHT_ERROR = "`class_weight` must be a dictionary and specified for all classes." +COMPARE_CLASS_BENCHMARK_WEIGHT_ERROR = "`class_benchmark_weight` must be a dictionary and specified for all class benchmarks." +COMPARE_OVERALL_BENCHMARK_WEIGHT_ERROR = "`overall_benchmark_weight` must be a dictionary and specified for all overall benchmarks." + +COMPARE_CLASS_WEIGHT_WARNING = "Invalid `class_weight` format; the result is for unweighted mode." +COMPARE_CLASS_BENCHMARK_WEIGHT_WARNING = "Invalid `class_benchmark_weight` format; the result is for unweighted mode." +COMPARE_OVERALL_BENCHMARK_WEIGHT_WARNING = "Invalid `overall_benchmark_weight` format; the result is for unweighted mode." + +COMPARE_METRICS_OFF_ERROR = "Comparison cannot be performed when `metrics_off=True` in any matrix." + +COMBINE_TYPE_ERROR = "Input must be an instance of pycm.ConfusionMatrix." + +COMPARE_RESULT_WARNING = "Confusion matrices are too similar to identify a clear best option." + +WEIGHTED_KAPPA_WARNING = "Invalid weight format; the result is for unweighted kappa." +WEIGHTED_ALPHA_WARNING = "Invalid weight format; the result is for unweighted alpha." + +AVERAGE_WEIGHT_ERROR = "`weight` must be a dictionary and specified for all classes." AVERAGE_INVALID_ERROR = "Invalid parameter!" -BRIER_LOG_LOSS_CLASS_ERROR = "Actual vector contains string so pos_class should be explicitly specified" +BRIER_LOG_LOSS_CLASS_ERROR = "Actual vector contains strings; `pos_class` must be explicitly specified." +BRIER_LOG_LOSS_PROB_ERROR = "This option is only available in binary probability mode." -BRIER_LOG_LOSS_PROB_ERROR = "This option only works in binary probability mode" +CLASS_NUMBER_WARNING = "Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization." -CLASS_NUMBER_WARNING = "The confusion matrix is a high dimension matrix and won't be demonstrated properly.\n" \ - "If confusion matrix has too many zeros (sparse matrix) you can set `sparse` flag to True in printing functions "\ - "otherwise by using save_csv method to save the confusion matrix in csv format you'll have better demonstration." +CLASSES_WARNING = "Specified classes are not a subset of the classes in the actual and predicted vectors." +CLASSES_TYPE_WARNING = "Classes is neither a list nor None, so it will be ignored." -CLASSES_WARNING = "Used classes is not a subset of classes in actual and predict vectors." - -CLASSES_TYPE_WARNING = "The classes is neither a list nor None so it'll be ignored." - -CURVE_NONE_WARNING = "The curve axes contain non-numerical value(s)." +CURVE_NONE_WARNING = "The curve contains non-numerical value(s)." DEPRECATION_WARNING = "`{}` is deprecated and may be removed in future releases." -DISTANCE_METRIC_TYPE_ERROR = "The metric type must be DistanceType" +DISTANCE_METRIC_TYPE_ERROR = "`metric` type must be DistanceType." + CLASS_NUMBER_THRESHOLD = 10 @@ -336,16 +325,17 @@ 0.001: 3.09, 0.0005: 3.29} -CI_ALPHA_TWO_SIDE_WARNING = "The alpha value is invalid, automatically set to 0.05.\nSupported values (two-sided) : " + ",".join( - map(str, sorted(ALPHA_TWO_SIDE_TABLE))) +CI_ALPHA_TWO_SIDE_WARNING = ("Invalid alpha value; automatically set to 0.05. Supported two-sided values are: " + + ", ".join(map(str, sorted(ALPHA_TWO_SIDE_TABLE)))) -CI_ALPHA_ONE_SIDE_WARNING = "The alpha value is invalid, automatically set to 0.05.\nSupported values (one-sided) : " + ",".join( - map(str, sorted(ALPHA_ONE_SIDE_TABLE))) +CI_ALPHA_ONE_SIDE_WARNING = ("Invalid alpha value; automatically set to 0.05. Supported one-sided values are: " + + ", ".join(map(str, sorted(ALPHA_ONE_SIDE_TABLE)))) -CI_FORMAT_ERROR = "The input type is supposed to be string but it's not!" +CI_FORMAT_ERROR = "Input must be provided as a string." + +CI_SUPPORT_ERROR = ("Confidence interval calculation for this parameter is not supported in this version of pycm.\n" + " Supported parameters are: ") + ", ".join(CI_CLASS_LIST) + ", " + ", ".join(CI_OVERALL_LIST) -CI_SUPPORT_ERROR = "CI calculation for this parameter is not supported on this version of pycm.\nSupported parameters : " + \ - ",".join(CI_CLASS_LIST) + "," + ",".join(CI_OVERALL_LIST) MULTICLASS_RECOMMEND = [ @@ -530,9 +520,10 @@ DEFAULT_BACKGROUND_COLOR = "transparent" RECOMMEND_HTML_MESSAGE = 'Note 1 : Recommended statistics for this type of classification highlighted in {0}'.format( RECOMMEND_BACKGROUND_COLOR) -RECOMMEND_WARNING = "The recommender system assumes that the input is the result of classification over the whole data" \ - " rather than just a part of it.\nIf the confusion matrix is the result of test data classification" \ - ", the recommendation is not valid." +RECOMMEND_WARNING = ("The recommendation system assumes the input is the result of classification over the entire" + " dataset, not just a subset. If the confusion matrix is based on test data classification," + " the recommendation may not be valid.") + RECOMMEND_HTML_MESSAGE2 = 'Note 2 : {0}'.format( RECOMMEND_WARNING)