Messages unification (#552)

* update error and warning messages * Add the modification of error and warning messages * minor edits on messages * update messages * edit message parameter name * update warnign messages * modify messages * modify messages * modify changelog * modify INVALID_CLASS_NAME_ERROR message * Add quotemarks to parameters in messages * Apply changes in the messages * Apply changes in the messages * unify some of the messages * Apply the respective changes in the error messeages * minor edit * correct an error message * minor edit in error messages --------- Co-authored-by: A. Zolanvari <[email protected]>
sepandhaghighi · Sep 19, 2024 · 2ddaaf3 · 2ddaaf3
1 parent fe59e9c
commit 2ddaaf3
Show file tree

Hide file tree

Showing 7 changed files with 164 additions and 172 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - `README.md` modified
 - Test system modified
 - `Python 3.12` added to `test.yml`
+- Warning and error messages updated
 ## [4.0] - 2023-06-07
 ### Added
 - `pycmMultiLabelError` class

diff --git a/Test/compare_test.py b/Test/compare_test.py
@@ -54,7 +54,7 @@
 pycm.ConfusionMatrix(classes: [0, 1, 2])
 >>> cp.best_name
 'model2'
->>> with warns(RuntimeWarning, match='Confusion matrices are too close'):
+>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'):
 ...     cp2 = Compare({"model1": cm_comp1, "model2": cm_comp1})
 >>> cp2.scores == {'model2': {'class': 0.50278, 'overall': 0.58095}, 'model1': {'class': 0.50278, 'overall': 0.58095}}
 True
@@ -102,7 +102,7 @@
 1     cm2    0.93333           0.9
 2     cm1    0.73333           0.71667
 <BLANKLINE>
->>> with warns(RuntimeWarning, match='Confusion matrices are too close'):
+>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'):
 ...     cp7 = Compare({"cm1": cm1, "cm2": cm2}, class_weight={0: 200, 1: 1, 2: 1})
 >>> cp7.class_weight == {0: 200, 1: 1, 2: 1}
 True
@@ -120,16 +120,16 @@
 >>> cm = ConfusionMatrix(y_actu, y_pred)
 >>> cm.relabel({0: "L1", 1: "L2", 2: "L3"})
 >>> cm_null = ConfusionMatrix(matrix={0: {0: 0, 1: 0, 2: 0}, 1: {0: 0, 1: 0, 2: 0}, 2: {0: 0, 1: 0, 2: 0}})
->>> with warns(RuntimeWarning, match='Confusion matrices are too close'):
+>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'):
 ...     cp8 = Compare({"cm1": cm, "cm2": cm}, class_weight={'L3': 6, 'L1': 3, 'L2': 3})
->>> with warns(RuntimeWarning, match='The class_weight format is wrong, the result is for unweighted mode.'):
+>>> with warns(RuntimeWarning, match='Invalid `class_weight` format; the result is for unweighted mode.'):
 ...     cp9 = Compare({"cm1": cm1, "cm2": cm2}, class_weight={0: 0, 1: 0, 2: 0})
 >>> class_benchmark_weight = {"PLRI": 0, "NLRI": 0, "DPI": 0, "AUCI": 0, "MCCI": 0, "QI": 0}
->>> with warns(RuntimeWarning, match='The class_benchmark_weight format is wrong, the result is for unweighted mode.'):
+>>> with warns(RuntimeWarning, match='Invalid `class_benchmark_weight` format; the result is for unweighted mode.'):
 ...     cp10 = Compare({"cm1": cm1, "cm2": cm2}, class_benchmark_weight=class_benchmark_weight)
 >>> overall_benchmark_weight = {"SOA1": 0, "SOA2": 0, "SOA3": 0, "SOA4": 0, "SOA5": 0, "SOA6": 0, "SOA7": 0, "SOA8": 0, "SOA9": 0, "SOA10": 0}
->>> with warns(RuntimeWarning, match='The overall_benchmark_weight format is wrong, the result is for unweighted mode.'):
+>>> with warns(RuntimeWarning, match='Invalid `overall_benchmark_weight` format; the result is for unweighted mode.'):
 ...     cp11 = Compare({"cm1": cm1, "cm2": cm2}, overall_benchmark_weight=overall_benchmark_weight)
->>> with warns(RuntimeWarning, match='Confusion matrices are too close'):
+>>> with warns(RuntimeWarning, match='Confusion matrices are too similar to identify a clear best option.'):
 ...     cp12 = Compare({"cm1": cm_null, "cm2": cm_null})
 """
diff --git a/Test/error_test.py b/Test/error_test.py
diff --git a/Test/plot_error_test.py b/Test/plot_error_test.py
@@ -18,14 +18,14 @@
 >>> ax = cm.plot()
 Traceback (most recent call last):
     ...
-pycm.pycm_error.pycmPlotError: Error in importing matplotlib module. Please install it using this command: pip install matplotlib
+pycm.pycm_error.pycmPlotError: Failed to import matplotlib module. Please install it using: `pip install matplotlib`.
 >>> ax = cm.plot(plot_lib='seaborn')
 Traceback (most recent call last):
     ...
-pycm.pycm_error.pycmPlotError: Error in importing seaborn module. Please install it using this command: pip install seaborn
+pycm.pycm_error.pycmPlotError: Failed to import seaborn module. Please install it using: `pip install seaborn`.
 >>> crv = Curve(actual_vector = np.array([1, 1, 2, 2]), probs = np.array([[0.1, 0.9], [0.4, 0.6], [0.35, 0.65], [0.8, 0.2]]), classes=[2, 1])
 >>> ax = crv.plot(classes=[1])
 Traceback (most recent call last):
     ...
-pycm.pycm_error.pycmPlotError: Error in importing matplotlib module. Please install it using this command: pip install matplotlib
+pycm.pycm_error.pycmPlotError: Failed to import matplotlib module. Please install it using: `pip install matplotlib`.
 """
diff --git a/Test/warning_test.py b/Test/warning_test.py
@@ -4,7 +4,7 @@
 >>> from pycm.pycm_util import deprecated
 >>> from pytest import warns
 >>> large_cm = ConfusionMatrix(list(range(10)) + [2, 3, 5], list(range(10)) + [1, 7, 2])
->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'):
+>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'):
 ...     large_cm.print_matrix()
 Predict 0       1       2       3       4       5       6       7       8       9
 Actual
@@ -28,7 +28,7 @@
 <BLANKLINE>
 9       0       0       0       0       0       0       0       0       0       1
 <BLANKLINE>
->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'):
+>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'):
 ...     large_cm.print_normalized_matrix()
 Predict   0         1         2         3         4         5         6         7         8         9
 Actual
@@ -52,7 +52,7 @@
 <BLANKLINE>
 9         0.0       0.0       0.0       0.0       0.0       0.0       0.0       0.0       0.0       1.0
 <BLANKLINE>
->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'):
+>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'):
 ...     large_cm.stat()
 Overall Statistics :
 <BLANKLINE>
@@ -193,7 +193,7 @@
 dInd(Distance index)                                              0.0           0.08333       0.5082        0.5           0.0           0.5           0.0           0.08333       0.0           0.0
 sInd(Similarity index)                                            1.0           0.94107       0.64065       0.64645       1.0           0.64645       1.0           0.94107       1.0           1.0
 <BLANKLINE>
->>> with warns(RuntimeWarning, match='The confusion matrix is a high dimension matrix'):
+>>> with warns(RuntimeWarning, match='Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization.'):
 ...     print(large_cm)
 Predict 0       1       2       3       4       5       6       7       8       9
 Actual

diff --git a/pycm/pycm_handler.py b/pycm/pycm_handler.py
@@ -186,7 +186,7 @@ def __obj_array_handler__(array, classes, transpose):
     if classes is None:
         classes = list(range(len(array)))
     if len(classes) != len(array):
-        raise pycmMatrixError(CLASSES_LENGHT_ERROR)
+        raise pycmMatrixError(CLASSES_LENGTH_ERROR)
     matrix = {}
     for i in range(len(array)):
         matrix[classes[i]] = {classes[j]: x for j, x in enumerate(array[i])}

diff --git a/pycm/pycm_param.py b/pycm/pycm_param.py
@@ -46,87 +46,76 @@
 </html>
 '''
 
-PROBABILITY_SIZE_ERROR = "Probability vector elements must have same length and equal to classes"
-PROBABILITY_TYPE_ERROR = "The elements of the probability vector can only contain numeric values"
-PROBABILITY_SUM_ERROR = "The sum of probability values must be one"
-THRESHOLDS_NUMBER_ERROR = "Number of the thresholds is lower than 2"
-THRESHOLDS_TYPE_ERROR = "The type of thresholds is assumed to be list or NumPy array"
-THRESHOLDS_NUMERIC_ERROR = "The thresholds can only contain numeric values"
-CLASSES_TYPE_ERROR = "The type of classes is assumed to be list"
-CLASSES_MATCH_ERROR = "The classes don't match to actual_vector"
-MATRIX_CLASS_TYPE_ERROR = "Type of the input matrix classes is assumed  be the same"
-MATRIX_FORMAT_ERROR = "Input confusion matrix format error"
-MAPPING_FORMAT_ERROR = "Mapping format error"
-MAPPING_CLASS_NAME_ERROR = "Mapping class names error"
-SEABORN_PLOT_LIBRARY_ERROR = "Error in importing seaborn module. Please install it using this command: pip install seaborn"
-MATPLOTLIB_PLOT_LIBRARY_ERROR = "Error in importing matplotlib module. Please install it using this command: pip install matplotlib"
-PLOT_COLORS_CLASS_MISMATCH_ERROR = "Given colors and classes have not the same length."
-PLOT_MARKERS_CLASS_MISMATCH_ERROR = "Given markers and classes have not the same length."
-VECTOR_TYPE_ERROR = "The type of input vectors is assumed to be a list or a NumPy array"
-VECTOR_SIZE_ERROR = "Input vectors must have same length"
-VECTOR_EMPTY_ERROR = "Input vectors are empty"
-VECTOR_ONLY_ERROR = "This option only works in vector mode"
-VECTOR_UNIQUE_CLASS_ERROR = "The classes list isn't unique. It contains duplicated labels."
-NOT_ALL_SET_VECTOR_ERROR = "Class extraction from input failed. Input vectors should be a list of sets with unified types."
-CLASS_NUMBER_ERROR = "Number of the classes is lower than 2"
-METRICS_OFF_ERROR = "This method cannot be executed while 'metrics_off=True'."
-CLASSES_ERROR = "Used classes is not a subset of matrix's classes."
-COMPARE_FORMAT_ERROR = "The input type is supposed to be dictionary but it's not!"
-CLASSES_LENGHT_ERROR = "Classes length is not equal to the array length."
-AREA_METHOD_ERROR = "The numeric integral method can only be selected between 'trapezoidal' and 'midpoint'!"
-
-VECTOR_INDEX_ERROR = "Given index is out of vector's range."
-INVALID_CLASS_NAME_ERROR = "Given class name is not among problem's classes."
-
-COMPARE_TYPE_ERROR = "The input is supposed to consist of pycm.ConfusionMatrix object but it's not!"
-COMPARE_DOMAIN_ERROR = "The domain of all ConfusionMatrix objects must be same! The sample size or the number " \
-                       "of classes are different."
-COMPARE_NUMBER_ERROR = "Lower than two confusion matrices is given for comparing. The minimum number of " \
-                       "confusion matrix for comparing is 2."
-
-COMPARE_CLASS_WEIGHT_ERROR = "The class_weight type must be dictionary and also must be specified for all of the classes."
-
-COMPARE_CLASS_BENCHMARK_WEIGHT_ERROR = "The class_benchmark_weight type must be dictionary and also must be specified for all of the class benchmarks."
-
-COMPARE_OVERALL_BENCHMARK_WEIGHT_ERROR = "The overall_benchmark_weight type must be dictionary and also must be specified for all of the overall benchmarks."
-
-COMPARE_CLASS_WEIGHT_WARNING = "The class_weight format is wrong, the result is for unweighted mode."
-
-COMPARE_CLASS_BENCHMARK_WEIGHT_WARNING = "The class_benchmark_weight format is wrong, the result is for unweighted mode."
-
-COMPARE_OVERALL_BENCHMARK_WEIGHT_WARNING = "The overall_benchmark_weight format is wrong, the result is for unweighted mode."
-
-COMPARE_METRICS_OFF_ERROR = "Compare cannot be executed while in either of matrices 'metrics_off=True'."
-
-COMBINE_TYPE_ERROR = "The input type is supposed to be pycm.ConfusionMatrix object but it's not!"
-
-COMPARE_RESULT_WARNING = "Confusion matrices are too close and the best one can not be recognized."
-
-WEIGHTED_KAPPA_WARNING = "The weight format is wrong, the result is for unweighted kappa."
-
-WEIGHTED_ALPHA_WARNING = "The weight format is wrong, the result is for unweighted alpha."
-
-AVERAGE_WEIGHT_ERROR = "The weight type must be dictionary and also must be specified for all of the classes."
-
+PROBABILITY_SIZE_ERROR = "All elements of the probability vector must have the same length and match the number of classes."
+PROBABILITY_TYPE_ERROR = "Probability vector elements must be numeric."
+PROBABILITY_SUM_ERROR = "The sum of the probability values must equal 1."
+THRESHOLDS_NUMBER_ERROR = "The number of thresholds must be at least 2."
+THRESHOLDS_TYPE_ERROR = "`thresholds` must be provided as a list or a NumPy array."
+THRESHOLDS_NUMERIC_ERROR = "`thresholds` must contain only numeric values."
+CLASSES_TYPE_ERROR = "`classes` must be provided as a list."
+CLASSES_MATCH_ERROR = "`classes` does not match the actual vector."
+MATRIX_CLASS_TYPE_ERROR = "All input matrix classes must be of the same type."
+MATRIX_FORMAT_ERROR = "Invalid input confusion matrix format."
+MAPPING_FORMAT_ERROR = "Invalid mapping format."
+MAPPING_CLASS_NAME_ERROR = "Invalid mapping class names."
+SEABORN_PLOT_LIBRARY_ERROR = "Failed to import seaborn module. Please install it using: `pip install seaborn`."
+MATPLOTLIB_PLOT_LIBRARY_ERROR = "Failed to import matplotlib module. Please install it using: `pip install matplotlib`."
+PLOT_COLORS_CLASS_MISMATCH_ERROR = "The number of colors does not match the number of classes."
+PLOT_MARKERS_CLASS_MISMATCH_ERROR = "The number of markers does not match the number of classes."
+VECTOR_TYPE_ERROR = "Input vectors must be provided as a list or a NumPy array."
+VECTOR_SIZE_ERROR = "Input vectors must have the same length."
+VECTOR_EMPTY_ERROR = "Input vectors must not be empty."
+VECTOR_ONLY_ERROR = "This option is only available in vector mode."
+VECTOR_UNIQUE_CLASS_ERROR = "`classes` must contain unique labels with no duplicates."
+NOT_ALL_SET_VECTOR_ERROR = "Failed to extract classes from input. Input vectors should be a list of sets with unified types."
+CLASS_NUMBER_ERROR = "The number of classes must be at least 2."
+METRICS_OFF_ERROR = "This method cannot be executed when `metrics_off=True`."
+CLASSES_ERROR = "The specified classes are not a subset of the matrix's classes."
+COMPARE_FORMAT_ERROR = "Input must be provided as a dictionary."
+CLASSES_LENGTH_ERROR = "The length of the classes does not match the length of the array."
+AREA_METHOD_ERROR = "The integral method must be either 'trapezoidal' or 'midpoint'."
+
+VECTOR_INDEX_ERROR = "Index is out of range for the given vector."
+INVALID_CLASS_NAME_ERROR = "The specified class name is not among the confusion matrix's classes."
+
+COMPARE_TYPE_ERROR = "Input must be a dictionary containing pycm.ConfusionMatrix objects."
+COMPARE_DOMAIN_ERROR = "All ConfusionMatrix objects must have the same domain (same sample size and number of classes)."
+COMPARE_NUMBER_ERROR = "At least 2 confusion matrices are required for comparison."
+
+COMPARE_CLASS_WEIGHT_ERROR = "`class_weight` must be a dictionary and specified for all classes."
+COMPARE_CLASS_BENCHMARK_WEIGHT_ERROR = "`class_benchmark_weight` must be a dictionary and specified for all class benchmarks."
+COMPARE_OVERALL_BENCHMARK_WEIGHT_ERROR = "`overall_benchmark_weight` must be a dictionary and specified for all overall benchmarks."
+
+COMPARE_CLASS_WEIGHT_WARNING = "Invalid `class_weight` format; the result is for unweighted mode."
+COMPARE_CLASS_BENCHMARK_WEIGHT_WARNING = "Invalid `class_benchmark_weight` format; the result is for unweighted mode."
+COMPARE_OVERALL_BENCHMARK_WEIGHT_WARNING = "Invalid `overall_benchmark_weight` format; the result is for unweighted mode."
+
+COMPARE_METRICS_OFF_ERROR = "Comparison cannot be performed when `metrics_off=True` in any matrix."
+
+COMBINE_TYPE_ERROR = "Input must be an instance of pycm.ConfusionMatrix."
+
+COMPARE_RESULT_WARNING = "Confusion matrices are too similar to identify a clear best option."
+
+WEIGHTED_KAPPA_WARNING = "Invalid weight format; the result is for unweighted kappa."
+WEIGHTED_ALPHA_WARNING = "Invalid weight format; the result is for unweighted alpha."
+
+AVERAGE_WEIGHT_ERROR = "`weight` must be a dictionary and specified for all classes."
 AVERAGE_INVALID_ERROR = "Invalid parameter!"
 
-BRIER_LOG_LOSS_CLASS_ERROR = "Actual vector contains string so pos_class should be explicitly specified"
+BRIER_LOG_LOSS_CLASS_ERROR = "Actual vector contains strings; `pos_class` must be explicitly specified."
+BRIER_LOG_LOSS_PROB_ERROR = "This option is only available in binary probability mode."
 
-BRIER_LOG_LOSS_PROB_ERROR = "This option only works in binary probability mode"
+CLASS_NUMBER_WARNING = "Confusion matrix is high-dimensional and may not display properly. Consider using the `sparse` flag in printing functions, or save it as a CSV file for better visualization."
 
-CLASS_NUMBER_WARNING = "The confusion matrix is a high dimension matrix and won't be demonstrated properly.\n" \
-                       "If confusion matrix has too many zeros (sparse matrix) you can set `sparse` flag to True in printing functions "\
-                       "otherwise by using save_csv method to save the confusion matrix in csv format you'll have better demonstration."
+CLASSES_WARNING = "Specified classes are not a subset of the classes in the actual and predicted vectors."
+CLASSES_TYPE_WARNING = "Classes is neither a list nor None, so it will be ignored."
 
-CLASSES_WARNING = "Used classes is not a subset of classes in actual and predict vectors."
-
-CLASSES_TYPE_WARNING = "The classes is neither a list nor None so it'll be ignored."
-
-CURVE_NONE_WARNING = "The curve axes contain non-numerical value(s)."
+CURVE_NONE_WARNING = "The curve contains non-numerical value(s)."
 
 DEPRECATION_WARNING = "`{}` is deprecated and may be removed in future releases."
 
-DISTANCE_METRIC_TYPE_ERROR = "The metric type must be DistanceType"
+DISTANCE_METRIC_TYPE_ERROR = "`metric` type must be DistanceType."
+
 
 CLASS_NUMBER_THRESHOLD = 10
 
@@ -336,16 +325,17 @@
     0.001: 3.09,
     0.0005: 3.29}
 
-CI_ALPHA_TWO_SIDE_WARNING = "The alpha value is invalid, automatically set to 0.05.\nSupported values (two-sided) : " + ",".join(
-    map(str, sorted(ALPHA_TWO_SIDE_TABLE)))
+CI_ALPHA_TWO_SIDE_WARNING = ("Invalid alpha value; automatically set to 0.05. Supported two-sided values are: "
+                             + ", ".join(map(str, sorted(ALPHA_TWO_SIDE_TABLE))))
 
-CI_ALPHA_ONE_SIDE_WARNING = "The alpha value is invalid, automatically set to 0.05.\nSupported values (one-sided) : " + ",".join(
-    map(str, sorted(ALPHA_ONE_SIDE_TABLE)))
+CI_ALPHA_ONE_SIDE_WARNING = ("Invalid alpha value; automatically set to 0.05. Supported one-sided values are: "
+                             + ", ".join(map(str, sorted(ALPHA_ONE_SIDE_TABLE))))
 
-CI_FORMAT_ERROR = "The input type is supposed to be string but it's not!"
+CI_FORMAT_ERROR = "Input must be provided as a string."
+
+CI_SUPPORT_ERROR = ("Confidence interval calculation for this parameter is not supported in this version of pycm.\n"
+                    " Supported parameters are: ") + ", ".join(CI_CLASS_LIST) + ", " + ", ".join(CI_OVERALL_LIST)
 
-CI_SUPPORT_ERROR = "CI calculation for this parameter is not supported on this version of pycm.\nSupported parameters : " + \
-    ",".join(CI_CLASS_LIST) + "," + ",".join(CI_OVERALL_LIST)
 
 
 MULTICLASS_RECOMMEND = [
@@ -530,9 +520,10 @@
 DEFAULT_BACKGROUND_COLOR = "transparent"
 RECOMMEND_HTML_MESSAGE = '<span style="color:red;">Note 1</span> : Recommended statistics for this type of classification highlighted in <span style="color :{0};">{0}</span>'.format(
     RECOMMEND_BACKGROUND_COLOR)
-RECOMMEND_WARNING = "The recommender system assumes that the input is the result of classification over the whole data" \
-                    " rather than just a part of it.\nIf the confusion matrix is the result of test data classification" \
-                    ", the recommendation is not valid."
+RECOMMEND_WARNING = ("The recommendation system assumes the input is the result of classification over the entire"
+                     " dataset, not just a subset. If the confusion matrix is based on test data classification,"
+                     " the recommendation may not be valid.")
+
 RECOMMEND_HTML_MESSAGE2 = '<span style="color:red;">Note 2</span> : {0}'.format(
     RECOMMEND_WARNING)