From 451e1a41f91023507757e5048d74185606c8a6b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Boull=C3=A9?= Date: Mon, 8 Jan 2024 16:53:15 +0100 Subject: [PATCH] Update LearningTest scripts New scripts now capture fatal errors and are more resilient to warning from json and kdic files. --- test/LearningTest/cmd/python/apply_command.py | 4 +- .../cmd/python/apply_command_all.py | 1 - test/LearningTest/cmd/python/check_results.py | 657 ++++++++---------- test/LearningTest/cmd/python/help_options.py | 2 +- .../cmd/python/learning_test.config | 20 + .../cmd/python/learning_test_env.py | 5 +- test/LearningTest/cmd/python/test_khiops.py | 96 ++- .../cmd/python/test_khiops_all.py | 84 +-- test/LearningTest/cmd/python/utils.py | 2 +- 9 files changed, 456 insertions(+), 415 deletions(-) create mode 100644 test/LearningTest/cmd/python/learning_test.config diff --git a/test/LearningTest/cmd/python/apply_command.py b/test/LearningTest/cmd/python/apply_command.py index 77d67fe99..15714be0e 100644 --- a/test/LearningTest/cmd/python/apply_command.py +++ b/test/LearningTest/cmd/python/apply_command.py @@ -363,7 +363,7 @@ def apply_command_performance(work_dir): dir_name = os.path.basename(work_dir) root_name = os.path.basename(os.path.dirname(work_dir)) results_dir = os.path.join(work_dir, "results") - if os.path.isdir((results_dir)): + if os.path.isdir(results_dir): test_pattern = "TestEvaluationReport.xls" for file_name in os.listdir(results_dir): if test_pattern in file_name: @@ -389,7 +389,7 @@ def apply_command_performance_ref(work_dir): dir_name = os.path.basename(work_dir) root_name = os.path.basename(os.path.dirname(work_dir)) results_dir = os.path.join(work_dir, "results.ref") - if os.path.isdir((results_dir)): + if os.path.isdir(results_dir): test_pattern = "TestEvaluationReport.xls" for file_name in os.listdir(results_dir): if test_pattern in file_name: diff --git a/test/LearningTest/cmd/python/apply_command_all.py b/test/LearningTest/cmd/python/apply_command_all.py index 6447899ba..9f938d758 100644 --- a/test/LearningTest/cmd/python/apply_command_all.py +++ b/test/LearningTest/cmd/python/apply_command_all.py @@ -4,7 +4,6 @@ import apply_command import test_khiops - if __name__ == "__main__": all_commands, standard_command_number = apply_command.register_all_commands() diff --git a/test/LearningTest/cmd/python/check_results.py b/test/LearningTest/cmd/python/check_results.py index e8cfdcc8d..67a3c6e98 100644 --- a/test/LearningTest/cmd/python/check_results.py +++ b/test/LearningTest/cmd/python/check_results.py @@ -1,27 +1,57 @@ import os.path -import sys -import shutil -import csv -import string -import subprocess +import re -def print_error(log_file, error_message): - print(error_message) +def print_message(log_file, message): + print(message) + write_message(log_file, message) + + +def write_message(log_file, message): # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - log_file.write(error_message.encode("utf-8", "ignore").decode("utf-8") + "\n") + log_file.write(message.encode("utf-8", "ignore").decode("utf-8") + "\n") -def print_detailed_error(log_file, error_message, number_print): +def print_detailed_message(log_file, message, number_print): number_print_max = 10 if number_print < number_print_max: - print(error_message) - log_file.write(error_message + "\n") + print(message) + log_file.write(message + "\n") if number_print == number_print_max: print("...\n") log_file.write("...\n") +# Parsers en variables globales, compiles une seule fois +token_parser = None +time_parser = None +numeric_parser = None + + +def initialize_parsers(): + """Initialisation des parsers globaux""" + global token_parser + global time_parser + global numeric_parser + if token_parser is not None: + return + # Delimiters pour els fichiers json et kdic + delimiters = ["\,", "\{", "\}", "\[", "\]", "\:", "\(", "\)", "\<", "\>", "\="] + numeric_pattern = "-?[0-9]+\.?[0-9]*(?:[Ee]-?[0-9]+)?" + string_pattern = ( + '"[^"]*"' # Sans les double-quotes dans les strings (dur a parser...) + ) + time_pattern = "\d{1,2}:\d{2}:\d{2}\.?\d*" + other_tokens = "[\w]+" + tokens = time_pattern + "|" + numeric_pattern + "|" + string_pattern + for delimiter in delimiters: + tokens += "|" + delimiter + tokens += "|" + other_tokens + token_parser = re.compile(tokens) + numeric_parser = re.compile(numeric_pattern) + time_parser = re.compile(time_pattern) + + def check_results(test): # compare les fichiers 2 a 2 et ecrit les resultat dans le fichier comparisonResults.log print("--Comparing results...") @@ -33,11 +63,14 @@ def check_results(test): if not os.path.isdir(test_dir): print("test directory (" + test_dir + ") not available") return 0 + number_fatal_errors = 0 number_errors = 0 number_warnings = 0 number_files = 0 log_file = open(os.path.join(os.getcwd(), test, "comparisonResults.log"), "w") - log_file.write(test.upper() + " comparison\n\n") + write_message(log_file, test.upper() + " comparison\n") + # Initialisation des parsers + initialize_parsers() # test des fichiers 2 a 2 for file_name in os.listdir(ref_dir): [errors, warnings] = check_file( @@ -48,14 +81,23 @@ def check_results(test): number_files = number_files + 1 number_errors = number_errors + errors number_warnings = number_warnings + warnings + # recherche des erreurs fatales + fatal_error_files = [ + "stdout_error.log", + "stderr_error.log", + "return_code_error.log", + ] + for file_name in os.listdir(test_dir): + if file_name in fatal_error_files: + number_fatal_errors = number_fatal_errors + 1 # comparaison du nombre de fichiers if len(os.listdir(ref_dir)) == 0: - print_error(log_file, "no comparison: missing reference result files") + print_message(log_file, "no comparison: missing reference result files") number_errors = number_errors + 1 if len(os.listdir(ref_dir)) > 0 and len(os.listdir(ref_dir)) != len( os.listdir(test_dir) ): - print_error( + print_message( log_file, "number of results files (" + str(len(os.listdir(test_dir))) @@ -69,7 +111,7 @@ def check_results(test): err_file = open(err_file_name, "r", errors="ignore") for s in err_file: if s.find("error") >= 0: - print_detailed_error( + print_detailed_message( log_file, err_file_name + ": " + s, number_errors + number_warnings ) number_errors = number_errors + 1 @@ -78,18 +120,20 @@ def check_results(test): and not s.find("converted in 0") >= 0 and not s.find("...") >= 0 ): - print_detailed_error( + print_detailed_message( log_file, err_file_name + ": " + s, number_errors + number_warnings ) number_warnings = number_warnings + 1 if s.find("failure") >= 0: - print_detailed_error( + print_detailed_message( log_file, err_file_name + ": " + s, number_errors + number_warnings ) number_errors = number_errors + 1 err_file.close() - log_file.write("\n" + str(number_warnings) + " warning(s)\n") - log_file.write(str(number_errors) + " error(s)") + print_message(log_file, "\n" + str(number_warnings) + " warning(s)") + print_message(log_file, str(number_errors) + " error(s)") + if number_fatal_errors > 0: + print_message(log_file, "FATAL ERROR") log_file.close() print( @@ -100,6 +144,7 @@ def check_results(test): + " error(s), " + str(number_warnings) + " warning(s)" + + ("\nFATAL ERROR" if number_fatal_errors > 0 else "") ) print( "log writed in " @@ -199,46 +244,45 @@ def filter_khiops_temp_dir(value): # warning : 2 cellules contiennent des valeurs numeriques avec une difference relative toleree # error : les cellules sont differentes if not os.path.isfile(path_ref): - print_error(log_file, "file " + path_ref + " is missing") + print_message(log_file, "file " + path_ref + " is missing") return [1, 0] if not os.path.isfile(path_test): - print_error(log_file, "file " + path_test + " is missing") + print_message(log_file, "file " + path_test + " is missing") return [1, 0] - log_file.write( - "\nfile " + path_test.encode("utf-8", "ignore").decode("utf-8") + "\n" - ) + # En-tete de comparaison des fichiers + write_message(log_file, "\nfile " + path_test) + + # Recherche du fichier compare et de son extension + file_name = os.path.basename(path_ref) + assert file_name == os.path.basename(path_test) + _, file_extension = os.path.splitext(file_name) # test si fichier de temps - is_time_file = os.path.basename(path_ref) == "time.log" + is_time_file = file_name == "time.log" # test si fichier histogramme - is_histogram_file = "histogram" in os.path.basename( - path_ref - ) and ".log" in os.path.basename(path_ref) + is_histogram_file = "histogram" in file_name and file_extension == ".log" # test si fichier d'erreur - is_error_file = os.path.basename(path_ref) == "err.txt" + is_error_file = file_name == "err.txt" # test si fichier de benchmark - is_benchmark_file = os.path.basename(path_ref) == "benchmark.xls" + is_benchmark_file = file_name == "benchmark.xls" + + # test si fichier dictionnaire + is_kdic_file = file_extension == ".kdic" # Test si fichier json - is_json_file = ".json" in os.path.basename(path_ref) - is_json_file = is_json_file or ".khj" in os.path.basename(path_ref) - is_json_file = is_json_file or ".khvj" in os.path.basename(path_ref) - is_json_file = is_json_file or ".khcj" in os.path.basename(path_ref) - is_json_file = is_json_file or ".kdicj" in os.path.basename(path_ref) + is_json_file = file_extension in [".json", ".khj", ".khvj", ".khcj", ".kdicj"] # Cas particulier des fichier .bad qui sont en fait des fichier json (ex: LearningTest\TestKhiops\Advanced\AllResultsApiMode - if ".bad" in os.path.basename(path_ref): + if file_extension == ".bad": if ( os.path.isfile(path_ref.replace(".bad", ".khj")) or os.path.isfile(path_ref.replace(".bad", ".khj")) or os.path.isfile(path_ref.replace(".bad", ".kdicj")) ): is_json_file = True - json_errors = 0 - json_derivatrion_rules_errors = 0 # initialisation des nombres d'erreurs error = 0 @@ -248,347 +292,261 @@ def filter_khiops_temp_dir(value): if is_time_file: return [error, warning] - # nombre de lignes de chaque fichier - file_ref = open(path_ref, "r", errors="ignore") - file_test = open(path_test, "r", errors="ignore") - file_test_line_number = 0 + # lecture des lignes de chaque fichier try: - file_test_line_number = len(file_test.readlines()) + with open(path_ref, "r", errors="ignore") as file_ref: + file_ref_lines = file_ref.readlines() except BaseException as message: - print("Error: can't compute line number (" + str(message) + ")") - file_ref_line_number = 0 + error += 1 + print_message( + log_file, "Error: can't open file " + path_ref + " (" + str(message) + ")" + ) + return [error, warning] + assert file_ref_lines is not None try: - file_ref_line_number = len(file_ref.readlines()) + with open(path_test, "r", errors="ignore") as file_test: + file_test_lines = file_test.readlines() except BaseException as message: - print("Error: can't compute line number (" + str(message) + ")") + error += 1 + print_message( + log_file, "Error: can't open file " + path_test + " (" + str(message) + ")" + ) + return [error, warning] + assert file_test_lines is not None + + # Comparaison des nombres de lignes + file_ref_line_number = len(file_ref_lines) + file_test_line_number = len(file_test_lines) if file_test_line_number != file_ref_line_number: - log_file.write( + write_message( + log_file, "test file has " + str(file_test_line_number) + " lines and reference file has " + str(file_ref_line_number) - + " lines\n" + + " lines", ) error = error + 1 - file_ref.close() - file_test.close() - - # ouverture des fichiers consideres commes des tableaux de cellules separees par des tabulation - csv.field_size_limit(500000000) - file_ref = open(path_ref, "r", errors="ignore") - file_test = open(path_test, "r", errors="ignore") - # Cas particulier du fichier d'erreur, que l'on ouvre en ignorant les tabulations - if is_error_file: - file_ref_csv = csv.reader(file_ref, delimiter="\n") - file_test_csv = csv.reader(file_test, delimiter="\n") - else: - file_ref_csv = csv.reader(file_ref, delimiter="\t") - file_test_csv = csv.reader(file_test, delimiter="\t") # comparaison ligne a ligne max_threshold = 0 - number_print_max = 10 + max_print_error = 10 max_field_length = 100 - line = 0 - skip_benchmark_lines = 0 - try: - for row_t in file_test_csv: - line += 1 - if line > file_test_line_number or line > file_ref_line_number: - break - - # parcours des fichiers ligne par ligne - row_r = next(file_ref_csv) - length_r = len(row_r) - length_t = len(row_t) - i = 0 + skip_benchmark_lines = False + line_number = min(file_ref_line_number, file_test_line_number) + for index in range(line_number): + line = index + 1 + line_ref = file_ref_lines[index].rstrip() + line_test = file_test_lines[index].rstrip() + + # cas special des fichiers de benchmark: + # on saute les blocs de ligne dont le role est le reporting de temps de calcul + # ("Time" dans le premier champ d'entete) + if is_benchmark_file and line_ref.find("Time") != -1: + skip_benchmark_lines = True + continue + if is_benchmark_file and skip_benchmark_lines: + # fin de bloc si ligne vide + if line_ref.find("\t") == -1: + skip_benchmark_lines = False + if skip_benchmark_lines: + continue + + # Ok si lignes egales + if line_ref == line_test: + continue + + # cas special du fichier d'erreur: on tronque les lignes qui font du reporting de temps de calcul (" time:") + if ( + is_error_file + and line_ref.find(" time:") != -1 + and line_test.find(" time:") != -1 + ): + line_ref = filter_time(line_ref) + line_test = filter_time(line_test) - # comparaison des nombre de colonnes - if length_r != length_t: - log_file.write( - "test file (line " - + str(line) - + ") has " - + str(length_t) - + " columns and reference file has " - + str(length_r) - + " columns\n" - ) - error = error + 1 - break + # cas special du fichier d'erreur: + # on saute les lignes qui font du reporting de temps de calcul ("interrupted ") + if ( + is_error_file + and line_ref.lower().find(" interrupted ") != -1 + and line_test.lower().find(" interrupted ") != -1 + ): + continue - # cas special du fichier d'erreur: on tronque les lignes qui font du reporting de temps de calcul (" time:") - if ( - is_error_file - and length_r > 0 - and row_r[i].find(" time:") != -1 - and length_t > 0 - and row_t[i].find(" time:") != -1 - ): - row_r[i] = filter_time(row_r[i]) - row_t[i] = filter_time(row_t[i]) + # cas special du fichier d'erreur, pour le message "(Operation canceled)" qui n'est pas case sensitive + if is_error_file: + if line_ref.find("(Operation canceled)") != -1: + line_ref = line_ref.replace( + "(Operation canceled)", "(operation canceled)" + ) + if line_test.find("(Operation canceled)") != -1: + line_test = line_test.replace( + "(Operation canceled)", "(operation canceled)" + ) - # cas special du fichier d'erreur: - # on saute les lignes qui font du reporting de temps de calcul ("interrupted after") - if ( - is_error_file - and length_r > 0 - and row_r[i].find(" interrupted ") != -1 - and length_t > 0 - and row_t[i].find(" interrupted ") != -1 - ): - continue + # cas special du fichier d'erreur en coclustering: + # on saute les lignes d'ecritire de rapport intermediaire qui different par le temps + # ("Write intermediate coclustering report") + if ( + is_error_file + and line_ref.find("Write intermediate coclustering report") != -1 + and line_test.find("Write intermediate coclustering report") != -1 + ): + continue - # cas special du fichier d'erreur, pour le message "(Operation canceled)" qui n'est pas case sensistive - if is_error_file: - if length_r > 0 and row_r[i].find("(Operation canceled)") != -1: - row_r[i] = row_r[i].replace( - "(Operation canceled)", "(operation canceled)" - ) - if length_t > 0 and row_t[i].find("(Operation canceled)") != -1: - row_t[i] = row_t[i].replace( - "(Operation canceled)", "(operation canceled)" - ) + # cas special du fichier d'histogramme: + # on tronque les lignes qui font du reporting de temps de calcul (" time\t") + if ( + is_histogram_file + and line_ref.find("time") != -1 + and line_test.find("time") != -1 + ): + line_ref = line_ref[: line_ref.find("time")] + line_test = line_test[: line_test.find("time")] + # cas special du fichier d'histogramme: + # on ignore le champ tronque les lignes qui font du reporting de temps de calcul (" time\t") + if ( + is_histogram_file + and line_ref.find("Version") != -1 + and line_test.find("Version") != -1 + ): + line_ref = "" + line_test = "" - # cas special du fichier d'erreur en coclustering: - # on saute les lignes d'ecritire de rapport intermediaire qui different par le temps - # ("Write intermediate coclustering report") - if ( - is_error_file - and length_r > 0 - and row_r[i].find("Write intermediate coclustering report") != -1 - and length_t > 0 - and row_t[i].find("Write intermediate coclustering report") != -1 - ): - continue + # cas special du caractere # en tete de premiere ligne de fichier (identifiant de version d'application) + if line == 1 and line_ref.find("#") == 0 and line_test.find("#") == 0: + continue - # cas special du fichier d'histogramme: - # on tronque les lignes qui font du reporting de temps de calcul (" time\t") - if ( - is_histogram_file - and length_r > 2 - and row_r[1].find("time") != -1 - and length_t > 2 - and row_t[1].find("time") != -1 - ): - row_r[2] = "" - row_t[2] = "" - # cas special du fichier d'histogramme: - # on ignore le champ tronque les lignes qui font du reporting de temps de calcul (" time\t") - if ( - is_histogram_file - and length_r >= 2 - and row_r[0].find("Version") != -1 - and length_t >= 2 - and row_t[0].find("Version") != -1 - ): - row_r[1] = "" - row_t[1] = "" + # idem pour des informations de licences d'un fichier d'erreur + if ( + is_error_file + and line == 2 + and line_ref.find("Khiops ") == 0 + and line_test.find("Khiops ") == 0 + ): + continue - # cas special du caractere # en tete de premiere ligne de fichier (identifiant de version d'application) - if ( - line == 1 - and length_r > 0 - and row_r[0].find("#") == 0 - and length_t > 0 - and row_t[0].find("#") == 0 - ): - continue - # idem pour des informations de licences d'un fichier d'erreur + # cas special du champ version des fichiers json (identifiant de version d'application) + if ( + is_json_file + and line_ref.find('"version": ') >= 0 + and line_test.find('"version": ') >= 0 + ): + continue + + # Sinon, on analyse les champs + line_fields_ref = line_ref.split("\t") + line_fields_test = line_test.split("\t") + + # comparaison des nombres de champs + field_number_ref = len(line_fields_ref) + field_number_test = len(line_fields_test) + if field_number_ref != field_number_test: + if error < max_print_error: + write_message( + log_file, + "test file (line " + + str(line) + + ") has " + + str(field_number_test) + + " columns and reference file has " + + str(field_number_ref) + + " columns", + ) + elif error == max_print_error: + write_message(log_file, "...") + error = error + 1 + + # comparaison des champs + field_number_length = min(field_number_ref, field_number_test) + for i in range(field_number_length): + field_ref = line_fields_ref[i] + field_test = line_fields_test[i] + + # parcours des lignes cellule par cellule + # cas special du fichier d'erreur ou json: on tronque les chemins vers les repertoires temporaires de Khiops if ( - is_error_file - and line == 2 - and length_r > 0 - and row_r[0].find("Khiops ") == 0 - and length_t > 0 - and row_t[0].find("Khiops ") == 0 + (is_error_file or is_json_file) + and field_ref.find("~Khiops") != -1 + and field_test.find("~Khiops") != -1 ): - continue + field_ref = filter_khiops_temp_dir(field_ref) + field_test = filter_khiops_temp_dir(field_test) - # cas special du champ version des fichiers json (identifiant de version d'application) + # cas special du fichier d'erreur ou khj: on tronque le compte des lignes avec des warning sur le nombre de records secondaires if ( - is_json_file > 0 - and length_r == 2 - and row_r[1].find("version") == 0 - and length_t == 2 - and row_t[1].find("version") == 0 + (is_error_file or is_json_file) + and "warning" in field_ref + and " after reading " in field_ref + and " secondary records " in field_ref + and "warning" in field_test + and " after reading " in field_test + and " secondary records " in field_test ): - continue - - # cas special des fichiers de benchmark: - # on saute les blocs de ligne dont le role est le reporting de temps de calcul - # ("Time" dans le premier champ d'entete) - if is_benchmark_file and length_r > 0 and row_r[i].find("Time") != -1: - skip_benchmark_lines = 1 - continue - if is_benchmark_file and skip_benchmark_lines: - # fin de bloc si ligne vide - skip_benchmark_lines = 0 - j = 0 - while j < length_r: - if row_r[j] != "": - skip_benchmark_lines = 1 - j += 1 - if skip_benchmark_lines: - continue - - # comparaison des cellules - while i < length_r: - # parcours des lignes cellule par cellule - if i < length_t: - # cas special du fichier d'erreur ou json: on tronque les chemins vers les repertoires temporaires de Khiops - if ( - (is_error_file or is_json_file) - and length_r > i - and row_r[i].find("~Khiops") != -1 - and length_t > i - and row_t[i].find("~Khiops") != -1 - ): - row_r[i] = filter_khiops_temp_dir(row_r[i]) - row_t[i] = filter_khiops_temp_dir(row_t[i]) - - # cas special du fichier d'erreur ou khj: on tronque le compte des lignes avec des warning sur le nombre de records secondaires - if ( - (is_error_file or is_json_file) - and length_r > 0 - and "warning" in row_r[i] - and " after reading " in row_r[i] - and " secondary records " in row_r[i] - and length_t > 0 - and "warning" in row_t[i] - and " after reading " in row_t[i] - and " secondary records " in row_t[i] - ): - row_r[i] = filter_secondary_record(row_r[i]) - row_t[i] = filter_secondary_record(row_t[i]) - - # cas general de comparaison de cellules - [eval_res, threshold_res] = check_cell(row_r[i], row_t[i]) - # truncature des champs affiches dans les messages d'erreur - row_t_i = row_t[i] - if len(row_t_i) > max_field_length: - row_t_i = row_t_i[0:max_field_length] + "..." - row_r_i = row_r[i] - if len(row_r_i) > 5: - row_r_i = row_r_i[0:max_field_length] + "..." - # messages d'erreur - if eval_res == 0: - if error < number_print_max or threshold_res > max_threshold: - log_file.write( - "l" - + str(file_test_csv.line_num) - + " c" - + str(i + 1) - + " " - + row_t_i - + " -> " - + row_r_i - + "\n" - ) - elif error == number_print_max: - log_file.write("...\n") - error = error + 1 - # Cas particulier des erreurs dans le fichier json, si elles sont dues a la regle de derivation - if is_json_file: - json_errors += 1 - if row_t[i].find("derivationRule:") >= 0: - json_derivatrion_rules_errors += 1 - elif eval_res == 2: - warning = warning + 1 - if threshold_res > max_threshold: - max_threshold = threshold_res - else: - # apparemment il y a des cellules vides en plus dans le fichier de reference... - if len(row_r[i]) != 0: - if error < number_print_max: - log_file.write( - "l" - + str(file_test_csv.line_num) - + " c" - + str(i + 1) - + " " - + row_r[i] - + " disappeared\n" - ) - elif error == number_print_max: - log_file.write("...\n") - error = error + 1 - i = i + 1 - # end while - # end for - except BaseException as message: - print( - "Error: can't compare file csv cells in " - + os.path.basename(path_ref) - + " (" - + str(message) - + ")" - ) - file_ref.close() - file_test.close() - # print(str(error)+" error(s)") + field_ref = filter_secondary_record(field_ref) + field_test = filter_secondary_record(field_test) + + # cas general de comparaison de cellules + [eval_res, threshold_res] = check_cell(field_ref, field_test) + # truncature des champs affiches dans les messages d'erreur + if len(field_test) > max_field_length: + field_test = field_test[0:max_field_length] + "..." + if len(field_ref) > max_field_length: + field_ref = field_ref[0:max_field_length] + "..." + # messages d'erreur + if eval_res == 0: + if error < max_print_error or threshold_res > max_threshold: + write_message( + log_file, + "l" + + str(line) + + " c" + + str(i + 1) + + " " + + field_test + + " -> " + + field_ref, + ) + elif error == max_print_error: + write_message(log_file, "...") + error = error + 1 + elif eval_res == 2: + warning = warning + 1 + max_threshold = max(threshold_res, max_threshold) if warning > 0: - log_file.write(str(warning) + " warning(s) (epsilon difference)\n") + write_message(log_file, str(warning) + " warning(s) (epsilon difference)") if error == 0: - log_file.write("OK\n") + write_message(log_file, "OK") elif max_threshold > 0: - log_file.write("max relative difference: " + str(max_threshold) + "\n") + write_message(log_file, "max relative difference: " + str(max_threshold)) if error > 0: - log_file.write(str(error) + " error(s)") - if is_json_file and 0 < json_errors == json_derivatrion_rules_errors: - log_file.write(" (only in derivation rules)") - log_file.write("\n") + write_message(log_file, str(error) + " error(s)") return [error, warning] def split_cell(cell): - # decoupe une chaine de caractere en un tableau de sous-chaines, qui sont des portions numeriques, soit non numeriques - max_substrings = 10000 - length = len(cell) - i = 0 - substrings = [] - substring = "" - float_type = 1 - string_type = 2 - previous_type = 0 - while i < length: - c = cell[i] - if c in ".0123456789": - cell_type = float_type - else: - cell_type = string_type - if type == previous_type: - substring = substring + c - else: - if previous_type != 0: - substrings.append(substring) - substring = "" + c - previous_type = cell_type - i = i + 1 - if i == length: - substrings.append(substring) - if len(substrings) > max_substrings: - return substrings + # Pour gerer les double-quotes a l'interieur des strings, pour les format json et kdic + cell = cell.replace('\\"', "'") + cell = cell.replace('""', "'") + substrings = token_parser.findall(cell) return substrings # return true if time format - - def is_time(val): # si format time (?h:mm:ss), on ignore en renvoyant OK - time = val.strip() - for c in time: - if c not in ":0123456789": - return False - if 7 <= len(time) <= 8: - if time.find(":") >= 1 and time.find(":", 3) >= 4: - return True - return False + return time_parser.match(val.strip()) def check_value(val1, val2): - # check_cell, dans le cas de valeurs elementaires + # Comparaison de deux valeurs numeriques + # renvoie deux valeur: + # - result: + # - 1 si les cellules sont identiques + # - 2 si les la difference relative est toleree + # - 0 si les cellules sont differentes + # - threshold: differe,ce relative si result = 2 # Ok si valeurs egales if val1 == val2: return [1, 0] @@ -611,9 +569,12 @@ def check_value(val1, val2): def check_cell(cell1, cell2): # comparaison de deux cellules # pour les valeurs numeriques, une diffence relative de 0.00001 est toleree - # renvoi 1 si les cellules sont identiques - # 2 si les la difference relative est toleree - # 0 si les cellules sont differentes + # renvoie deux valeur: + # - result: + # - 1 si les cellules sont identiques + # - 2 si les la difference relative est toleree + # - 0 si les cellules sont differentes + # - threshold: differe,ce relative si result = 2 if cell1 == cell2: return [1, 0] @@ -635,7 +596,7 @@ def check_cell(cell1, cell2): if ( cell1.find("Unable to open file") != -1 - and cell2.find("nable to access file") != -1 + and cell2.find("Unable to access file") != -1 ): return [1, 0] @@ -675,17 +636,10 @@ def check_cell(cell1, cell2): # sinon c'est peut etre un pbm d'arrondi # on accepte les differences relatives faibles - threshold = float(0.00001) - try: - float1 = float(cell1) - float2 = float(cell2) - res = ( - 0.5 * abs(float1 - float2) / (abs(float1) / 2 + abs(float2) / 2 + threshold) - ) - if res <= threshold: - return [2, res] - return [0, res] - except ValueError: + if numeric_parser.match(cell1) and numeric_parser.match(cell2): + [eval_result, threshold_result] = check_value(cell1, cell2) + return [eval_result, threshold_result] + else: # on arrive pas a le convertir en float, ce n'est pas un nombre # on decoupe chaque cellule sous la forme d'un ensemble de sous-chaines qui sont soit # des libelles, soit des float @@ -709,7 +663,6 @@ def check_cell(cell1, cell2): return [0, 0] if eval_result == 2: full_eval = 2 - if threshold_result > full_threshold: - full_threshold = threshold_result + full_threshold = max(threshold_result, full_threshold) i = i + 1 return [full_eval, full_threshold] diff --git a/test/LearningTest/cmd/python/help_options.py b/test/LearningTest/cmd/python/help_options.py index 3940a4310..befcfadfe 100644 --- a/test/LearningTest/cmd/python/help_options.py +++ b/test/LearningTest/cmd/python/help_options.py @@ -3,7 +3,7 @@ print( "KhiopsBatchMode: " + str(os.getenv("KhiopsBatchMode")) - + "\n\t true, false (default: false)" + + "\n\t true, false (default: true)" ) print( diff --git a/test/LearningTest/cmd/python/learning_test.config b/test/LearningTest/cmd/python/learning_test.config new file mode 100644 index 000000000..55a616012 --- /dev/null +++ b/test/LearningTest/cmd/python/learning_test.config @@ -0,0 +1,20 @@ +# The config file learning_test.config must be in directory LearningTest\cmd\python +# It is optional, in which case all keys are set to empty +# It contains the following key=value pairs that allows a personnalisation of the environment: +# - path: additional path (eg: to access to java runtime) +# - classpath: additional classpath for java libraries +# - learningtest_root: alternative root dir to use where LearningTest is located +# - learning_release_dir: dir where the release developement binaries are located (to enable the 'r' alias') +# - learning_debug_dir: dir where the debug developement binaries are located (to enable the 'd' alias') + +# Uncomment the following keys by removing the leading '#' and assigning a path + +# path= + +# classpath= + +# learningtest_root= + +# learning_release_dir= + +# learning_debug_dir= diff --git a/test/LearningTest/cmd/python/learning_test_env.py b/test/LearningTest/cmd/python/learning_test_env.py index d1b832e12..70194cbc1 100644 --- a/test/LearningTest/cmd/python/learning_test_env.py +++ b/test/LearningTest/cmd/python/learning_test_env.py @@ -106,9 +106,8 @@ def load_learning_test_config(): # Fill missing keys with empty values, the as when the config file is missing if ok: if len(learning_test_config_keys) != len(config_dic): - missing_keys = "" for key in learning_test_config_keys: - if not key in config_dic: + if key not in config_dic: config_dic[key] = "" # Return if ok if ok: @@ -119,7 +118,7 @@ def load_learning_test_config(): print( "The config file " + learning_test_config_file_name - + " must be in directory LearningTest\cmd\python" + + " must be in directory LearningTest\\cmd\\python" ) print("It is optional, in which case all keys are set to empty") print( diff --git a/test/LearningTest/cmd/python/test_khiops.py b/test/LearningTest/cmd/python/test_khiops.py index f352c3ee7..18852150c 100644 --- a/test/LearningTest/cmd/python/test_khiops.py +++ b/test/LearningTest/cmd/python/test_khiops.py @@ -95,6 +95,23 @@ def test(modl_path, samples_path, sample_test): # verifie l'existence du repertoire et du fichier de sample_test # et lance la comparaison pour le sample 'sample_test' + def filter_lines(lines, filtered_pattern): + """retourne les lignes sans celles contenant le pattern en parametre""" + output_lines = [] + for line in lines: + if filtered_pattern not in line: + output_lines.append(line) + return output_lines + + def filter_empty_lines(lines): + """retourne les lignes sans les lignes vides""" + output_lines = [] + for line in lines: + line = line.strip() + if line != "": + output_lines.append(line) + return output_lines + # check MODL path if modl_path != "nul": if not os.path.isfile(modl_path): @@ -184,7 +201,7 @@ def test(modl_path, samples_path, sample_test): time_file_name = os.path.join( os.getcwd(), os.path.join(test_dir, "results.ref", "time.log") ) - print(time_file_name) ## + print(time_file_name) if os.path.isfile(time_file_name): file_time = open(time_file_name, "r") lines = file_time.readlines() @@ -252,12 +269,6 @@ def test(modl_path, samples_path, sample_test): # un plantagephysique de l'allocateur en cas de depassement des contraintes memoires des scenarios os.putenv("KhiopsHardMemoryLimitMode", "true") - # khiops en mode multitable via une variable d'environnement - os.putenv("KhiopsMultiTableMode", "true") - - # khiops en mode Text via une variable d'environnement - # os.putenv('KhiopsTextVariableMode', 'true') - # khiops en mode crash test via une variable d'environnement os.putenv("KhiopsCrashTestMode", "true") @@ -270,14 +281,12 @@ def test(modl_path, samples_path, sample_test): khiops_params.append("-n") khiops_params.append(khiops_mpi_process_number) khiops_params.append(modl_path) - if os.getenv("KhiopsBatchMode") == "true": + if os.getenv("KhiopsBatchMode") != "false": khiops_params.append("-b") khiops_params.append("-i") khiops_params.append(os.path.join(os.getcwd(), "test.prm")) khiops_params.append("-e") - khiops_params.append( - os.path.join(os.getcwd(), os.path.join(test_dir, "results", "err.txt")) - ) + khiops_params.append(os.path.join(os.getcwd(), test_dir, "results", "err.txt")) if os.getenv("KhiopsOutputScenarioMode") == "true": khiops_params.append("-o") khiops_params.append(os.path.join(os.getcwd(), "test.output.prm")) @@ -287,10 +296,67 @@ def test(modl_path, samples_path, sample_test): # Lancement de khiops time_start = time.time() - try: - subprocess.run(khiops_params) - except Exception as error: - print("Execution failed:" + str(error)) + with subprocess.Popen( + khiops_params, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) as khiops_process: + stdout, stderr = khiops_process.communicate() + + # En cas d'anomalie, memorisation du contenu de des sorties standard + if stdout != "": + is_kni = "KNI" in modl_path + is_coclustering = "Coclustering" in modl_path + lines = stdout.split("\n") + lines = filter_empty_lines(lines) + # Pour les test KNI, le stdout contient une ligne avec le nombre de records + if is_kni: + lines = filter_lines(lines, "Recoded record number:") + # Cas particulier du coclustering en mode debug + if is_coclustering: + lines = filter_lines( + lines, "BEWARE: Optimization level set to 0 in debug mode only!!!" + ) + # Exception egalement pour cas des lancement en mode parallele simule + lines = filter_lines(lines, "Warning : simulated parallel mode") + # Exception en mode debug, pour les stats memoire + if "Memory stats (number of pointers, and memory space)" in stdout: + ok = True + # Parcours des lignes pour voir si ce sont bien des messages de stats, y compris en parallel + # En parallele, on a l'id du process entre crochets en tete de chaque ligne + for line in lines: + ok = ( + (line[0] == "[" and line[-1] == "]") + or "Memory stats (number of pointers, and memory space)" in line + or "Alloc: " in line + or "Requested: " in line + ) + else: + ok = len(lines) == 0 + if not ok: + with open( + os.path.join(os.getcwd(), test_dir, "results", "stdout_error.log"), + "w", + ) as stdout_file: + stdout_file.write(stdout) + if stderr != "": + with open( + os.path.join(os.getcwd(), test_dir, "results", "stderr_error.log"), "w" + ) as stderr_file: + stderr_file.write(stderr) + if khiops_process.returncode != 0 and khiops_process.returncode != 2: + with open( + os.path.join(os.getcwd(), test_dir, "results", "return_code_error.log"), + "w", + ) as return_code_file: + return_code_file.write( + "Wrong return code: " + + str(khiops_process.returncode) + + " (should be 0 or 2)" + ) + time_stop = time.time() print(sample_test.upper() + " test done") diff --git a/test/LearningTest/cmd/python/test_khiops_all.py b/test/LearningTest/cmd/python/test_khiops_all.py index 6a530016d..9eaf7dd5e 100644 --- a/test/LearningTest/cmd/python/test_khiops_all.py +++ b/test/LearningTest/cmd/python/test_khiops_all.py @@ -6,40 +6,33 @@ import stat -# lance les tests de khiops sur tous les repertoires contenus dans la liste "tests" - - -def remove_old_tests_resuls(): - """Efface tous les resultats de TOUS les tests (meme ceux qui ne sont pas specifier dans la variable tests)""" - all_tools_test_root = os.path.join( - learning_test_env.learning_test_root, "LearningTest" - ) - for tool_test_dir in os.listdir(all_tools_test_root): - tools_test_root = os.path.join(all_tools_test_root, tool_test_dir) - if os.path.isdir(tools_test_root) and tool_test_dir.find("Test") == 0: - for test in os.listdir(tools_test_root): - test_path = os.path.join(tools_test_root, test) - if os.path.isdir(test_path): - for sub_test in os.listdir(test_path): - sub_test_path = os.path.join(test_path, sub_test) - if os.path.isfile( - os.path.join(sub_test_path, "comparisonResults.log") - ): - print(sub_test_path) - # os.remove(os.path.join(sub_test_path, 'comparisonResults.log')) - result_dir = os.path.join(sub_test_path, "results") - if os.path.isdir(result_dir) and False: - for file_name in os.listdir(result_dir): - file_path = os.path.join(result_dir, file_name) - os.chmod(file_path, stat.S_IWRITE) - os.remove(file_path) - - +# lance les tests de khiops sur tous les repertoires contenus dans la liste "tool_test_dirs" def test_khiops_tool(tool_name, tool_version, tool_test_dirs): """Run tool on test dirs""" # Build tool exe path name from version tool_exe_name, tool_test_sub_dir = test_khiops.retrieve_tool_info(tool_name) tool_exe_path = test_khiops.build_tool_exe_path(tool_exe_name, tool_version) + # Clean results + for test in tool_test_dirs: + tool_samples_path = os.path.join( + learning_test_env.learning_test_root, + "LearningTest", + tool_test_sub_dir, + test, + ) + if os.path.isdir(tool_samples_path): + for sub_test in os.listdir(tool_samples_path): + sub_test_path = os.path.join(tool_samples_path, sub_test) + file_path = os.path.join(sub_test_path, "comparisonResults.log") + if os.path.isfile(file_path): + os.chmod(file_path, stat.S_IWRITE) + os.remove(file_path) + result_dir = os.path.join(sub_test_path, "results") + if os.path.isdir(result_dir): + for file_name in os.listdir(result_dir): + file_path = os.path.join(result_dir, file_name) + os.chmod(file_path, stat.S_IWRITE) + os.remove(file_path) # Run tests for test in tool_test_dirs: print("\n\n--------------------------------------------------------") @@ -55,14 +48,19 @@ def test_khiops_tool(tool_name, tool_version, tool_test_dirs): if __name__ == "__main__": - if len(sys.argv) != 2: - print("testAll [version]") + if len(sys.argv) != 2 and len(sys.argv) != 3: + print("testAll [version] ") print(" run all tests for all Khiops tools") print("\tversion: version of the tool") print("\t d: debug version in developpement environnement") print("\t r: release version in developpement environnement") print("\t ver: ..exe in directory LearningTest\\cmd\\modl") print("\t nul: for comparison with the test results only") + print("\t full exe path, if parameter is used") + print("\ttool: all tools if not specified, one specified tool otherwise") + print("\t Khiops") + print("\t Coclustering") + print("\t KNI") exit(0) # Info on complete tests @@ -75,9 +73,6 @@ def test_khiops_tool(tool_name, tool_version, tool_test_dirs): sys.stdout = test_khiops.Unbuffered(sys.stdout) - # Remove old results (not activated) - # remove_old_tests_resuls() - # Passage en mode batch os.environ["KhiopsBatchMode"] = "true" @@ -85,6 +80,11 @@ def test_khiops_tool(tool_name, tool_version, tool_test_dirs): version = sys.argv[1] assert version is not None + # Retrieve tool + tool = "" + if len(sys.argv) == 3: + tool = sys.argv[2] + # Khiops tool khiops_tests = [ "Standard", @@ -97,9 +97,6 @@ def test_khiops_tool(tool_name, tool_version, tool_test_dirs): "MultipleTargets", "MultiTables", "DeployCoclustering", - "Histograms", - "HistogramsLimits", - "TextVariables", "SparseData", "SparseModeling", "ParallelTask", @@ -111,6 +108,10 @@ def test_khiops_tool(tool_name, tool_version, tool_test_dirs): "CrashTests", "SmallInstability", ] + # V11 "Histograms", + # V11 "HistogramsLimits", + # V11 "TextVariables", + # Following tests are very long, instable and not usefull: if os.getenv("KhiopsCompleteTests") == "true": khiops_tests.append("Classification") @@ -118,14 +119,17 @@ def test_khiops_tool(tool_name, tool_version, tool_test_dirs): khiops_tests.append("MTClassification") khiops_tests.append("Regression") khiops_tests.append("ChallengeAutoML") - test_khiops_tool("Khiops", version, khiops_tests) + if tool == "" or tool == "Khiops": + test_khiops_tool("Khiops", version, khiops_tests) # Coclustering tool coclustering_tests = ["Standard", "Bugs", "NewPriorV9", "SmallInstability"] - test_khiops_tool("Coclustering", version, coclustering_tests) + if tool == "" or tool == "Coclustering": + test_khiops_tool("Coclustering", version, coclustering_tests) # KNI tool KNI_tests = ["Standard", "MultiTables", "SmallInstability"] - test_khiops_tool("KNI", version, KNI_tests) + if tool == "" or tool == "KNI": + test_khiops_tool("KNI", version, KNI_tests) print("all tests are done") diff --git a/test/LearningTest/cmd/python/utils.py b/test/LearningTest/cmd/python/utils.py index bb8b5e35b..867400fe1 100644 --- a/test/LearningTest/cmd/python/utils.py +++ b/test/LearningTest/cmd/python/utils.py @@ -39,7 +39,7 @@ def copyFilesWithoutExtension(src, dest, tabExt): for fileName in os.listdir(src): if not os.path.isdir(os.path.join(src, fileName)): (root, extension) = os.path.splitext(fileName) - if not extension in tabExt: + if extension not in tabExt: copy(os.path.join(src, fileName), dest)