diff --git a/.clang-format b/.clang-format index c7fdca608..016985d5a 100644 --- a/.clang-format +++ b/.clang-format @@ -2,6 +2,7 @@ BasedOnStyle: LLVM Language: Cpp ColumnLimit: 120 +ReflowComments: false IndentWidth: 8 AccessModifierOffset: -8 BreakBeforeBraces: Allman diff --git a/.github/actions/test-khiops-install/action.yml b/.github/actions/test-khiops-install/action.yml index a44d7c275..4d6c131f1 100644 --- a/.github/actions/test-khiops-install/action.yml +++ b/.github/actions/test-khiops-install/action.yml @@ -61,11 +61,11 @@ runs: touch test/LearningTest/TestCoclustering/Standard/Iris/results/time.log touch test/LearningTest/TestKhiops/Standard/Iris/results/time.log echo "Check test results" - cd test/LearningTest/cmd/python/ - $PYTHON test_khiops.py Khiops nul Standard - $PYTHON test_khiops.py Coclustering nul Standard - $PYTHON apply_command.py errors ../../TestKhiops/Standard/ Iris | tee /tmp/khiops-log.txt - $PYTHON apply_command.py errors ../../TestCoclustering/Standard/ Iris | tee /tmp/coclustering-log.txt + cd test/LearningTestTool/py/ + $PYTHON kht_test.py ../../LearningTest/TestKhiops/Standard/Iris check + $PYTHON kht_test.py ../../LearningTest/TestCoclustering/Standard/Iris check + $PYTHON kht_apply.py ../../LearningTest/TestKhiops/Standard/Iris errors | tee /tmp/khiops-log.txt + $PYTHON kht_apply.py ../../LearningTest/TestCoclustering/Standard/Iris errors | tee /tmp/khiops-log.txt if (grep -q error /tmp/khiops-log.txt || grep -q error /tmp/coclustering-log.txt); then echo "::error::Errors in Khiops run" false diff --git a/.github/workflows/run-standard-tests.yml b/.github/workflows/run-standard-tests.yml index b7f2aa012..9545b5975 100644 --- a/.github/workflows/run-standard-tests.yml +++ b/.github/workflows/run-standard-tests.yml @@ -107,8 +107,9 @@ jobs: - name: Set environment variables shell: bash run: | - echo "TEST_PY=test/LearningTest/cmd/python/test_khiops.py" >> "$GITHUB_ENV" - echo "APPLY_PY=test/LearningTest/cmd/python/apply_command.py" >> "$GITHUB_ENV" + echo "TEST_PY=test/LearningTestTool/py/kht_test.py" >> "$GITHUB_ENV" + echo "APPLY_PY=test/LearningTestTool/py/kht_apply.py" >> "$GITHUB_ENV" + echo "COLLECT_PY=test/LearningTestTool/py/kht_collect_results.py" >> "$GITHUB_ENV" echo "BIN_PATH=${{ github.workspace }}/build/${{ env.PRESET_NAME }}/bin" >> "$GITHUB_ENV" - name: Setup MPI (windows) if: ${{ runner.os == 'Windows' }} @@ -142,17 +143,16 @@ jobs: export KhiopsMPIProcessNumber=4 fi if [[ "${{ matrix.config }}" == "release" ]] ; then - python3 $TEST_PY Khiops ${BIN_PATH}/MODL${EXT} Standard + python $TEST_PY test/LearningTest/TestKhiops/Standard ${BIN_PATH} if [[ "${{ matrix.running-mode }}" != "parallel" ]] ; then - python $TEST_PY Coclustering ${BIN_PATH}/MODL_Coclustering${EXT} Standard - python $TEST_PY KNI ${BIN_PATH}/KNITransfer${EXT} Standard - python $TEST_PY KNI ${BIN_PATH}/KNITransfer${EXT} MultiTables SpliceJunction + python $TEST_PY test/LearningTest/TestCoclustering/Standard ${BIN_PATH} + python $TEST_PY test/LearningTest/TestKNI/Standard ${BIN_PATH} fi else - python $TEST_PY Khiops ${BIN_PATH}/MODL${EXT} Standard IrisLight + python $TEST_PY test/LearningTest/TestKhiops/Standard/IrisLight ${BIN_PATH} if [[ "${{ matrix.running-mode }}" != "parallel" ]] ; then - python $TEST_PY Coclustering ${BIN_PATH}/MODL_Coclustering${EXT} Standard Iris - python $TEST_PY KNI ${BIN_PATH}/KNITransfer${EXT} Standard Iris + python $TEST_PY test/LearningTest/TestCoclustering/Standard/Iris ${BIN_PATH} + python $TEST_PY test/LearningTest/TestKNI/Standard/Iris ${BIN_PATH} fi fi - name: Collect results @@ -160,41 +160,43 @@ jobs: if: success() || failure() run: | if [[ "${{ matrix.config }}" == "release" ]] ; then - python $APPLY_PY errors test/LearningTest/TestKhiops/Standard | tee test/LearningTest/TestKhiops/Standard/errors.txt + python $APPLY_PY test/LearningTest/TestKhiops/Standard errors | tee -a results/errors.txt if [[ "${{ matrix.running-mode }}" != "parallel" ]] ; then - python $APPLY_PY errors test/LearningTest/TestCoclustering/Standard | tee test/LearningTest/TestCoclustering/Standard/errors.txt - python $APPLY_PY errors test/LearningTest/TestKNITransfer/Standard | tee test/LearningTest/TestKNITransfer/Standard/errors.txt - python $APPLY_PY errors test/LearningTest/TestKNITransfer/MultiTables SpliceJunction | tee test/LearningTest/TestKNITransfer/MultiTables/errors.txt + python $APPLY_PY test/LearningTest/TestCoclustering/Standard errors | tee -a results/errors.txt + python $APPLY_PY test/LearningTest/TestKNI/Standard errors | tee -a results/errors.txt fi else - python $APPLY_PY errors test/LearningTest/TestKhiops/Standard IrisLight | tee test/LearningTest/TestKhiops/Standard/errors.txt + python $APPLY_PY test/LearningTest/TestKhiops/Standard/IrisLight errors | tee -a results/errors.txt if [[ "${{ matrix.running-mode }}" != "parallel" ]] ; then - python $APPLY_PY errors test/LearningTest/TestCoclustering/Standard Iris | tee test/LearningTest/TestCoclustering/Standard/errors.txt - python $APPLY_PY errors test/LearningTest/TestKNITransfer/Standard Iris | tee test/LearningTest/TestKNITransfer/Standard/errors.txt + python $APPLY_PY test/LearningTest/TestCoclustering/Standard/Iris errors | tee -a results/errors.txt + python $APPLY_PY test/LearningTest/TestKNI/Standard/Iris errors | tee -a results/errors.txt fi fi + python $COLLECT_PY test/LearningTest/ results --collect-type warnings -f basic - name: Check results + # We escape eol in the results file to print multilines in the github UI https://github.com/orgs/community/discussions/26288 shell: bash run: | - if grep -qr "error" --include="errors.txt" test/LearningTest/ ; then - echo "::error::Errors in test" + MSG=$(: full path of the executable - d: debug version in developpement environnement - r: release version in developpement environnement - ver: ..exe in directory LearningTest\cmd\modl - nul: for comparison with the test results only - testName: name of the tool test directory (Standard, MultiTables...) - subTestName: optional, name of the tool test sub-directory (Adult,Iris...) -~~~~ - -#### test_khiops_all.py -Example -- python [ScriptPath]/test_khiops_all.py r -- python [ScriptPath]/test_khiops_all.py [MODL_PATH] Khiops -~~~~ -testAll [version] - run all tests for all Khiops tools - version: version of the tool - d: debug version in developpement environnement - r: release version in developpement environnement - ver: ..exe in directory LearningTest\cmd\modl - nul: for comparison with the test results only - full exe path, if parameter is used - tool: all tools if not specified, one specified tool otherwise - Khiops - Coclustering - KNI -~~~~ - -#### apply_command.py -Example: -- python [ScriptPath]/apply_command.py errors TestKhiops/Standard - -~~~~ -apply_command [command] [root_path] ([dir_name]) - apply command on a directory structure - command: name of the command - rootPath is the path of the root directory - dirName is the name of one specific sub-directory - or all (default) for executing on all sub-directories - example: applyCommand list TestKhiops\Standard - example: applyCommand list TestKhiops\Standard Adult - - List of available standard commands (* for all commands): - list: list of sub-directories - errors: report errors and warnings - logs: detailed report errors and warnings - compareTimes: compare time with ref time and report warnings only - compareTimesVerbose: compare time with ref time and report all - performance: report SNB test accuracy - performanceRef: report ref SNB test accuracy - clean: delete results files - cleanref: delete results.ref files - makeref: copy results files to results.ref - copyref: copy results.ref files to results - checkHDFS: check if parameter files are compliant with HDFS - transformHDFS: transform parameter files to be compliant with HDFS - transformHDFSresults: transform results files to be compliant with HDFS -~~~~ - -#### apply_command_all.py -Example: -- python [ScriptPath]/apply_command_all.py errors - -~~~~ -applyCommandAll [command] <*> - apply command on all test sub-directories - command: name of the command - *: to include 'unofficial' sub-directories, such as z_work - Type applyCommand to see available commands -~~~~ - - -### Using LearningTest under Windows - -Commands are available using command files (.cmd) located in directory LearningTest/cmd, that are simply wrappers to the python scripts: -- helpOptions -- testKhiops -- testCoclustering -- testKNI -- testAll -- applyCommand -- applyCommandAll - -Typical use -- open a shell -- run a command found in learningTest/cmd -- run the tests, for example -~~~ - TestKhiops r Standard Adult - TestKhiops r Standard - TestAll r -~~~ -- analyze results, for example -~~~ - ApplyCommand errors TestKhiops/Standard - ApplyCommandAll errors -~~~ - - -## Test methodology - -### Test hierarchy - -The set of non-regression tests is voluminous. In practice, the tests are run in stages: -- elementary: TestKhiops Standard IrisLight, less than one second -- standard: TestKhiops Standard, less than one minute -- all : TestAll, less than two hours -- complete: TestAll in KhiopsCompleteTests mode (see help_options), more than one day -- release: the multiplication of test conditions reinforces the tool's robustness - - TestAll under different platforms - - TestAll in sequential or parallel mode (cf. KhiopMPIProcessNumber) - - Test in debug mode for short test runs (cf KhiopsMinTestTime, KhiopsMaxTestTime) - - - diff --git a/test/LearningTest/cmd/python/apply_command.py b/test/LearningTest/cmd/python/apply_command.py deleted file mode 100644 index 766d15e5c..000000000 --- a/test/LearningTest/cmd/python/apply_command.py +++ /dev/null @@ -1,1479 +0,0 @@ -import os.path -import sys -import stat -import utils -import test_khiops -import check_results as cr -from test_dir_management import * - - -# Imports de pykhiops a effectuer au cas par cas dans chaque methode, car ralentissant trop les scripts -# import pykhiops as pk - - -def file_read_lines(file_name): - with open(file_name, "r", errors="ignore") as input_file: - lines = input_file.readlines() - return lines - - -def file_write_lines(file_path, lines): - with open(file_path, "w", errors="ignore") as the_file: - for line in lines: - the_file.write(line) - - -def file_search(file_name, search_text): - # search in a file - the_file = open(file_name, "r", errors="ignore") # Opens the file in read-mode - text = the_file.read() # Reads the file and assigns the value to a variable - the_file.close() # Closes the file (read session) - if text.find(search_text) >= 0: - return True - else: - return False - - -def file_content_search(file_lines, search_text): - # search in a file - for line in file_lines: - if line.find(search_text) >= 0: - return True - return False - - -def file_content_search_count(file_lines, search_text): - # search in a file - count = 0 - for line in file_lines: - if line.find(search_text) >= 0: - count += 1 - return count - - -def file_replace(file_name, source_text, replace_text): - # search/replace in a file - the_file = open(file_name, "r", errors="ignore") # Opens the file in read-mode - text = the_file.read() # Reads the file and assigns the value to a variable - the_file.close() # Closes the file (read session) - # Opens the file again, this time in write-mode - the_file = open(file_name, "w", errors="ignore") - # replaces all instance_number of our keyword - the_file.write(text.replace(source_text, replace_text)) - # and writes the whole output when done, wiping over the old contents of the file - the_file.close() # Closes the file (write session) - - -def file_compare(file_name1: str, file_name2: str, skip_patterns: list = None): - """Compare whether two file have the same content - :param file_name1: - :param file_name2: - :param skip_patterns: does not compare lines containing one of the string in the list of patterns - :return: - """ - compare_ok = os.path.isfile(file_name1) and os.path.isfile(file_name2) - lines1 = [] - lines2 = [] - if compare_ok: - file1 = open(file_name1, "r", errors="ignore") - lines1 = file1.readlines() - file1.close() - file2 = open(file_name2, "r", errors="ignore") - lines2 = file2.readlines() - file1.close() - compare_ok = len(lines1) == len(lines2) - if not compare_ok: - print( - "\tdifferent line number (" - + str(len(lines1)) - + " vs " - + str(len(lines2)) - + ")" - ) - if compare_ok: - for i in range(len(lines1)): - if skip_patterns is not None: - skip = False - for pattern in skip_patterns: - if pattern in lines1[i]: - skip = True - break - if skip: - continue - if lines1[i] != lines2[i]: - compare_ok = False - error_line = lines1[i].replace("\t", " ") - if len(error_line) > 70: - error_line = error_line[:70] + " ..." - print("\tdifferent line (" + str(i + 1) + "): " + error_line) - break - return compare_ok - - -def file_compare_line_number(file_name1: str, file_name2: str): - """Compare whether two file have the same number of lines - :param file_name1: - :param file_name2: - :return: - """ - compare_ok = os.path.isfile(file_name1) and os.path.isfile(file_name2) - if compare_ok: - file1 = open(file_name1, "r", errors="ignore") - lines1 = file1.readlines() - file1.close() - file2 = open(file_name2, "r", errors="ignore") - lines2 = file2.readlines() - file1.close() - compare_ok = len(lines1) == len(lines2) - return compare_ok - - -def apply_command_list(work_dir): - # list directory names, with reference results info - results_ref_dir, candidate_dirs = get_results_ref_dir(work_dir, show=True) - results_ref_info = "" - if results_ref_dir is None: - results_ref_info = ", invalid " + RESULTS_REF + " dirs " + str(candidate_dirs) - elif results_ref_dir == RESULTS_REF and len(candidate_dirs) == 0: - results_ref_info = ", missing " + RESULTS_REF + " dir" - elif len(candidate_dirs) >= 2: - results_ref_info = ( - ", used " + results_ref_dir + " dir among " + str(candidate_dirs) - ) - print(work_dir + results_ref_info) - - -def analyse_tests_results(work_dir): - """Analyse test directories for warning, errors or fatal errors - Returns: - - warning number - - erreur number - - message related to special files (optional) - - message related to file extensions (optional) - - specific message (optional) - - portability message (optional) - """ - - def extract_number(message): - assert message != "" - fields = message.split() - assert fields[0].isdigit() - number = int(fields[0]) - return number - - # Traitement des erreurs memorisee dans le log - log_file_name = os.path.join(work_dir, COMPARISON_RESULTS_LOG) - error_number = 0 - warning_number = 0 - special_file_message = "" - message_extension = "" - specific_message = "" - portability_message = "" - if os.path.isfile(log_file_name): - log_file = open(log_file_name, "r", errors="ignore") - begin_summary = False - for line in log_file: - line = line.strip() - # Recherche du debug de la synthese - if line == cr.SUMMARY_TITLE: - begin_summary = True - - # Analyse de la synthese - if begin_summary: - if line.find(cr.SUMMARY_WARNING_KEY) >= 0: - warning_number = extract_number(line) - if line.find(cr.SUMMARY_ERROR_KEY) >= 0: - error_number = extract_number(line) - for key in cr.SUMMARY_SPECIAL_FILE_KEYS: - if line == key: - special_file_message = key - break - if line.find(cr.SUMMARY_FILE_TYPES_KEY) == 0: - message_extension = line - if line.find(cr.SUMMARY_NOTE_KEY) == 0: - specific_message = line - if line.find(cr.SUMMARY_PORTABILITY_KEY) == 0: - portability_message = line - if not begin_summary: - assert error_number == 0 - error_number = 1 - specific_message = ( - "Section '" - + cr.SUMMARY_TITLE - + "' not found in " - + COMPARISON_RESULTS_LOG - ) - - # Fermeture du fichier - log_file.close() - else: - error_number = 1 - specific_message = "The test has not been launched" - return ( - warning_number, - error_number, - special_file_message, - message_extension, - specific_message, - portability_message, - ) - - -def apply_command_errors(work_dir): - # list test directories with errors or warnings - # outpout in standard output stream - test_dir_name = os.path.basename(work_dir) - family_dir_name = os.path.basename(os.path.dirname(work_dir)) - tool_name = os.path.basename(os.path.dirname(os.path.dirname(work_dir))) - ( - warning_number, - error_number, - special_file_message, - message_extension, - specific_message, - portability_message, - ) = analyse_tests_results(work_dir) - if ( - warning_number != 0 - or error_number != 0 - or special_file_message != "" - or portability_message != "" - ): - message = "\t" + tool_name + "\t" - message += family_dir_name + "\t" - message += test_dir_name + "\t" - if warning_number > 0: - message += "warnings\t" + str(warning_number) + "\t" - else: - message += "\t\t" - if error_number > 0: - message += "errors\t" + str(error_number) + "\t" - else: - message += "\t\t" - if special_file_message != "": - message += special_file_message - message += "\t" + message_extension - message += "\t" + specific_message - message += "\t" + portability_message - print(message) - - -def apply_command_logs(work_dir): - # list test directories with logs - # outpout in standard output stream - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - ( - warning_number, - error_number, - special_file_message, - message_extension, - specific_message, - portability_message, - ) = analyse_tests_results(work_dir) - if ( - warning_number != 0 - or error_number != 0 - or special_file_message != "" - or portability_message != "" - ): - log_file_name = os.path.join(work_dir, COMPARISON_RESULTS_LOG) - if os.path.isfile(log_file_name): - print("==================================================================") - print(root_name + " " + dir_name) - print("==================================================================") - print(log_file_name) - log_file = open(log_file_name, "r", errors="ignore") - for s in log_file: - s = s.replace("\n", "") - print(" " + s) - log_file.close() - - -def apply_command_compare_times(work_dir, verbose=False): - def print_log_message(message): - print("\t" + tool_name + "\t" + root_name + "\t" + dir_name + "\t" + message) - - def clean_time_value(value): - found_pos = value.find(" (") - if found_pos >= 0: - value = value[:found_pos] - found_pos = value.find(")") - if found_pos >= 0: - value = value[:found_pos] - return value - - def time_to_seconds(value): - fields = value.split(":") - computed_seconds = ( - float(fields[0]) * 3600 + float(fields[1]) * 60 + float(fields[2]) - ) - return computed_seconds - - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - tool_name = os.path.basename(os.path.dirname(os.path.dirname(work_dir))) - # Recherche du repertoire des resultats de reference - results_dir_err_file = os.path.join(work_dir, RESULTS, ERR_TXT) - results_ref, _ = get_results_ref_dir(work_dir, show=verbose) - is_valid = results_ref is not None - if is_valid: - results_ref_dir_err_file = os.path.join(work_dir, results_ref, ERR_TXT) - if not os.path.isfile(results_dir_err_file): - is_valid = False - if verbose: - print_log_message( - "\t\t\t\t\terror : missing " - + ERR_TXT - + " file in " - + RESULTS - + " dir" - ) - if is_valid and not os.path.isfile(results_ref_dir_err_file): - is_valid = False - if verbose: - print_log_message( - "\t\t\t\t\terror : missing " - + ERR_TXT - + " file in " - + results_ref - + " dir" - ) - if is_valid: - with open(results_dir_err_file, "r", errors="ignore") as fErr: - lines = fErr.readlines() - with open(results_ref_dir_err_file, "r", errors="ignore") as f_err_ref: - lines_ref = f_err_ref.readlines() - if len(lines) != len(lines_ref): - print_log_message( - "\t\t\t\t\terror : " + ERR_TXT + " files with different number of lines" - ) - else: - pattern = "time: " - for i in range(len(lines)): - line = lines[i] - line_ref = lines_ref[i] - if pattern in line_ref: - pos = line_ref.find(pattern) - time_label = line_ref[: pos + len(pattern)] - time_ref_value = line_ref[len(time_label) : -1] - if time_label not in line: - print_log_message( - str(i + 1) - + "\t" - + time_label[:-2] - + "\t" - + "???" - + "\t" - + clean_time_value(time_ref_value) - + "\tERROR no time found" - ) - else: - time_value = line[len(time_label) : -1] - seconds = time_to_seconds(clean_time_value(time_value)) - seconds_ref = time_to_seconds(clean_time_value(time_ref_value)) - warning = "" - if ( - seconds + seconds_ref > 0.3 - and abs(seconds - seconds_ref) > 0.1 - and abs(seconds - seconds_ref) - > 0.1 * (seconds + seconds_ref) / 2 - ): - warning = ( - "\tWARNING\t" - + str( - int( - (seconds - seconds_ref) - * 100.0 - / (seconds_ref if seconds_ref > 0 else 0.01) - ) - ) - + "%" - ) - if verbose or warning != "": - label = time_label[: -len(pattern) - 1] - pos = label.find("(") - if pos > 0: - label = label[pos + 1 :] - print_log_message( - str(i + 1) - + "\t" - + label - + "\t" - + clean_time_value(time_value) - + "\t" - + clean_time_value(time_ref_value) - + "\t" - + warning - ) - - -def apply_command_compare_times_verbose(work_dir): - apply_command_compare_times(work_dir, verbose=True) - - -def apply_command_performance(work_dir): - # list test directories with SNB performance - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - results_dir = os.path.join(work_dir, RESULTS) - if os.path.isdir(results_dir): - test_pattern = "TestEvaluationReport.xls" - for file_name in os.listdir(results_dir): - if test_pattern in file_name: - test_eval_file_name = os.path.join(results_dir, file_name) - test_eval_file = open(test_eval_file_name, "r", errors="ignore") - for s in test_eval_file: - if s.find("Selective Naive Bayes", 0) == 0: - # comma to avoid doubling "\n" - print( - root_name - + "\t" - + dir_name - + "\t" - + file_name[: -len(test_pattern)] - + "\t" - + s, - ) - test_eval_file.close() - - -def apply_command_performance_ref(work_dir): - # list test directories with SNB ref performance - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - results_ref, _ = get_results_ref_dir(work_dir, show=True) - if results_ref is not None: - results_dir = os.path.join(work_dir, results_ref) - if os.path.isdir(results_dir): - test_pattern = "TestEvaluationReport.xls" - for file_name in os.listdir(results_dir): - if test_pattern in file_name: - test_eval_file_name = os.path.join(results_dir, file_name) - test_eval_file = open(test_eval_file_name, "r", errors="ignore") - for s in test_eval_file: - if s.find("Selective Naive Bayes", 0) == 0: - # comma to avoid doubling "\n" - print( - root_name - + "\t" - + dir_name - + "\t" - + file_name[: -len(test_pattern)] - + "\t" - + s, - ) - test_eval_file.close() - - -def apply_command_check_fnb(work_dir): - from pykhiops import core as pk - - def to_s(value): - # return str(value) - return str("{:.4g}".format(value)) - - def print_stats( - result_file_name, report, criterion, new_value, ref_value, maximize - ): - fstats.write(tool_name + "\t" + root_name + "\t" + dir_name + "\t") - fstats.write(result_file_name + "\t" + report + "\t" + criterion + "\t") - fstats.write(to_s(new_value) + "\t" + to_s(ref_value) + "\t") - diff = new_value - ref_value - if maximize: - fstats.write(to_s(diff)) - alert = diff < 0 - else: - fstats.write(to_s(-diff)) - alert = diff > 0 - if alert and abs(diff) > 0.01 * (abs(ref_value) + abs(new_value)) / 2: - fstats.write("\tALERT") - fstats.write("\n") - - def print_error(message): - print(tool_name + "\t" + root_name + "\t" + dir_name + "\terror\t" + message) - - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is None: - return - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - tool_name = os.path.basename(os.path.dirname(os.path.dirname(work_dir))) - - # Creation d'un fichier de collecte des stats - fstats = None - root_dir = os.path.dirname(os.path.dirname(os.path.dirname(work_dir))) - stats_file_name = os.path.join(root_dir, "stats.FNB.log") - if os.path.isfile(stats_file_name): - fstats = open(stats_file_name, "a", errors="ignore") - else: - fstats = open(stats_file_name, "w", errors="ignore") - fstats.write( - "Tool\tRoot\tDir\tFile\tReport\tCriterion\tValue\tRef value\tDiff\n" - ) - - if ref_dir is not None and os.path.isdir(ref_dir): - for file_name in os.listdir(ref_dir): - ref_file_path = os.path.join(ref_dir, file_name) - test_file_path = os.path.join(test_dir, file_name) - - ##### - if not os.path.isfile(test_file_path): - print_error("Missing file " + test_file_path) - continue - - # Comparaison du fichier d'erreur - if file_name == ERR_TXT: - if not file_compare( - ref_file_path, test_file_path, skip_patterns=["time"] - ): - print_error(file_name + " are different") - # Comparaison si preparation - elif "PreparationReport" in file_name: - if not file_compare( - ref_file_path, test_file_path, skip_patterns=["#Khiops "] - ): - print_error(file_name + " are different") - elif ".khj" in file_name: - # Read json result file - try: - ref_report = pk.AnalysisResults() - ref_report.read_khiops_json_file(ref_file_path) - test_report = pk.AnalysisResults() - test_report.read_khiops_json_file(test_file_path) - except Exception as e: - print_error(file_name + "\tparsing alert: " + str(e)) - continue - # Analyse modeling results - if ref_report.modeling_report is not None: - if test_report.modeling_report is None: - print_error(file_name + "\tmissing modeling report") - else: - ref_snb_predictor = ref_report.modeling_report.get_predictor( - "Selective Naive Bayes" - ) - test_snb_predictor = test_report.modeling_report.get_predictor( - "Selective Naive Bayes" - ) - if ref_snb_predictor is not None: - if test_snb_predictor is None: - print_error( - file_name - + "\tmissing SNB predictor in modeling report" - ) - else: - print_stats( - file_name, - test_report.modeling_report.report_type, - "Sel. vars", - test_snb_predictor.variables, - ref_snb_predictor.variables, - False, - ) - # Analyse evaluation results - ref_evaluation_reports = list() - test_evaluation_reports = list() - ref_evaluation_reports.append(ref_report.train_evaluation_report) - ref_evaluation_reports.append(ref_report.test_evaluation_report) - ref_evaluation_reports.append(ref_report.evaluation_report) - test_evaluation_reports.append(test_report.train_evaluation_report) - test_evaluation_reports.append(test_report.test_evaluation_report) - test_evaluation_reports.append(test_report.evaluation_report) - for i in range(len(ref_evaluation_reports)): - ref_evaluation_report = ref_evaluation_reports[i] - test_evaluation_report = test_evaluation_reports[i] - if ref_evaluation_report is not None: - if test_evaluation_report is None: - print_error( - file_name - + "\tmissing " - + ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type - + " report" - ) - else: - ref_snb_performance = ( - ref_evaluation_report.get_predictor_performance( - "Selective Naive Bayes" - ) - ) - test_snb_performance = ( - test_evaluation_report.get_predictor_performance( - "Selective Naive Bayes" - ) - ) - if ref_snb_performance is not None: - if test_snb_performance is None: - print_error( - file_name - + "\tmissing SNB performance in " - + ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type - + " report" - ) - else: - if test_snb_performance.type == "Classifier": - print_stats( - file_name, - ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type, - "accuracy", - test_snb_performance.accuracy, - ref_snb_performance.accuracy, - True, - ) - print_stats( - file_name, - ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type, - "compression", - test_snb_performance.compression, - ref_snb_performance.compression, - True, - ) - print_stats( - file_name, - ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type, - "auc", - test_snb_performance.auc, - ref_snb_performance.auc, - True, - ) - if test_snb_performance.type == "Regressor": - print_stats( - file_name, - ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type, - "rmse", - test_snb_performance.rmse, - ref_snb_performance.rmse, - False, - ) - print_stats( - file_name, - ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type, - "mae", - test_snb_performance.mae, - ref_snb_performance.mae, - False, - ) - print_stats( - file_name, - ref_evaluation_report.report_type - + " " - + ref_evaluation_report.evaluation_type, - "nlpd", - test_snb_performance.nlpd, - ref_snb_performance.nlpd, - False, - ) - # Ecriture des stats - fstats.close() - - -def apply_command_bench(work_dir): - # build bench parameter file - def extract_info(line): - start, end = line.split(" ", 1) - field, comment = end.split("//", 1) - return field - - # extraction des renseignement du fichier de parametrage - class_file_name = "" - class_name = "" - database_name = "" - target_attribute_name = "" - prm_file_name = os.path.join(work_dir, TEST_PRM) - prm_file = open(prm_file_name, "r", errors="ignore") - for s in prm_file: - if s.find("class_file_name") >= 0 and class_file_name == "": - class_file_name = extract_info(s) - if s.find("class_name") >= 0 and class_name == "": - class_name = extract_info(s) - if s.find("TrainDatabase.database_name") >= 0 and database_name == "": - database_name = extract_info(s) - if ( - s.find("AnalysisSpec.target_attribute_name") >= 0 - and target_attribute_name == "" - ): - target_attribute_name = extract_info(s) - prm_file.close() - # affichage des lignes de fichier de bencgmark correspondant - print("") - print("BenchmarkSpecs.InsertItemAfter // Insert after") - print("// -> Benchmark") - print("class_file_name " + class_file_name + " // Dictionary file") - print("class_name " + class_name + " // Dictionary") - print("target_attribute_name " + target_attribute_name + " // Target variable") - print("database_name " + database_name + " // Database file") - print("Exit // OK") - print("// <- Benchmark") - - -def apply_command_clean(work_dir): - # clean comparison log file - file_path = os.path.join(work_dir, cr.COMPARISON_LOG_FILE_NAME) - if os.path.isfile(file_path): - utils.remove_file(file_path) - - # clean test results directory - test_dir = os.path.join(work_dir, RESULTS) - if os.path.isdir(test_dir): - for file_name in os.listdir(test_dir): - file_path = os.path.join(test_dir, file_name) - utils.remove_file(file_path) - utils.remove_dir(test_dir) - - -def apply_command_clean_ref(work_dir): - # clean reference results directory - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is not None and os.path.isdir(ref_dir): - for file_name in os.listdir(ref_dir): - file_path = os.path.join(ref_dir, file_name) - utils.remove_file(file_path) - - -def apply_command_delete_ref(work_dir): - # delete reference results files and directories for all reference contexts - current_ref_dir, all_ref_dirs = get_results_ref_dir(work_dir, show=True) - if current_ref_dir is not None: - for ref_dir in all_ref_dirs: - for file_name in os.listdir(ref_dir): - file_path = os.path.join(ref_dir, file_name) - utils.remove_file(file_path) - utils.remove_dir(ref_dir) - - -def apply_command_make_ref(work_dir): - # copy results files to from test to reference dir - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is not None: - if not os.path.isdir(ref_dir): - os.mkdir(ref_dir) - if os.path.isdir(ref_dir): - for file_name in os.listdir(ref_dir): - file_path = os.path.join(ref_dir, file_name) - utils.remove_file(file_path) - if os.path.isdir(test_dir) and os.path.isdir(ref_dir): - for file_name in os.listdir(test_dir): - utils.copy( - os.path.join(test_dir, file_name), os.path.join(ref_dir, file_name) - ) - - -def apply_command_copy_ref(work_dir): - # copy results files to from reference to test dir - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is not None: - if not os.path.isdir(test_dir): - os.mkdir(test_dir) - if os.path.isdir(test_dir): - for file_name in os.listdir(test_dir): - file_path = os.path.join(test_dir, file_name) - utils.remove_file(file_path) - if os.path.isdir(test_dir) and os.path.isdir(ref_dir): - for file_name in os.listdir(ref_dir): - utils.copy( - os.path.join(ref_dir, file_name), os.path.join(test_dir, file_name) - ) - - -def apply_command_check_hdfs(work_dir): - def parameter_exist(line, searched_keyword): - # Test if there is at least one parameter in a line - fields = ( - line[line.find(searched_keyword) + len(searched_keyword) :] - .strip() - .split("//") - ) - return len(fields) > 0 and len(fields[0]) > 0 - - # Check compliance to HDFS system - keywords = [ - "class_file_name", - "ResultFilesDirectory", - ".database_name", - "DataTableName", - ".EvaluationFileName", - "ReportFileName", - "InputCoclusteringFileName", - ] - prm_file_name = os.path.join(work_dir, TEST_PRM) - print(work_dir) - with open(prm_file_name, "r", errors="ignore") as prm_file: - line_index = 1 - for s in prm_file: - # Test comments - if "//" in s: - comment_pos = s.find("//") - if ( - comment_pos > 0 - and s[comment_pos - 1] != " " - and s[comment_pos - 1] != "\t" - ): - if s[comment_pos + 2 :].find("//") >= 0: - print( - str(line_index) - + ": \tWARNING: Multiple '//' in line -> " - + s[:-1] - ) - else: - print( - str(line_index) - + ": \tComment without blank ' //' -> " - + s[:-1] - ) - # Test each keyword - for keyword in keywords: - if ( - s.find(keyword) >= 0 - and s.find(" ../../../datasets") <= 0 - and s.find(" ../../../MTdatasets") <= 0 - and s.find(" ./") <= 0 - ): - if parameter_exist(s, keyword): - print(str(line_index) + ": \t" + s[:-1]) - line_index += 1 - - -def apply_command_transform_hdfs(work_dir): - def parameter_exist(line, searched_keyword): - # Test if there is at least one parameter in a line and a value for this parameter - fields = ( - line[line.find(searched_keyword) + len(searched_keyword) :] - .strip() - .split("//") - ) - return len(fields) > 0 and len(fields[0]) > 0 - - # Create new file test.prm.hdfs compliant with hdfs - keywords = [ - "class_file_name", - "ResultFilesDirectory", - ".database_name", - ".DataTableName", - "ReportFileName", - "InputCoclusteringFileName", - ] - # PostProcessedCoclusteringFileName CoclusteringDictionaryFileName removed - # The name of the coclustering dictionary should not be a path - prm_file_name = os.path.join(work_dir, TEST_PRM) - prm_file = open(prm_file_name, "r", errors="ignore") - prm_file_lines = prm_file.readlines() - prm_file.close() - prm_file = open(prm_file_name, "w", errors="ignore") - for s in prm_file_lines: - new_line = s - # Test comments - if "//" in s: - comment_pos = s.find("//") - if ( - comment_pos > 0 - and s[comment_pos - 1] != " " - and s[comment_pos - 1] != "\t" - ): - if s[comment_pos + 2 :].find("//") >= 0: - print( - "\tWARNING: Multiple '//' in line (NO TRANSFORM) -> " + s[:-1] - ) - else: - new_line = s.replace("//", " //") - # Test each keyword - for keyword in keywords: - if ( - s.find(keyword) >= 0 - and s.find(" ../../../datasets") <= 0 - and s.find(" ../../../MTdatasets") <= 0 - and s.find(" ./") <= 0 - ): - if parameter_exist(s, keyword): - space_pos = s.find(" ") - new_line = s[: space_pos + 1] + "./" + s[space_pos + 1 :] - break - - # Special case for the token "EvaluationFileName", must not be confused with TestEvaluationFileName - if ( - s.find("EvaluationFileName") == 0 - and not s.find(" ./") - and parameter_exist(s, "EvaluationFileName") - ): - space_pos = s.find(" ") - new_line = s[: space_pos + 1] + "./" + s[space_pos + 1 :] - - prm_file.write(new_line) - prm_file.close() - # Transform errror file in reference results dir - do_it = False - results_ref, _ = get_results_ref_dir(work_dir, show=True) - if results_ref is not None: - err_ref_file_name = os.path.join(work_dir, results_ref, ERR_TXT) - if do_it and os.path.isfile(err_ref_file_name): - err_file = open(err_ref_file_name, "r", errors="ignore") - err_file_lines = err_file.readlines() - err_file.close() - err_file = open(err_ref_file_name, "w", errors="ignore") - for s in err_file_lines: - new_line = s - new_line = new_line.replace(" " + RESULTS + "/", " ./" + RESULTS + "/") - new_line = new_line.replace( - " " + RESULTS + "\\", " ./" + RESULTS + "\\" - ) - err_file.write(new_line) - err_file.close() - - -def escape_for_json(token): - return token.replace("/", "\\/") - - -def apply_command_transform_hdfs_results(work_dir): - hdfs_test_dir = "hdfs:///user/bguerraz/LearningTest/TestKhiops/" - hdfs_data_dir = "hdfs:///user/bguerraz/LearningTest/" - - std_data_dir = "../../../" - datasets = "datasets" - mt_datasets = "MTdatasets" - - head, sub_test_name = os.path.split(work_dir) - _, test_name = os.path.split(head) - - hdfs_local_dir = hdfs_test_dir + test_name + "/" + sub_test_name - - test_dir = os.path.join(work_dir, RESULTS) - if os.path.isdir(test_dir): - for file_name in os.listdir(test_dir): - file_path = os.path.join(test_dir, file_name) - - # Read in the file - with open(file_path, "r", errors="ignore") as file: - file_data = file.read() - - # search and replace - if ".khj" in file_name: - # datasets - file_data = file_data.replace( - escape_for_json(hdfs_data_dir + datasets), - escape_for_json(std_data_dir + datasets), - ) - file_data = file_data.replace( - escape_for_json(hdfs_data_dir + mt_datasets), - escape_for_json(std_data_dir + mt_datasets), - ) - - # current dir ./ - file_data = file_data.replace( - escape_for_json(hdfs_local_dir + "/" + RESULTS), - escape_for_json("./" + RESULTS), - ) # ou RESULTS sans "./"" ? - - # files in current dir - - # file_data = file_data.replace(escape_for_json( - # hdfs_local_dir+"/"), "") - file_data = file_data.replace( - escape_for_json(hdfs_local_dir + "/"), escape_for_json("./") - ) - - else: - # datasets - file_data = file_data.replace( - hdfs_data_dir + datasets, std_data_dir + datasets - ) - file_data = file_data.replace( - hdfs_data_dir + mt_datasets, std_data_dir + mt_datasets - ) - - # current dir ./ - file_data = file_data.replace( - hdfs_local_dir + "/" + RESULTS, "./" + RESULTS - ) - - # files in current dir - file_data = file_data.replace(hdfs_local_dir + "/", "./") - - # Write the file in place - os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD) - with open(file_path, "w", errors="ignore") as output_file: - output_file.write(file_data) - - -def apply_command_make_ref_err(work_dir): - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is not None: - if not os.path.isdir(ref_dir): - os.mkdir(ref_dir) - if os.path.isdir(ref_dir): - file_path = os.path.join(ref_dir, ERR_TXT) - utils.remove_file(file_path) - if os.path.isdir(test_dir) and os.path.isdir(ref_dir): - utils.copy(os.path.join(test_dir, ERR_TXT), os.path.join(ref_dir, ERR_TXT)) - - -def apply_command_make_ref_time(work_dir): - # copy time file to reference results dir - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is not None: - if not os.path.isdir(ref_dir): - os.mkdir(ref_dir) - if os.path.isdir(ref_dir): - file_path = os.path.join(ref_dir, TIME_LOG) - if os.path.isfile(file_path): - utils.remove_file(file_path) - if os.path.isdir(test_dir) and os.path.isdir(ref_dir): - utils.copy( - os.path.join(test_dir, TIME_LOG), - os.path.join(ref_dir, TIME_LOG), - ) - - -def apply_command_work(work_dir): - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is None: - return - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - tool_name = os.path.basename(os.path.dirname(os.path.dirname(work_dir))) - - # Transformation du fichier .prm - transform_prm = False - if transform_prm: - file_path = os.path.join(work_dir, TEST_PRM) - try: - lines = file_read_lines(file_path) - with open(file_path, "w", errors="ignore") as the_file: - for line in lines: - if line.find("EpsilonBinNumber") >= 0: - continue - if line.find("OutlierManagementHeuristic") >= 0: - continue - if line.find("OptimalAlgorithm") >= 0: - continue - if line.find("EpsilonBinWidth") >= 0: - continue - if line.find("MaxIntervalNumber") >= 0: - continue - if line.find("HistogramCriterion") >= 0: - continue - if line.find("MaxHierarchyLevel") >= 0: - continue - the_file.write(line) - except Exception as e: - print("BUG: " + file_path + " : " + str(e)) - - # Parcours du repertoire de reference - compare_histograms = True - if compare_histograms: - print("COMPARE " + work_dir) - indicators = [ - "Null cost", - "Reference null cost", - "Cost", - "Level", - "Partition cost", - ] - if os.path.isdir(ref_dir): - for file_name in os.listdir(ref_dir): - ref_file_path = os.path.join(ref_dir, file_name) - test_file_path = os.path.join(test_dir, file_name) - if not os.path.isfile(test_file_path): - print("Missing ref file: " + file_name) - elif "istogram.log" in file_name: - ref_lines = file_read_lines(ref_file_path) - test_lines = file_read_lines(test_file_path) - ref_indicators = {} - test_indicators = {} - ref_histogram = [] - test_histogram = [] - # Analyse des resultats de references - for line in ref_lines: - # Collecte des indicateurs - for indicator in indicators: - if len(line) < 70 and indicator in line: - fields = line[:-1].split("\t") - try: - ref_indicators[indicator] = float( - fields[len(fields) - 1] - ) - except Exception as e: - print( - " " - + file_name - + ": Ref conversion error: " - + line[:-1] - + " " - + str(e) - ) - # Collectes des lignes de l'histogramme - if ( - len(ref_histogram) > 0 - or "Lower bound\tUpper bound\tFrequency" in line - ): - ref_histogram.append(line) - # Analyse des resultats de test - for line in test_lines: - # Collecte des indicateurs - for indicator in indicators: - if len(line) < 70 and indicator in line: - fields = line[:-1].split("\t") - try: - test_indicators[indicator] = float( - fields[len(fields) - 1] - ) - except Exception as e: - print( - " " - + file_name - + ": Test conversion error: " - + line[:-1] - + " " - + str(e) - ) - # Collectes des lignes de l'histogramme - if ( - len(test_histogram) > 0 - or "Lower bound\tUpper bound\tFrequency" in line - ): - test_histogram.append(line) - # Comparaison des resultats - for indicator in indicators: - ref_value = ref_indicators[indicator] - test_value = test_indicators[indicator] - if ( - abs(ref_value - test_value) - > abs(ref_value + test_value) / 100000 - ): - print( - " " - + file_name - + ": Difference in " - + indicator - + ": " - + str(ref_value) - + " vs " - + str(test_value) - ) - if len(ref_histogram) != len(test_histogram): - print( - " " - + file_name - + ": Difference in interval number: " - + str(len(ref_histogram) - 1) - + " vs " - + str(len(test_histogram) - 1) - ) - else: - for i in range(len(ref_histogram)): - ref_line = ref_histogram[i] - test_line = test_histogram[i] - ref_line_fields = ref_line.split("\t") - test_line_fields = test_line.split("\t") - # Comparaison des 9 permiers champs - compare_ok = True - for f in range(8): - compare_ok = ( - compare_ok - and ref_line_fields[f] == test_line_fields[f] - ) - if not compare_ok: - print( - " " - + file_name - + ": Difference in interval " - + str(i) - + " field " - + str(f + 1) - + ": \n\t" - + ref_line - + "\t" - + test_line - ) - break - - -def apply_command_template(work_dir): - test_dir = os.path.join(work_dir, RESULTS) - ref_dir, _ = get_results_ref_dir(work_dir, show=True) - if ref_dir is None: - return - dir_name = os.path.basename(work_dir) - root_name = os.path.basename(os.path.dirname(work_dir)) - tool_name = os.path.basename(os.path.dirname(os.path.dirname(work_dir))) - - -def register_command( - available_commands: dict, command_id: str, command_function, command_label: str -): - """Register a command in a dictionnary of commands""" - assert command_id not in available_commands - assert command_id != "" - assert command_label != "" - available_commands[command_id] = (command_function, command_label) - - -def display_commands(available_commands: dict, max_number=None): - """Display available commands, with their id and label""" - assert max_number is None or max_number > 0 - # Print generic info - print("apply_command [command] [root_path] ([dir_name])") - print(" apply command on a directory structure") - print("\tcommand: name of the command") - print("\trootPath is the path of the root directory") - print("\tdirName is the name of one specific sub-directory") - print("\t or all (default) for executing on all sub-directories") - print(" example: applyCommand list TestKhiops\\Standard") - print(" example: applyCommand list TestKhiops\\Standard Adult") - print("\n List of available standard commands (* for all commands):") - # Print list of available commands - for i, command_id in enumerate(available_commands): - if max_number is None or i < max_number: - (command_function, command_label) = available_commands[command_id] - print("\t" + command_id + ": " + command_label) - - -def execute_command( - available_commands: dict, command_id, root_path, test_dir_name=None -): - """Internal use. - Same as apply_command, with a dictionnary of commands as first parameter - """ - assert command_id != "" - assert root_path != "" - # Verification des operandes - if command_id not in available_commands: - print("error: wrong command " + command_id) - exit(0) - if not os.path.isdir(root_path): - print("error: root directory " + root_path + " does not exist") - exit(0) - root_path = os.path.abspath(root_path) - # Verification de l'utilisation d'un repertoire de test d'un des outils - tool_ok = False - for name in test_khiops.khiops_tool_names: - tool_directory = test_khiops.khiops_test_sub_dirs[name] - tool_ok = tool_ok or tool_directory in root_path - if not tool_ok: - print( - "error: root directory " - + root_path - + " should contain a test dir for one of the tools " - + str(test_khiops.khiops_tool_names) - .replace("'", "") - .replace("[", "(") - .replace("]", ")") - ) - exit(0) - # Recherche des sous-repertoires a exploiter - test_list = [] - if test_dir_name is None: - for name in os.listdir(root_path): - if os.path.isdir(os.path.join(root_path, name)): - test_list.append(name) - else: - if os.path.isdir(os.path.join(root_path, test_dir_name)): - test_list.append(test_dir_name) - else: - print( - "error: sub-directory " - + test_dir_name - + " of " - + root_path - + " does not exist" - ) - exit(0) - if len(test_list) == 0: - print("error: no sub-directory is available in " + root_path) - exit(0) - # Sort test list - test_list.sort() - # Execution de la commande - (command_function, command_label) = available_commands[command_id] - for name in test_list: - # lanceur de commande sur un directory - work_dir = os.path.join(root_path, name) - # verification de l'existence du directory - if not os.path.isdir(work_dir): - print("error: directory " + work_dir + " does not exist") - return 0 - # Lancement de la commande dans son repertoire de travail - os.chdir(work_dir) - command_function(work_dir) - os.chdir(root_path) - # Message synthetique de fin si famille de jeu de tests - family_dir_name = os.path.basename(root_path) - tool_test_sub_dir = os.path.basename(os.path.dirname(root_path)) - if test_dir_name is None: - print("DONE\t" + tool_test_sub_dir + "\t" + family_dir_name) - - -def register_all_commands(): - """Register all available commands - Return a dictionary of the registered commands - """ - # Gestion de l'ensemble des commandes dans un dictionnaire contenant pour chaque identifiant de commande - # une paire (comannd, libelle) - all_available_commands = {} - - # Enregistrement des commandes standard - register_command( - all_available_commands, - "list", - apply_command_list, - "list of sub-directories, with results.ref info", - ) - register_command( - all_available_commands, - "errors", - apply_command_errors, - "report errors and warnings", - ) - register_command( - all_available_commands, - "logs", - apply_command_logs, - "detailed report errors and warnings", - ) - register_command( - all_available_commands, - "compareTimes", - apply_command_compare_times, - "compare time with ref time and report warnings only", - ) - register_command( - all_available_commands, - "compareTimesVerbose", - apply_command_compare_times_verbose, - "compare time with ref time and report all", - ) - register_command( - all_available_commands, - "performance", - apply_command_performance, - "report SNB test accuracy", - ) - register_command( - all_available_commands, - "performanceRef", - apply_command_performance_ref, - "report ref SNB test accuracy", - ) - register_command( - all_available_commands, - "clean", - apply_command_clean, - "delete test result files and comparison log file", - ) - register_command( - all_available_commands, - "cleanref", - apply_command_clean_ref, - "delete reference result files for current reference context", - ) - register_command( - all_available_commands, - "deleteref", - apply_command_delete_ref, - "delete reference result files and directories for all reference context", - ) - register_command( - all_available_commands, - "makeref", - apply_command_make_ref, - "copy test result files to reference dir for current reference context", - ) - register_command( - all_available_commands, - "copyref", - apply_command_copy_ref, - "copy reference result files to test dir for current reference context", - ) - register_command( - all_available_commands, - "checkHDFS", - apply_command_check_hdfs, - "check if parameter files are compliant with HDFS", - ) - register_command( - all_available_commands, - "transformHDFS", - apply_command_transform_hdfs, - "transform parameter files to be compliant with HDFS", - ) - register_command( - all_available_commands, - "transformHDFSresults", - apply_command_transform_hdfs_results, - "transform results files to be compliant with HDFS", - ) - standard_command_number = len(all_available_commands) - - # Enregistrement des commandes internes - register_command( - all_available_commands, - "makereftime", - apply_command_make_ref_time, - "copy time file to reference results dir", - ) - register_command( - all_available_commands, - "makereferr", - apply_command_make_ref_err, - "copy err file to reference results dir", - ) - register_command( - all_available_commands, - "bench", - apply_command_bench, - "build bench parameter file", - ) - register_command( - all_available_commands, - "checkfnb", - apply_command_check_fnb, - "check fnb results (deprecated)", - ) - register_command( - all_available_commands, - "work", - apply_command_work, - "last work command (temporary and anonymous)", - ) - return all_available_commands, standard_command_number - - -if __name__ == "__main__": - all_commands, standard_command_number = register_all_commands() - - # Affichage des commandes si pas de parametres ou mauvais nombre de parametres - if len(sys.argv) <= 2: - display_all = len(sys.argv) == 2 and sys.argv[1] == "*" - display_commands( - all_commands, max_number=None if display_all else standard_command_number - ) - exit(0) - - # Recherche des parametres sur la ligne de commande - command_param = sys.argv[1] - root_path_param = sys.argv[2] - if len(sys.argv) == 4: - test_dir_param = sys.argv[3] - else: - test_dir_param = None - - # Lancement de la commande - execute_command(all_commands, command_param, root_path_param, test_dir_param) diff --git a/test/LearningTest/cmd/python/apply_command_all.py b/test/LearningTest/cmd/python/apply_command_all.py deleted file mode 100644 index 2d27e7e65..000000000 --- a/test/LearningTest/cmd/python/apply_command_all.py +++ /dev/null @@ -1,54 +0,0 @@ -import os.path -import sys -import learning_test_env -import apply_command -import test_khiops -import test_families - -if __name__ == "__main__": - all_commands, standard_command_number = apply_command.register_all_commands() - - include_unofficial_sub_dirs = False - ok = len(sys.argv) == 2 - if not ok: - ok = len(sys.argv) == 3 and sys.argv[2] == "*" - include_unofficial_sub_dirs = ok - if not ok: - print("applyCommandAll [command] <*>") - print(" apply command on all test sub-directories") - print("\tcommand: name of the command") - print("\t*: to include 'unofficial' sub-directories, such as z_work") - print(" Type applyCommand to see available commands") - exit(0) - - # Acces to command - command = sys.argv[1] - - # Browse main directories to fin test directories per tool - for khiops_tool_name in test_khiops.khiops_tool_names: - exe_name, test_sub_dir = test_khiops.retrieve_tool_info(khiops_tool_name) - tool_test_path = os.path.join( - learning_test_env.learning_test_root, "LearningTest", test_sub_dir - ) - # Get standard families to initialize directories to use - test_family = test_families.get_test_family(khiops_tool_name) - used_dir_names = test_family.copy() - # Add unofficial test directories if requested - if include_unofficial_sub_dirs: - # Sort all actual directories by name to ensure stability accross platforms - all_dir_names = os.listdir(tool_test_path) - all_dir_names.sort() - for dir_name in all_dir_names: - if not dir_name in test_family: - # Unofficial directopries are with an '_' in second char (e.g. z_work) - if dir_name.find("_") == 1: - root_path = os.path.join(tool_test_path, dir_name) - if os.path.isdir(root_path): - used_dir_names.append(dir_name) - # Execute command on all used directories - for dir_name in used_dir_names: - root_path = os.path.join(tool_test_path, dir_name) - if os.path.isdir(root_path): - apply_command.execute_command(all_commands, command, root_path) - else: - print("error: directory not found: " + root_path) diff --git a/test/LearningTest/cmd/python/help_options.py b/test/LearningTest/cmd/python/help_options.py deleted file mode 100644 index a6c60c714..000000000 --- a/test/LearningTest/cmd/python/help_options.py +++ /dev/null @@ -1,95 +0,0 @@ -import os -from test_dir_management import * - -print( - "KhiopsBatchMode: " - + str(os.getenv("KhiopsBatchMode")) - + "\n\ttrue, false (default: true)" -) - -print( - "KhiopsMinTestTime: " - + str(os.getenv("KhiopsMinTestTime")) - + "\n\trun only tests where run time (in file " - + TIME_LOG - + ") is beyond a threshold" -) - -print( - "KhiopsMaxTestTime: " - + str(os.getenv("KhiopsMaxTestTime")) - + "\n\trun only tests where run time (in file " - + TIME_LOG - + ") is below a threshold" -) - -print( - "KhiopsTestTimeoutLimit: " - + str(os.getenv("KhiopsTestTimeoutLimit")) - + "\n\tkill overlengthy process (default: 300 s)" -) - -print( - "KhiopsMPIProcessNumber: " - + str(os.getenv("KhiopsMPIProcessNumber")) - + "\n\tNumber of MPI process in paralle mode (default: None)" -) - -print( - "KhiopsExpertMode: " - + str(os.getenv("KhiopsExpertMode")) - + "\n\tKhiops expert mode true, false (default: false)" -) - -print( - "KhiopsTaskFileMode: " - + str(os.getenv("KhiopsTaskFileMode")) - + "\n\tCreate a task file task.log (-t option) (default: None)" -) - -print( - "KhiopsOutputScenarioMode: " - + str(os.getenv("KhiopsOutputScenarioMode")) - + "\n\tCreate an output scenario test.output.prm (-o option) (default: None)" -) - -print( - "KhiopsCompleteTests: " - + str(os.getenv("KhiopsCompleteTests")) - + "\n\tPerform all tests, even the longest ones (default: false)" -) - - -print("") -print( - "KhiopsComparisonPlatform: " - + str(os.getenv("KhiopsComparisonPlatform")) - + "\n\tplatform (Windows, Linux, Darwin, WSL) used to compare test results (default: None, to use that of current OS)" -) - -print( - "KhiopsPreparationTraceMode: " - + str(os.getenv("KhiopsPreparationTraceMode")) - + "\n\tTrace for dimensionnining of preparation tasks (default: false)" -) -print( - "KhiopsParallelTrace: " - + str(os.getenv("KhiopsParallelTrace")) - + "\n\tTrace for parallel tasks (0 to 3)" -) - -print( - "Analysis of memory stats\n" - + "\tKhiopsMemStatsLogFileName: " - + str(os.getenv("KhiopsMemStatsLogFileName")) - + ", memory stats log file name\n" - + "\tKhiopsMemStatsLogFrequency: " - + str(os.getenv("KhiopsMemStatsLogFrequency")) - + ", frequency of allocator stats collection (0, 100000, 1000000,...) \n" - + "\tKhiopsMemStatsLogToCollect: " - + str(os.getenv("KhiopsMemStatsLogToCollect")) - + ", stats to collect (8193: only time and labels, 16383: all,...)\n" - + "\tKhiopsIOTraceMode: " - + str(os.getenv("KhiopsIOTraceMode")) - + ", to collect IO trace (false, true)" -) diff --git a/test/LearningTest/cmd/python/learning_test.config b/test/LearningTest/cmd/python/learning_test.config deleted file mode 100644 index 55a616012..000000000 --- a/test/LearningTest/cmd/python/learning_test.config +++ /dev/null @@ -1,20 +0,0 @@ -# The config file learning_test.config must be in directory LearningTest\cmd\python -# It is optional, in which case all keys are set to empty -# It contains the following key=value pairs that allows a personnalisation of the environment: -# - path: additional path (eg: to access to java runtime) -# - classpath: additional classpath for java libraries -# - learningtest_root: alternative root dir to use where LearningTest is located -# - learning_release_dir: dir where the release developement binaries are located (to enable the 'r' alias') -# - learning_debug_dir: dir where the debug developement binaries are located (to enable the 'd' alias') - -# Uncomment the following keys by removing the leading '#' and assigning a path - -# path= - -# classpath= - -# learningtest_root= - -# learning_release_dir= - -# learning_debug_dir= diff --git a/test/LearningTest/cmd/python/learning_test_env.py b/test/LearningTest/cmd/python/learning_test_env.py deleted file mode 100644 index 70194cbc1..000000000 --- a/test/LearningTest/cmd/python/learning_test_env.py +++ /dev/null @@ -1,234 +0,0 @@ -import os - -""" -Specification of test environment. -A config file (see below) allow to personalize the test environment. - -The launch of a test from LearningTest can used an executable with its full path as as parameter. -This is the standard way, used for example under linux platforms. -For a Khiops developer, it may convenient to use directly the executable obtained from compilation, -either with is release or debug version. -The choice of debug versus release is made using a parameter (r or d) instead of the full path of the executable. -To benefit from these "short cut" parameters, the config file must be correctly specified. - -""" - -""" name of config file """ -learning_test_config_file_name = "learning_test.config" - -""" List of keys in config file """ -learning_test_config_keys = { - "path": "additional path (eg: to access to java runtime)", - "classpath": "additional classpath for java libraries", - "learningtest_root": "alternative root dir to use where LearningTest is located", - "learning_release_dir": "dir where the release developement binaries are located (to enable the 'r' alias')", - "learning_debug_dir": "dir where the debug developement binaries are located (to enable the 'd' alias')", -} - - -def load_learning_test_config(): - """Load config file, check it - Return config in case of dictionary, and quit the program otherwise""" - ok = True - config_dic = {} - # Get the full path to the directory a Python file is contained in - containing_dir_path = os.path.dirname(os.path.realpath(__file__)) - # Get the file name of the config file - config_file_path = os.path.join(containing_dir_path, learning_test_config_file_name) - # If file does not exist, use empty values for each key - if not os.path.isfile(config_file_path): - for key in learning_test_config_keys: - config_dic[key] = "" - return config_dic - # Read file - if ok: - try: - with open(config_file_path, "r") as config_file: - lines = config_file.readlines() - except Exception as e: - print( - "Error in config file " - + learning_test_config_file_name - + ": read error (" - + str(e) - + ")" - ) - ok = False - # Analyse key value pairs - if ok: - for n, line in enumerate(lines): - line = line.strip() - # Skip comment lines or empty lines - if len(line) == 0 or line.find("#") == 0: - continue - # Split key=value pair - fields = line.split("=") - # Test syntax - if len(fields) != 2: - print( - "error in config file " - + learning_test_config_file_name - + " line " - + str(n + 1) - + ": bad field number" - ) - ok = False - break - # Test validity of key - if not fields[0] in learning_test_config_keys: - print( - "error in config file " - + learning_test_config_file_name - + " line " - + str(n + 1) - + ": unknown key <" - + fields[0] - + ">" - ) - ok = False - break - else: - # Test unicity of key - if config_dic.get(fields[0]) is not None: - print( - "error in config file " - + learning_test_config_file_name - + " line " - + str(n + 1) - + ": key <" - + fields[0] - + "> not unique" - ) - ok = False - break - else: - config_dic[fields[0]] = fields[1] - # Fill missing keys with empty values, the as when the config file is missing - if ok: - if len(learning_test_config_keys) != len(config_dic): - for key in learning_test_config_keys: - if key not in config_dic: - config_dic[key] = "" - # Return if ok - if ok: - return config_dic - else: - # Print help message - print("") - print( - "The config file " - + learning_test_config_file_name - + " must be in directory LearningTest\\cmd\\python" - ) - print("It is optional, in which case all keys are set to empty") - print( - "It contains the following key=value pairs that allows a personnalisation of the environment:" - ) - for key in learning_test_config_keys: - print("\t" + key + ": " + learning_test_config_keys[key]) - quit() - - -""" Global config dictionary """ -learning_test_config = load_learning_test_config() - - -def search_learning_test_root(): - """Extract root directory of LearningTest""" - # Case where an alternative toot directory is specified in the config file - test_root = learning_test_config["learningtest_root"] - if test_root != "": - # Test if valid directory - if not os.path.isdir(test_root): - print( - "error in config file " - + learning_test_config_file_name - + ": key learningtest_root contains value <" - + test_root - + "> that is not a valid directory" - ) - quit() - if not os.path.isdir(os.path.join(test_root, "LearningTest")): - print( - "error in config file " - + learning_test_config_file_name - + ": key learningtest_root (" - + test_root - + ") should contain LearningTest dir" - ) - quit() - - # Case where an alternative toot directory is specified in the config file - else: - # Get the full path to the directory a Python file is contained in - containing_dir_path = os.path.dirname(os.path.realpath(__file__)) - assert "learningtest" in containing_dir_path.lower(), ( - "LearningTest dir not found in path " + containing_dir_path - ) - # Extract start of the path before "LearningTest" - path = containing_dir_path - while "learningtest" in path.lower(): - path = os.path.dirname(path) - test_root = path - return test_root - - -# Specification of path environment variable -path_env = learning_test_config["path"] -if path_env != "": - if os.environ.get("path") is None: - os.environ["path"] = path_env - else: - os.environ["path"] = path_env + ";" + os.environ["path"] - -# Specification of classpath environment variable -class_path_env = learning_test_config["classpath"] -if class_path_env != "": - if os.environ.get("CLASSPATH") is None: - os.environ["CLASSPATH"] = class_path_env - else: - os.environ["CLASSPATH"] = class_path_env + ";" + os.environ["CLASSPATH"] - -# Root dir for LearningTest -learning_test_root = search_learning_test_root() - - -def build_dev_tool_exe_path(exe_name, version): - """Build path of exe in developpement environmement for a given version (d: debug our r: release)""" - assert version in ["d", "r"], version + " must be d or r" - if version == "r": - config_key = "learning_release_dir" - else: - config_key = "learning_debug_dir" - learning_dev_dir = learning_test_config[config_key] - # Check directory - if learning_dev_dir == "": - print( - "error in config file " - + learning_test_config_file_name - + ": key " - + config_key - + " must be specified to use '" - + version - + "' alias" - ) - quit() - elif not os.path.isdir(learning_dev_dir): - print( - "error in config file " - + learning_test_config_file_name - + ": key " - + config_key - + " (" - + learning_dev_dir - + ") should be a valid directory" - ) - quit() - # Build tool path - tool_exe_path = os.path.join(learning_dev_dir, exe_name) - if os.name == "nt": - tool_exe_path += ".exe" - if not os.path.isfile(tool_exe_path): - print("error: excutable (" + tool_exe_path + ") should be a valid file") - quit() - return tool_exe_path diff --git a/test/LearningTest/cmd/python/make_learning_test_version.py b/test/LearningTest/cmd/python/make_learning_test_version.py deleted file mode 100644 index b40d7a814..000000000 --- a/test/LearningTest/cmd/python/make_learning_test_version.py +++ /dev/null @@ -1,209 +0,0 @@ -# Sauvegarde du contenu de LearningTest d'un directory local, sur le reseau, pour une version donnee - -# Variables d'environnement necessaires: -# netroot: racine des directories utilisateur sur le reseau - -# La sauvergarde de LearningTest se fait sous netroot\archive\dir\version - -import os -import sys -import os.path -import shutil -import learning_test_env -from test_dir_management import * - - -def make_learning_test_version(version: str, option: str = ""): - """ - Make version of LearningTest - :param version: version of Learning test - :param option: option (default: ""), - "scripts" for scritf files only - "dataset" for datasets only, - "reference" for default plus reference results - :return: - """ - - def is_empty_dir(source_dir, ignore_list=None): - """ - Check if a directory is empty - :param source_dir: name of files or directories to ignore silently - :param ignore_list: - :return: - """ - names = os.listdir(source_dir) - for name in names: - if ignore_list is not None: - if name not in names: - return False - elif RESULTS_REF in ignore_list and is_candidate_results_ref_dir(name): - return False - else: - return False - return True - - def copy_subdirs( - source_dir, target_dir, ignore_list=None, warning_list=None, script_only=False - ): - """ - Copy sub-directories fom source root to target root - :param source_dir: - :param target_dir: - :param ignore_list: name of files or directories to ignore silently - :param warning_list: name of files or directories to ignore with a warning - :param script_only: to indicate to save only script files - :return: - """ - # Return if directory is empty - if is_empty_dir(source_dir): - return - # Create target dir - if not os.path.isdir(target_dir): - os.mkdir(target_dir) - # Browse source dir content ot propagate copy - names = os.listdir(source_dir) - # Test whether we are in an elementary test directory - is_test_dir = False - for name in names: - if name == TEST_PRM: - is_test_dir = True - # Copy content - for name in names: - copy = True - if ignore_list is not None: - if name in ignore_list: - copy = False - elif RESULTS_REF in ignore_list and is_candidate_results_ref_dir(name): - copy = False - if warning_list is not None: - if name in warning_list: - copy = False - elif RESULTS_REF in warning_list and is_candidate_results_ref_dir(name): - copy = False - if not copy: - print( - "warning: found " - + name - + " in directory " - + source_root - + " (ignored)" - ) - if is_test_dir and script_only and name != TEST_PRM: - copy = False - if copy: - source_path = os.path.join(source_dir, name) - target_path = os.path.join(target_dir, name) - if os.path.isdir(source_path): - try: - os.mkdir(target_path) - except (IOError, os.error) as why: - print( - "Cannot create directory %s: %s" % (target_path, str(why)) - ) - copy_subdirs( - source_path, - target_path, - ignore_list=ignore_list, - warning_list=warning_list, - script_only=(option == "scripts"), - ) - elif os.path.isfile(source_path): - try: - shutil.copyfile(source_path, target_path) - except (IOError, os.error) as why: - print( - "Cannot copy file %s to %s: %s" - % (source_path, target_path, str(why)) - ) - - assert ( - option == "" - or option == "datasets" - or option == "references" - or option == "scripts" - ) - # check environment variables - netroot = os.getenv("NETROOT") - if netroot is None: - print("variable NETROOT must be defined") - exit(0) - - # Get source dir - source_root = os.path.join(learning_test_env.learning_test_root, "LearningTest") - - # Le repertoire source sera sauvegarde sous netroot\archive\dir\version - target_root = os.path.join(netroot, "archive", "LearningTest", version) - if option != "": - target_root = target_root + "_" + option - - # Verification de la version a creer - if os.path.isdir(target_root): - option_label = "" if option == "" else "(" + option + ")" - print( - "Version " - + version - + " of LearningTest" - + option_label - + " already exists on " - + os.path.join(netroot, "archive") - ) - exit(0) - - # creation du directory de la version a sauvegarder - if not os.path.isdir(target_root): - os.makedirs(target_root) - - # Parametrage des noms de fichiers ou repertoire specifiques - dataset_dirs = ["datasets", "MTdatasets", "TextDatasets", "UnusedDatasets"] - test_dirs = ["cmd", "doc", "TestCoclustering", "TestKhiops", "TestKNITransfer"] - forbidden_names = ["__pycache__", "modl", RESULTS, COMPARISON_RESULTS_LOG] - if option != "references": - forbidden_names.append(RESULTS_REF) - - # Cas de la copie des jeux de donnees - if option == "datasets": - for dataset in dataset_dirs: - copy_subdirs( - os.path.join(source_root, dataset), - os.path.join(target_root, dataset), - warning_list=forbidden_names, - ) - # Cas de la copie des script et eventuellement des resultats de reference - else: - for dataset in test_dirs: - copy_subdirs( - os.path.join(source_root, dataset), - os.path.join(target_root, dataset), - ignore_list=forbidden_names, - script_only=True, - ) - - -if __name__ == "__main__": - # check parameters - if len(sys.argv) < 2 or len(sys.argv) > 3: - print("MakeLearningTestVersion [version] [Option]") - print( - "Copy LearningTest from local directory in a version under network archive directory" - ) - print( - " Copy most files, except from datasets, " - + RESULTS - + ", or " - + RESULTS_REF - + " directories, if no option is specified." - ) - print(" Available options:") - print(" scripts: only script files") - print(" references: default plus reference result files") - print(" datasets: copy only datasets") - exit(0) - elif len(sys.argv) == 2: - make_learning_test_version(sys.argv[1]) - elif len(sys.argv) == 3: - options = ["scripts", "references", "datasets"] - if sys.argv[2] in options: - make_learning_test_version(sys.argv[1], option=sys.argv[2]) - else: - print("MakeLearningTestVersion: Invalid option " + sys.argv[2]) - print("DONE") diff --git a/test/LearningTest/cmd/python/test_families.py b/test/LearningTest/cmd/python/test_families.py deleted file mode 100644 index 80c570818..000000000 --- a/test/LearningTest/cmd/python/test_families.py +++ /dev/null @@ -1,50 +0,0 @@ -import os - - -def get_test_family(tool): - """Return list of tes families per tool - Account for 'KhiopsCompleteTests' env var for extended families""" - # Khiops tool - if tool == "Khiops": - test_family = [ - "Standard", - "SideEffects", - "Rules", - "MissingValues", - "Advanced", - "Bugs", - "BugsMultiTables", - "MultipleTargets", - "MultiTables", - "DeployCoclustering", - "SparseData", - "SparseModeling", - "ParallelTask", - "NewPriorV9", - "DTClassification", - "VariableConstruction", - "NewV10", - "CrashTests", - "SmallInstability", - "CharacterEncoding", - ] - # V11 "KIInterpretation", - # V11 "Histograms", - # V11 "HistogramsLimits", - # V11 "TextVariables", - # Following tests are very long, unstable and not useful: - if os.getenv("KhiopsCompleteTests") == "true": - test_family.append("Classification") - test_family.append("MTClassification") - test_family.append("Regression") - test_family.append("ChallengeAutoML") - # V11 test_family.append("TextClassification") - - # Coclustering tool - if tool == "Coclustering": - test_family = ["Standard", "Bugs", "NewPriorV9", "SmallInstability"] - - # KNI tool - if tool == "KNI": - test_family = ["Standard", "MultiTables", "SmallInstability"] - return test_family diff --git a/test/LearningTest/cmd/python/test_khiops.py b/test/LearningTest/cmd/python/test_khiops.py deleted file mode 100644 index 123843fbc..000000000 --- a/test/LearningTest/cmd/python/test_khiops.py +++ /dev/null @@ -1,661 +0,0 @@ -import os.path -import sys -import shutil -import stat -import subprocess -import time -import learning_test_env -import check_results -from test_dir_management import * - - -# mpiexec sous Windows -if os.name == "nt": - mpi_exe_name = "mpiexec.exe" -# mpiexec sous Linux -else: - mpi_exe_name = "mpiexec" - -""" -A chaque nom d'outil Khiops correspond un nom d'exe et un sous-repertoire de LearningTest associe. -On peut egalement specifier si l'outil est lancable en parallel ou non. - -Les listes et dictionnaires ci-dessous permettent d'ajouter des outils si besoin. -""" - -""" Liste des noms des outils Khiops """ -khiops_tool_names = ["Khiops", "Coclustering", "KNI"] - -""" Dictionnaire des noms d'executable avec le nom d'outil en cle """ -khiops_exe_names = { - "Khiops": "MODL", - "Coclustering": "MODL_Coclustering", - "KNI": "KNITransfer", -} - -""" Dictionnaire des noms des sous-repertoires de LearningTest avec le nom d'outil en cle """ -khiops_test_sub_dirs = { - "Khiops": "TestKhiops", - "Coclustering": "TestCoclustering", - "KNI": "TestKNITransfer", -} - -""" Liste des outils de Khiops qui tournent en parallele (les seuls que l'on peut lancer avec mpiexec) """ -khiops_parallel_tools = ["Khiops"] - - -def retrieve_tool_info(khiops_tool_name): - """Retrieve tool info from a Khiops tool name - return exe name, test sub dir related to tool - """ - assert khiops_tool_name in khiops_tool_names, print( - "toolName must in " + str(khiops_tool_names) - ) - exe_name = khiops_exe_names.get(khiops_tool_name) - test_sub_dir = khiops_test_sub_dirs.get(khiops_tool_name) - assert exe_name is not None and test_sub_dir is not None - return exe_name, test_sub_dir - - -def build_tool_exe_path(khiops_tool_exe_name, khiops_tool_version): - """Build tool exe path name from exe name and version""" - assert khiops_tool_version is not None - - # Version "nul" for results comparison only - if khiops_tool_version == "nul": - khiops_tool_exe_path = "nul" - # Version "d" or "r" for debug or release development version on windows - elif khiops_tool_version in ["d", "r"]: - khiops_tool_exe_path = learning_test_env.build_dev_tool_exe_path( - khiops_tool_exe_name, khiops_tool_version - ) - # Case where the full path of the exe if given - elif os.path.isfile(khiops_tool_version): - khiops_tool_exe_path = khiops_tool_version - # Case when the exe is in the LearningTest/cmd/mod directly with a name suffixed by "." + khiops_tool_version - # Actually, the LearningTest/cmd/mod can contains executables, such as e.g. MODL.V10.0.exe, to - # launch previous version of the tools, using the version in the command (ex: testkhiops V10.0 Standard) - else: - khiops_tool_exe_path = os.path.join( - learning_test_env.learning_test_root, - "LearningTest", - "cmd", - "modl", - khiops_tool_exe_name + "." + khiops_tool_version, - ) - if os.name == "nt": - khiops_tool_exe_path += ".exe" - - # Test if tool exe dir exists - if khiops_tool_version != "nul": - if not os.path.isfile(khiops_tool_exe_path): - print("Khiops tool path : " + khiops_tool_exe_path + " does not exist") - exit(1) - return khiops_tool_exe_path - - -def evaluate_tool(tool_exe_path, tool_test_family_path, test_name): - """Evaluation d'un outil sur un repertoire de test terminal et comparaison des resultats - Parametres: - - tool_exe_path: path de l'outil a tester, ou nul si on ne veut faire que la comparaison - - tool_test_family_path: repertoire racine du repertoire de test - - test_name: repertoire de test terminal""" - - def get_env_var_positive_value(env_var_name, is_int=False): - """Retourne la valeur numerique d'une variable d'environnement representant une duree - Renvoie None si la variable n'est pas definie - Sort du programme avec une erreur si elle ne correspond pas a une valeur numerique positive - """ - value = os.getenv(env_var_name) - if value is not None: - try: - if is_int: - value = int(value) - else: - value = float(value) - if value < 0: - raise ValueError("should be positive") - except ValueError as exception: - value = None - print( - "error : env var " - + env_var_name - + " (" - + str(os.getenv(env_var_name)) - + ") :", - exception, - ) - exit(1) - return value - - def filter_lines(lines, filtered_pattern): - """retourne les lignes sans celles contenant le pattern en parametre""" - output_lines = [] - for line in lines: - if filtered_pattern not in line: - output_lines.append(line) - return output_lines - - def filter_empty_lines(lines): - """retourne les lignes sans les lignes vides""" - output_lines = [] - for line in lines: - line = line.strip() - if line != "": - # En parallelle, une ligne vide contient le numero du process entre crochets - is_process_id = line[0] == "[" and line[-1] == "]" - if is_process_id: - is_process_id = line[1:-1].isdigit() - if not is_process_id: - output_lines.append(line) - return output_lines - - def filter_copyright_lines(lines): - """retourne les lignes sans les lignes de copyright, presentes en mode UI""" - output_lines = lines - is_copyright = False - if len(lines) >= 2: - copyright_line = lines[1].strip() - is_copyright = ( - copyright_line.find("(c)") >= 0 - and copyright_line.find("Orange - All rights reserved.") >= 0 - ) - if is_copyright: - output_lines = lines[2:] - return output_lines - - # check MODL path - if tool_exe_path != "nul": - if not os.path.isfile(tool_exe_path): - print("MODL path : " + tool_exe_path + " is not correct") - return 0 - test_dir = os.path.join(tool_test_family_path, test_name) - - # get executable path and set path for exe and dll - tool_dir = os.path.dirname(tool_exe_path) - if os.name == "nt": - initial_path = os.getenv("path") - os.environ["path"] = tool_dir + ";" + os.getenv("path") - else: - initial_path = os.getenv("LD_LIBRARY_PATH", "") - os.environ["LD_LIBRARY_PATH"] = ( - tool_dir + ":" + os.getenv("LD_LIBRARY_PATH", "") - ) - - # verification de l'integrite du repertoire de test - if not os.path.isdir(test_dir): - print("error: test " + test_name + " is not available") - return 0 - os.chdir(test_dir) - - # Extraction des repertoires principaux - family_dir_name = os.path.basename(tool_test_family_path) - tool_test_sub_dir = os.path.basename(os.path.dirname(tool_test_family_path)) - - # Recherche du nom du module Khiops - module_name = os.path.basename(tool_exe_path) - module_name = module_name.lower() - if "." in module_name: - fields = module_name.split(".") - module_name = fields[0] - - # Recherche des exe correspondant a des outils pouvant tourner en parallel - khiops_parallel_modules = [] - for name in khiops_tool_names: - if name in khiops_parallel_tools: - exe_name = khiops_exe_names[name] - khiops_parallel_modules.append(exe_name.lower()) - - # Recherche du contexte parallele - khiops_mpi_process_number = get_env_var_positive_value( - "KhiopsMPIProcessNumber", is_int=True - ) - if module_name not in khiops_parallel_modules: - khiops_mpi_process_number = None - - # Affichage du debut des tests ou de la comparaison - action_name = "Test" - if tool_exe_path == "nul": - action_name = "Comparison" - print( - "starting " - + action_name - + " " - + module_name - + " " - + family_dir_name - + " " - + test_name - + " (MPI: " - + str(khiops_mpi_process_number) - + ", platform: " - + get_context_platform_type() - + ")" - ) - - # Lancement des tests - if tool_exe_path != "nul": - # Recherche dans les variable d'environnement du paramtrage des temps min et max - # pour declencher les test selon le temps des resultat de reference, et de la limite de timeout - khiops_min_test_time = get_env_var_positive_value("KhiopsMinTestTime") - khiops_max_test_time = get_env_var_positive_value("KhiopsMaxTestTime") - khiops_test_timeout_limit = get_env_var_positive_value("KhiopsTestTimeoutLimit") - - # Recherche du temps des resultats de reference dans le fichier de temps - results_ref, _ = get_results_ref_dir(test_dir) - results_ref_test_time = None - if results_ref is not None: - time_file_name = os.path.join( - os.getcwd(), os.path.join(test_dir, results_ref, TIME_LOG) - ) - if os.path.isfile(time_file_name): - file_time = open(time_file_name, "r", errors="ignore") - lines = file_time.readlines() - file_time.close() - if len(lines) > 0: - line = lines[0] - line = line[:-1] - fields = line.split(":") - if len(fields) == 2: - time_field = fields[1] - try: - results_ref_test_time = float(time_field) - except ValueError: - results_ref_test_time = None - - # Arret si test trop long ou trop court - if results_ref_test_time is not None and ( - ( - ( - khiops_max_test_time is not None - and results_ref_test_time > khiops_max_test_time - ) - or ( - khiops_min_test_time is not None - and results_ref_test_time < khiops_min_test_time - ) - ) - ): - print( - test_name - + " test not launched (test time: " - + str(results_ref_test_time) - + ")\n" - ) - return - - # Nettoyage du repertoire de resultats - result_dir = os.path.join(test_dir, RESULTS) - if os.path.isdir(result_dir): - for file_name in os.listdir(result_dir): - file_path = os.path.join(result_dir, file_name) - try: - os.chmod(file_path, stat.S_IWRITE) - os.remove(file_path) - except Exception as e: - print("error: unable to remove file " + file_path + " : " + str(e)) - - # khiops en mode expert via une variable d'environnement - os.putenv("KhiopsExpertMode", "true") - # os.putenv('KhiopsForestExpertMode', 'true') - - # khiops en mode HardMemoryLimit via une variable d'environnement pour provoquer - # un plantage physique de l'allocateur en cas de depassement des contraintes memoires des scenarios - os.putenv("KhiopsHardMemoryLimitMode", "true") - - # khiops en mode crash test via une variable d'environnement - os.putenv("KhiopsCrashTestMode", "true") - - # Construction des parametres - khiops_params = [] - if khiops_mpi_process_number is not None: - khiops_params.append(mpi_exe_name) - # if platform.system() == "Darwin": - # khiops_params.append("-host") - # khiops_params.append("localhost") - # if platform.system() != "Windows": - # khiops_params.append("--allow-run-as-root") - # khiops_params.append("--oversubscribe") - khiops_params.append("-n") - khiops_params.append(str(khiops_mpi_process_number)) - khiops_params.append(tool_exe_path) - if os.getenv("KhiopsBatchMode") != "false": - khiops_params.append("-b") - khiops_params.append("-i") - khiops_params.append(os.path.join(os.getcwd(), TEST_PRM)) - khiops_params.append("-e") - khiops_params.append(os.path.join(os.getcwd(), test_dir, RESULTS, ERR_TXT)) - if os.getenv("KhiopsOutputScenarioMode") == "true": - khiops_params.append("-o") - khiops_params.append(os.path.join(os.getcwd(), "test.output.prm")) - if os.getenv("KhiopsTaskFileMode") == "true": - khiops_params.append("-p") - khiops_params.append(os.path.join(os.getcwd(), "task.log")) - - # Calcul d'un time_out en fonction du temps de reference, uniquement si celui est disponible - MIN_TIMEOUT = 300 - TIMEOUT_RATIO = 5 - MAX_TIMEOUT = 3600 - timeout = None - if results_ref_test_time is not None: - if khiops_test_timeout_limit is None: - khiops_test_timeout_limit = MIN_TIMEOUT - timeout = khiops_test_timeout_limit + TIMEOUT_RATIO * results_ref_test_time - - # Lancement de khiops - MAX_RUN_NUMBER = 3 - timeout_expiration_lines = [] - overall_time_start = time.time() - for run_number in range(MAX_RUN_NUMBER): - run_completed = True - time_start = time.time() - with subprocess.Popen( - khiops_params, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - ) as khiops_process: - try: - stdout, stderr = khiops_process.communicate(timeout=timeout) - except subprocess.TimeoutExpired: - run_completed = False - khiops_process.kill() - stdout, stderr = khiops_process.communicate() - time_stop = time.time() - # Memorisation du probleme en cas d'echec - if not run_completed: - killing_time = time_stop - time_start - timeout_expiration_lines.append( - "Trial " - + str(run_number + 1) - + " : process killed after " - + "{:.1f}".format(killing_time) - + "s (reference time=" - + "{:.1f}".format(results_ref_test_time) - + "s)" - ) - # Arret si ok - if run_completed: - break - # Arret si on a depense globalement trop de temps - overall_time = time_stop - overall_time_start - if overall_time > MAX_TIMEOUT and run_number < MAX_RUN_NUMBER - 1: - timeout_expiration_lines.append( - "No more trial: overall trial time is " - + "{:.1f}".format(overall_time) - + "s (limit=" - + "{:.1f}".format(MAX_TIMEOUT) - + "s)" - ) - break - - # Memorisation des infos sur les run en cas de timeout - if len(timeout_expiration_lines) > 0: - with open( - os.path.join(os.getcwd(), test_dir, RESULTS, PROCESS_TIMEOUT_ERROR_LOG), - "w", - errors="ignore", - ) as timeout_file: - for line in timeout_expiration_lines: - timeout_file.write(line + "\n") - - # En cas d'anomalie, memorisation du contenu des sorties standard - if stdout != "": - # Affichage sur la console, utile par exemple en mode debug pour avoir les stats memoire - print(stdout) - - # Pretraitement des lignes pour supprimer les lignes normales - # parfois specifiques a certains outils - is_kni = "KNI" in tool_exe_path - is_coclustering = "Coclustering" in tool_exe_path - lines = stdout.split("\n") - lines = filter_empty_lines(lines) - lines = filter_copyright_lines(lines) - # Pour les test KNI, le stdout contient une ligne avec le nombre de records - if is_kni: - lines = filter_lines(lines, "Recoded record number:") - lines = filter_lines(lines, "Error : Finish opening stream error:") - # Cas particulier du coclustering en mode debug - if is_coclustering: - lines = filter_lines( - lines, "BEWARE: Optimization level set to 0 in debug mode only!!!" - ) - # Exception egalement pour cas des lancement en mode parallele simule - lines = filter_lines(lines, "Warning : simulated parallel mode") - # Exception en mode debug, pour les stats memoire - if "Memory stats (number of pointers, and memory space)" in stdout: - ok = True - # Parcours des lignes pour voir si ce sont bien des messages de stats, y compris en parallel - # En parallele, on a l'id du process entre crochets en tete de chaque ligne - for line in lines: - # Recherche d'un pattern de message de l'allocateur - ok = ( - "Memory stats (number of pointers, and memory space)" in line - or "Alloc: " in line - or "Requested: " in line - ) - # Recherche additionnelle de "Process " en tete de ligne - # En effet, parfois en parallel, le debut d'un message commencant par "Process " - # est emis sur une lige de stdout, et la fin sur une autre ligne - if not ok: - ok = line.find("Process ") >= 0 - break - if not ok: - break - else: - ok = len(lines) == 0 - if not ok: - try: - with open( - os.path.join(os.getcwd(), test_dir, RESULTS, STDOUT_ERROR_LOG), - "w", - errors="ignore", - ) as stdout_file: - stdout_file.write(stdout) - except Exception as exception: - print( - "Enable to write file " - + STDOUT_ERROR_LOG - + " in " - + RESULTS - + " dir", - exception, - ) - if stderr != "": - print(stderr, file=sys.stderr) - try: - with open( - os.path.join(os.getcwd(), test_dir, RESULTS, STDERR_ERROR_LOG), - "w", - errors="ignore", - ) as stderr_file: - stderr_file.write(stderr) - except Exception as exception: - print( - "Enable to write file " - + STDERR_ERROR_LOG - + " in " - + RESULTS - + " dir", - exception, - ) - if khiops_process.returncode != 0 and khiops_process.returncode != 2: - try: - with open( - os.path.join(os.getcwd(), test_dir, RESULTS, RETURN_CODE_ERROR_LOG), - "w", - errors="ignore", - ) as return_code_file: - return_code_file.write( - "Wrong return code: " - + str(khiops_process.returncode) - + " (should be 0 or 2)" - ) - except Exception as exception: - print( - "Enable to write file " - + RETURN_CODE_ERROR_LOG - + " in " - + RESULTS - + " dir", - exception, - ) - # Message de fin de test - print( - tool_test_sub_dir + " " + family_dir_name + " " + test_name + " test done" - ) - - # Memorisation d'un fichier contenant le temp global - try: - with open( - os.path.join(os.getcwd(), os.path.join(test_dir, RESULTS, TIME_LOG)), - "w", - errors="ignore", - ) as time_file: - time_file.write("Overal time: " + str(time_stop - time_start) + "\n") - except Exception as exception: - print( - "Enable to write file " + TIME_LOG + " in " + RESULTS + " dir", - exception, - ) - - # Restore initial path - if os.name == "nt": - os.environ["path"] = initial_path - else: - os.environ["LD_LIBRARY_PATH"] = initial_path - - # Comparaison des resultats - os.chdir(tool_test_family_path) - check_results.check_results(test_name) - - -def evaluate_tool_on_family(tool_exe_path, tool_test_family_path, test_name=None): - """Evaluation d'un outil sur une famille de test et comparaison des resultats - Parametres: - - tool_exe_path: path de l'outil a tester, ou nul si on ne veut faire que la comparaison - - tool_test_family_path: repertoire racine du repertoire de test - - test_name: repertoire de test terminal""" - - # Echec si le nombre de processus est parametre et mpiexec n'est pas dans le path - if shutil.which(mpi_exe_name) is None and "KhiopsMPIProcessNumber" in os.environ: - print("error : KhiopsMPIProcessNumber set but mpiexec not found in path.") - exit(1) - - # Collect sub-directories of samples_path - test_list = [] - for file_name in os.listdir(tool_test_family_path): - if os.path.isdir(os.path.join(tool_test_family_path, file_name)): - test_list.append(file_name) - - # Error if no sub-directory - if len(test_list) == 0: - print("error : no test is available in " + tool_test_family_path) - exit(1) - - # Case of a specific sub-directory - if test_name is not None: - evaluate_tool(tool_exe_path, tool_test_family_path, test_name) - # Case of all sub-directories - else: - for name in test_list: - evaluate_tool(tool_exe_path, tool_test_family_path, name) - # Message global - family_dir_name = os.path.basename(tool_test_family_path) - tool_test_sub_dir = os.path.basename(os.path.dirname(tool_test_family_path)) - action_name = "TEST" - if tool_exe_path == "nul": - action_name = "COMPARISON" - print(action_name + " DONE\t" + tool_test_sub_dir + "\t" + family_dir_name) - - -# Pour ouvir un fichier avec un flush systematique -class Unbuffered(object): - def __init__(self, stream): - self.stream = stream - - def write(self, data): - # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - self.stream.write(data.encode("utf-8", "ignore").decode("utf-8")) - self.stream.flush() - - def writelines(self, datas): - # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - self.stream.writelines( - [data.encode("utf-8", "ignore").decode("utf-8") for data in datas] - ) - self.stream.flush() - - def __getattr__(self, attr): - return getattr(self.stream, attr) - - -if __name__ == "__main__": - if len(sys.argv) < 4 or len(sys.argv) > 5: - # Specific help if only one parameter with the tool name - if len(sys.argv) == 2: - tool_name = sys.argv[1] - print("test" + tool_name + " [version] [testFamilyName] ([testName])") - print(" run tests for the " + tool_name + " tool") - else: - print("test [toolName] [version] [testFamilyName] ([testName])") - print(" run tests of one of the Khiops tools") - print("\ttool_name: name of the tool, among Khiops, Coclustering, KNI") - print("\tversion: version of the tool, one of the following options") - print("\t : full path of the executable") - print("\t d: debug version in developpement environnement") - print("\t r: release version in developpement environnement") - print("\t ver: ..exe in directory LearningTest\\cmd\\modl") - print("\t nul: for comparison only with test results") - print( - "\ttestFamilyName: name of the tool test family directory (Standard, MultiTables...)" - ) - print("\ttestName: optional, name of the tool test directory (Adult,Iris...)") - exit(1) - - sys.stdout = Unbuffered(sys.stdout) - - # Retrieve tool info - tool_name = sys.argv[1] - tool_exe_name, tool_test_sub_dir = retrieve_tool_info(tool_name) - - # Build tool exe path name from version - version = sys.argv[2] - tool_exe_path = build_tool_exe_path(tool_exe_name, version) - - # Test is tool test dir exists - test_family_name = sys.argv[3] - assert test_family_name is not None - tool_test_family_path = os.path.join( - learning_test_env.learning_test_root, - "LearningTest", - tool_test_sub_dir, - test_family_name, - ) - if not os.path.isdir(tool_test_family_path): - print( - "error : test family directory " + tool_test_family_path + " does not exist" - ) - exit(1) - - # Test is tool test sub dir exists - test_name = None - if len(sys.argv) == 5: - test_name = sys.argv[4] - assert test_name is not None - samples_sub_path = os.path.join( - learning_test_env.learning_test_root, - "LearningTest", - tool_test_sub_dir, - test_family_name, - test_name, - ) - if not os.path.isdir(samples_sub_path): - print( - "error : samples sub directory " + samples_sub_path + " does not exist" - ) - exit(1) - - # Start evaluation - evaluate_tool_on_family(tool_exe_path, tool_test_family_path, test_name) diff --git a/test/LearningTest/cmd/python/test_khiops_all.py b/test/LearningTest/cmd/python/test_khiops_all.py deleted file mode 100644 index 71f103202..000000000 --- a/test/LearningTest/cmd/python/test_khiops_all.py +++ /dev/null @@ -1,104 +0,0 @@ -import learning_test_env -import test_khiops -import test_families -import os.path -import sys -import stat -from test_dir_management import * - - -# lance les tests de khiops sur tous les repertoires contenus dans la liste "tool_test_dirs" -def test_khiops_tool(tool_name, tool_version, tool_test_dirs): - """Run tool on test dirs""" - # Build tool exe path name from version - tool_exe_name, tool_test_sub_dir = test_khiops.retrieve_tool_info(tool_name) - tool_exe_path = test_khiops.build_tool_exe_path(tool_exe_name, tool_version) - # Clean results - for test in tool_test_dirs: - tool_samples_path = os.path.join( - learning_test_env.learning_test_root, - "LearningTest", - tool_test_sub_dir, - test, - ) - if os.path.isdir(tool_samples_path): - for sub_test in os.listdir(tool_samples_path): - sub_test_path = os.path.join(tool_samples_path, sub_test) - file_path = os.path.join(sub_test_path, COMPARISON_RESULTS_LOG) - if os.path.isfile(file_path): - os.chmod(file_path, stat.S_IWRITE) - os.remove(file_path) - result_dir = os.path.join(sub_test_path, "results") - if os.path.isdir(result_dir): - for file_name in os.listdir(result_dir): - file_path = os.path.join(result_dir, file_name) - os.chmod(file_path, stat.S_IWRITE) - os.remove(file_path) - # Run tests - for test in tool_test_dirs: - print("\n\n--------------------------------------------------------") - print("\tRunning " + tool_name + " " + test + " tests") - print("--------------------------------------------------------") - tool_samples_path = os.path.join( - learning_test_env.learning_test_root, - "LearningTest", - tool_test_sub_dir, - test, - ) - test_khiops.evaluate_tool_on_family(tool_exe_path, tool_samples_path, None) - - -if __name__ == "__main__": - if len(sys.argv) != 2 and len(sys.argv) != 3: - print("testAll [version] ") - print(" run all tests for all Khiops tools") - print("\tversion: version of the tool") - print("\t d: debug version in developpement environnement") - print("\t r: release version in developpement environnement") - print("\t ver: ..exe in directory LearningTest\\cmd\\modl") - print("\t nul: for comparison with the test results only") - print("\t full exe path, if parameter is used") - print("\ttool: all tools if not specified, one specified tool otherwise") - print("\t Khiops") - print("\t Coclustering") - print("\t KNI") - exit(0) - - # Info on complete tests - if os.getenv("KhiopsCompleteTests") != "true": - print("\n--------------------------------------------------------") - print("Set env var KhiopsCompleteTests=true") - print("\tto run all long, instable and unusefull tests") - print("--------------------------------------------------------\n") - print("\n\n--------------------------------------------------------") - - sys.stdout = test_khiops.Unbuffered(sys.stdout) - - # Passage en mode batch - os.environ["KhiopsBatchMode"] = "true" - - # Retrieve version - version = sys.argv[1] - assert version is not None - - # Retrieve tool - tool = "" - if len(sys.argv) == 3: - tool = sys.argv[2] - - # Khiops tool - if tool == "" or tool == "Khiops": - khiops_tests = test_families.get_test_family("Khiops") - test_khiops_tool("Khiops", version, khiops_tests) - - # Coclustering tool - if tool == "" or tool == "Coclustering": - coclustering_tests = test_families.get_test_family("Coclustering") - test_khiops_tool("Coclustering", version, coclustering_tests) - - # KNI tool - if tool == "" or tool == "KNI": - KNI_tests = test_families.get_test_family("KNI") - test_khiops_tool("KNI", version, KNI_tests) - - print("all tests are done") diff --git a/test/LearningTest/cmd/python/utils.py b/test/LearningTest/cmd/python/utils.py deleted file mode 100644 index 279ef3782..000000000 --- a/test/LearningTest/cmd/python/utils.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -import os.path -import shutil -import stat - - -def copy(src, dest): - try: - shutil.copy(src, dest) - except BaseException as message: - print("can't copy " + src + " (" + str(message) + ")") - - -def remove_file(file_path): - """Remove file""" - os.chmod(file_path, stat.S_IWRITE) - try: - os.remove(file_path) - except (IOError, os.error) as why: - print("Cannot remove file %s: %s" % (file_path, str(why))) - - -def remove_dir(dir_path): - """Remove empty directory""" - try: - os.rmdir(dir_path) - except (IOError, os.error) as why: - print("Cannot remove directory %s: %s" % (dir_path, str(why))) diff --git a/test/LearningTestTool/README.md b/test/LearningTestTool/README.md new file mode 100644 index 000000000..d74144d08 --- /dev/null +++ b/test/LearningTestTool/README.md @@ -0,0 +1,443 @@ +# Khiops test tool: LearningTest and LearningTestTool + +LearningTest +- created in March 2009 +- automated test of Khiops tools +- versions synchronized with delivered Khiops versions + - one version per delivered Khiops version, with same tag + - one version for the current branch under development + +Non-regression tests consist of over 600 test sets organized into around 40 suites, mainly for Khiops, but also for Khiops coclustering and the KNI DLL. +They collectively occupy around 10 GB, including 5 GB for the databases and 3.5 GB for the test scripts with the reference results. + +The following components are involved in the Khiops tests +- tool binaries: binaries of the tools (Khiops, Coclustering and KNI) to test +- LearningTestTool: scripts that launch the tests and analyse the results +- LearningTest: datasets, tool scnearios that describe the tests and reference results + + +## LearningTest + +The LearningTest directory stores all the datasets, tool scenarios and reference results. + +### LearningTest directory tree + +The _LearningTest dir_ contains one sub-directory per _collection of datasets_: +- datasets: standard datasets +- MTdatasets: multi-table datasets +- TextDatasets: text datasets +- UnusedDatasets: datasets not currently in use + +And one _tool dir_ per set of tests related to each tool +- TestKhiops: tests for Khiops +- TestCoclustering: tests for Coclustering +- TestKNI: test for KNI + +Each tool dir is a two-level tree: +- Level 1: one _suite dir_ per test suite, e.g. _TestKhiops/Standard_ +- Level 2: one _test dir_ per test, e.g. _TestKhiops/Standard/Iris_ + +Suite dirs prefixed with y_ or z_ (e.g. z_Work) are temporary suites. + + +### Test dirs + +Each test dir is organized as follows: +- test.prm: scenario file to run the test +- ... : test-specific local data files, if any +- readme.txt: optional file with short information relative to the test +- results: sub-directory containing the results of running the tool with the script +- results.ref: sub-directory containing reference results +- comparisonResults.log: comparisonb report obtained by comparing results and results.ref + +### Normalisation of paths in scenarios + +The test.prm scenario files must be written independently of the LearningTest localization, by specifying +the paths of the files concerned, which must be relative to the LearningTest tree structure, with linux-type syntax. + +For example: +- `./SNB_Modeling.kdic` to access a specific local dictionary in the test dir + - except for datasets defined in LearningTest/dataset root trees, + datasets can have specific dictionaries or data per test dir +- `../../../datasets/Adult/Adult.txt` to access a dataset data file from a collection of datasets +- `./results/T_Adult.txt` for a result in the results sub-directory + +### Variants of references results + +The references results in `results.ref` dir may have variations, depending on the context +- computing type: sequential, parallel, +- platform: Windows, Linux, Darwin (macOS) + +In this case, multiple reference result directories are used to store the results: +- naming convention: `results_ref<-(computing type)><-(plateforms)>` + - the '-' char is used to separate the context informations + - the '_' car is used in case of several values per context +- the set of `results.ref` directories muts cover all possible contexts + - the base `results.ref` dir (without context) is used when specific contexts are not necessary +- examples of valid results.ref directories: + - `results.ref-sequential`, `results.ref-parallel` + - `results.ref-Windows`, `results.ref-Darwin_Linux` + - `results.ref-parallel-Darwin_Linux` +- examples of valid sets of results.ref directories + - [results.ref]: standard case + - [results.ref, results.ref-parallel]: specialisation in case of parallel computing + - [results.ref, results.ref-Darwin_Linux]: specialisation for Linux and Mac platforms + - [results.ref, results.ref-Parallel, results.ref-Parallel-Darwin_Linux]: more complex contexts + + +## LearningTestTool + +The LearningTestTool directory stores all the scripts to test the Khiops tools and analyse the results, using a LearningTest directory. + +### Terminology + +The following terminology is used throughout the test environment: +- tools + - `Khiops`: Khiops tool, with tool binary name `MODL` + - `Coclustering`: Khiops tool, with tool binary name `MODL_Coclustering` + - `KNI`: Khiops tool, with tool binary name `KNITransfer` +- typology of directories + - `test dir`: one terminal test, e.g. `/LearningTest/TestKhiops/Standard/Iris` + - `suite dir`: a suite of tests, e.g. `/LearningTest/TestKhiops/Standard` + - `tool dir`: all suites for a given tool, e.g. `/LearningTest/TestKhiops` + - `learning test dir`: home directory of LearningTest, containing all tool dirs, e.g. `/LearningTest` +- test family: subset of test suites per test dir + - `basic`: `Standard` suite only per tool, around one minute of overall execution time + - `full`: suites used for non-regression tests (default family), around one hour of overall execution time + - `fullNoKNI`: same as `full`, without the KNI tool (e.g. to test the tool binaries of an installed Khiops desktop application) + - `complete`: same as `full`, plus large scale tests, around one day of overall execution time + - `all`: all suites (used to managed test dirs exhaustively, not to run tests) +- divers + - `command`: one of the LearningTest scripts + - `instruction`: instruction to perform on test dirs (e.g. `errors` to collect error stats) + - `dataset collection`: collection of datasets (e.g. `datasets`, `MTdatasets`,...) + +A subset of test dirs is defined using both a `LearningTest source dir` and a family: +- test dir: a single test dir +- suite dir: all test dirs of a test suite +- tool dir: all test suites for a specific tool in a given family +- LearningTest dir: all test suites for all tools in a given family + +## LearningTest commands + +All LearningTest commands are prefixed by 'kht_'. + +Available LearningTest commands are +- **_kht_test_** (LearningTest source dir) (tool binaries dir) [options] + - test a tool on a subset of test dirs + - options: + - family, + - processes + - forced-platform: for the context of reference results + - min-test-time, max-test-time, test-timeout-limit + - user-interface, task-file, output-scenario +- **_kht_apply_** (LearningTest source dir) (instruction) [options] + - apply an instruction on a subset of test dirs + - options: + - family, + - processes, + - forced-platform: for the context of reference results + - min-test-time, max-test-time +- **_kht_collect_results_** (LearningTest source dir) (target dir) [options] + - collect results from a subset of test dirs in a sub-dir of the target dir named LearningTest_results + - options: + - collect types: _errors_, _messages_, _all_ (default: _errors_) + - family +- **_kht_export_** (LearningTest source dir) (target dir) [options] + - export LearningTest tree for a subset of test dirs, to a sub-dir of the target dir named: + - LearningTest if export type is _all_ + - LearningTest_(export type) otherwise + - options: + - export types: _all_, _scripts_, _references_, _datasets_ (default: _all_) + - family +- **_kht_env_** + - show the status of the main environment variables used by the tool binaries +- **_kht_help_** + - show this help message + +Detailed help is available by typing in the name of a specific command. + +Notes regarding the directory parameters: +- the kht commands can be applied on any LearningTest directory tree +- the directories can be absolute or relative +- commands benefit from shell autocompletion on most file systems to complete the parameters for the LearningTest|tool|family|test directory + + +### LearningTestTool directory tree + +The LearningTestTool directory contains: +- files + - README.md +- directories + - py: all commands written in python + - callable using `python ` + - cmd: all commands contained in Windows shell files + - callable from a Windows shell using ` ` + - sh: all commands contained in Linux shell files + - callable from a Linux (or Mac) shell using ` ` + +### Implementation of LearningTestTool + +Naming conventions +- follow the terminology presented above + - except for the LearningTest dir, mostly named home dir in the code for concision reasons +- file and directory names and paths + - files: error_file_name vs error_file_path + - directories: suite_dir_name vs suite_dir (path not used in variable name) + +All command are implemented using the python language: +- LearningTestTool/py + - one (command).py file per command + - plus internal python files, prefixed by '_' + - _kht_constants.py: common constants + - _kht_utils.py: common utility functions + - _kht_families.py: definition of families + - _kht_results_management.py: management of context of reference results, sequential vs parallel and platform + - _kht_standard_instructions.py: standard maintained instruction functions for kht_apply command + - _kht_one_shot_instructions.py: one-shot instruction functions for kht_apply command + - _kht_check_results.py: deep check by comparison of test and reference results +- LearningTestTool/cmd + - one (command).cmd file per command + - same implementation: redirection to the python command file +- LearningTestTool/sh + - one (command) file per command + - same implementation: redirection to the python command file + +The kht_test.py source file: +- starts the tool according to execution options +- collect the standard outputs from stdout and stderr and the return code +- manages a timeout for overlengthy execution times +- is resilient to some expected output (ex: memory messages in debug mode) +- compares test and references results, with a summary in file comparisonResults.log +- ... + +The _kht_check_results.py source file is the most critical one +- objective: + - resilient cross-plateform comparison of test and reference results + - fully automatic, while beeing as robust as a human expert comparison +- by far the most complex portion of code + - but pragmatic code fast to implement and test, avoiding over-design + - constrained by fast evolution, reuse by copy-past is acceptable + - it it sometimes simpler to rework the content of specific test dirs +- main features to be robust to changes accross contexts, with relience to + - value of the tool version is ignored everywhere + - computing times in err.txt file are ignored + - varying results depending on the context, sequentiel vs parallel, plus platform, with dedidate results.ref directories + - messages in relation to resource limits, which may vary slightly depending on execution modes and platforms + - "normal" failure in scenario ("Batch mode failure") + - byte encoding of file names across plateforms + - poor handling of accented file names by zip + - varying patterns in error messages + - specific messages that occur only in sequential mode (100th, 1000th warning...) + - varying order of messages in some rare cases, in parallel mode + - approximate AUC estimate in the event of limited resources + - espilon variation of numerical results + - ... + +A difficult task is to design the Khiops algorithms with the aim of being reproducible, which has so far been achieved: +- using a portability layer that provides an abstraction of the underlying platform +- using the Parallel library that guaranties the same results whatever the ressources (RAM, number of cores) + - a few exceptions: + - for optimisation purposes, some tasks are only run in parallel mode (e.g. pre-indexation of files) + - the AUC criterion can be evaluated on a subset of the database when there is not enough RAM +- using truncations in some numerical results +- fixing the random seeds +- using sort with secondary sort criterions n case of equality, including fixed random values when a random choice is used + - this is important, because the Ansi sort function exploits a stable sort only on some plateforms +- ... + +## Running Khiops tests + +Installing LearningTest on a new machine +- LearningTest + - copy the LearningTest tree on local disk + - or obtain it using a clone from gitlab (TODO) +- LearningTestTool + - copy the LearningTestTool tree on local disk + - or obtain it using a clone from the KhiopsML/khiops repo on github (TODO) + - add the script directory (cmd or sh) to the path +- Install python + - add python to the path + +All commands are directly callable from any plateform. + +## Main usages + +### Test methodology + +The set of non-regression tests is very large, up to 10 GB and one day of computation time. +In practice, the tests are run in stages: +- **scale** + - elementary: TestKhiops/Standard/IrisLight, around one second, even in debug mode + - standard: TestKhiops/Standard, around one minute + - _full_ familly (default family): all non-regression tests, around one hour + - _complete_ family: same as full, plus large scale tests, around one day +- **execution mode** + - _full_ using sequential or parallel computing (use process number = 4) + - debug mode for test dirs with short test time (typically max-test-time=5) +- **portability** + - _full_ under different platforms +- **stress tests** + - huge scale: monster datasets (tens to hundreds of Gb) on powerfull servers are sometimes used for stress tests: + - datasets from the large scale learning challenge (DNA, 50 millions of instances with 200 records per instances) + - Criteo (45 millions of instances) + - OrangeCDR: 100000 instances with 35 million CDRs + - ... + - parallelization and performance: the same test scenario is used, but with different resources, RAM and number of processes. + - ... + +### Non-regression tests for developpement + +The aim is to quickly check that the new source code has not introduced any regressions. + +The main use of LearningTest is to carry out non-regression tests regularly during development. +The kht_test command is by far the most commonly used. + +It is used in stage by increasing _scale_, up to the _full_ family. + +In case of bug fix or new feature, new test dirs may be added to LearningTest: see section "Evolutions of LearningTest" + +### Non-regression tests for release + +The aim is to detect as many potential bugs as possible, in an extensive manner, in order to deliver a high-quality version of the tool. + +In case of a release, massive tests are performed by _scale_, _execution mode_, _portability_, _stress tests_. + +### Portability on new platform + +The aim is check that the new platform is supported by the tool. + +The tests must be run on the new platform, using the _full_ family. + +### CI/CD + +The tests can be run at different steps of the developpement process: +- pull request: automatic run of the tests using the _basic_ familly + - minimum check that the code compiles and run on three plateforms on standard test dirs + - rapid and sufficient in most cases + - in some cases, the requirement may be higher + - for example, in the case of a major feature + - it is up to the reviewer to impose this requirement, in agreement with the developer + - the tests can be run manually using the _full_ family on the developer's platform +- release + - the test must be run using the _full_ familly on all the supported plateforms + - this must be triggered manually before the release + - this can also be triggered periodically (for example, once a month), for verification purposes + +## Evolutions of LearningTest + +### New test dir + +A new test dir may be necessary in the following case: +- fix a bug +- new minor feature + +In this case, a new test dir is first developped in a temporary working suite, +(e.g. `LearningTest/TestKhiops/z_Work`). + +When the new test dir is completed with its scenario and reference results dir, it can be copied in an existing suite, +(e.g. `LearningTest/TestKhiops/BugsMultiTables`). + +It might be referenced in the commit notes of the relevant github issue. + +### New test suite + +A new test dir may be necessary in the following case: +- new major feature + +In this case, a new test suite is first developped in a new temporary working suite, +(e.g. `LearningTest/TestKhiops/z_TextVariables`). + +When the development of the major feature is completed, the new test suite can become an "official suite" +(e.g. `LearningTest/TestKhiops/TextVariables`), and referenced in the _kht_families.py source of LearningTestTool. + + +### Evolution of scenarios + +The khiops scenarios mirror the structure of the user interface, and are likely to evolve in a major release: +- when the user interface is redesigned for greater simplicity +- when major features are added, with new user parameters + +In this case, the scenarios `test.prm` within each test dir have to be updated: +- first, save all the current LearningTest directory tree + - using git lab + - or locally on your disk using the kht_export command +- implement manually a prototype of the new scenario + - copy one representative test dir in working suite + (ex: `LearningTest/TestKhiops/Standard/Adult` in `LearningTest/TestKhiops/z_Work`) + - register the new scenario using the tool in user interface mdoe with the -o option + - adapt the scenario to the LearningTest constraints, such as normalisation of paths... +- implement a one-shot instruction (ex: `transformPrm`) in python source file `_kht_one_shot_instructions.py` + to automatically transform the scenario from its old version to its new version +- use the test methodology to apply the `transformPrm` instruction by increasing scale, up to all the test dirs +- the reference results should remain the same, except in the case of a new feature +- _be pragmatic: coding the one-shot instruction may be tedious, and manual transformation of a + small proportion of scenarios is acceptable and more time efficient_ + + +### Evolution of reference results + +Caution: +- avoid mixing up the evolution of scenarios and reference results +- updating the reference results is: + - critical to keep quality + - tedious and time-consuming to avoid introducing regressions into the tests +- wait for stabilization of the new algorithms before updating the reference results + +When Khiops algorithms evolve, such as for example a new version of the SNB method, reference results are likely to evolve. + +In this case, the reference results within each dir have to be updated: +- first, save all the current LearningTest directory tree + - using git lab + - or locally on your disk using the kht_export command +- test your new algorithm on temporay work suites first +- compare your test and results manually by double-checking the results + - on one representative test dir (ex: `LearningTest/TestKhiops/Standard/Adult`) + - on one representative test suite (ex: `LearningTest/TestKhiops/Standard`) + - on some carefully choosen additional test dirs, representative of potential difficulties +- implement a one-shot instruction (ex: `compareSNBResults`) in python source file `_kht_one_shot_instructions.py` + to automatically compare the new test results to the old reference results + - ensure that anything except the new feature is unchanged (ex: data preparation results should be unchanged if the SNB only has evolved) + - check that the new perfomance indicators (accuracy, auc...) remain close from the old ones + - check that the new computing times remain close from the old ones +- use the test methodology to apply the `compareSNBResults` instruction by increasing scale, up to all the test dirs +- when all is validated and double checked, update the new references using the `makeref` instruction +- _be rigouros and paranoid: the quality of the reference results is critical_ + +## Management of LearningTest and LearningTestTool + +The LearningTestTool and LearningTest projects are closely linked to the development of Khiops core, +but their pace of development is different: +- Khiops core evolves rapidly +- LearningTest evolves slowly: + - its main use is "read-only" for non-regression tests + - it sometimes undergoes changes to fix bugs or develop minor or major features +- LearningTestTool rarely evolves + - resilience methods sometimes need to be maintained as Khiops algorithms or output reports evolve + +Currently, LearningTestTool and a tiny part of LearningTest (`basic` family) +are maintained in the Khiops github repo https://github.com/KhiopsML/khiops + +The full LearningTest directory tree cannot be embedded in the Khiops github repo for the following reasons: +- scalability issue: around ten GB are necessary to store a snapshot of one version of LearningTest +- confidentiality: some datasets or test dir contain confidential data +- cost: storing and managing a large number of tests on dozens of platforms can be expensive in an external cloud + +A solution must be defined to meet the requirements of the processes summarised in this document. + +A potential solution, for a start: +- keep the developpement of LearningTest on the Khiops repo, with the `basic` sub-part of LearningTest +- exploit a gitlab repo to store and manage LearningTest: process to specify + - all file except scenarios (test.prm) managed using LFS + - synchronisation of version between the Khiops and LearningTest repos: use the closest former version + - keep a full snapshot of LearningTest for past versions of Khiops + - for the current dev branch of Khiops: + - keep only a few last version of LearningTest + - clean the preceeding former deprecated versions of LearningTest if necessary (never used again) + - ... +- exploit the system initialized by Stephane G to launch the tests on many plateforms +- ... + + + + diff --git a/test/LearningTestTool/cmd/kht_apply.cmd b/test/LearningTestTool/cmd/kht_apply.cmd new file mode 100644 index 000000000..d09773963 --- /dev/null +++ b/test/LearningTestTool/cmd/kht_apply.cmd @@ -0,0 +1,2 @@ +@call python %~dp0..\py\%~n0.py %* + diff --git a/test/LearningTestTool/cmd/kht_collect_results.cmd b/test/LearningTestTool/cmd/kht_collect_results.cmd new file mode 100644 index 000000000..d09773963 --- /dev/null +++ b/test/LearningTestTool/cmd/kht_collect_results.cmd @@ -0,0 +1,2 @@ +@call python %~dp0..\py\%~n0.py %* + diff --git a/test/LearningTestTool/cmd/kht_env.cmd b/test/LearningTestTool/cmd/kht_env.cmd new file mode 100644 index 000000000..d09773963 --- /dev/null +++ b/test/LearningTestTool/cmd/kht_env.cmd @@ -0,0 +1,2 @@ +@call python %~dp0..\py\%~n0.py %* + diff --git a/test/LearningTestTool/cmd/kht_export.cmd b/test/LearningTestTool/cmd/kht_export.cmd new file mode 100644 index 000000000..d09773963 --- /dev/null +++ b/test/LearningTestTool/cmd/kht_export.cmd @@ -0,0 +1,2 @@ +@call python %~dp0..\py\%~n0.py %* + diff --git a/test/LearningTestTool/cmd/kht_help.cmd b/test/LearningTestTool/cmd/kht_help.cmd new file mode 100644 index 000000000..d09773963 --- /dev/null +++ b/test/LearningTestTool/cmd/kht_help.cmd @@ -0,0 +1,2 @@ +@call python %~dp0..\py\%~n0.py %* + diff --git a/test/LearningTestTool/cmd/kht_test.cmd b/test/LearningTestTool/cmd/kht_test.cmd new file mode 100644 index 000000000..d09773963 --- /dev/null +++ b/test/LearningTestTool/cmd/kht_test.cmd @@ -0,0 +1,2 @@ +@call python %~dp0..\py\%~n0.py %* + diff --git a/test/LearningTestTool/kht_shell.cmd b/test/LearningTestTool/kht_shell.cmd new file mode 100644 index 000000000..cd7cf10af --- /dev/null +++ b/test/LearningTestTool/kht_shell.cmd @@ -0,0 +1,8 @@ +#echo off +REM Start dos shell with the LearningTestTool command files in the path + +REM Add LearningTest script cmd dir in the path +set path=%~dp0cmd;%path% + +REM Open a new shell with given title and starting directory +start "%~n0" /D "%~dp0..\LearningTest" diff --git a/test/LearningTest/cmd/python/check_results.py b/test/LearningTestTool/py/_kht_check_results.py similarity index 56% rename from test/LearningTest/cmd/python/check_results.py rename to test/LearningTestTool/py/_kht_check_results.py index fabd50bbc..ed5e6e997 100644 --- a/test/LearningTest/cmd/python/check_results.py +++ b/test/LearningTestTool/py/_kht_check_results.py @@ -1,10 +1,47 @@ import os.path import re -from test_dir_management import * +import _kht_constants as kht +import _kht_utils as utils +import _kht_results_management as results + +""" +Verification des resultats d'un repertoire de test terminal + +La comparaison est effectue entre les resultats de test, et les resultats de reference +correspondant au contexte en cours (plateforme, parallel ou sequuentiel...). +Elle se fait sur tous les ficgier du repertoire de facon hierarchique +- nombre de fichier de chaque repertoire +- noms des fichiers +- pour chaque fichier + - nombre de ligne + - contenu + - comparaison des lignes + - si necessaire, comparaison des champs des lignes, pour un separateur tabulation + - si necessaire, comparaions des tokens du champs, + dans le cas de la tokenisation d'un fichier json ou kdic + +La comparaison se fait en etant tolerant aux variations 'normales' selon le contexte d'execution +- il peut y avoir des resultats de reference different selon le contexte +- on filtre prealablement certaines informations non presentes systematiquement + - copyright + - prefix de type '[0] ' lie au process, genere par mpiexec en parallele + - statistique sur la memoire ne mode debug + ... +- il y a une tolerance sur les valeur numeriques, ce qui entraine alors des warning et non des erreurs +- ... + +En cas d'erreurs residuelles, plusieurs strategies de recouvrement des erreurs sont utilises, +a differents moments du processus de comparaison +- tolerance sur echec de scenario, si cela correspond au resultats de reference +- tolerance aux noms de fichier utilisant des caracteres accentues systeme dependant +- tolerance sur les messages d'erreurs differents en parallele et en sequentiel +- tolerance sur les message d'erreur lies au manque de ressource +... +""" # Nom du fichier de comparaison -COMPARISON_LOG_FILE_NAME = COMPARISON_RESULTS_LOG +COMPARISON_LOG_FILE_NAME = kht.COMPARISON_RESULTS_LOG # Constantes de la section SUMMARY des fichiers de log des resultats de comparaison SUMMARY_TITLE = "SUMMARY" @@ -15,145 +52,165 @@ SUMMARY_PORTABILITY_KEY = "Portability: " # Constantes pour la gestion des fichiers speciaux, par priorite decroissante -SUMMARY_TIMOUT_ERROR_KEY = "TIMOUT ERROR" +SUMMARY_TIMEOUT_ERROR_KEY = "TIMEOUT ERROR" SUMMARY_FATAL_ERROR_KEY = "FATAL ERROR" SUMMARY_UNEXPECTED_OUTPUT_KEY = "UNEXPECTED OUTPUT" SUMMARY_SPECIAL_FILE_KEYS = [ - SUMMARY_TIMOUT_ERROR_KEY, + SUMMARY_TIMEOUT_ERROR_KEY, SUMMARY_FATAL_ERROR_KEY, SUMMARY_UNEXPECTED_OUTPUT_KEY, ] # Association entre type de fichier special et cle de gestion dans le resume -SUMMARY_SPECIAL_FILE_KEYS_PER_FILE = {} -SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[STDOUT_ERROR_LOG] = SUMMARY_UNEXPECTED_OUTPUT_KEY -SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[STDERR_ERROR_LOG] = SUMMARY_UNEXPECTED_OUTPUT_KEY -SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[PROCESS_TIMEOUT_ERROR_LOG] = SUMMARY_TIMOUT_ERROR_KEY -SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[RETURN_CODE_ERROR_LOG] = SUMMARY_FATAL_ERROR_KEY -assert len(SUMMARY_SPECIAL_FILE_KEYS_PER_FILE) == len(SPECIAL_ERROR_FILES) - - -def write_message(message, log_file=None, show=False): - """Ecriture d'un message dans un fichier de log - Ecriture dans un fichier de log selon le le parametre log_file - Affichage sur la console selon le parametre show - Si ni log_file, ni show ne sont specifier, la methode est en mode silencieux +SUMMARY_SPECIAL_FILE_KEYS_PER_FILE = { + kht.STDOUT_ERROR_LOG: SUMMARY_UNEXPECTED_OUTPUT_KEY, + kht.STDERR_ERROR_LOG: SUMMARY_UNEXPECTED_OUTPUT_KEY, + kht.PROCESS_TIMEOUT_ERROR_LOG: SUMMARY_TIMEOUT_ERROR_KEY, + kht.RETURN_CODE_ERROR_LOG: SUMMARY_FATAL_ERROR_KEY, +} +assert len(SUMMARY_SPECIAL_FILE_KEYS_PER_FILE) == len(kht.SPECIAL_ERROR_FILES) + +# Ensemble des cle pouvant se trouver dans le resume +ALL_SUMMARY_KEYS = [ + SUMMARY_WARNING_KEY, + SUMMARY_ERROR_KEY, + SUMMARY_FILE_TYPES_KEY, + SUMMARY_PORTABILITY_KEY, +] + SUMMARY_SPECIAL_FILE_KEYS +assert len(set(ALL_SUMMARY_KEYS)) == len(ALL_SUMMARY_KEYS), ( + "Summary keys " + str(ALL_SUMMARY_KEYS) + " must not contain duplicates" +) + + +def analyse_comparison_log(test_dir): """ - cleaned_message = message.encode(encoding="utf-8", errors="ignore").decode( - encoding="utf-8" - ) - if show: - print(cleaned_message) - # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - if log_file is not None: - log_file.write(cleaned_message + "\n") - - -def read_file_lines(file_path, log_file=None, show=False): - """Chargement en memoire des lignes d'un fichier - Retourne la liste des fichier si ok, None sinon - Ecrit un message dans le log en cas d'erreur - """ - # lecture des lignes du fichier - try: - with open(file_path, "r", errors="ignore") as file: - file_lines = file.readlines() - except BaseException as exception: - write_message( - "Error : can't open file " + file_path + " (" + str(exception) + ")", - log_file=log_file, - show=show, - ) - file_lines = None - return file_lines - - -def write_file_lines( - file_path, file_lines, striped_lines_suffix="\n", log_file=None, show=False -): - """Ecriture d'une liste de ligne dans un fichier - Ajoute un suffix aux lignes sans caractere fin de ligne - Ecrit un message dans le log en cas d'erreur + Analyse du log de comparaison des resultats de test et de reference + present dans un repertoire de test + Renvoie: + - error_number + Le nombre d'erreurs deduit du resume + - warning_number + Le nombre de warnings deduit du resume + - summary_infos: + Un dictionnaire par avec une ligne de texte par cle de resume (ALL_SUMMARY_KEYS) + - files_infos: + Un dictionaire par nom de fichier contenant le resultat de la comparaison + pour ce fichier, sous la forme d'un texte potentiellement multi-lignes + Ce texte contient 'OK' uniquement si aucun problme n'est detecte + Il contient des lignes de texte, dont certain sont potentiellement prefixes par 'warning: ' + ou 'error : ' sinon + Si le log de comparaison n'est pas disponible ou exploitable, on retourne une erreur """ - # lecture des lignes du fichier - try: - with open(file_path, "w", errors="ignore") as file: - for line in file_lines: - file.write(line) - if len(line) == 0 or line[-1] != "\n": - file.write(striped_lines_suffix) - except BaseException as exception: - write_message( - "Error : can't open output file " + file_path + " (" + str(exception) + ")", - log_file=log_file, - show=show, - ) - -def find_in_lines(lines, substring): - """Recherche d'une chaine de caractere parmi un ensemble de lignes - Renvoie l'index de la premiere ligne dans laquelle la chaine apparait, -1 sinon""" - for i, line in enumerate(lines): - if line.find(substring) >= 0: - return i - return -1 + def extract_number(message): + assert message != "" + fields = message.split() + assert fields[0].isdigit() + number = int(fields[0]) + return number + utils.check_test_dir(test_dir) -def append_message(initial_messages, message): - """Ajout d'un message a un message existant, en ajoutant si necessaire ', ' - pour separer les messages si les deux sont non vides - Retourne un message complete du nouveau message""" - if message == "": - return initial_messages - elif initial_messages == "": - return message + # Initialisation des resultats + error_number = 0 + warning_number = 0 + summary_infos = {} + files_infos = {} + + # Traitement des erreurs memorisee dans le log + log_file_path = os.path.join(test_dir, kht.COMPARISON_RESULTS_LOG) + if not os.path.isfile(log_file_path): + # Erreur speciale si pas de fichier de comparaison + error_number = 1 + summary_infos[SUMMARY_NOTE_KEY] = "The test has not been launched" else: - return initial_messages + ", " + message - - -# Parsers en variables globales, compiles une seule fois -token_parser = None -time_parser = None -numeric_parser = None - - -def initialize_parsers(): - """Initialisation des parsers globaux""" - global token_parser - global time_parser - global numeric_parser - if token_parser is not None: - return - # Delimiters pour els fichiers json et kdic - delimiters = ["\,", "\{", "\}", "\[", "\]", "\:", "\(", "\)", "\<", "\>", "\="] - numeric_pattern = "-?[0-9]+\.?[0-9]*(?:[Ee]-?[0-9]+)?" - string_pattern = ( - '"[^"]*"' # Sans les double-quotes dans les strings (dur a parser...) - ) - time_pattern = "\d{1,2}:\d{2}:\d{2}\.?\d*" - other_tokens = "[\w]+" - tokens = time_pattern + "|" + numeric_pattern + "|" + string_pattern - for delimiter in delimiters: - tokens += "|" + delimiter - tokens += "|" + other_tokens - token_parser = re.compile(tokens) - numeric_parser = re.compile(numeric_pattern) - time_parser = re.compile(time_pattern) - + try: + with open(log_file_path, "r", errors="ignore") as log_file: + lines = log_file.readlines() + except Exception as exception: + # Erreur speciale si probleme de lecture du fichier de comparaison + lines = None + error_number = 1 + summary_infos[SUMMARY_NOTE_KEY] = ( + "Unable to read file " + kht.COMPARISON_RESULTS_LOG + str(exception) + ) + # Analyse du contenu du fichier + file_pattern = "file " + if lines is not None: + index = 0 + while index < len(lines): + line = lines[index] + index += 1 + line = line.strip() + + # Analyse des lignes concernant chaque fichier avant le resume + if line.find(file_pattern) == 0: + file_path = line[len(file_pattern) :] + file_name = os.path.basename(file_path) + file_info = "" + while index < len(lines): + line = lines[index] + index += 1 + line = line.strip() + if line == "": + break + else: + if file_info != "": + file_info += "\n" + file_info += line + files_infos[file_name] = file_info + continue + + # Analyse du resume jsuq'u la fin du fichier si debut de resume trouve + if line == SUMMARY_TITLE: + while index < len(lines): + line = lines[index] + index += 1 + line = line.strip() + for key in ALL_SUMMARY_KEYS: + if line.find(key) >= 0: + summary_infos[key] = line + if key == SUMMARY_WARNING_KEY: + warning_number = extract_number(line) + elif key == SUMMARY_ERROR_KEY: + error_number = extract_number(line) + + # Erreur speciale si le resume n'est pas trouve + if len(summary_infos) == 0: + assert error_number == 0 + error_number = 1 + specific_message = ( + "Section '" + + SUMMARY_TITLE + + "' not found in " + + kht.COMPARISON_RESULTS_LOG + ) + summary_infos[SUMMARY_NOTE_KEY] = specific_message + # Retour des resultats + return error_number, warning_number, summary_infos, files_infos -def check_results(test): - """Compare les fichiers de resultats de test et de reference 2 a 2 - et ecrit les resultats dans le fichier de log""" - assert os.path.isdir(test) - test_full_path = os.path.join(os.getcwd(), test) +def check_results(test_dir, forced_context=None): + """ + Fonction principale de comparaison des resultats de test et de reference + Les fichiers sont compares 2 a 2 et la synthese de la comparaison est ecrite + dans un fichier de log, avec un resume en fin de fichier, facile a parser + On retourne True s'il n'y a aucune erreur + + Le parametrage d'un contexte force en entree permete d'effectuer la comparaison avec + un contexte (parallel|sequential, platform) alternatif. Dans ce cas: + - l'objectif est essentiellement de renvoyer un indicateur global de succes de la comparaison + - on n'ecrit pas de fichier de comparaison + """ + utils.check_test_dir(test_dir) # Initialisation des stats de comparaison special_error_file_error_numbers = {} - for file_name in SPECIAL_ERROR_FILES: + for file_name in kht.SPECIAL_ERROR_FILES: special_error_file_error_numbers[file_name] = 0 error_number = 0 - warnings_number = 0 + warning_number = 0 + user_message_warning_number = 0 compared_files_number = 0 error_number_in_err_txt = 0 error_number_per_extension = {} @@ -162,65 +219,76 @@ def check_results(test): erroneous_test_file_lines = {} erroneous_file_names = [] extension_message = "" - recovery_message = "" specific_message = "" portability_message = "" + recovery_message = "" - # Ouverture du fichier de log de comparaison - log_file_path = os.path.join(test_full_path, COMPARISON_LOG_FILE_NAME) - try: - log_file = open(log_file_path, "w", errors="ignore") - except Exception as exception: - print("error : unable to create log file " + log_file_path, exception) - return - assert log_file is not None - write_message(test + " comparison", log_file=log_file) + # Ouverture du fichier de log de comparaison, sauf si lle contexte est force + log_file = None + if forced_context is None: + log_file_path = os.path.join(test_dir, COMPARISON_LOG_FILE_NAME) + try: + log_file = open(log_file_path, "w", errors="ignore") + except Exception as exception: + print("error : unable to create log file " + log_file_path, exception) + return + assert log_file is not None + utils.write_message( + utils.test_dir_name(test_dir) + " comparison", log_file=log_file + ) # Information sur le contexte courant de comparaison des resultats - current_context = get_current_results_ref_context() - write_message( - "current comparison context : " + str(current_context), - log_file=log_file, - ) + if forced_context is None: + current_context = results.get_current_results_ref_context() + utils.write_message( + "current comparison context : " + str(current_context), + log_file=log_file, + ) + else: + current_context = forced_context # Test de presence du repertoire de test a comparer - test_dir = os.path.join(test_full_path, RESULTS) - if not os.path.isdir(test_dir): - write_message( - "error : no comparison, test directory not available (" + test_dir + ")", + results_dir = os.path.join(test_dir, kht.RESULTS) + if not os.path.isdir(results_dir): + utils.write_message( + "error : no comparison, test directory not available (" + results_dir + ")", log_file=log_file, show=True, ) error_number = error_number + 1 # Recherche du repertoire courant des resultats de reference - results_ref, candidate_dirs = get_results_ref_dir( - test_full_path, log_file=log_file, show=True + results_ref, candidate_dirs = results.get_results_ref_dir( + test_dir, forced_context=forced_context, log_file=log_file, show=True ) if results_ref is None: - write_message( - "error : invalid " + RESULTS_REF + " dirs " + str(candidate_dirs), + utils.write_message( + "error : invalid " + + kht.RESULTS_REF + + " dirs " + + utils.list_to_label(candidate_dirs), log_file=log_file, show=True, ) error_number = error_number + 1 elif len(candidate_dirs) >= 2: portability_message = ( - "used " + results_ref + " dir among " + str(candidate_dirs) + "used " + results_ref + " dir among " + utils.list_to_label(candidate_dirs) ) - write_message( + utils.write_message( portability_message, log_file=log_file, show=True, ) # Test de presence du repertoire de reference a comparer + results_ref_dir = "" if error_number == 0: - ref_dir = os.path.join(test_full_path, results_ref) - if not os.path.isdir(ref_dir): - write_message( + results_ref_dir = os.path.join(test_dir, results_ref) + if not os.path.isdir(results_ref_dir): + utils.write_message( "error : no comparison, reference directory not available (" - + ref_dir + + results_ref_dir + ")", log_file=log_file, show=True, @@ -229,16 +297,13 @@ def check_results(test): # Comparaison effective si possible if error_number == 0: - # Initialisation des parsers - initialize_parsers() - - # Acces aux fichiers des repertoire de reference et de test + # Acces aux fichiers des repertoires de reference et de test # On passe par le format bytes des nom de fichier pour avoir acces # aux fichier quelque soit la plateforme # - Windows ne supporte que l'utf8 # - Linux stocke les nom directement sous la forme de bytes - ref_byte_file_names = os.listdir(os.fsencode(ref_dir)) - test_byte_file_names = os.listdir(os.fsencode(test_dir)) + ref_byte_file_names = os.listdir(os.fsencode(results_ref_dir)) + test_byte_file_names = os.listdir(os.fsencode(results_dir)) # On memorise les noms de fichiers sous forme de string pour faciliter le reporting # Tout en gardant l'association entre le nom python (utf8) et les nom en bytes @@ -258,14 +323,15 @@ def check_results(test): file_name = os.fsdecode(byte_file_name) cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii") if cleaned_file_name != file_name: - write_message( + utils.write_message( "warning : reference file name with a byte encoding (" + str(byte_file_name) - + ") used under utf8 name (" + + ") used under ascii name (" + cleaned_file_name - + ")" + + ")", + log_file=log_file, ) - warnings_number += 1 + warning_number += 1 recovery = True ref_file_names.append(cleaned_file_name) dic_ref_byte_file_names[cleaned_file_name] = byte_file_name @@ -276,29 +342,30 @@ def check_results(test): file_name = os.fsdecode(byte_file_name) cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii") if cleaned_file_name != file_name: - write_message( + utils.write_message( "warning : test file name with a byte encoding (" + str(byte_file_name) - + ") used under utf8 name (" + + ") used under ascii name (" + cleaned_file_name - + ")" + + ")", + log_file=log_file, ) - warnings_number += 1 + warning_number += 1 recovery = True test_file_names.append(cleaned_file_name) dic_test_byte_file_names[cleaned_file_name] = byte_file_name # Message de recuperation d'erreur si necessaire if recovery: - write_message( + utils.write_message( "\nRecovery from errors caused by byte encoding of file names in another platform", log_file=log_file, ) - portability_message = append_message( - portability_message, "recovery of type byte enncoding of file names" + recovery_message = utils.append_message( + recovery_message, "Recovery of type byte encoding of file names" ) - # On les tri pour ameliorer la statbilite du reporting inter plateformes + # On tri par nom de fichier pour ameliorer la stabilite du reporting inter plateformes ref_file_names.sort() test_file_names.sort() @@ -306,14 +373,14 @@ def check_results(test): ref_result_file_number = len(ref_file_names) test_result_file_number = len(test_file_names) if ref_result_file_number == 0: - write_message( + utils.write_message( "error : no comparison, missing reference result files", log_file=log_file, show=True, ) error_number = error_number + 1 elif ref_result_file_number != test_result_file_number: - write_message( + utils.write_message( "\nerror : number of results files (" + str(test_result_file_number) + ") should be " @@ -322,40 +389,40 @@ def check_results(test): show=True, ) error_number = error_number + 1 - # Affichage des nom des fichier supplementaires + # Affichage des noms des fichier supplementaires max_file_reported = 20 if test_result_file_number > ref_result_file_number: # Message specifique en cas de fichiers en trop - specific_message = append_message( + specific_message = utils.append_message( specific_message, "additional result files" ) - write_message( - "Additional files in " + RESULTS + " dir:", log_file=log_file + utils.write_message( + "Additional files in " + kht.RESULTS + " dir:", log_file=log_file ) file_reported = 0 for file_name in test_file_names: if file_name not in ref_file_names: if file_reported < max_file_reported: - write_message("\t" + file_name, log_file=log_file) + utils.write_message("\t" + file_name, log_file=log_file) else: - write_message("\t...", log_file=log_file) + utils.write_message("\t...", log_file=log_file) break file_reported += 1 elif test_result_file_number < ref_result_file_number: # Message specifique en cas de fichiers manquants - specific_message = append_message( + specific_message = utils.append_message( specific_message, "missing result files" ) - write_message( - "Missing files in " + RESULTS + " dir:", log_file=log_file + utils.write_message( + "Missing files in " + kht.RESULTS + " dir:", log_file=log_file ) file_reported = 0 for file_name in ref_file_names: if file_name not in test_file_names: if file_reported < max_file_reported: - write_message("\t" + file_name, log_file=log_file) + utils.write_message("\t" + file_name, log_file=log_file) else: - write_message("\t...", log_file=log_file) + utils.write_message("\t...", log_file=log_file) break file_reported += 1 @@ -364,28 +431,32 @@ def check_results(test): compared_files_number = compared_files_number + 1 # Path des fichier utilises pour le reporting - ref_file_path = os.path.join(ref_dir, file_name) - test_file_path = os.path.join(test_dir, file_name) + ref_file_path = os.path.join(results_ref_dir, file_name) + test_file_path = os.path.join(results_dir, file_name) # En-tete de comparaison des fichiers - write_message("\nfile " + test_file_path, log_file=log_file) + utils.write_message("\nfile " + test_file_path, log_file=log_file) - # On utilise si possible le path des fichiers en byte pour s'adapter aux contraintes de la plateforme + # On utilise si possible le path des fichiers en bytes pour s'adapter aux contraintes de la plateforme # Les erreurs seront diagnostiquees si necessaire lors de la lecture des fichiers used_ref_file_path = ref_file_path if dic_ref_byte_file_names.get(file_name) is not None: used_ref_file_path = os.path.join( - os.fsencode(ref_dir), dic_ref_byte_file_names.get(file_name) + os.fsencode(results_ref_dir), dic_ref_byte_file_names.get(file_name) ) used_test_file_path = test_file_path if dic_test_byte_file_names.get(file_name) is not None: used_test_file_path = os.path.join( - os.fsencode(test_dir), dic_test_byte_file_names.get(file_name) + os.fsencode(results_dir), dic_test_byte_file_names.get(file_name) ) # Lecture des fichiers - ref_file_lines = read_file_lines(used_ref_file_path, log_file=log_file) - test_file_lines = read_file_lines(used_test_file_path, log_file=log_file) + ref_file_lines = utils.read_file_lines( + used_ref_file_path, log_file=log_file + ) + test_file_lines = utils.read_file_lines( + used_test_file_path, log_file=log_file + ) if ref_file_lines is None: error_number = error_number + 1 if test_file_lines is None: @@ -393,26 +464,29 @@ def check_results(test): # Comparaison si ok if ref_file_lines is not None and test_file_lines is not None: - # Cas des fichier stdout et stderr, que l'on filtre du prefix de process id - if file_name in [STDOUT_ERROR_LOG, STDERR_ERROR_LOG]: - ref_file_lines = filter_process_id_prefix_from_lines(ref_file_lines) - test_file_lines = filter_process_id_prefix_from_lines( + # Cas des fichier stdout et stderr, que l'on filtre du prefix de process id presnet en parallele + if file_name in [kht.STDOUT_ERROR_LOG, kht.STDERR_ERROR_LOG]: + ref_file_lines = utils.filter_process_id_prefix_from_lines( + ref_file_lines + ) + test_file_lines = utils.filter_process_id_prefix_from_lines( test_file_lines ) - # Mise en forme specifique des message utilisateur (error, warning) pour les traiter des facon identique - # dans les cas des fichiers de log utilisateur et json + # Mise en forme specifique des messages utilisateurs (error, warning) pour les traiter + # de facon identique dans les cas des fichiers de log utilisateur et json contains_user_messages = False # Cas du fichier de log utilisateur - if file_name == ERR_TXT: + if file_name == kht.ERR_TXT: contains_user_messages = True # Identification des lignes de message ref_file_lines = strip_user_message_lines(ref_file_lines) test_file_lines = strip_user_message_lines(test_file_lines) - # Cas des fichier json (il faut passer le path en entier pour gerer certaines exceptions) - elif is_file_with_json_extension(ref_file_path): + # Cas des fichier json + elif is_file_with_json_extension(file_name): contains_user_messages = True - # Pretraitement des ligne de message pour les mettre dans le meme format que pour les fichier d'erreur + # Pretraitement des lignes de message pour les mettre dans le meme format + # que pour les fichier d'erreur ref_file_lines = strip_user_message_lines_in_json_file( ref_file_lines ) @@ -430,7 +504,7 @@ def check_results(test): ) # Comparaison des fichiers pre-traites - errors, warnings = check_file_lines( + errors, warnings, user_message_warnings = check_file_lines( ref_file_path, test_file_path, ref_file_lines, @@ -438,7 +512,8 @@ def check_results(test): log_file=log_file, ) error_number += errors - warnings_number += warnings + warning_number += warnings + user_message_warning_number += user_message_warnings # Memorisation des statistiques par extension if errors > 0: @@ -446,40 +521,41 @@ def check_results(test): error_number_per_file[file_name] = errors erroneous_ref_file_lines[file_name] = ref_file_lines erroneous_test_file_lines[file_name] = test_file_lines - if file_name == ERR_TXT: + if file_name == kht.ERR_TXT: error_number_in_err_txt += errors else: _, file_extension = os.path.splitext(file_name) error_number_per_extension[file_extension] = ( error_number_per_extension.get(file_extension, 0) + errors ) + + # Message synthetique de recuperation des warnng sur les message utilisateur si necessaire + if user_message_warning_number > 0: + recovery_message = utils.append_message( + recovery_message, "Recovery from varying patterns in user messages" + ) + # Recherche des erreurs fatales, avec tentative de recuperation - # On accepte les erreurs fatales que si on les meme en test et reference, + # On accepte les erreurs fatales que si on ales meme en test et reference, # et uniquement dans le cas du pattern particulier du "Batch mode failure" qui est du # a des scenario n'ayant pas pu s'excuter entierement pour des raison de portabilite fatal_error_recovery = True - STDERR_ERROR_LOG_RECOVERY_PATTERN = ( - "fatal error : Command file : Batch mode failure" - ) - RETURN_CODE_ERROR_LOG_RECOVERY_PATTERN = ( - "Wrong return code: 1 (should be 0 or 2)" - ) for file_name in test_file_names: # Cas d'une erreur fatale - if file_name in SPECIAL_ERROR_FILES: + if file_name in kht.SPECIAL_ERROR_FILES: special_error_file_error_numbers[file_name] = ( special_error_file_error_numbers[file_name] + 1 ) error_number += 1 special_error = SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[file_name].lower() - write_message( + utils.write_message( "\n" + special_error + " : found file " + file_name, log_file=log_file, ) # La tentative de recuperation des erreurs fatales echoue si on ne respecte # pas toutes les conditions necessaires - if file_name not in [STDERR_ERROR_LOG, RETURN_CODE_ERROR_LOG]: + if file_name not in [kht.STDERR_ERROR_LOG, kht.RETURN_CODE_ERROR_LOG]: fatal_error_recovery = False else: # Les fichiers doivent etre les memes @@ -491,60 +567,69 @@ def check_results(test): # Test que le fichier est reduit au pattern accepte if not fatal_error_recovery: # Lecture des lignes du fichier - test_file_path = os.path.join(test_dir, file_name) - test_file_lines = read_file_lines( + test_file_path = os.path.join(results_dir, file_name) + test_file_lines = utils.read_file_lines( test_file_path, log_file=log_file ) # Pattern dans le cas de sdterr - if file_name == STDERR_ERROR_LOG: + fatal_error_pattern = ( + "fatal error : Command file : Batch mode failure" + ) + if file_name == kht.STDERR_ERROR_LOG: if ( len(test_file_lines) == 0 - or test_file_lines[0].strip() - != STDERR_ERROR_LOG_RECOVERY_PATTERN + or test_file_lines[0].strip() != fatal_error_pattern ): fatal_error_recovery = False # Pattern dans le cas du code retour - if file_name == RETURN_CODE_ERROR_LOG: + return_code_error_pattern = ( + "Wrong return code: 1 (should be 0 or 2)" + ) + if file_name == kht.RETURN_CODE_ERROR_LOG: if ( len(test_file_lines) == 0 or test_file_lines[0].strip() - != RETURN_CODE_ERROR_LOG_RECOVERY_PATTERN + != return_code_error_pattern ): fatal_error_recovery = False # Message de recuperation si necessaire - if special_error_file_error_numbers[RETURN_CODE_ERROR_LOG] > 0: + if special_error_file_error_numbers[kht.RETURN_CODE_ERROR_LOG] > 0: # Cas de la recuperation if fatal_error_recovery: - error_number -= special_error_file_error_numbers[RETURN_CODE_ERROR_LOG] - error_number -= special_error_file_error_numbers[STDERR_ERROR_LOG] - special_error_file_error_numbers[RETURN_CODE_ERROR_LOG] = 0 - special_error_file_error_numbers[STDERR_ERROR_LOG] = 0 - write_message( + error_number -= special_error_file_error_numbers[ + kht.RETURN_CODE_ERROR_LOG + ] + error_number -= special_error_file_error_numbers[kht.STDERR_ERROR_LOG] + special_error_file_error_numbers[kht.RETURN_CODE_ERROR_LOG] = 0 + special_error_file_error_numbers[kht.STDERR_ERROR_LOG] = 0 + utils.write_message( "\nRecovery from fatal errors caused solely by a 'Batch mode failure' in another platform", log_file=log_file, ) - portability_message = append_message( - portability_message, "recovery of type 'Batch mode failure'" + recovery_message = utils.append_message( + recovery_message, "Recovery of type 'Batch mode failure'" ) # Ecriture des premieres lignes des fichiers d'erreur fatales ou de timeout si necessaire for file_name in test_file_names: if ( - file_name in SPECIAL_ERROR_FILES + file_name in kht.SPECIAL_ERROR_FILES and special_error_file_error_numbers[file_name] > 0 ): # Lecture des lignes du fichier - test_file_path = os.path.join(test_dir, file_name) - test_file_lines = read_file_lines(test_file_path, log_file=log_file) - write_message( + test_file_path = os.path.join(results_dir, file_name) + test_file_lines = utils.read_file_lines( + test_file_path, log_file=log_file + ) + utils.write_message( "\nspecial error file " + test_file_path, log_file=log_file ) max_print_lines = 10 for i, line in enumerate(test_file_lines): if i < max_print_lines: - write_message("\t" + line.rstrip(), log_file=log_file) + utils.write_message("\t" + line.rstrip(), log_file=log_file) else: - write_message("\t...", log_file=log_file) + utils.write_message("\t...", log_file=log_file) break # Il y a plusieurs tentatives de recuperation des erreurs pour des jeux de test ou des variation normales @@ -570,28 +655,25 @@ def check_results(test): # Filtrage d'un certain type de warning pour recommencer la comaraison if varying_warning_messages_in_err_txt_recovery: # Acces aux lignes des fichier - ref_file_lines = erroneous_ref_file_lines.get(ERR_TXT) - test_file_lines = erroneous_test_file_lines.get(ERR_TXT) + ref_file_lines = erroneous_ref_file_lines.get(kht.ERR_TXT) + test_file_lines = erroneous_test_file_lines.get(kht.ERR_TXT) # Filtrage des lignes selon le motif en nombre variable warning_pattern1 = "warning : Data table slice " warning_pattern2 = " : Read data table slice interrupted by user" filtered_ref_file_lines = [] + filtered_test_file_lines = [] for line in ref_file_lines: if line.find(warning_pattern1) != 0 or line.find(warning_pattern2) < 0: filtered_ref_file_lines.append(line) - filtered_test_file_lines = [] - for line in test_file_lines: - if ( - line.find(warning_pattern1) != 0 - or line.find(warning_pattern2) < 0 - ): - filtered_test_file_lines.append(line) + for line in test_file_lines: + if line.find(warning_pattern1) != 0 or line.find(warning_pattern2) < 0: + filtered_test_file_lines.append(line) - # Comparaison a nouveau des fichier, en mode non verbeux - errors, warnings = check_file_lines( - ERR_TXT, - ERR_TXT, + # Comparaison a nouveau des fichiers, en mode non verbeux + errors, warnings, user_message_warnings = check_file_lines( + kht.ERR_TXT, + kht.ERR_TXT, filtered_ref_file_lines, filtered_test_file_lines, ) @@ -603,24 +685,24 @@ def check_results(test): if varying_warning_messages_in_err_txt_recovery: # Messages sur la recuperation recovery_summary = ( - "Recovery from varying warning number in " + ERR_TXT + " file only" + "Recovery from varying warning number in " + kht.ERR_TXT + " file only" ) - recovery_message = recovery_summary.lower() - write_message("\n" + recovery_summary + ":", log_file=log_file) - write_message( + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( "\tall errors come from the warning in " - + ERR_TXT + + kht.ERR_TXT + " file only, du to varying number of active process number", log_file=log_file, ) - write_message( + utils.write_message( "\t" + str(error_number) + " errors converted to warnings", log_file=log_file, ) # On transforme les erreur en warning - warnings_number += error_number + warning_number += error_number error_number = 0 - # On reinitialise egalement les stats d'erreur pour les extensuon concernees + # On reinitialise egalement les stats d'erreur pour les extensions concernees error_number_in_err_txt = 0 # Tentative de recuperation des erreurs si la seule difference est une difference d'ordre @@ -650,22 +732,22 @@ def check_results(test): def filter_record_index_from_lines(lines): """Filtrage avance des lignes en supprimant le debut de ligne jusqu'a l'index de record""" filtered_lines = [] - warning_pattern = "warning : Data table " - record_pattern = " : Record " - for line in lines: - pos1 = line.find(warning_pattern) + record_index_pattern = [ + "warning : Data table ", + " : Record ", + " : Field ", + ] + for input_line in lines: + pos1 = utils.find_pattern_in_line(input_line, record_index_pattern) if pos1 >= 0: - pos2 = line.find(record_pattern) - if pos2 > pos1: - pos3 = line[pos2 + len(record_pattern) :].find(" : ") - if pos3 > 0: - line = line[pos2 + len(record_pattern) + pos3 :] - filtered_lines.append(line) + input_line = input_line[ + input_line.find(record_index_pattern[-1]) : + ] + filtered_lines.append(input_line) return filtered_lines # Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs user_message_error_number = 0 - user_message_warning_number = 0 recovered_error_number = 0 recovered_warning_number = 0 for file_name in erroneous_file_names: @@ -679,21 +761,20 @@ def filter_record_index_from_lines(lines): ref_file_lines = extract_striped_lines(ref_file_lines) # Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur # La comparaison se fait de facon muette, sans passer par le fichier de log - errors, warnings = check_file_lines( + errors, warnings, user_message_warnings = check_file_lines( file_name, file_name, ref_file_lines, test_file_lines, ) user_message_error_number += errors - user_message_warning_number += warnings - # Comparaison filtree les messages utilisateurs jusq'aux index des records, + # Comparaison filtree les messages utilisateurs jusqu'aux index des records, # qui peuvent varier d'une execution a l'autre, puis les avoir trier test_file_lines = filter_record_index_from_lines(test_file_lines) ref_file_lines = filter_record_index_from_lines(ref_file_lines) test_file_lines.sort() ref_file_lines.sort() - errors, warnings = check_file_lines( + errors, warnings, user_message_warnings = check_file_lines( file_name, file_name, ref_file_lines, @@ -715,22 +796,22 @@ def filter_record_index_from_lines(lines): if unsorted_user_messages_recovery: # Messages sur la recuperation recovery_summary = "Recovery from unsorted user messages" - recovery_message = recovery_summary.lower() - write_message("\n" + recovery_summary + ":", log_file=log_file) - write_message( + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( "\tall errors come from the users messages in " - + ERR_TXT + + kht.ERR_TXT + " and in json reports, with a different order and possibly different record indexes", log_file=log_file, ) - write_message( + utils.write_message( "\t" + str(error_number) + " errors converted to warnings", log_file=log_file, ) # On transforme les erreur en warning - warnings_number += error_number + warning_number += error_number error_number = 0 - # On reinitialise egalement les stats d'erreur pour les extensuon concernees + # On reinitialise egalement les stats d'erreur pour les extensions concernees error_number_per_extension[".khj"] = 0 error_number_per_extension[".khcj"] = 0 error_number_in_err_txt = 0 @@ -740,26 +821,28 @@ def filter_record_index_from_lines(lines): if error_number > 0: roc_curve_recovery = True - # On verifie d'abord qu'il y a un warning correspondant dans le log utiliusateur + # On verifie d'abord qu'il y a un warning correspondant dans le log utilisateur if roc_curve_recovery: # On doit potentiellement relire ce fichier, car ce type de message correspond - # a un motif resilient qui ne genere pas d'erreur - err_file_lines = erroneous_test_file_lines.get(ERR_TXT) + # a un motif USER qui ne genere pas d'erreur + err_file_lines = erroneous_test_file_lines.get(kht.ERR_TXT) if err_file_lines is None: - err_file_path = os.path.join(test_dir, ERR_TXT) - err_file_lines = read_file_lines(err_file_path) + err_file_path = os.path.join(results_dir, kht.ERR_TXT) + err_file_lines = utils.read_file_lines(err_file_path) if err_file_lines is None: roc_curve_recovery = False else: searched_warning = ( - "warning : Evaluation Selective Naive Bayes : Not enough memory to compute the exact AUC:" - " estimation made on a sub-sample of size" + "warning : Evaluation Selective Naive Bayes : " + + "Not enough memory to compute the exact AUC:" + + " estimation made on a sub-sample of size" ) roc_curve_recovery = ( - find_in_lines(err_file_lines, searched_warning) >= 0 + utils.find_pattern_in_lines(err_file_lines, [searched_warning]) >= 0 ) # Comptage des erreurs pour les fichier d'evaluation au format xls + error_number_in_json_report_files = 0 if roc_curve_recovery: error_number_in_evaluation_xls = 0 for file_name in erroneous_file_names: @@ -768,7 +851,7 @@ def filter_record_index_from_lines(lines): error_number_in_evaluation_xls += error_number_per_file.get( file_name ) - # On test si les nombre d'erreurs se rappartis dans le fichier de log utilisateur, + # On teste si les nombre d'erreurs se rappartis dans le fichier de log utilisateur, # les rapports json et les fichiers d'evalauation au format xls error_number_in_json_report_files = error_number_per_extension.get( ".khj", 0 @@ -781,19 +864,20 @@ def filter_record_index_from_lines(lines): ) # Analyse specifique des rapports json en excluant la partie lie a la courbe de ROC + roc_curve_error_number = 0 + roc_curve_warning_number = 0 if roc_curve_recovery: for file_name in erroneous_file_names: _, file_extension = os.path.splitext(file_name) if file_extension == ".khj": # Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs - roc_curve_error_number = 0 - roc_curve_warning_number = 0 test_file_lines = erroneous_test_file_lines.get(file_name) ref_file_lines = erroneous_ref_file_lines.get(file_name) assert test_file_lines is not None assert ref_file_lines is not None - # Extraction des qui correspond au calcul de l'AUC et des courbes de ROC + # Extraction des champs qui correspondent au calcul de l'AUC et des courbes de ROC for key in ["auc", "values"]: + # Selection d'un champ selon sa valeur selected_test_file_lines = ( extract_key_matching_lines_in_json_file( test_file_lines, key @@ -804,7 +888,7 @@ def filter_record_index_from_lines(lines): ) # Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur # La comparaison se fait de facon muette, sans passer par le ficheir de log - errors, warnings = check_file_lines( + errors, warnings, user_message_warnings = check_file_lines( file_name, file_name, selected_test_file_lines, @@ -812,6 +896,7 @@ def filter_record_index_from_lines(lines): ) roc_curve_error_number += errors roc_curve_warning_number += warnings + # Le recouvrement est possible si le nombre d'erreurs trouves specifiquement pour le calcul # de l'AUC et des courbes de ROC correspond au nombre d'eerur total assert roc_curve_error_number <= error_number_in_json_report_files @@ -823,44 +908,127 @@ def filter_record_index_from_lines(lines): if roc_curve_recovery: # Messages sur la recuperation recovery_summary = "Recovery from AUC rough estimate" - recovery_message = recovery_summary.lower() - write_message("\n" + recovery_summary + ":", log_file=log_file) - write_message( + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( "\tall errors in json report file come from AUC rough estimate", log_file=log_file, ) - write_message( + utils.write_message( "\t" + str(roc_curve_error_number) + " errors in json report files converted to warnings", log_file=log_file, ) - write_message( + utils.write_message( "\t" + str(error_number - roc_curve_error_number) + " errors in evaluation xls files ignored and converted to warnings", log_file=log_file, ) # On transforme les erreur en warning - warnings_number += error_number + warning_number += error_number error_number = 0 - # On reinitialise egalement les stats d'erreur pour les extensuon concernees + # On reinitialise egalement les stats d'erreur pour les extensions concernees error_number_per_extension[".khj"] = 0 error_number_per_extension[".xls"] = 0 + # Tentative de recuperation des erreurs dans le cas tres particulier des caracteres accentues sous Windows, + # ou on observe un comportement local a la machine de developement sous Windows different de celui + # observe sur la machine Windows cloud, pourl aquelle certains fichiers sources avec caracteres + # accentues n'ont pas pu etre dezippes correctement et conduisent a des erreurs de lecture + # Dans ce cas uniquement, on tente de se comparer a une version linux de reference, pour laquelle + # on a le meme probleme et on observe le meme comportement + # Pas de recuperation d'erreur avancee si un contexte est force + if error_number > 0 and forced_context is None: + zip_encoding_recovery = True + + # On verifie d'abord que les conditions sont reunies + linux_context = None + if zip_encoding_recovery: + # On doit etre sous Windows + zip_encoding_recovery = results.get_context_platform_type() == "Windows" + + # Le fichier err.txt doit comporter une erreur de lecture + if zip_encoding_recovery: + read_error_pattern = ["error : File ./", " : Unable to open file ("] + err_file_path = os.path.join(results_dir, kht.ERR_TXT) + err_file_lines = utils.read_file_lines(err_file_path) + zip_encoding_recovery = err_file_lines is not None + # On doit trouver le pattern d'erreur + if zip_encoding_recovery: + line_index = utils.find_pattern_in_lines( + err_file_lines, read_error_pattern + ) + zip_encoding_recovery = line_index >= 0 + # La ligne concernee doit avoir un probleme de caracrete accentue + if zip_encoding_recovery: + erronneous_line = err_file_lines[line_index] + ascii_erronneous_line = erronneous_line.encode( + "ascii", "ignore" + ).decode("ascii") + zip_encoding_recovery = ascii_erronneous_line != erronneous_line + + # Il doit y avoir un des resultats de references specifiques pour Linux + if zip_encoding_recovery: + assert forced_context is None + windows_results_ref_dir, _ = results.get_results_ref_dir(test_dir) + linux_context = [results.get_context_computing_type(), "Linux"] + linux_results_ref_dir, _ = results.get_results_ref_dir( + test_dir, forced_context=linux_context + ) + zip_encoding_recovery = windows_results_ref_dir != linux_results_ref_dir + + # Comparaison des resultats de test avec ceux de reference sous linux + if zip_encoding_recovery: + results_ref_dir = os.path.join(test_dir, linux_results_ref_dir) + assert linux_context is not None + # Comparaison "pragmatique" entre les fichiers des repertoires de test et de reference + # en forcant le contexte, sans tentative de recuperation d'erreur avancee + zip_encoding_recovery = check_results( + test_dir, forced_context=linux_context + ) + + # Recuperation effective des erreurs si possible + if zip_encoding_recovery: + # Messages sur la recuperation + recovery_summary = ( + "Recovery from poor handling of accented file names by zip" + ) + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( + "\tcomparison for Windows test results is performed using Linux reference results", + log_file=log_file, + ) + utils.write_message( + "\t" + str(error_number) + " errors converted to warnings", + log_file=log_file, + ) + # On transforme les erreur en warning + warning_number += error_number + error_number = 0 + # On reinitialise egalement les stats d'erreur + for extension in error_number_per_extension: + error_number_per_extension[extension] = 0 + for file_name in kht.SPECIAL_ERROR_FILES: + special_error_file_error_numbers[file_name] = 0 + # Message dedies aux fichiers speciaux special_error_file_message = "" - for file_name in SPECIAL_ERROR_FILES: + for file_name in kht.SPECIAL_ERROR_FILES: if special_error_file_error_numbers[file_name] > 0: special_error_file_message = SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[file_name] break # Ecriture d'un resume synthetique - write_message("\n" + SUMMARY_TITLE, log_file=log_file) - write_message(str(warnings_number) + " " + SUMMARY_WARNING_KEY, log_file=log_file) - write_message(str(error_number) + " " + SUMMARY_ERROR_KEY, log_file=log_file) + utils.write_message("\n" + SUMMARY_TITLE, log_file=log_file) + utils.write_message( + str(warning_number) + " " + SUMMARY_WARNING_KEY, log_file=log_file + ) + utils.write_message(str(error_number) + " " + SUMMARY_ERROR_KEY, log_file=log_file) if special_error_file_message != "": - write_message(special_error_file_message, log_file=log_file) + utils.write_message(special_error_file_message, log_file=log_file) if error_number > 0: # Tri des extensions file_extensions = [] @@ -869,40 +1037,48 @@ def filter_record_index_from_lines(lines): file_extensions.sort() # Message specifique si erreurs dans un seul type de fichier if error_number_in_err_txt > 0: - extension_message += ERR_TXT + extension_message = utils.append_message(extension_message, kht.ERR_TXT) if error_number_in_err_txt == error_number: - specific_message = append_message( - specific_message, "errors only in " + ERR_TXT + specific_message = utils.append_message( + specific_message, "errors only in " + kht.ERR_TXT ) if len(file_extensions) > 0: for file_extension in file_extensions: - extension_message += file_extension + extension_message = utils.append_message( + extension_message, file_extension + ) if error_number_per_extension[file_extension] == error_number: - specific_message = append_message( + specific_message = utils.append_message( specific_message, "errors only in " + file_extension + " files" ) # Ecriture des messages additionnels if extension_message != "": - write_message(SUMMARY_FILE_TYPES_KEY + extension_message, log_file=log_file) + utils.write_message( + SUMMARY_FILE_TYPES_KEY + extension_message, log_file=log_file + ) if specific_message != "": - write_message(SUMMARY_NOTE_KEY + specific_message, log_file=log_file) + utils.write_message(SUMMARY_NOTE_KEY + specific_message, log_file=log_file) # Ecriture d'un message additionnel lie a la portabilite - portability_message = append_message(portability_message, recovery_message) + portability_message = utils.append_message(portability_message, recovery_message) if portability_message != "": - write_message(SUMMARY_PORTABILITY_KEY + portability_message, log_file=log_file) + utils.write_message( + SUMMARY_PORTABILITY_KEY + portability_message, log_file=log_file + ) - # Affichage d'un message de fin sur la console - final_message = "--Comparison done : " - final_message += str(compared_files_number) + " files(s) compared, " - final_message += str(error_number) + " error(s), " - final_message += str(warnings_number) + " warning(s)" - if special_error_file_message != "": - final_message += ", " + special_error_file_message - if recovery_message != "": - final_message += ", Recovery from errors" - print(final_message) - print("log writed in " + log_file_path + "\n") + # Affichage d'un message de fin sur la console si le contexte n'est pas force + if forced_context is None: + final_message = "--Comparison done : " + final_message += str(compared_files_number) + " files(s) compared, " + final_message += str(error_number) + " error(s), " + final_message += str(warning_number) + " warning(s)" + if special_error_file_message != "": + final_message += ", " + special_error_file_message + if recovery_message != "": + final_message += ", Recovery from errors" + print(final_message) + print(" log file: " + log_file_path + "\n") + return error_number == 0 def is_file_with_json_extension(file_path): @@ -911,20 +1087,13 @@ def is_file_with_json_extension(file_path): file_name = os.path.basename(file_path) _, file_extension = os.path.splitext(file_name) - # Test si fichier json + # Extension json de base json_file_extensions = [".json", ".khj", ".khvj", ".khcj", ".kdicj"] + # On rajoute les extension en les suffisant par "bad" pour permettre + # de gerer des tests de fichier corrects avec une extension erronnee + for extension in json_file_extensions.copy(): + json_file_extensions.append(extension + "bad") is_json_file = file_extension in json_file_extensions - # Cas particulier des fichier .bad qui sont en fait des fichiers json - # On test ici l'existence d'un fichier ne differenent que par l'extenion - # Attention: test adhoc en dur pour quelques jeu de test de LearningTesrt - # (ex: LearningTest\TestKhiops\Advanced\AllResultsApiMode) - if file_extension == ".bad": - if ( - os.path.isfile(file_path.replace(".bad", ".khj")) - or os.path.isfile(file_path.replace(".bad", ".khj")) - or os.path.isfile(file_path.replace(".bad", ".kdicj")) - ): - is_json_file = True return is_json_file @@ -961,18 +1130,18 @@ def strip_user_message_lines_in_json_file(lines): ce type d'analyse et de diagnostic """ - def clean_message_line(line): + def clean_message(message): """Nettoyage d'une ligne de message, entre '"' et potentiellement suivi d'une ',' Cela ne gere pas tous les cas d'encodage json, mais cela est suffisant la plupart du temps """ - cleaned_line = line.strip() + cleaned_message = message.strip() # Cas d'un milieur de section, avec ',' en fin de ligne - if cleaned_line[-1] == ",": - cleaned_line = cleaned_line[1:-2] + if cleaned_message[-1] == ",": + cleaned_message = cleaned_message[1:-2] # Cas d'une fin de section else: - cleaned_line = cleaned_line[1:-1] - return cleaned_line + cleaned_message = cleaned_message[1:-1] + return cleaned_message # Recherche des ligne du fichier dans les sections "messages" in_message_section = False @@ -985,7 +1154,7 @@ def clean_message_line(line): in_message_section = line.strip() != "]" # Nettoyage des lignes dans la section message if in_message_section: - line = clean_message_line(line) + line = clean_message(line) # Cas hors de la section des message else: # Detection du debut de section @@ -1082,7 +1251,7 @@ def is_specific_line_pair_sequential(line1, line2): i += 1 # Message si lignes filtrees if filtered_line_number > 0: - write_message( + utils.write_message( "Specific sequential messages (100th...): " + str(filtered_line_number) + " lines filtered", @@ -1091,50 +1260,71 @@ def is_specific_line_pair_sequential(line1, line2): return result_lines -def filter_process_id_prefix_from_lines(lines): - """retourne les lignes sans l'eventuel prefixe de process id, du type '[0] '""" - output_lines = [] - for line in lines: - # En parallelle, une ligne vide peut contenir le numero du process entre crochets - pos_end = -1 - is_process_id = len(line) > 0 and line[0] == "[" - if is_process_id: - pos_end = line.find("]") - is_process_id = pos_end > 0 and line[1:pos_end].isdigit() - if is_process_id: - line = line[pos_end + 1 :].lstrip() - output_lines.append(line) - return output_lines +""" Liste de motifs pour lesquels ont admet une variation normale s'il font parti de la comparaison + dans une paire de lignes. Dans ce cas, on ignore la comparaison +""" +RESILIENCE_USER_MESSAGE_PATTERNS = [ + [ + "system resources are not sufficient to run the task (need ", + " of additional memory)", + ], + [ + "error : ", + "Database basic stats ", + "Too much memory necessary to store the values of the target variable ", + " (more than ", + ], + [ + "warning : Evaluation Selective Naive Bayes : Not enough memory to compute the exact AUC: " + + "estimation made on a sub-sample of size " + ], + [ + "warning : Database ", + ": Record ", + " : Single instance ", + "uses too much memory (more than ", + " after reading ", + " secondary records ", + ], + ["error : ", " : Not enough memory "], +] def check_file_lines( - ref_file_path: str, test_file_path, ref_file_lines, test_file_lines, log_file=None + ref_file_path: str, + test_file_path: str, + ref_file_lines, + test_file_lines, + log_file=None, ): - """Comparaison d'un fichier de test et d'un fihcier de reference + """ + Comparaison d'un fichier de test et d'un fichier de reference Parametres: - - ref_file_path: chemin du fichier de refence + - ref_file_path: chemin du fichier de reference - test_file_path: chemin du fichier de test - ref_file_lines: liste des lignes du fichier de reference - test_file_lines: liste des lignes du fichier de test - - log file: fichier de log ouvert dans le quel des messages sont ecrits (seulement si log_file est sepcifie) + - log file: fichier de log ouvert dans le quel des messages sont ecrits (seulement si log_file est specifie) Retourne - - error: nombre d'erreurs - - warning: nombre de warning + - errors: nombre d'erreurs + - warnings: nombre de warning + - user_message_warnings: nombre de warning lie a une tolerance sur la variation des messages utilisateurs + (ex: "too much memory") - Les nom des fichiers en parametre permettent de specialiser les comparaisons selon le type de fichier + Les noms des fichiers en parametre permettent de specialiser les comparaisons selon le type de fichier Les listes de lignes en entree permettent d'eviter de relire un fichier dont on connait le nom et dont on a deja lu les lignes. Cela permet par exemple de reutiliser les methodes de comparaison apres avoir filtre le fichier de sous-parties que l'on ne souhaite pas comparer. - Compare les fichiers ligne par ligne, cellule par cellule (separateur '\t'), et token par token + Compare les fichiers ligne par ligne, champ par champ (separateur '\t'), et token par token dans le cas des fichier json ou dictionnaire On a avec des tolerances selon le type de fichier. Pour les valeurs numeriques, une difference relative de 0.00001 est toleree - ecrit les difference dans le fichier log_file et affiche le nb d'erreur dans le terminal - - warning : 2 cellules contiennent des valeurs numeriques avec une difference relative toleree - - error : les cellules sont differentes + - warning : 2 champs contiennent des valeurs numeriques avec une difference relative toleree + - error : les champs sont differents """ def filter_time(value): @@ -1154,38 +1344,6 @@ def filter_time(value): filtered_value = value return filtered_value - def filter_secondary_record(value): - # Supression d'un pattern de nombre de records secondaires - - pos_start1 = value.find(" uses too much memory (more than ") - if pos_start1 == -1: - return value - pos_start2 = value.find(" after reading ") - if pos_start2 == -1: - return value - pos_start3 = value.find(" secondary records ") - if pos_start3 == -1: - return value - if pos_start1 >= 0 and pos_start2 > pos_start1 and pos_start3 > pos_start2: - filtered_value = value[:pos_start1] + " uses too much memory..." - else: - filtered_value = value - return filtered_value - - def filter_secondary_table_stats(value): - # Supression d'un pattern de nombre de records secondaires - pos_start1 = value.find("Table ") - if pos_start1 == -1: - return value - pos_start2 = value.find(" Records: ") - if pos_start2 == -1: - return value - if pos_start1 >= 0 and pos_start2 > pos_start1: - filtered_value = value[:pos_start2] + " Records: " - else: - filtered_value = value - return filtered_value - def filter_khiops_temp_dir(value): # Nettoyage de la partie temp directory d'une valeur pos_khiops_temp_dir = value.find("~Khiops") @@ -1220,20 +1378,20 @@ def filter_khiops_temp_dir(value): end_value = "" # Filtrage de l'eventuel nom de fichier en remplacant les chiffres par le pattern XXX # pour se rendre independant des eventuels index de fichiers temporaires - i = 0 - while i < len(filename): - c = filename[i] + pos = 0 + while pos < len(filename): + c = filename[pos] if c != "_" and not c.isdigit(): filtered_filename += c else: filtered_filename += "XXX" - while i < len(filename): - c = filename[i] + while pos < len(filename): + c = filename[pos] if c != "_" and not c.isdigit(): filtered_filename += c break - i += 1 - i += 1 + pos += 1 + pos += 1 filtered_value = ( begin_value + " KHIOPS_TMP_DIR/" + filtered_filename + end_value ) @@ -1253,41 +1411,38 @@ def filter_khiops_temp_dir(value): _, file_extension = os.path.splitext(file_name) # test si fichier de temps - is_time_file = file_name == TIME_LOG + is_time_file = file_name == kht.TIME_LOG # test si fichier histogramme is_histogram_file = "histogram" in file_name and file_extension == ".log" # test si fichier d'erreur - is_error_file = file_name == ERR_TXT + is_error_file = file_name == kht.ERR_TXT # test si fichier de benchmark is_benchmark_file = file_name == "benchmark.xls" - # test si fichier dictionnaire - is_kdic_file = file_extension == ".kdic" - # Test si fichier json is_json_file = is_file_with_json_extension(file_name) # initialisation des nombres d'erreurs et de warning - error = 0 - warning = 0 - numerical_warning = 0 # Lie a une tolerance dee difference de valeur numerique - resilience_warning = ( + errors = 0 + warnings = 0 + numerical_warnings = 0 # Lie a une tolerance dee difference de valeur numerique + user_message_warnings = ( 0 # Lie a un pattern de message avec tolerance (ex: "Not enough memory") ) # Pas de controle si fichier de temps if is_time_file: - write_message("OK", log_file=log_file) - return error, warning + utils.write_message("OK", log_file=log_file) + return errors, warnings, user_message_warnings # Comparaison des nombres de lignes file_ref_line_number = len(ref_file_lines) file_test_line_number = len(test_file_lines) if file_test_line_number != file_ref_line_number: - write_message( + utils.write_message( "test file has " + str(file_test_line_number) + " lines and reference file has " @@ -1295,21 +1450,20 @@ def filter_khiops_temp_dir(value): + " lines", log_file=log_file, ) - error = error + 1 + errors = errors + 1 # comparaison ligne a ligne max_threshold = 0 max_print_error = 10 max_field_length = 100 skip_benchmark_lines = False - filter_secondary_record_detected = False line_number = min(file_ref_line_number, file_test_line_number) for index in range(line_number): line = index + 1 line_ref = ref_file_lines[index].rstrip() line_test = test_file_lines[index].rstrip() - # cas special des fichiers de benchmark: + # Cas special des fichiers de benchmark: # on saute les blocs de ligne dont le role est le reporting de temps de calcul # ("Time" dans le premier champ d'entete) if is_benchmark_file and line_ref.find("Time") != -1: @@ -1326,16 +1480,26 @@ def filter_khiops_temp_dir(value): if line_ref == line_test: continue - # cas special du fichier d'erreur: on tronque les lignes qui font du reporting de temps de calcul (" time:") + # Cas special du fichier d'erreur: on tronque les lignes qui font du reporting de temps de calcul (" time:") if ( is_error_file - and line_ref.find(" time:") != -1 - and line_test.find(" time:") != -1 + and line_ref.find(" time: ") != -1 + and line_test.find(" time: ") != -1 ): line_ref = filter_time(line_ref) line_test = filter_time(line_test) - # cas special du fichier d'erreur: + # Cas special du fichier d'erreur: on tronque les lignes de stats sur les records des tables + if is_error_file: + record_stats_pattern = [" Table ", " Records: "] + if ( + utils.find_pattern_in_line(line_ref, record_stats_pattern) == 0 + and utils.find_pattern_in_line(line_test, record_stats_pattern) == 0 + ): + line_ref = line_ref[: line_ref.find(record_stats_pattern[-1])] + line_test = line_test[: line_test.find(record_stats_pattern[-1])] + + # Cas special du fichier d'erreur: # on saute les lignes qui font du reporting de temps de calcul ("interrupted ") if ( is_error_file @@ -1344,7 +1508,7 @@ def filter_khiops_temp_dir(value): ): continue - # cas special du fichier d'erreur, pour le message "(Operation canceled)" qui n'est pas case sensitive + # Cas special du fichier d'erreur, pour le message "(Operation canceled)" qui n'est pas case sensitive if is_error_file: if line_ref.find("(Operation canceled)") != -1: line_ref = line_ref.replace( @@ -1355,8 +1519,8 @@ def filter_khiops_temp_dir(value): "(Operation canceled)", "(operation canceled)" ) - # cas special du fichier d'erreur en coclustering: - # on saute les lignes d'ecritire de rapport intermediaire qui different par le temps + # Cas special du fichier d'erreur en coclustering: + # on saute les lignes d'ecriture de rapport intermediaire qui different par le temps # ("Write intermediate coclustering report") if ( is_error_file @@ -1365,7 +1529,7 @@ def filter_khiops_temp_dir(value): ): continue - # cas special du fichier d'histogramme: + # Cas special du fichier d'histogramme: # on tronque les lignes qui font du reporting de temps de calcul (" time\t") if ( is_histogram_file @@ -1374,30 +1538,26 @@ def filter_khiops_temp_dir(value): ): line_ref = line_ref[: line_ref.find("time")] line_test = line_test[: line_test.find("time")] - # cas special du fichier d'histogramme: - # on ignore le champ tronque les lignes qui font du reporting de temps de calcul (" time\t") + # Cas special du fichier d'histogramme: + # on ignore les ligne avec le numero de version if ( is_histogram_file and line_ref.find("Version") != -1 and line_test.find("Version") != -1 ): - line_ref = "" - line_test = "" - - # cas special du caractere # en tete de premiere ligne de fichier (identifiant de version d'application) - if line == 1 and line_ref.find("#") == 0 and line_test.find("#") == 0: continue - # idem pour des informations de licences d'un fichier d'erreur + # Cas special du caractere # en tete de premiere ligne de fichier + # pour l'identifiant de version d'application (ex: #Khiops 10.2.0) + tool_version_pattern = ["#", " "] if ( - is_error_file - and line == 2 - and line_ref.find("Khiops ") == 0 - and line_test.find("Khiops ") == 0 + line == 1 + and utils.find_pattern_in_line(line_ref, tool_version_pattern) == 0 + and utils.find_pattern_in_line(line_test, tool_version_pattern) == 0 ): continue - # cas special du champ version des fichiers json (identifiant de version d'application) + # Cas special du champ version des fichiers json (identifiant de version d'application) if ( is_json_file and line_ref.find('"version": ') >= 0 @@ -1405,6 +1565,32 @@ def filter_khiops_temp_dir(value): ): continue + # Traitement des patterns toleres pour la comparaison + if is_error_file or is_json_file: + resilience_found = False + for pattern in RESILIENCE_USER_MESSAGE_PATTERNS: + if ( + utils.find_pattern_in_line(line_ref, pattern) != -1 + and utils.find_pattern_in_line(line_test, pattern) != -1 + ): + # On renvoie un warning, en indiquant qu'il s'agit d'un warning de resilience + warnings += 1 + user_message_warnings += 1 + # Ecriture d'un warning + utils.write_message( + "warning : line " + + str(line) + + " " + + line_test.strip() + + " -> " + + line_ref.strip(), + log_file=log_file, + ) + resilience_found = True + break + if resilience_found: + continue + # Sinon, on analyse les champs line_fields_ref = line_ref.split("\t") line_fields_test = line_test.split("\t") @@ -1413,8 +1599,8 @@ def filter_khiops_temp_dir(value): field_number_ref = len(line_fields_ref) field_number_test = len(line_fields_test) if field_number_ref != field_number_test: - if error < max_print_error: - write_message( + if errors < max_print_error: + utils.write_message( "test file (line " + str(line) + ") has " @@ -1424,9 +1610,9 @@ def filter_khiops_temp_dir(value): + " columns", log_file=log_file, ) - elif error == max_print_error: - write_message("...", log_file=log_file) - error = error + 1 + elif errors == max_print_error: + utils.write_message("...", log_file=log_file) + errors = errors + 1 # comparaison des champs field_number_length = min(field_number_ref, field_number_test) @@ -1434,7 +1620,7 @@ def filter_khiops_temp_dir(value): field_ref = line_fields_ref[i] field_test = line_fields_test[i] - # parcours des lignes cellule par cellule + # parcours des lignes champ par champs # cas special du fichier d'erreur ou json: on tronque les chemins vers les repertoires temporaires de Khiops if ( (is_error_file or is_json_file) @@ -1444,21 +1630,8 @@ def filter_khiops_temp_dir(value): field_ref = filter_khiops_temp_dir(field_ref) field_test = filter_khiops_temp_dir(field_test) - # cas special du fichier d'erreur ou khj - # on tronque le compte des lignes avec des warning sur le nombre de records secondaires - if is_error_file or is_json_file: - filter_secondary_record_detected = True - field_ref = filter_secondary_record(field_ref) - field_test = filter_secondary_record(field_test) - - # Cas particulier du nombre de record secondaire raporte dans le fichier d'erreur, - # si des enregistrement secondaire ont ete detectes - if is_error_file: - field_ref = filter_secondary_table_stats(field_ref) - field_test = filter_secondary_table_stats(field_test) - - # cas general de comparaison de cellules - [eval_res, threshold_res] = check_cell(field_ref, field_test) + # cas general de comparaison de champs + [eval_res, threshold_res] = check_field(field_ref, field_test) # truncature des champs affiches dans les messages d'erreur if len(field_test) > max_field_length: @@ -1467,11 +1640,11 @@ def filter_khiops_temp_dir(value): field_ref = field_ref[0:max_field_length] + "..." # messages d'erreur if eval_res == 0: - if error < max_print_error or threshold_res > max_threshold: - write_message( - "l" + if errors < max_print_error or threshold_res > max_threshold: + utils.write_message( + "line " + str(line) - + " c" + + " field " + str(i + 1) + " " + field_test @@ -1479,61 +1652,62 @@ def filter_khiops_temp_dir(value): + field_ref, log_file=log_file, ) - elif error == max_print_error: - write_message("...", log_file=log_file) - error += 1 + elif errors == max_print_error: + utils.write_message("...", log_file=log_file) + errors += 1 elif eval_res == 2: - warning += 1 - if threshold_res == 0: - resilience_warning += 1 - else: - numerical_warning += 1 + warnings += 1 + if threshold_res > 0: + numerical_warnings += 1 max_threshold = max(threshold_res, max_threshold) - if warning > 0: - if numerical_warning > 0: - write_message( - str(numerical_warning) + " warning(s) (epsilon difference)", + if warnings > 0: + if numerical_warnings > 0: + utils.write_message( + str(numerical_warnings) + " warning(s) (epsilon difference)", log_file=log_file, ) - if resilience_warning > 0: - write_message( - str(resilience_warning) - + " warning(s) (resilience to specific message patterns)", + if user_message_warnings > 0: + utils.write_message( + str(user_message_warnings) + + " warning(s) (resilience to specific user message patterns)", log_file=log_file, ) - if error == 0: - write_message("OK", log_file=log_file) - if error > 0: - message = str(error) + " error(s)" + if errors == 0: + utils.write_message("OK", log_file=log_file) + if errors > 0: + message = str(errors) + " error(s)" if max_threshold > 0: message += " (max relative difference: " + str(max_threshold) + ")" - write_message(message, log_file=log_file) - - return error, warning + utils.write_message(message, log_file=log_file) + return errors, warnings, user_message_warnings -def split_cell(cell): +def split_field(field_value): + """Decoupage d'un champ (champ d'une ligne avec separateur tabulation) + en un ensemble de tokens elementaire pour le parsing d'un fichier json ou kdic + Permet ensuite de comparer chaque valeur de token, pour avoir une tolerance par rapport aux + mirco-variations des valeurs numeriques""" # Pour gerer les double-quotes a l'interieur des strings, pour les format json et kdic - cell = cell.replace('\\"', "'") - cell = cell.replace('""', "'") - substrings = token_parser.findall(cell) - return substrings + field_value = field_value.replace('\\"', "'") + field_value = field_value.replace('""', "'") + sub_fields = TOKEN_PARSER.findall(field_value) + return sub_fields -# return true if time format def is_time(val): - # si format time (?h:mm:ss), on ignore en renvoyant OK - return time_parser.match(val.strip()) + """Indique si une valeur est de type temps hh:mm:ss.ms""" + return TIME_PARSER.match(val.strip()) def check_value(val1, val2): - # Comparaison de deux valeurs numeriques - # renvoie deux valeur: - # - result: - # - 1 si les cellules sont identiques - # - 2 si les la difference relative est toleree - # - 0 si les cellules sont differentes - # - threshold: difference relative si result = 2 + """Comparaison de deux valeurs numeriques + Renvoie deux valeur: + - result: + - 1 si les valeurs sont identiques + - 2 si les la difference relative est toleree + - 0 si les valeurs sont differentes + - threshold: difference relative si result = 2 + """ # Ok si valeurs egales if val1 == val2: return [1, 0] @@ -1553,88 +1727,67 @@ def check_value(val1, val2): return [0, 0] -# Liste de motifs pour lesquels ont admet une variation normale s'il font parti de la comparaison -# dans une paire de cellules -# Dans ce cas, on ignore la comparaison -RESILIENCE_PATTERNS = [ - "system resources", # Gestion des ressources systemes - "Unable to access file", # Acces a un fichier - "Unable to open file", # Ouverture d'un fichier - " : Not enough memory to ", # Manque de memoire - " : Not enough memory for ", # Manque de memoire (variante) - "Too much memory necessary to store the values", # Manque de memoire (variante) -] - - -def check_cell(cell1, cell2): - # comparaison de deux cellules - # pour les valeurs numeriques, une diffence relative de 0.00001 est toleree - # renvoie deux valeur: - # - result: - # - 1 si les cellules sont identiques - # - 2 si les la difference relative est toleree (warning) - # - 0 si les cellules sont differentes (error) - # - threshold: difference relative liee au cas erreur ou warning - - if cell1 == cell2: +def check_field(field1, field2): + """ " Comparaison de deux champs + Pour les valeurs numeriques, une diffence relative de 0.00001 est toleree + Renvoie deux valeur: + - result: + - 1 si les champs sont identiques + - 2 si les la difference relative est toleree (warning) + - 0 si les champs sont differents (error) + - threshold: difference relative liee au cas erreur ou warning + """ + if field1 == field2: return [1, 0] - # si les deux cellules sont des time, on renvoie OK pour ignorer la comparaison - if is_time(cell1) and is_time(cell2): + # si les deux champs sont des time, on renvoie OK pour ignorer la comparaison + if is_time(field1) and is_time(field2): return [1, 0] - # Traitement des patterns toleres pour la comparaison - for pattern in RESILIENCE_PATTERNS: - if cell1.find(pattern) != -1 and cell2.find(pattern) != -1: - # On renvoie un warning, mais avec 0 indique la tolerance - return [2, 0] - # uniformisation entre windows et linux pour les chemins de fichier # on va remplacer les \ par des / - string1 = cell1.replace("\\", "/") - string2 = cell2.replace("\\", "/") - + string1 = field1.replace("\\", "/") + string2 = field2.replace("\\", "/") # Tolerance temporaire pour le passage au format hdfs - # hdfs_value1 = cell1.replace("./", "") + # hdfs_value1 = field1.replace("./", "") # hdfs_value1 = hdfs_value1.replace(".\\/..\\/", "") # hdfs_value1 = hdfs_value1.replace("..\\/", "") # hdfs_value1 = hdfs_value1.replace(".\\/", "") - # hdfs_value2 = cell2.replace("./", "") + # hdfs_value2 = field2.replace("./", "") # hdfs_value2 = hdfs_value2.replace(".\\/..\\/", "") # hdfs_value2 = hdfs_value2.replace("..\\/", "") # hdfs_value2 = hdfs_value2.replace(".\\/", "") # if hdfs_value1 == hdfs_value2: # return [1, 0] - if string1 == string2: return [1, 0] - # sinon c'est peut etre un pbm d'arrondi + # sinon c'est peut etre un probleme d'arrondi # on accepte les differences relatives faibles - if numeric_parser.match(cell1) and numeric_parser.match(cell2): - [eval_result, threshold_result] = check_value(cell1, cell2) + if NUMERIC_PARSER.match(field1) and NUMERIC_PARSER.match(field2): + [eval_result, threshold_result] = check_value(field1, field2) return [eval_result, threshold_result] else: # on arrive pas a le convertir en float, ce n'est pas un nombre - # on decoupe chaque cellule sous la forme d'un ensemble de sous-chaines qui sont soit + # on decoupe chaque champ sous la forme d'un ensemble de sous-chaines qui sont soit # des libelles, soit des float - substrings1 = split_cell(cell1) - substrings2 = split_cell(cell2) + sub_fields1 = split_field(field1) + sub_fields2 = split_field(field2) # nombre de sous-chaines differentes: il y a erreur - if len(substrings1) != len(substrings2): + if len(sub_fields1) != len(sub_fields2): return [0, 0] # comparaison pas a pas else: i = 0 - length = len(substrings1) + length = len(sub_fields1) warnings = 0 errors = 0 max_warning_threshold = 0 max_error_threshold = 0 while i < length: [eval_result, threshold_result] = check_value( - substrings1[i], substrings2[i] + sub_fields1[i], sub_fields2[i] ) # Traitement des erreurs if eval_result == 0: @@ -1651,3 +1804,46 @@ def check_cell(cell1, cell2): return [2, max_warning_threshold] else: return [1, 0] + + +def initialize_parsers(): + """Initialisation de parsers sont compile une fois pour toutes + Retourne les parsers de token, de numeric et de time + """ + # Delimiters pour les fichiers json et kdic + delimiters = [ + "\\,", + "\\{", + "\\}", + "\\[", + "\\]", + "\\:", + "\\(", + "\\)", + "\\<", + "\\>", + "\\=", + ] + numeric_pattern = "-?[0-9]+\\.?[0-9]*(?:[Ee]-?[0-9]+)?" + string_pattern = ( + '"[^"]*"' # Sans les double-quotes dans les strings (dur a parser...) + ) + time_pattern = "\\d{1,2}:\\d{2}:\\d{2}\\.?\\d*" + other_tokens = "[\\w]+" + tokens = time_pattern + "|" + numeric_pattern + "|" + string_pattern + for delimiter in delimiters: + tokens += "|" + delimiter + tokens += "|" + other_tokens + token_parser = re.compile(tokens) + numeric_parser = re.compile(numeric_pattern) + time_parser = re.compile(time_pattern) + return token_parser, numeric_parser, time_parser + + +# Parsers en variables globales, compiles une seule fois au chargement du module +# - le parser de tokens permet d'analyser de facon detaillee le contenu d'un +# fichier json ou dictionnaire (.kdic) en le decomposant en une suite de tokens +# separateur, valeur numerique opu categorielle entre double-quotes. +# - le parser de numerique est specialise pour les valeurs numeriques au format scientifique +# - le parser de time est specialise pour le format time hh:mm:ss.ms +TOKEN_PARSER, NUMERIC_PARSER, TIME_PARSER = initialize_parsers() diff --git a/test/LearningTestTool/py/_kht_constants.py b/test/LearningTestTool/py/_kht_constants.py new file mode 100644 index 000000000..e4543f227 --- /dev/null +++ b/test/LearningTestTool/py/_kht_constants.py @@ -0,0 +1,145 @@ +""" +Constantes permettant la gestion de la structure des repertoires de LearningTest +et l'analyse des resultats par repertoire de test +""" + +""" Repertoire racine de l'arborescence de de l'outil de test """ +LEARNING_TEST_TOOL = "LearningTestTool" + +""" Repertoire racine de l'arborescence de test """ +LEARNING_TEST = "LearningTest" + +""" Repertoires des resultats de test et de reference """ +RESULTS = "results" +RESULTS_REF = "results.ref" + +""" Fichiers se trouvant d'un repertoire de test """ +TEST_PRM = "test.prm" +COMPARISON_RESULTS_LOG = "comparisonResults.log" + +""" Fichiers se trouvant d'un repertoire de resultats """ +ERR_TXT = "err.txt" +TIME_LOG = "time.log" + +""" Fichiers speciaux, par priorite decroissante """ +PROCESS_TIMEOUT_ERROR_LOG = "process_timeout_error.log" +RETURN_CODE_ERROR_LOG = "return_code_error.log" +STDOUT_ERROR_LOG = "stdout_error.log" +STDERR_ERROR_LOG = "stderr_error.log" +SPECIAL_ERROR_FILES = [ + PROCESS_TIMEOUT_ERROR_LOG, + RETURN_CODE_ERROR_LOG, + STDOUT_ERROR_LOG, + STDERR_ERROR_LOG, +] + +""" +Liste des outils de Khiops Core +A chaque nom d'outil Khiops correspond un nom d'exe et un sous-repertoire de LearningTest associe. +On peut egalement specifier si l'outil est lancable en parallel ou non. + +Les listes et dictionnaires ci-dessous permettent d'ajouter des outils si besoin. +""" + +""" Liste des noms des outils """ +KHIOPS = "Khiops" +COCLUSTERING = "Coclustering" +KNI = "KNI" +TOOL_NAMES = [KHIOPS, COCLUSTERING, KNI] + +""" Dictionnaire des noms d'executable avec le nom d'outil en cle """ +TOOL_EXE_NAMES = { + KHIOPS: "MODL", + COCLUSTERING: "MODL_Coclustering", + KNI: "KNITransfer", +} +assert set(TOOL_EXE_NAMES) == set(TOOL_NAMES), "Exe names must be defined for each tool" + +""" Dictionnaire des noms des sous-repertoires de LearningTest avec le nom d'outil en cle """ +TOOL_DIR_NAMES = { + KHIOPS: "TestKhiops", + COCLUSTERING: "TestCoclustering", + KNI: "TestKNI", +} +assert set(TOOL_DIR_NAMES) == set(TOOL_NAMES), "Tool dir must be defined for each tool" + +# Alias pour des nom speciaux +ALIAS_CHECK = "check" # Pour declencher une comparaions entre resultats de test et de reference, plutot qu'un test +ALIAS_R = "r" # Designe le repertoire des binaires des outils en release dans l'environnement de developpement +ALIAS_D = "d" # Designe le repertoire des binaires des outils en debug dans l'environnement de developpement + +""" Dictionnaire inverse des noms d'outil avec les nom d'exe en cle """ +TOOL_NAMES_PER_EXE_NAME = dict((v, k) for k, v in TOOL_EXE_NAMES.items()) +assert set(TOOL_NAMES_PER_EXE_NAME.values()) == set(TOOL_NAMES) + +""" Dictionnaire inverse des noms d'outil avec les nom des sous-repertoires en cle """ +TOOL_NAMES_PER_DIR_NAME = dict((v, k) for k, v in TOOL_DIR_NAMES.items()) +assert set(TOOL_NAMES_PER_DIR_NAME.values()) == set(TOOL_NAMES) + +""" Liste des outils de Khiops qui tournent en parallele (les seuls que l'on peut lancer avec mpiexec) """ +PARALLEL_TOOL_NAMES = [KHIOPS] +assert set(PARALLEL_TOOL_NAMES) <= set(TOOL_NAMES), ( + "Parallel tools " + str(PARALLEL_TOOL_NAMES) + " must be a subset of Khiops tools" +) + +""" Liste des repertoires de LearningTest contenant les jeux de donnees """ +DATASET_COLLECTION_NAMES = [ + "datasets", + "MTdatasets", + "TextDatasets", + "UnusedDatasets", +] + +""" +Typologie des resultats de reference +""" + +""" Type de resultats de reference """ +COMPUTING = "computing" +PLATFORM = "platform" +RESULTS_REF_TYPES = [COMPUTING, PLATFORM] + +""" Valeurs par type de resultats de refences """ +RESULTS_REF_TYPE_VALUES = { + COMPUTING: ["parallel", "sequential"], + PLATFORM: ["Darwin", "Linux", "Windows"], +} +assert set(RESULTS_REF_TYPE_VALUES) == set( + RESULTS_REF_TYPES +), "Values must be defined for each type or reference results" + +# Caracteres separateurs utilises dans l'analyse des type de repertoire de reference +AND = "-" +OR = "_" + + +""" +Variables d'environnement influant le comportement des outils Khiops +""" +# Variables documentees pour l'utilisateur +KHIOPS_PREPARATION_TRACE_MODE = "KhiopsPreparationTraceMode" +KHIOPS_PARALLEL_TRACE = "KhiopsParallelTrace" +KHIOPS_MEM_STATS_LOG_FILE_NAME = "KhiopsMemStatsLogFileName" +KHIOPS_MEM_STATS_LOG_FREQUENCY = "KhiopsMemStatsLogFrequency" +KHIOPS_MEM_STATS_LOG_TO_COLLECT = "KhiopsMemStatsLogToCollect" +KHIOPS_IO_TRACE_MODE = "KhiopsIOTraceMode" + +# Variables non documentee documentees, utilisee systematiquyement pour les tests +KHIOPS_EXPERT_MODE = "KhiopsExpertMode" +KHIOPS_CRASH_TEST_MODE = "KhiopsCrashTestMode" +KHIOPS_HARD_MEMORY_LIMIT_MODE = "KhiopsHardMemoryLimitMode" + +""" +Gestion du timeout pour un jeu de test +""" +# Temps minimum avant un timeout +MIN_TIMEOUT = 300 + +# Ratio de temps par rapport au temps des resultats de reference avant le timeout +TIMEOUT_RATIO = 5 + +# Temps au dela duquel on ne tente pas de relancer un test +MAX_TIMEOUT = 3600 + +# Nombre maximum de lancement de test dans le cas d'un depassement du timeout +MAX_RUN_NUMBER = 3 diff --git a/test/LearningTestTool/py/_kht_families.py b/test/LearningTestTool/py/_kht_families.py new file mode 100644 index 000000000..ff4b5a74e --- /dev/null +++ b/test/LearningTestTool/py/_kht_families.py @@ -0,0 +1,116 @@ +import os + +import _kht_constants as kht +import _kht_utils as utils + +""" +Definition des familles de test +Chaque famille de test est une liste de suite de test par outil +""" + + +def check_family(family): + """Test d'existence d'une famille, avec sortie en erreur fatale si non trouvee""" + if family not in TEST_FAMILIES: + utils.fatal_error( + "Family " + + family + + " should be in available families " + + utils.list_to_label(TEST_FAMILIES) + ) + + +# Liste des familles, de la plus simple a la plus complete +BASIC = "basic" # Famille elementaire, tres rapide a tester +FULL = "full" # Famille correspondant a tous les tests de non regression (environ une heure a tester) +FULL_NO_KNI = "full-no-kni" # Idem, sans KNI +COMPLETE = "complete" # Famille complete, tres lourde a tester (environ une journee) +ALL = "all" # Tous les repertoire de suite exhaustivement: ne pas utiliser pour les test, mais pour leur gestion +TEST_FAMILIES = [BASIC, FULL, FULL_NO_KNI, COMPLETE, ALL] +assert len(set(TEST_FAMILIES)) == len(TEST_FAMILIES), ( + "Families " + str(TEST_FAMILIES) + " must not contain duplicates" +) + +# Fammile par defaut +DEFAULT_TEST_FAMILY = FULL +assert DEFAULT_TEST_FAMILY in TEST_FAMILIES + +# Cas de la version V11, qui inclus des suites de test additionnelles +KHIOPS_V11 = True + +# Ensuite des suites de test par famille et par outils +# Pour le cas particlier le la famille ALL, les suite ne sont pas specifiee: +# il fautparcours les sous repertoire des tool dir exhaustivement +FAMILY_TEST_SUITES = {} + +# Famille basique +FAMILY_TEST_SUITES[BASIC, kht.KHIOPS] = ["Standard"] +FAMILY_TEST_SUITES[BASIC, kht.COCLUSTERING] = ["Standard"] +FAMILY_TEST_SUITES[BASIC, kht.KNI] = ["Standard"] + +# Famille full +FAMILY_TEST_SUITES[FULL, kht.KHIOPS] = [ + "Standard", + "SideEffects", + "Rules", + "MissingValues", + "Advanced", + "Bugs", + "BugsMultiTables", + "MultipleTargets", + "MultiTables", + "DeployCoclustering", + "SparseData", + "SparseModeling", + "ParallelTask", + "NewPriorV9", + "DTClassification", + "VariableConstruction", + "NewV10", + "CrashTests", + "SmallInstability", + "CharacterEncoding", +] +FAMILY_TEST_SUITES[FULL, kht.COCLUSTERING] = [ + "Standard", + "Bugs", + "NewPriorV9", + "SmallInstability", +] +FAMILY_TEST_SUITES[FULL, kht.KNI] = ["Standard", "MultiTables", "SmallInstability"] + +# Nouvelle suites specifique a la version 11 +if KHIOPS_V11: + FAMILY_TEST_SUITES[FULL, kht.KHIOPS] = FAMILY_TEST_SUITES[FULL, kht.KHIOPS] + [ + "KIInterpretation", + "Histograms", + "HistogramsLimits", + "TextVariables", + ] + +# Famille full sans KNI +FAMILY_TEST_SUITES[FULL_NO_KNI, kht.KHIOPS] = FAMILY_TEST_SUITES[ + FULL, kht.KHIOPS +].copy() +FAMILY_TEST_SUITES[FULL_NO_KNI, kht.COCLUSTERING] = FAMILY_TEST_SUITES[ + FULL, kht.COCLUSTERING +].copy() +FAMILY_TEST_SUITES[FULL_NO_KNI, kht.KNI] = [] + +# Famille Complete +FAMILY_TEST_SUITES[COMPLETE, kht.KHIOPS] = FAMILY_TEST_SUITES[ + FULL, kht.KHIOPS +].copy() + [ + "Classification", + "MTClassification", + "Regression", + "ChallengeAutoML", +] +FAMILY_TEST_SUITES[COMPLETE, kht.COCLUSTERING] = FAMILY_TEST_SUITES[ + FULL, kht.COCLUSTERING +].copy() +FAMILY_TEST_SUITES[COMPLETE, kht.KNI] = FAMILY_TEST_SUITES[FULL, kht.KNI].copy() +if KHIOPS_V11: + FAMILY_TEST_SUITES[COMPLETE, kht.KHIOPS] = FAMILY_TEST_SUITES[ + COMPLETE, kht.KHIOPS + ] + ["TextClassification"] diff --git a/test/LearningTestTool/py/_kht_one_shot_instructions.py b/test/LearningTestTool/py/_kht_one_shot_instructions.py new file mode 100644 index 000000000..69bbfe99a --- /dev/null +++ b/test/LearningTestTool/py/_kht_one_shot_instructions.py @@ -0,0 +1,559 @@ +import os.path +import sys +import stat + +import _kht_constants as kht +import _kht_utils as utils +import _kht_check_results as check +import _kht_results_management as results +import _kht_standard_instructions as standard_instructions + +""" +Instruction pour des usages uniques +Peu documente et developpe rapidment sous forme de prototype +Exemples: +- manipulation a faire une fois sur l'ensemble des repertoire de test +- modification des scenario selon evoilution de l'ergonomie de Khiops core +- evaluation de l'impcat sur les performances d'une evolution des algorithme de Khiops core +- ... +""" + +# Imports de pykhiops a effectuer au cas par cas dans chaque methode, car ralentissant trop les scripts +# import khiops as pk + + +def instruction_make_ref_time(test_dir): + # Copie du fichier de temps vers le repertoire des resultats de reference + results_dir = os.path.join(test_dir, kht.RESULTS) + results_ref_dir, _ = results.get_results_ref_dir(test_dir, show=True) + if results_ref_dir is not None: + if not os.path.isdir(results_ref_dir): + os.mkdir(results_ref_dir) + if os.path.isdir(results_ref_dir): + file_path = os.path.join(results_ref_dir, kht.TIME_LOG) + if os.path.isfile(file_path): + utils.remove_file(file_path) + if os.path.isdir(results_dir) and os.path.isdir(results_ref_dir): + utils.copy_file( + os.path.join(results_dir, kht.TIME_LOG), + os.path.join(results_ref_dir, kht.TIME_LOG), + ) + + +def instruction_make_ref_err(test_dir): + results_dir = os.path.join(test_dir, kht.RESULTS) + results_ref_dir, _ = results.get_results_ref_dir(test_dir, show=True) + if results_ref_dir is not None: + if not os.path.isdir(results_ref_dir): + os.mkdir(results_ref_dir) + if os.path.isdir(results_ref_dir): + file_path = os.path.join(results_ref_dir, kht.ERR_TXT) + utils.remove_file(file_path) + if os.path.isdir(results_dir) and os.path.isdir(results_ref_dir): + utils.copy_file( + os.path.join(results_dir, kht.ERR_TXT), + os.path.join(results_ref_dir, kht.ERR_TXT), + ) + + +def instruction_bench(test_dir): + # Construction de scenario de benchmark + def extract_info(line): + start, end = line.split(" ", 1) + field, comment = end.split("//", 1) + return field + + # extraction des renseignement du fichier de parametrage + class_file_name = "" + class_name = "" + database_name = "" + target_attribute_name = "" + prm_file_path = os.path.join(test_dir, kht.TEST_PRM) + prm_file = open(prm_file_path, "r", errors="ignore") + for s in prm_file: + if s.find("class_file_name") >= 0 and class_file_name == "": + class_file_name = extract_info(s) + if s.find("class_name") >= 0 and class_name == "": + class_name = extract_info(s) + if s.find("TrainDatabase.database_name") >= 0 and database_name == "": + database_name = extract_info(s) + if ( + s.find("AnalysisSpec.target_attribute_name") >= 0 + and target_attribute_name == "" + ): + target_attribute_name = extract_info(s) + prm_file.close() + # affichage des lignes de fichier de bencgmark correspondant + print("") + print("BenchmarkSpecs.InsertItemAfter // Insert after") + print("// -> Benchmark") + print("class_file_name " + class_file_name + " // Dictionary file") + print("class_name " + class_name + " // Dictionary") + print("target_attribute_name " + target_attribute_name + " // Target variable") + print("database_name " + database_name + " // Database file") + print("Exit // OK") + print("// <- Benchmark") + + +def instruction_check_fnb(test_dir): + from khiops import core as pk + + def to_s(value): + return str("{:.4g}".format(value)) + + def print_stats( + result_file_name, report, criterion, new_value, ref_value, maximize + ): + fstats.write( + "\t" + tool_dir_name + "\t" + suite_dir_name + "\t" + test_dir_name + "\t" + ) + fstats.write(result_file_name + "\t" + report + "\t" + criterion + "\t") + fstats.write(to_s(new_value) + "\t" + to_s(ref_value) + "\t") + diff = new_value - ref_value + if maximize: + fstats.write(to_s(diff)) + alert = diff < 0 + else: + fstats.write(to_s(-diff)) + alert = diff > 0 + if alert and abs(diff) > 0.01 * (abs(ref_value) + abs(new_value)) / 2: + fstats.write("\tALERT") + fstats.write("\n") + + def print_error(message): + print( + "\t" + + tool_dir_name + + "\t" + + suite_dir_name + + "\t" + + test_dir_name + + "\terror\t" + + message + ) + + results_dir = os.path.join(test_dir, kht.RESULTS) + results_ref_dir, _ = results.get_results_ref_dir(test_dir, show=True) + if results_ref_dir is None: + return + test_dir_name = utils.test_dir_name(test_dir) + suite_dir_name = utils.suite_dir_name(test_dir) + tool_dir_name = utils.tool_dir_name(test_dir) + + # Analyse du log de comparaison + ( + error_number, + warning_number, + summary_infos, + files_infos, + ) = check.analyse_comparison_log(test_dir) + + # On verifie que les resultats hors SNB sont correct (ex: preparation) + preparation_ok = True + if error_number >= 0: + for file_name in files_infos: + if "Preparation" in file_name and ".xls" in file_name: + preparation_ok = preparation_ok and files_infos[file_name] == "OK" + + # Creation d'un fichier de collecte des stats + fstats = None + home_dir = utils.get_home_dir(test_dir) + stats_file_path = os.path.join(home_dir, tool_dir_name, "stats.FNB.log") + if os.path.isfile(stats_file_path): + fstats = open(stats_file_path, "a", errors="ignore") + else: + fstats = open(stats_file_path, "w", errors="ignore") + fstats.write( + "Tool\tRoot\tDir\tFile\tReport\tCriterion\tValue\tRef value\tDiff\n" + ) + + if results_ref_dir is not None and os.path.isdir(results_ref_dir): + for file_name in os.listdir(results_ref_dir): + ref_file_path = os.path.join(results_ref_dir, file_name) + test_file_path = os.path.join(results_dir, file_name) + + ##### + if not os.path.isfile(test_file_path): + print_error("Missing file " + test_file_path) + continue + + # Comparaison du fichier d'erreur + if file_name == kht.ERR_TXT: + if not standard_instructions.file_compare( + ref_file_path, test_file_path, skip_patterns=["time"] + ): + print_error(file_name + " are different") + # Comparaison si preparation + elif "PreparationReport" in file_name: + if not standard_instructions.file_compare( + ref_file_path, test_file_path, skip_patterns=["#Khiops "] + ): + print_error(file_name + " are different") + elif ".khj" in file_name: + # Lecture du fichier de resultats json + try: + ref_report = pk.AnalysisResults() + ref_report.read_khiops_json_file(ref_file_path) + test_report = pk.AnalysisResults() + test_report.read_khiops_json_file(test_file_path) + except Exception as e: + print_error(file_name + "\tparsing alert: " + str(e)) + continue + # Analyse des resultats de modelisation + if ref_report.modeling_report is not None: + if test_report.modeling_report is None: + print_error(file_name + "\tmissing modeling report") + else: + ref_snb_predictor = ref_report.modeling_report.get_predictor( + "Selective Naive Bayes" + ) + test_snb_predictor = test_report.modeling_report.get_predictor( + "Selective Naive Bayes" + ) + if ref_snb_predictor is not None: + if test_snb_predictor is None: + print_error( + file_name + + "\tmissing SNB predictor in modeling report" + ) + else: + print_stats( + file_name, + test_report.modeling_report.report_type, + "Sel. vars", + test_snb_predictor.variables, + ref_snb_predictor.variables, + False, + ) + # Analyse des resultats d'evaluation + ref_evaluation_reports = list() + test_evaluation_reports = list() + ref_evaluation_reports.append(ref_report.train_evaluation_report) + ref_evaluation_reports.append(ref_report.test_evaluation_report) + ref_evaluation_reports.append(ref_report.evaluation_report) + test_evaluation_reports.append(test_report.train_evaluation_report) + test_evaluation_reports.append(test_report.test_evaluation_report) + test_evaluation_reports.append(test_report.evaluation_report) + for i in range(len(ref_evaluation_reports)): + ref_evaluation_report = ref_evaluation_reports[i] + test_evaluation_report = test_evaluation_reports[i] + if ref_evaluation_report is not None: + if test_evaluation_report is None: + print_error( + file_name + + "\tmissing " + + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type + + " report" + ) + else: + ref_snb_performance = ( + ref_evaluation_report.get_predictor_performance( + "Selective Naive Bayes" + ) + ) + test_snb_performance = ( + test_evaluation_report.get_predictor_performance( + "Selective Naive Bayes" + ) + ) + if ref_snb_performance is not None: + if test_snb_performance is None: + print_error( + file_name + + "\tmissing SNB performance in " + + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type + + " report" + ) + else: + if test_snb_performance.type == "Classifier": + print_stats( + file_name, + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type, + "accuracy", + test_snb_performance.accuracy, + ref_snb_performance.accuracy, + True, + ) + print_stats( + file_name, + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type, + "compression", + test_snb_performance.compression, + ref_snb_performance.compression, + True, + ) + print_stats( + file_name, + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type, + "auc", + test_snb_performance.auc, + ref_snb_performance.auc, + True, + ) + if test_snb_performance.type == "Regressor": + print_stats( + file_name, + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type, + "rmse", + test_snb_performance.rmse, + ref_snb_performance.rmse, + False, + ) + print_stats( + file_name, + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type, + "mae", + test_snb_performance.mae, + ref_snb_performance.mae, + False, + ) + print_stats( + file_name, + ref_evaluation_report.report_type + + " " + + ref_evaluation_report.evaluation_type, + "nlpd", + test_snb_performance.nlpd, + ref_snb_performance.nlpd, + False, + ) + # Ecriture des stats + fstats.close() + + +def instruction_work(test_dir): + results_dir = os.path.join(test_dir, kht.RESULTS) + results_ref_dir, _ = results.get_results_ref_dir(test_dir, show=True) + if results_ref_dir is None: + return + test_dir_name = utils.test_dir_name(test_dir) + suite_dir_name = utils.suite_dir_name(test_dir) + tool_dir_name = utils.tool_dir_name(test_dir) + + # Transformation du fichier .prm + transform_prm = False + if transform_prm: + file_path = os.path.join(test_dir, kht.TEST_PRM) + lines = utils.read_file_lines(file_path) + try: + with open(file_path, "w", errors="ignore") as the_file: + for line in lines: + if line.find("EpsilonBinNumber") >= 0: + continue + if line.find("OutlierManagementHeuristic") >= 0: + continue + if line.find("OptimalAlgorithm") >= 0: + continue + if line.find("EpsilonBinWidth") >= 0: + continue + if line.find("MaxIntervalNumber") >= 0: + continue + if line.find("HistogramCriterion") >= 0: + continue + if line.find("MaxHierarchyLevel") >= 0: + continue + the_file.write(line) + except Exception as e: + print("BUG: " + file_path + " : " + str(e)) + + # Parcours du repertoire de reference + compare_histograms = True + if compare_histograms: + print("COMPARE " + test_dir) + indicators = [ + "Null cost", + "Reference null cost", + "Cost", + "Level", + "Partition cost", + ] + if os.path.isdir(results_ref_dir): + for file_name in os.listdir(results_ref_dir): + ref_file_path = os.path.join(results_ref_dir, file_name) + test_file_path = os.path.join(results_dir, file_name) + if not os.path.isfile(test_file_path): + print("Missing ref file: " + file_name) + elif "istogram.log" in file_name: + ref_lines = utils.read_file_lines(ref_file_path) + test_lines = utils.read_file_lines(test_file_path) + ref_indicators = {} + test_indicators = {} + ref_histogram = [] + test_histogram = [] + # Analyse des resultats de references + for line in ref_lines: + # Collecte des indicateurs + for indicator in indicators: + if len(line) < 70 and indicator in line: + fields = line[:-1].split("\t") + try: + ref_indicators[indicator] = float( + fields[len(fields) - 1] + ) + except Exception as e: + print( + " " + + file_name + + ": Ref conversion error: " + + line[:-1] + + " " + + str(e) + ) + # Collectes des lignes de l'histogramme + if ( + len(ref_histogram) > 0 + or "Lower bound\tUpper bound\tFrequency" in line + ): + ref_histogram.append(line) + # Analyse des resultats de test + for line in test_lines: + # Collecte des indicateurs + for indicator in indicators: + if len(line) < 70 and indicator in line: + fields = line[:-1].split("\t") + try: + test_indicators[indicator] = float( + fields[len(fields) - 1] + ) + except Exception as e: + print( + " " + + file_name + + ": Test conversion error: " + + line[:-1] + + " " + + str(e) + ) + # Collectes des lignes de l'histogramme + if ( + len(test_histogram) > 0 + or "Lower bound\tUpper bound\tFrequency" in line + ): + test_histogram.append(line) + # Comparaison des resultats + for indicator in indicators: + ref_value = ref_indicators[indicator] + test_value = test_indicators[indicator] + if ( + abs(ref_value - test_value) + > abs(ref_value + test_value) / 100000 + ): + print( + " " + + file_name + + ": Difference in " + + indicator + + ": " + + str(ref_value) + + " vs " + + str(test_value) + ) + if len(ref_histogram) != len(test_histogram): + print( + " " + + file_name + + ": Difference in interval number: " + + str(len(ref_histogram) - 1) + + " vs " + + str(len(test_histogram) - 1) + ) + else: + for i in range(len(ref_histogram)): + ref_line = ref_histogram[i] + test_line = test_histogram[i] + ref_line_fields = ref_line.split("\t") + test_line_fields = test_line.split("\t") + # Comparaison des 9 permiers champs + compare_ok = True + for f in range(8): + compare_ok = ( + compare_ok + and ref_line_fields[f] == test_line_fields[f] + ) + if not compare_ok: + print( + " " + + file_name + + ": Difference in interval " + + str(i) + + " field " + + str(f + 1) + + ": \n\t" + + ref_line + + "\t" + + test_line + ) + break + + +def instruction_template(test_dir): + results_dir = os.path.join(test_dir, kht.RESULTS) + results_ref_dir, _ = results.get_results_ref_dir(test_dir, show=True) + if results_ref_dir is None: + return + test_dir_name = utils.test_dir_name(test_dir) + suite_dir_name = utils.suite_dir_name(test_dir) + tool_dir_name = utils.tool_dir_name(test_dir) + + +""" +Enregistrement des instructions +""" + + +def register_one_shot_instructions(): + """ + Enregistrement des instructions a usage unique + Retourne un dictionnaire d'instructions + """ + + # Gestion de l'ensemble des instructions dans un dictionnaire contenant pour chaque identifiant d'instruction + # une paire (instruction, libelle) + available_instructions = {} + + # Enregistrement des instructions + standard_instructions.register_instruction( + available_instructions, + "makereftime", + instruction_make_ref_time, + "copy time file to reference results dir", + ) + standard_instructions.register_instruction( + available_instructions, + "makereferr", + instruction_make_ref_err, + "copy err file to reference results dir", + ) + standard_instructions.register_instruction( + available_instructions, + "bench", + instruction_bench, + "build bench parameter file", + ) + standard_instructions.register_instruction( + available_instructions, + "checkfnb", + instruction_check_fnb, + "check fnb results (deprecated)", + ) + standard_instructions.register_instruction( + available_instructions, + "work", + instruction_work, + "last work instruction (temporary and uncommented)", + ) + return available_instructions diff --git a/test/LearningTest/cmd/python/test_dir_management.py b/test/LearningTestTool/py/_kht_results_management.py similarity index 67% rename from test/LearningTest/cmd/python/test_dir_management.py rename to test/LearningTestTool/py/_kht_results_management.py index a949a499c..ae92d1194 100644 --- a/test/LearningTest/cmd/python/test_dir_management.py +++ b/test/LearningTestTool/py/_kht_results_management.py @@ -1,53 +1,22 @@ import os import platform +import _kht_constants as kht +import _kht_utils as utils """ -Constantes permettant la gestion de la structure des repertoires de LearningTest -et l'analyse des resultats par repertorie de test -""" - -# Repertoire racine de l'arborescence de test -LEARNING_TEST = "LearningTest" - -# Repertoires des resultats de test et de reference -RESULTS = "results" -RESULTS_REF = "results.ref" - -# Fichiers se trouvant d'un repertoire de test -TEST_PRM = "test.prm" -COMPARISON_RESULTS_LOG = "comparisonResults.log" - -# Fichiers se trouvant d'un repertoire de resultats -ERR_TXT = "err.txt" -TIME_LOG = "time.log" - -# Fichiers speciaux, par priorite decroissante -PROCESS_TIMEOUT_ERROR_LOG = "process_timeout_error.log" -RETURN_CODE_ERROR_LOG = "return_code_error.log" -STDOUT_ERROR_LOG = "stdout_error.log" -STDERR_ERROR_LOG = "stderr_error.log" -SPECIAL_ERROR_FILES = [ - PROCESS_TIMEOUT_ERROR_LOG, - RETURN_CODE_ERROR_LOG, - STDOUT_ERROR_LOG, - STDERR_ERROR_LOG, -] - -""" -Gestion de la typologie des resultats de test de reference, selon les axes suivants -- computing - - parallel: si la variable d'environnement KhiopsMPIProcessNumber est definie +Gestion de la typologie des resultats de test de reference, selon les axes suivants definis + dans (RESULTS_REF_TYPES) +- COMPUTING + - parallel: si la variable process_number a une valeur strictement plus grande que 1 - sequential -- platform +- PLATFORM - fourni par la fonction python platform.system() - - plus os.name pour detecter le cas WSL (plateforme=Windows et os=posix) - - peut etre forcee par la variable d'environnement KhiopsComparisonPlatform + - peut etre forcee par la variable forced_platform - valeurs possibles - Darwin (Mac) - Linux - Windows - - WSL On peut memoriser des variantes de resultats de reference selon leur type si necessaire Dans ce cas, la typlogie est indiquee en suffix du nom de repertoire 'results.ref' @@ -76,19 +45,14 @@ - [results.ref, results.ref-Parallel, results.ref-Parallel-Darwin_Linux] """ -# Type de resultats de reference -COMPUTING = "computing" -PLATFORM = "platform" -RESULTS_REF_TYPES = [COMPUTING, PLATFORM] +"""Variable globale de gestion du contexte des resultsta de reference""" -# Valeurs par type de resultats de refences -RESULTS_REF_TYPE_VALUES = {} -RESULTS_REF_TYPE_VALUES[COMPUTING] = ["parallel", "sequential"] -RESULTS_REF_TYPE_VALUES[PLATFORM] = ["Darwin", "Linux", "Windows", "WSL"] +# Nombre de process utilisex +process_number = 1 -# Caracteres separateurs utilises dans l'analyse des type de repertoire de reference -AND = "-" -OR = "_" +# Memorisation d'une plateforme force pour la comparaison entre resultstats de test de de reference +# Par defaut, on utilise la platforme courante +forced_platform = None def check_all_type_values(): @@ -96,8 +60,8 @@ def check_all_type_values(): all_values_list = [] all_values_dic = {} # Collecte de toutes les valeurs pour verifier leur validite et leur unicite - for results_ref_types in RESULTS_REF_TYPES: - results_ref_types_values = RESULTS_REF_TYPE_VALUES[results_ref_types] + for results_ref_types in kht.RESULTS_REF_TYPES: + results_ref_types_values = kht.RESULTS_REF_TYPE_VALUES[results_ref_types] for value in results_ref_types_values: # Une valeur ne doit contenu que des caracteres alphabetiques assert value.isalpha(), ( @@ -128,7 +92,7 @@ def check_all_type_values(): return True -# Verification une seule fois de la sepcification correcte des resultats de reference +# Verification une seule fois de la specification correcte des resultats de reference assert check_all_type_values(), "Invalid specification of reference results dirs" @@ -138,97 +102,135 @@ def get_current_results_ref_context(log_file=None, show=False): Une trace est ecrite dans un fichier de log et affichees sur la console si besoin """ return [ - get_context_computing_type(log_file, show), - get_context_platform_type(log_file, show), + get_context_computing_type(log_file=log_file, show=show), + get_context_platform_type(log_file=log_file, show=show), ] def get_context_computing_type(log_file=None, show=False): """Retourne le type de computing courant - Base sur la variable d'environnement KhiopsMPIProcessNumber + Base sur la variable process_number Une trace est ecrite dans un fichier de log et affichees sur la console si besoin """ - khiops_mpi_process_number = os.getenv("KhiopsMPIProcessNumber") - if khiops_mpi_process_number is None: + if process_number is None or process_number == 1: computing_type = "sequential" else: computing_type = "parallel" - assert computing_type in RESULTS_REF_TYPE_VALUES[COMPUTING], ( - COMPUTING + assert computing_type in kht.RESULTS_REF_TYPE_VALUES[kht.COMPUTING], ( + kht.COMPUTING + " type (" + computing_type + ") should be in " - + str(RESULTS_REF_TYPE_VALUES[COMPUTING]) + + str(kht.RESULTS_REF_TYPE_VALUES[kht.COMPUTING]) ) - # Affichhe d'une trace - if log_file is not None or show: - message = COMPUTING + " type: " + computing_type - if khiops_mpi_process_number is not None: - message += " (process number: " + str(khiops_mpi_process_number) + ")" - if log_file is not None: - log_file.write(message + "\n") - if show: - print(message) + # Affichage d'une trace + message = kht.COMPUTING + " type: " + computing_type + if process_number > 1: + message += " (process number: " + str(process_number) + ")" + utils.write_message(message, log_file=log_file, show=show) return computing_type def get_context_platform_type(log_file=None, show=False): """Retourne le type de computing courant - Base sur l'OS courant, ou force selon la variable d'environnement KhiopsComparisonPlatform + Base sur l'OS courant, ou force selon la variable forced_platform Une trace est ecrite dans un fichier de log et affichees sur la console si besoin """ - platform_type = os.getenv("KhiopsComparisonPlatform") - forced_platform_type = platform_type is not None - if not forced_platform_type: + if forced_platform is not None: + platform_type = forced_platform + else: platform_type = platform.system() - if platform_type == "Windows" and os.name == "posix": - platform_type = "WSL" - assert platform_type in RESULTS_REF_TYPE_VALUES[PLATFORM], ( - PLATFORM + assert platform_type in kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM], ( + kht.PLATFORM + " type (" + platform_type + ") should be in " - + str(RESULTS_REF_TYPE_VALUES[PLATFORM]) + + str(kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM]) ) - # Affichhe d'une trace - if log_file is not None or show: - message = PLATFORM + " type: " + platform_type - if forced_platform_type: - message += " (forced using 'KhiopsComparisonPlatform' env var)" - if log_file is not None: - log_file.write(message + "\n") - if show: - print(message) + # Affichage d'une trace + message = kht.PLATFORM + " type: " + platform_type + if forced_platform is not None: + message += " (forced using cmamand line option)" + utils.write_message(message, log_file=log_file, show=show) return platform_type -def get_results_ref_dir(test_dir, log_file=None, show=False): +def get_results_ref_dir(test_dir, forced_context=None, log_file=None, show=False): """Recherche du repertoire de reference correspondant au contexte courant Retourne: - le nom du repertoire, ou None en cas d'erreur - la liste des repertoires de references candidats, qu'il y a ait erreur ou non + On utilise le contexte courant, sauf si un contexte est force en entree On retourne results.ref s'il n'y a aucun repertoire de reference ou si c'est le seul rerpertoire candidat Les erreurs sont ecrites dans un fichier de log et affichees sur la console si besoin """ - assert LEARNING_TEST in test_dir, ( - test_dir + " must be in a sub-directory of " + LEARNING_TEST - ) - results_ref_context = get_current_results_ref_context() + utils.check_test_dir(test_dir) + test_dir_name = utils.test_dir_name(test_dir) + if forced_context is None: + results_ref_context = get_current_results_ref_context() + else: + results_ref_context = forced_context candidate_results_ref_dirs = get_candidate_results_ref_dirs(test_dir) results_ref_dir = _search_results_ref_dir( candidate_results_ref_dirs, results_ref_context, - test_dir_name=os.path.basename(test_dir), + test_dir_name=test_dir_name, log_file=log_file, show=show, ) return results_ref_dir, candidate_results_ref_dirs -def is_candidate_results_ref_dir(test_dir): +def get_results_ref_dir_time(test_dir): + """Recherche du temps de dans le fichier time.log du repertoire de reference + correspondant au contexte courant + Retourne le temps en secondes si le fichier existe et si le temps est valide + Retourne none sinon + """ + results_ref_dir, _ = get_results_ref_dir(test_dir) + results_ref_test_time = None + if results_ref_dir is not None: + time_file_path = os.path.join( + os.getcwd(), os.path.join(test_dir, results_ref_dir, kht.TIME_LOG) + ) + if os.path.isfile(time_file_path): + file_time = open(time_file_path, "r", errors="ignore") + lines = file_time.readlines() + file_time.close() + if len(lines) > 0: + line = lines[0] + line = line[:-1] + fields = line.split( + " " + ) # Pour etre resilient aux formats 'Overal time: