Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix remaining instabilities in LearningTest scripts #148

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 23 additions & 24 deletions test/LearningTest/cmd/python/apply_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@


def file_read_lines(file_name):
with open(file_name, "r") as input_file:
with open(file_name, "r", errors="ignore") as input_file:
lines = input_file.readlines()
return lines


def file_write_lines(file_path, lines):
with open(file_path, "w") as the_file:
with open(file_path, "w", errors="ignore") as the_file:
for line in lines:
the_file.write(line)


def file_search(file_name, search_text):
# search in a file
the_file = open(file_name, "r") # Opens the file in read-mode
the_file = open(file_name, "r", errors="ignore") # Opens the file in read-mode
text = the_file.read() # Reads the file and assigns the value to a variable
the_file.close() # Closes the file (read session)
if text.find(search_text) >= 0:
Expand Down Expand Up @@ -53,11 +53,11 @@ def file_content_search_count(file_lines, search_text):

def file_replace(file_name, source_text, replace_text):
# search/replace in a file
the_file = open(file_name, "r") # Opens the file in read-mode
the_file = open(file_name, "r", errors="ignore") # Opens the file in read-mode
text = the_file.read() # Reads the file and assigns the value to a variable
the_file.close() # Closes the file (read session)
# Opens the file again, this time in write-mode
the_file = open(file_name, "w")
the_file = open(file_name, "w", errors="ignore")
# replaces all instance_number of our keyword
the_file.write(text.replace(source_text, replace_text))
# and writes the whole output when done, wiping over the old contents of the file
Expand All @@ -75,10 +75,10 @@ def file_compare(file_name1: str, file_name2: str, skip_patterns: list = None):
lines1 = []
lines2 = []
if compare_ok:
file1 = open(file_name1, "r")
file1 = open(file_name1, "r", errors="ignore")
lines1 = file1.readlines()
file1.close()
file2 = open(file_name2, "r")
file2 = open(file_name2, "r", errors="ignore")
lines2 = file2.readlines()
file1.close()
compare_ok = len(lines1) == len(lines2)
Expand Down Expand Up @@ -118,10 +118,10 @@ def file_compare_line_number(file_name1: str, file_name2: str):
"""
compare_ok = os.path.isfile(file_name1) and os.path.isfile(file_name2)
if compare_ok:
file1 = open(file_name1, "r")
file1 = open(file_name1, "r", errors="ignore")
lines1 = file1.readlines()
file1.close()
file2 = open(file_name2, "r")
file2 = open(file_name2, "r", errors="ignore")
lines2 = file2.readlines()
file1.close()
compare_ok = len(lines1) == len(lines2)
Expand Down Expand Up @@ -283,7 +283,7 @@ def apply_command_logs(work_dir):
print(root_name + " " + dir_name)
print("==================================================================")
print(log_file_name)
log_file = open(log_file_name, "r")
log_file = open(log_file_name, "r", errors="ignore")
for s in log_file:
s = s.replace("\n", "")
print(" " + s)
Expand Down Expand Up @@ -340,10 +340,9 @@ def time_to_seconds(value):
+ " dir"
)
if is_valid:
with open(results_dir_err_file, "r") as fErr:
with open(results_dir_err_file, "r", errors="ignore") as fErr:
lines = fErr.readlines()
# with open(results_ref_dir_err_file, "r", encoding='utf-8') as fErrRef:
with open(results_ref_dir_err_file, "r") as f_err_ref:
with open(results_ref_dir_err_file, "r", errors="ignore") as f_err_ref:
lines_ref = f_err_ref.readlines()
if len(lines) != len(lines_ref):
print_log_message(
Expand Down Expand Up @@ -423,7 +422,7 @@ def apply_command_performance(work_dir):
for file_name in os.listdir(results_dir):
if test_pattern in file_name:
test_eval_file_name = os.path.join(results_dir, file_name)
test_eval_file = open(test_eval_file_name, "r")
test_eval_file = open(test_eval_file_name, "r", errors="ignore")
for s in test_eval_file:
if s.find("Selective Naive Bayes", 0) == 0:
# comma to avoid doubling "\n"
Expand Down Expand Up @@ -451,7 +450,7 @@ def apply_command_performance_ref(work_dir):
for file_name in os.listdir(results_dir):
if test_pattern in file_name:
test_eval_file_name = os.path.join(results_dir, file_name)
test_eval_file = open(test_eval_file_name, "r")
test_eval_file = open(test_eval_file_name, "r", errors="ignore")
for s in test_eval_file:
if s.find("Selective Naive Bayes", 0) == 0:
# comma to avoid doubling "\n"
Expand Down Expand Up @@ -507,9 +506,9 @@ def print_error(message):
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(work_dir)))
stats_file_name = os.path.join(root_dir, "stats.FNB.log")
if os.path.isfile(stats_file_name):
fstats = open(stats_file_name, "a")
fstats = open(stats_file_name, "a", errors="ignore")
else:
fstats = open(stats_file_name, "w")
fstats = open(stats_file_name, "w", errors="ignore")
fstats.write(
"Tool\tRoot\tDir\tFile\tReport\tCriterion\tValue\tRef value\tDiff\n"
)
Expand Down Expand Up @@ -695,7 +694,7 @@ def extract_info(line):
database_name = ""
target_attribute_name = ""
prm_file_name = os.path.join(work_dir, TEST_PRM)
prm_file = open(prm_file_name, "r")
prm_file = open(prm_file_name, "r", errors="ignore")
for s in prm_file:
if s.find("class_file_name") >= 0 and class_file_name == "":
class_file_name = extract_info(s)
Expand Down Expand Up @@ -814,7 +813,7 @@ def parameter_exist(line, searched_keyword):
]
prm_file_name = os.path.join(work_dir, TEST_PRM)
print(work_dir)
with open(prm_file_name, "r") as prm_file:
with open(prm_file_name, "r", errors="ignore") as prm_file:
line_index = 1
for s in prm_file:
# Test comments
Expand Down Expand Up @@ -875,7 +874,7 @@ def parameter_exist(line, searched_keyword):
prm_file = open(prm_file_name, "r", errors="ignore")
prm_file_lines = prm_file.readlines()
prm_file.close()
prm_file = open(prm_file_name, "w")
prm_file = open(prm_file_name, "w", errors="ignore")
for s in prm_file_lines:
new_line = s
# Test comments
Expand Down Expand Up @@ -922,10 +921,10 @@ def parameter_exist(line, searched_keyword):
if results_ref is not None:
err_ref_file_name = os.path.join(work_dir, results_ref, ERR_TXT)
if do_it and os.path.isfile(err_ref_file_name):
err_file = open(err_ref_file_name, "r")
err_file = open(err_ref_file_name, "r", errors="ignore")
err_file_lines = err_file.readlines()
err_file.close()
err_file = open(err_ref_file_name, "w")
err_file = open(err_ref_file_name, "w", errors="ignore")
for s in err_file_lines:
new_line = s
new_line = new_line.replace(" " + RESULTS + "/", " ./" + RESULTS + "/")
Expand Down Expand Up @@ -1007,7 +1006,7 @@ def apply_command_transform_hdfs_results(work_dir):

# Write the file in place
os.chmod(file_path, stat.S_IWRITE | stat.S_IREAD)
with open(file_path, "w") as output_file:
with open(file_path, "w", errors="ignore") as output_file:
output_file.write(file_data)


Expand Down Expand Up @@ -1057,7 +1056,7 @@ def apply_command_work(work_dir):
file_path = os.path.join(work_dir, TEST_PRM)
try:
lines = file_read_lines(file_path)
with open(file_path, "w") as the_file:
with open(file_path, "w", errors="ignore") as the_file:
for line in lines:
if line.find("EpsilonBinNumber") >= 0:
continue
Expand Down
45 changes: 34 additions & 11 deletions test/LearningTest/cmd/python/check_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def write_message(message, log_file=None, show=False):
Affichage sur la console selon le parametre show
Si ni log_file, ni show ne sont specifier, la methode est en mode silencieux
"""
cleaned_message = message.encode("utf-8", "ignore").decode("utf-8")
cleaned_message = message.encode(encoding="utf-8", errors="ignore").decode(
encoding="utf-8"
)
if show:
print(cleaned_message)
# on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique
Expand Down Expand Up @@ -167,7 +169,7 @@ def check_results(test):
# Ouverture du fichier de log de comparaison
log_file_path = os.path.join(test_full_path, COMPARISON_LOG_FILE_NAME)
try:
log_file = open(log_file_path, "w")
log_file = open(log_file_path, "w", errors="ignore")
except Exception as exception:
print("error : unable to create log file " + log_file_path, exception)
return
Expand Down Expand Up @@ -243,7 +245,7 @@ def check_results(test):
#
# Attention, la methode fsdecode utilise des 'surrogate characters' invisible
# permettant de garder trace des bytes non utf8 pour le re-encodage par fsencode si necessaire
# On passe par une version 'nettoyee' de ces caracteres speciaux pour memoriser
# On passe par une version 'nettoyee' purement ascii de ces caracteres speciaux pour memoriser
# l'association entre un nom de fichier de type string et un nom de type bytes
# Dans ce cas, il suffit de memoriser dans les resultats de reference la
# version du nom de fichier sans bytes (valide quelque soit la plateforme)
Expand All @@ -254,7 +256,7 @@ def check_results(test):
recovery = False
for byte_file_name in ref_byte_file_names:
file_name = os.fsdecode(byte_file_name)
cleaned_file_name = file_name.encode("utf-8", "ignore").decode("utf-8")
cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii")
if cleaned_file_name != file_name:
write_message(
"warning : reference file name with a byte encoding ("
Expand All @@ -272,7 +274,7 @@ def check_results(test):
dic_test_byte_file_names = {}
for byte_file_name in test_byte_file_names:
file_name = os.fsdecode(byte_file_name)
cleaned_file_name = file_name.encode("utf-8", "ignore").decode("utf-8")
cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii")
if cleaned_file_name != file_name:
write_message(
"warning : test file name with a byte encoding ("
Expand All @@ -289,11 +291,11 @@ def check_results(test):
# Message de recuperation d'erreur si necessaire
if recovery:
write_message(
"\nRecovery from errors caused byte encoding of file names in another platform",
"\nRecovery from errors caused by byte encoding of file names in another platform",
log_file=log_file,
)
portability_message = append_message(
portability_message, "recovery of type byte enencoding of file names"
portability_message, "recovery of type byte enncoding of file names"
)

# On les tri pour ameliorer la statbilite du reporting inter plateformes
Expand Down Expand Up @@ -368,7 +370,7 @@ def check_results(test):
# En-tete de comparaison des fichiers
write_message("\nfile " + test_file_path, log_file=log_file)

# On utilise si possible le path des fichiers en byte pour s'adapter aux contrainte de la plateforme
# On utilise si possible le path des fichiers en byte pour s'adapter aux contraintes de la plateforme
# Les erreurs seront diagnostiquees si necessaire lors de la lecture des fichiers
used_ref_file_path = ref_file_path
if dic_ref_byte_file_names.get(file_name) is not None:
Expand Down Expand Up @@ -644,6 +646,23 @@ def check_results(test):
# Analyse specifique de la sous partie des fichiers correspondant aux messages utilisateur,
# qui ont ete marque en stripant les lignes correspondantes
if unsorted_user_messages_recovery:

def filter_record_index_from_lines(lines):
"""Filtrage avance des lignes en supprimant le debut de ligne jusqu'a l'index de record"""
filtered_lines = []
warning_pattern = "warning : Data table "
record_pattern = " : Record "
for line in lines:
pos1 = line.find(warning_pattern)
if pos1 >= 0:
pos2 = line.find(record_pattern)
if pos2 > pos1:
pos3 = line[pos2 + len(record_pattern) :].find(" : ")
if pos3 > 0:
line = line[pos2 + len(record_pattern) + pos3 :]
filtered_lines.append(line)
return filtered_lines

# Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs
user_message_error_number = 0
user_message_warning_number = 0
Expand All @@ -659,7 +678,7 @@ def check_results(test):
test_file_lines = extract_striped_lines(test_file_lines)
ref_file_lines = extract_striped_lines(ref_file_lines)
# Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur
# La comparaison se fait de facon muette, sans passer par le ficheir de log
# La comparaison se fait de facon muette, sans passer par le fichier de log
errors, warnings = check_file_lines(
file_name,
file_name,
Expand All @@ -668,7 +687,10 @@ def check_results(test):
)
user_message_error_number += errors
user_message_warning_number += warnings
# Comparaison apres avoir triee les messages utilisateurs
# Comparaison filtree les messages utilisateurs jusq'aux index des records,
# qui peuvent varier d'une execution a l'autre, puis les avoir trier
test_file_lines = filter_record_index_from_lines(test_file_lines)
ref_file_lines = filter_record_index_from_lines(ref_file_lines)
test_file_lines.sort()
ref_file_lines.sort()
errors, warnings = check_file_lines(
Expand All @@ -679,6 +701,7 @@ def check_results(test):
)
recovered_error_number += errors
recovered_warning_number += warnings

# Il faut que les erreurs ne proviennent que des messages utilisateurs
if unsorted_user_messages_recovery:
unsorted_user_messages_recovery = (
Expand All @@ -697,7 +720,7 @@ def check_results(test):
write_message(
"\tall errors come from the users messages in "
+ ERR_TXT
+ " and in json reports, with a different order",
+ " and in json reports, with a different order and possibly different record indexes",
log_file=log_file,
)
write_message(
Expand Down
2 changes: 1 addition & 1 deletion test/LearningTest/cmd/python/help_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
print(
"KhiopsComparisonPlatform: "
+ str(os.getenv("KhiopsComparisonPlatform"))
+ "\n\tplatform (Windows, Linux, Darwin) used to compare test results (default: None, to use that of current OS)"
+ "\n\tplatform (Windows, Linux, Darwin, WSL) used to compare test results (default: None, to use that of current OS)"
)

print(
Expand Down
Loading
Loading