From b4c9b394af8fdde82cd8bfef584ab660486169d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Boull=C3=A9?= Date: Wed, 17 Jan 2024 11:06:34 +0100 Subject: [PATCH] Fix isprint portability instabilities Portability.h - p_isprint: mise en placxe d'une implementation portable - impacts sur tous les isprint existant: - KWCLex.lex - KWDatabaseFormatDetector::ComputeSeparatorPriority - KWDREncrypt::InitWorkingArrays - KWTest Test sur la stabilisation effectives des resultats sur Windows, Linux, Mac - TestKhiops/Rules/EncryptRules - TestKhiops/Bugs/DicoSpecialChars Correction a reporter en V11 --- src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp | 4 ++-- src/Learning/KWData/KWCLex.inc | 4 ++-- src/Learning/KWData/KWCLex.lex | 4 ++-- src/Learning/KWData/KWDatabaseFormatDetector.cpp | 2 +- src/Learning/KWTest/Divers.cpp | 7 ++++--- src/Learning/KWTest/KWTextParser.cpp | 2 +- src/Norm/base/Portability.h | 12 ++++++++++++ 7 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp b/src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp index e382a257f..9e4e4d09f 100644 --- a/src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp +++ b/src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp @@ -335,7 +335,7 @@ void KWDREncrypt::InitWorkingArrays(const Symbol& sKey) const { if (isalnum(i)) ivPureAlphanumChars.Add(i); - if (isprint(i) and not isalnum(i)) + if (p_isprint(i) and not isalnum(i)) ivPrintableNonAlphanumChars.Add(i); } @@ -365,7 +365,7 @@ void KWDREncrypt::InitWorkingArrays(const Symbol& sKey) const { // Caractere non imprimable transforme en blanc c = i; - if (c >= 128 or not isprint(c)) + if (c >= 128 or not p_isprint(c)) c = ' '; // Prefixe underscore rajoute diff --git a/src/Learning/KWData/KWCLex.inc b/src/Learning/KWData/KWCLex.inc index 986bb37be..df732c5d1 100644 --- a/src/Learning/KWData/KWCLex.inc +++ b/src/Learning/KWData/KWCLex.inc @@ -1141,7 +1141,7 @@ YY_RULE_SETUP // Initialisation de la valeur du token c = yytext[0]; - if (not isprint(c)) + if (not p_isprint(c)) { sToken += '['; sToken += IntToString((int)c); @@ -1160,7 +1160,7 @@ YY_RULE_SETUP nCorrectedLineNumber--; break; } - if (not isprint(c)) + if (not p_isprint(c)) { if (sToken.GetLength() < nMaxLength) { diff --git a/src/Learning/KWData/KWCLex.lex b/src/Learning/KWData/KWCLex.lex index 9f6768090..5d697d80a 100644 --- a/src/Learning/KWData/KWCLex.lex +++ b/src/Learning/KWData/KWCLex.lex @@ -252,7 +252,7 @@ name {letter}({letter}|{digit})* // Initialisation de la valeur du token c = yytext[0]; - if (not isprint(c)) + if (not p_isprint(c)) { sToken += '['; sToken += IntToString((int)c); @@ -271,7 +271,7 @@ name {letter}({letter}|{digit})* nCorrectedLineNumber--; break; } - if (not isprint(c)) + if (not p_isprint(c)) { if (sToken.GetLength() < nMaxLength) { diff --git a/src/Learning/KWData/KWDatabaseFormatDetector.cpp b/src/Learning/KWData/KWDatabaseFormatDetector.cpp index ec593d7a6..48c5cffc8 100644 --- a/src/Learning/KWData/KWDatabaseFormatDetector.cpp +++ b/src/Learning/KWData/KWDatabaseFormatDetector.cpp @@ -1227,7 +1227,7 @@ int KWDatabaseFormatDetector::ComputeSeparatorPriority(char cSeparator) const // Si non trouve, on prend le le cracater lui meme d'abord dans sa plage ascii, puis dans la plage ascii etendue if (nPriority == -1) { - if (isprint(cSeparator)) + if (p_isprint(cSeparator)) { if (cSeparator >= 0) nPriority = 1000 + cSeparator; diff --git a/src/Learning/KWTest/Divers.cpp b/src/Learning/KWTest/Divers.cpp index 167f2219b..4d58dea84 100644 --- a/src/Learning/KWTest/Divers.cpp +++ b/src/Learning/KWTest/Divers.cpp @@ -1829,9 +1829,9 @@ void AnyCharFileGenerator() char cChar; FileService::OpenOutputFile(sFileName, fTest); - fTest - << "Index\tChar\t" - "\tisupper\tislower\tisdigit\tisxdigit\tisalnum\tisspace\tispunct\tisprint\tisgraph\tiscntrl\tisascii\n"; + fTest << "Index\tChar\t" + "\tisupper\tislower\tisdigit\tisxdigit\tisalnum\tisspace\tispunct\tp_" + "isprint\tisprint\tisgraph\tiscntrl\tisascii\n"; for (i = 0; i < 20; i++) { for (nChar = 1; nChar < 256; nChar++) @@ -1861,6 +1861,7 @@ void AnyCharFileGenerator() fTest << (isalnum(nChar) != 0) << "\t"; fTest << (isspace(nChar) != 0) << "\t"; fTest << (ispunct(nChar) != 0) << "\t"; + fTest << (p_isprint(nChar) != 0) << "\t"; fTest << (isprint(nChar) != 0) << "\t"; fTest << (isgraph(nChar) != 0) << "\t"; fTest << (iscntrl(nChar) != 0) << "\t"; diff --git a/src/Learning/KWTest/KWTextParser.cpp b/src/Learning/KWTest/KWTextParser.cpp index 8932c880d..3363dc4a2 100644 --- a/src/Learning/KWTest/KWTextParser.cpp +++ b/src/Learning/KWTest/KWTextParser.cpp @@ -385,7 +385,7 @@ void KWTextParser::BuildLineWordDictionary(char* sLine, ObjectDictionary* odWord if (bInspectChars) { cout << i << "\t" << cLineChar << "\t" << isalnum(cLineChar) << "\t" << ispunct(cLineChar) - << "\t" << isspace(cLineChar) << "\t" << isprint(cLineChar) << endl; + << "\t" << isspace(cLineChar) << "\t" << p_isprint(cLineChar) << endl; } // Transformation des caracteres accentues diff --git a/src/Norm/base/Portability.h b/src/Norm/base/Portability.h index 01705253f..041da8679 100644 --- a/src/Norm/base/Portability.h +++ b/src/Norm/base/Portability.h @@ -216,6 +216,7 @@ FILE* p_fopen(const char* filename, const char* mode); char* p_strcpy(char* strDestination, const char* strSource); char* p_strncpy(char* strDest, const char* strSource, size_t count); char* p_strcat(char* strDestination, const char* strSource); +int p_isprint(int ch); // Le locale de l'application est parametre de facon a etre independant de la machine, // pour assurer l'unicite des conversions numeriques et de leur format d'export, des tris, @@ -396,3 +397,14 @@ inline char* p_strcat(char* strDestination, const char* strSource) } #endif // _WIN32 + +//////////////////////////////////////////////////// +// Implementation portable pour tous les OS + +// isprint a un comportement qui depend de l'OS et de la locale +// Par exemple; la tabulation est printbale sous Windows, mais pas sous linux +// Limplementation ci-dessous est portable sur tous les OS testes (Windows, Linux, MAC) +inline int p_isprint(int ch) +{ + return (0 <= ch and ch < 128 and isprint(ch) and not iscntrl(ch)); +}