Skip to content

Commit

Permalink
WIP Step 11
Browse files Browse the repository at this point in the history
Portability.h
- p_isprint: mise en placxe d'une implementation portable
- impacts sur tous les isprint existant:
  - KWCLex.lex
  - KWDatabaseFormatDetector::ComputeSeparatorPriority
  - KWDREncrypt::InitWorkingArrays
  - KWTest

KWDataGridManager::SortAttributeParts: reimplementation du tri avec random index en cas d'egalite

Ajout temporaire de tests pour verifier les correctuions de portabilote
- instabilte du coclustering
- caracteres speciaux dans les dictionnaire
- regle Encrypt
- dans test\LearningTest\TestKhiops\Standard et test\LearningTest\TestCoclustering\Standard
  • Loading branch information
marcboulle committed Jan 17, 2024
1 parent 0fc7633 commit 546dfa8
Show file tree
Hide file tree
Showing 213 changed files with 690,399 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ void KWDREncrypt::InitWorkingArrays(const Symbol& sKey) const
{
if (isalnum(i))
ivPureAlphanumChars.Add(i);
if (isprint(i) and not iscntrl(i) and not isalnum(i))
if (p_isprint(i) and not isalnum(i))
ivPrintableNonAlphanumChars.Add(i);
}

Expand Down Expand Up @@ -367,7 +367,7 @@ void KWDREncrypt::InitWorkingArrays(const Symbol& sKey) const
{
// Caractere non imprimable transforme en blanc
c = i;
if (c >= 128 or not(isprint(c) and not iscntrl(i)))
if (c >= 128 or not p_isprint(c))
c = ' ';

// Prefixe underscore rajoute
Expand Down
4 changes: 2 additions & 2 deletions src/Learning/KWData/KWCLex.lex
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ name {letter}({letter}|{digit})*

// Initialisation de la valeur du token
c = yytext[0];
if (not isprint(c))
if (not p_isprint(c))
{
sToken += '[';
sToken += IntToString((int)c);
Expand All @@ -271,7 +271,7 @@ name {letter}({letter}|{digit})*
nCorrectedLineNumber--;
break;
}
if (not isprint(c))
if (not p_isprint(c))
{
if (sToken.GetLength() < nMaxLength)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWData/KWDatabaseFormatDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ int KWDatabaseFormatDetector::ComputeSeparatorPriority(char cSeparator) const
// Si non trouve, on prend le le cracater lui meme d'abord dans sa plage ascii, puis dans la plage ascii etendue
if (nPriority == -1)
{
if (isprint(cSeparator))
if (p_isprint(cSeparator))
{
if (cSeparator >= 0)
nPriority = 1000 + cSeparator;
Expand Down
13 changes: 8 additions & 5 deletions src/Learning/KWDataPreparation/KWDataGridManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2921,11 +2921,12 @@ void KWDataGridManager::SortAttributeParts(KWDGAttribute* sourceAttribute, KWDGA
ObjectArray oaSourceParts;
ObjectArray oaAssociations;
KWSortableSymbol* association;
int nSource;
int n;
KWDGPart* sourcePart;
KWDGPart* groupedPart;
IntVector ivRandomIndexes;
int nSource;
int nRandomIndex;
int n;

require(sourceAttribute != NULL);
require(groupedAttribute != NULL);
Expand Down Expand Up @@ -2955,7 +2956,8 @@ void KWDataGridManager::SortAttributeParts(KWDGAttribute* sourceAttribute, KWDGA
oaAssociations.SetSize(oaSourceParts.GetSize());
for (n = 0; n < ivRandomIndexes.GetSize(); n++)
{
nSource = ivRandomIndexes.GetAt(n);
nRandomIndex = n;
nSource = ivRandomIndexes.GetAt(nRandomIndex);
sourcePart = cast(KWDGPart*, oaSourceParts.GetAt(nSource));

// Recherche de la partie groupee correspondante
Expand All @@ -2964,7 +2966,7 @@ void KWDataGridManager::SortAttributeParts(KWDGAttribute* sourceAttribute, KWDGA
// Creation de l'association entre index de partie et premiere valeur du groupe
association = new KWSortableSymbol;
oaAssociations.SetAt(nSource, association);
association->SetIndex(nSource);
association->SetIndex(nRandomIndex);
association->SetSortValue(groupedPart->GetValueSet()->GetHeadValue()->GetValue());
}

Expand All @@ -2981,7 +2983,8 @@ void KWDataGridManager::SortAttributeParts(KWDGAttribute* sourceAttribute, KWDGA
association = cast(KWSortableSymbol*, oaAssociations.GetAt(n));

// Recherche de la partie source
nSource = association->GetIndex();
nRandomIndex = association->GetIndex();
nSource = ivRandomIndexes.GetAt(nRandomIndex);
sourcePart = cast(KWDGPart*, oaSourceParts.GetAt(nSource));

// Recherche de la partie groupee correspondante
Expand Down
7 changes: 4 additions & 3 deletions src/Learning/KWTest/Divers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1829,9 +1829,9 @@ void AnyCharFileGenerator()
char cChar;

FileService::OpenOutputFile(sFileName, fTest);
fTest
<< "Index\tChar\t<Char>"
"\tisupper\tislower\tisdigit\tisxdigit\tisalnum\tisspace\tispunct\tisprint\tisgraph\tiscntrl\tisascii\n";
fTest << "Index\tChar\t<Char>"
"\tisupper\tislower\tisdigit\tisxdigit\tisalnum\tisspace\tispunct\tp_"
"isprint\tisprint\tisgraph\tiscntrl\tisascii\n";
for (i = 0; i < 20; i++)
{
for (nChar = 1; nChar < 256; nChar++)
Expand Down Expand Up @@ -1861,6 +1861,7 @@ void AnyCharFileGenerator()
fTest << (isalnum(nChar) != 0) << "\t";
fTest << (isspace(nChar) != 0) << "\t";
fTest << (ispunct(nChar) != 0) << "\t";
fTest << (p_isprint(nChar) != 0) << "\t";
fTest << (isprint(nChar) != 0) << "\t";
fTest << (isgraph(nChar) != 0) << "\t";
fTest << (iscntrl(nChar) != 0) << "\t";
Expand Down
2 changes: 1 addition & 1 deletion src/Learning/KWTest/KWTextParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ void KWTextParser::BuildLineWordDictionary(char* sLine, ObjectDictionary* odWord
if (bInspectChars)
{
cout << i << "\t" << cLineChar << "\t" << isalnum(cLineChar) << "\t" << ispunct(cLineChar)
<< "\t" << isspace(cLineChar) << "\t" << isprint(cLineChar) << endl;
<< "\t" << isspace(cLineChar) << "\t" << p_isprint(cLineChar) << endl;
}

// Transformation des caracteres accentues
Expand Down
12 changes: 12 additions & 0 deletions src/Norm/base/Portability.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ FILE* p_fopen(const char* filename, const char* mode);
char* p_strcpy(char* strDestination, const char* strSource);
char* p_strncpy(char* strDest, const char* strSource, size_t count);
char* p_strcat(char* strDestination, const char* strSource);
int p_isprint(int ch);

// Le locale de l'application est parametre de facon a etre independant de la machine,
// pour assurer l'unicite des conversions numeriques et de leur format d'export, des tris,
Expand Down Expand Up @@ -396,3 +397,14 @@ inline char* p_strcat(char* strDestination, const char* strSource)
}

#endif // _WIN32

////////////////////////////////////////////////////
// Implementation portable pour tous les OS

// isprint a un comportement qui depend de l'OS et de la locale
// Par exemple; la tabulation est printbale sous Windows, mais pas sous linux
// Limplementation ci-dessous est portable sur tous les OS testes (Windows, Linux, MAC)
inline int p_isprint(int ch)
{
return (0 <= ch and ch < 128 and isprint(ch) and not iscntrl(ch));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#Khiops 10.1.5
Short description
Dimensions 2
Name Type Parts Initial parts Values Interest Description
occupation Categorical 3 3 12 1
education Categorical 3 3 13 1

Coclustering stats
Instances 240
Cells 9
Null cost 1066.368632
Cost 1051.316663
Level 0.01411516443
Initial dimensions 2
Frequency variable
Dictionary Adult
Database ../../../datasets/Adult/Adult.txt
Sample percentage 0.5
Sampling mode Include sample
Selection variable native_country
Selection value United-States

Hierarchy occupation
Cluster ParentCluster Frequency Interest HierarchicalLevel Rank HierarchicalRank
{Prof-specialty, Exec-managerial, Protective-serv, ...} A2 90 1 1 1 6
{Other-service, Craft-repair, Transport-moving} A4 70 0.812391 1 3 6
{Sales, Farming-fishing, Adm-clerical, ...} A4 80 0.655405 1 5 6
A2 240 0.830416 -0.727419 2 3
A4 A2 150 0.728665 0.72783 4 5

Hierarchy education
Cluster ParentCluster Frequency Interest HierarchicalLevel Rank HierarchicalRank
{Bachelors, Masters, Assoc-voc, ...} B1 83 1 1 1 6
{HS-grad, 11th, 10th, ...} B3 105 0.903801 1 3 6
{Some-college, 9th} B3 52 0.59125 1 5 6
B1 240 0.869351 0 2 2
B3 B1 157 0.800281 0.696343 4 4

Composition occupation
Cluster Value Frequency Typicality
{Prof-specialty, Exec-managerial, Protective-serv, ...} Prof-specialty 54 1
{Prof-specialty, Exec-managerial, Protective-serv, ...} Exec-managerial 23 0.342621
{Prof-specialty, Exec-managerial, Protective-serv, ...} Protective-serv 6 0.148043
{Prof-specialty, Exec-managerial, Protective-serv, ...} Tech-support 7 0.0502739
{Prof-specialty, Exec-managerial, Protective-serv, ...} * 0 0
{Other-service, Craft-repair, Transport-moving} Other-service 33 1
{Other-service, Craft-repair, Transport-moving} Craft-repair 27 0.682053
{Other-service, Craft-repair, Transport-moving} Transport-moving 10 0.398653
{Sales, Farming-fishing, Adm-clerical, ...} Sales 27 1
{Sales, Farming-fishing, Adm-clerical, ...} Farming-fishing 11 0.631255
{Sales, Farming-fishing, Adm-clerical, ...} Adm-clerical 21 0.428212
{Sales, Farming-fishing, Adm-clerical, ...} Handlers-cleaners 12 0.37083
{Sales, Farming-fishing, Adm-clerical, ...} Machine-op-inspct 9 0.322156

Composition education
Cluster Value Frequency Typicality
{Bachelors, Masters, Assoc-voc, ...} Bachelors 39 1
{Bachelors, Masters, Assoc-voc, ...} Masters 14 0.756619
{Bachelors, Masters, Assoc-voc, ...} Assoc-voc 10 0.272011
{Bachelors, Masters, Assoc-voc, ...} Assoc-acdm 10 0.245697
{Bachelors, Masters, Assoc-voc, ...} Prof-school 4 0.220568
{Bachelors, Masters, Assoc-voc, ...} Doctorate 2 0.166163
{Bachelors, Masters, Assoc-voc, ...} 12th 4 0.143425
{Bachelors, Masters, Assoc-voc, ...} * 0 0
{HS-grad, 11th, 10th, ...} HS-grad 82 1
{HS-grad, 11th, 10th, ...} 11th 12 0.318105
{HS-grad, 11th, 10th, ...} 10th 8 0.271284
{HS-grad, 11th, 10th, ...} 7th-8th 3 0.0969771
{Some-college, 9th} Some-college 47 1
{Some-college, 9th} 9th 5 0.300942

Cells
occupation education Frequency
{Prof-specialty, Exec-managerial, Protective-serv, ...} {Bachelors, Masters, Assoc-voc, ...} 60
{Sales, Farming-fishing, Adm-clerical, ...} {HS-grad, 11th, 10th, ...} 52
{Other-service, Craft-repair, Transport-moving} {HS-grad, 11th, 10th, ...} 42
{Other-service, Craft-repair, Transport-moving} {Some-college, 9th} 25
{Sales, Farming-fishing, Adm-clerical, ...} {Bachelors, Masters, Assoc-voc, ...} 20
{Prof-specialty, Exec-managerial, Protective-serv, ...} {Some-college, 9th} 19
{Prof-specialty, Exec-managerial, Protective-serv, ...} {HS-grad, 11th, 10th, ...} 11
{Sales, Farming-fishing, Adm-clerical, ...} {Some-college, 9th} 8
{Other-service, Craft-repair, Transport-moving} {Bachelors, Masters, Assoc-voc, ...} 3

Loading

0 comments on commit 546dfa8

Please sign in to comment.