Skip to content

Commit

Permalink
WIP Step 3
Browse files Browse the repository at this point in the history
Ajout d'un jeu de test minimaliste LearningTest\TestKhiops\Standard\BUGEncryptRules

Modification temporaire de run-standard-tests.yaml pour n'executer les test que sous linux en release

KWDataGridClusteringCosts::ComputeAttributeCost: ajout de trace pour le calcul des couts des attributs

KWDREncrypt::EncryptString: trace pour verifier le comportement de isprint
  • Loading branch information
marcboulle committed Jan 15, 2024
1 parent c4b4cb9 commit 18b9bde
Show file tree
Hide file tree
Showing 13 changed files with 682 additions and 4 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/run-standard-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@ jobs:
strategy:
matrix:
build-setup:
- {os: windows-2022, cmake-preset: windows-msvc}
- {os: ubuntu-latest, cmake-preset: linux-gcc}
- {os: macos-latest, cmake-preset: macos-clang}
config: [debug, release]
config: [release]
runs-on: ${{ matrix.build-setup.os }}
env:
PRESET_NAME: ${{ matrix.build-setup.cmake-preset }}-${{ matrix.config }}
Expand Down
14 changes: 14 additions & 0 deletions src/Learning/KWDRRuleLibrary/KWDRStringEncrypt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,20 @@ const ALString KWDREncrypt::EncryptString(ALString& sStringToEncrypt, const Symb
int nBlockCode;
char cLastChar;

// DDD
static boolean bTraceOnce = false;
if (not bTraceOnce)
{
cout << "Index\tChar\tisdigit\tisalnum\tisprint\n";
for (i = 0; i < 256; i++)
{
cout << i << "\t";
if (isprint(i))
cout << (char)i;
cout << "\t" << isdigit(i) << "\t" << isalnum(i) << "\t" << isprint(i) << endl;
}
}

// Memorisation de la graine aleatoire initiale
nRandomSeed = GetRandomSeed();

Expand Down
6 changes: 6 additions & 0 deletions src/Learning/KWDataPreparation/KWAttributeSubsetStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ boolean KWAttributeSubsetStats::ComputeStats(const KWTupleTable* tupleTable)
optimizedDataGrid->GetAttributeAt(0)->GetAttributeName() == "SPetalLength" and
optimizedDataGrid->GetAttributeAt(1)->GetAttributeName() == "SepalWidth")
{
SetGlobalTraceOn(true);
dataGridCosts->ComputeAttributeCost(optimizedDataGrid->GetAttributeAt(0),
optimizedDataGrid->GetAttributeAt(0)->GetPartNumber());
dataGridCosts->ComputeAttributeCost(optimizedDataGrid->GetAttributeAt(1),
optimizedDataGrid->GetAttributeAt(1)->GetPartNumber());
SetGlobalTraceOn(false);
cout << "All costs" << endl;
dataGridCosts->WriteDataGridAllCosts(optimizedDataGrid, cout);
}
Expand Down
74 changes: 74 additions & 0 deletions src/Learning/KWDataPreparation/KWDataGridCosts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@

#include "KWDataGridCosts.h"

// DDD
static boolean bGlobalTraceOn = false;

boolean GetGlobalTraceOn()
{
return bGlobalTraceOn;
}

void SetGlobalTraceOn(boolean bValue)
{
bGlobalTraceOn = bValue;
}

////////////////////////////////////////////////////////////////////////////////////////
// Classe KWDataGridCosts

Expand Down Expand Up @@ -1398,6 +1411,15 @@ double KWDataGridClusteringCosts::ComputeAttributeCost(const KWDGAttribute* attr
// Une partition avec poubelle contient au moins 2 parties informatives + 1 groupe poubelle
require(attribute->GetGarbageModalityNumber() == 0 or nPartitionSize >= 3);

// DDD
if (GetGlobalTraceOn())
{
cout << "KWDataGridClusteringCosts::ComputeAttributeCost\t" << attribute->GetAttributeName() << endl;
cout << "\tnPartileNumber\t" << nPartileNumber << endl;
cout << "\tKWFrequencyTable::GetMinimumNumberOfModalitiesForGarbage()\t"
<< KWFrequencyTable::GetMinimumNumberOfModalitiesForGarbage() << endl;
}

// Cout nul si partition nulle
if (nPartitionSize == 1)
dAttributeCost = 0;
Expand Down Expand Up @@ -1425,38 +1447,90 @@ double KWDataGridClusteringCosts::ComputeAttributeCost(const KWDGAttribute* attr
if (nPartileNumber > KWFrequencyTable::GetMinimumNumberOfModalitiesForGarbage())
dAttributeCost += log(2.0);

if (GetGlobalTraceOn()) // DDD
{
cout << "\tattribute->GetCost()\t" << attribute->GetCost() << endl;
cout << "\tnGarbageModalityNumber\t" << nGarbageModalityNumber << endl;
cout << "\tdAttributeCost1\t" << dAttributeCost << endl;
}

// Cout de codage du choix des modalites informatives (hors poubelle)
if (nGarbageModalityNumber > 0)
{
// Cout de codage de la taille de la non-poubelle
dAttributeCost += KWStat::BoundedNaturalNumbersUniversalCodeLength(
nPartileNumber - nGarbageModalityNumber - 1, nPartileNumber - 2);
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCost2\t" << dAttributeCost << "\t"
<< KWStat::BoundedNaturalNumbersUniversalCodeLength(
nPartileNumber - nGarbageModalityNumber - 1, nPartileNumber - 2)
<< endl;
}

// Choix des modalites hors poubelle selon un tirage multinomial avec un tirage par
// variable
dAttributeCost +=
(nPartileNumber - nGarbageModalityNumber) * log(nPartileNumber * 1.0) -
KWStat::LnFactorial(nPartileNumber - nGarbageModalityNumber);
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCost3\t" << dAttributeCost << "\t"
<< (nPartileNumber - nGarbageModalityNumber) * log(nPartileNumber * 1.0) -
KWStat::LnFactorial(nPartileNumber - nGarbageModalityNumber)
<< endl;
}

// Cout de codage du nombre de parties informatives (nPartitionSize-1)
dAttributeCost += KWStat::BoundedNaturalNumbersUniversalCodeLength(
nPartitionSize - 2, nPartileNumber - nGarbageModalityNumber - 1);
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCost4\t" << dAttributeCost << "\t"
<< KWStat::BoundedNaturalNumbersUniversalCodeLength(
nPartitionSize - 2, nPartileNumber - nGarbageModalityNumber - 1)
<< endl;
}

// Cout de codage du choix des parties informatives (nPartitionSize - 1)
dAttributeCost +=
KWStat::LnBell(nPartileNumber - nGarbageModalityNumber, nPartitionSize - 1);
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCost5\t" << dAttributeCost << "\t"
<< KWStat::LnBell(nPartileNumber - nGarbageModalityNumber,
nPartitionSize - 1)
<< endl;
}
}
else
{
// Cout de codage du nombre de parties
dAttributeCost += KWStat::BoundedNaturalNumbersUniversalCodeLength(nPartitionSize - 1,
nPartileNumber - 1);
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCostBis2\t" << dAttributeCost << "\t"
<< KWStat::BoundedNaturalNumbersUniversalCodeLength(nPartitionSize - 1,
nPartileNumber - 1)
<< endl;
}

// Cout de codage du choix des parties
dAttributeCost += KWStat::LnBell(nPartileNumber, nPartitionSize);
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCostBis3\t" << dAttributeCost << "\t"
<< KWStat::LnBell(nPartileNumber, nPartitionSize) << endl;
}
}
}
}
if (GetGlobalTraceOn()) // DDD
{
cout << "\tdAttributeCostFinal\t" << dAttributeCost << endl;
}

return dAttributeCost;
}

Expand Down
6 changes: 6 additions & 0 deletions src/Learning/KWDataPreparation/KWDataGridCosts.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ class KWDataGridGeneralizedClassificationCosts;
#include "KWDataGridMerger.h"
#include "KWStat.h"

// DDD
// Indicateur de trace globale
// A positionner et utiliser temporairement le temps d'un debugage
boolean GetGlobalTraceOn();
void SetGlobalTraceOn(boolean bValue);

////////////////////////////////////////////////////////////////////////////
// Definition de la structure des couts d'une grille de donnees
// Les couts par entite, nuls par defaut, sont redefinissable dans des sous-classes
Expand Down
3 changes: 2 additions & 1 deletion src/Learning/MODL/MODL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ int main(int argc, char** argv)
// Choix du repertoire de lancement pour le debugage sous Windows (a commenter apres fin du debug)
// SetWindowsDebugDir("Standard", "IrisLight");
// SetWindowsDebugDir("Standard", "Iris2D");
SetWindowsDebugDir("z_Work", "BUGIrisU");
// SetWindowsDebugDir("z_Work", "BUGIrisU");
SetWindowsDebugDir("z_Work", "BUGEncryptRules");

// Parametrage des logs memoires depuis les variables d'environnement, pris en compte dans KWLearningProject
// KhiopsMemStatsLogFileName, KhiopsMemStatsLogFrequency, KhiopsMemStatsLogToCollect
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#Khiops 7.7.2i

Dictionary EncryptRules
{
Categorical Message ;
Categorical Key ;
Categorical AEncrypt = Encrypt(Message, Key) ;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Message Key
_
+2.99e-1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Message Key AEncrypt
_ oHKW
+2.99e-1 mHnS4d6s1RmbB
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Deploy model EncryptRules
Input database ./EncryptRules.txt: Read records: 2
Output database ./results/T_EncryptRules.txt: Written records: 2
Model deployment time: 0:00:00.01

Loading

0 comments on commit 18b9bde

Please sign in to comment.