Skip to content

Commit

Permalink
Finalize histogram json format in .khj reports
Browse files Browse the repository at this point in the history
Modification des rapports de visualisation pour la gestion des hstogrammes
- format json .khj:
  - ajouter d'un tag "parts" dans les rapports en non supervise (comme en supervise),
    quand on produit des histogrammes numériques ou categoriels
    - a prendre en compte dans l'outil de visualisation et dans pykhiops
  - supression des tags "centralBinExponent" et "lastCentralBinExponent" pour els hoistogramme
    - non utile, ni maintenant, ni plus tard
- format .xls
  - ajout egalement d'une colonne "Part" en non supervise dans les tableaux synthetiques
    sur les variables numerique et categorielles
- impacts
  - MHMODLHistogramAnalysisStats::WriteJSONKeyReport
  - KWAttributeStats::WriteJSONArrayFields
  - KWAttributeStats::WriteHeaderLineReport
  - KWAttributeStats::WriteLineReport
- tests
  - propagation des changement sur tout LearningTest
  - mises a jour mineurs de quelques scripts de test\LearningTest\cmd\python
  - mise a jour des tests du repo de test\LearningTest\TestKhiops
  • Loading branch information
marcboulle committed Dec 5, 2023
1 parent 3d4cda6 commit 141675e
Show file tree
Hide file tree
Showing 131 changed files with 294 additions and 276 deletions.
46 changes: 37 additions & 9 deletions src/Learning/KWDataPreparation/KWAttributeStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,14 +269,14 @@ void KWAttributeStats::WriteHeaderLineReport(ostream& ost)
ost << "\tTarget intervals";
else if (GetTargetAttributeType() == KWType::Symbol and IsTargetGrouped())
ost << "\tTarget groups";

// Nombre de parties sources
if (nAttributeType == KWType::Continuous)
ost << "\tIntervals";
else if (nAttributeType == KWType::Symbol)
ost << "\tGroups";
}

// Nombre de parties sources, y compris en non supervise
if (nAttributeType == KWType::Continuous)
ost << "\tIntervals";
else if (nAttributeType == KWType::Symbol)
ost << "\tGroups";

// Statistiques descriptives
ost << "\t";
kwDescriptiveStats->WriteHeaderLineReport(ost);
Expand Down Expand Up @@ -330,7 +330,7 @@ void KWAttributeStats::WriteLineReport(ostream& ost)
// Initialisation
nSource = -1;

// Evaluation de la variable si discretisation pertinente
// Evaluation de la variable si discretisation ou groupement pertinente
// dans le cas supervise uniquement
if (GetTargetAttributeName() != "")
{
Expand Down Expand Up @@ -368,6 +368,23 @@ void KWAttributeStats::WriteLineReport(ostream& ost)
ost << "\t1";
}
}
// Nombre de partie uniquement dans le cas supervise
else
{
assert(GetTargetAttributeName() != "");

if (GetPreparedDataGridStats() != NULL)
{
assert(GetPreparedDataGridStats()->GetAttributeNumber() == 1);

ost << "\t" << GetPreparedDataGridStats()->GetAttributeAt(0)->GetPartNumber();
}
// Pas d'infos sinon
else
{
ost << "\t1";
}
}

// Statistiques descriptives
ost << "\t";
Expand Down Expand Up @@ -442,7 +459,7 @@ void KWAttributeStats::WriteJSONArrayFields(JSONFile* fJSON, boolean bSummary)
// Initialisation
nSource = -1;

// Evaluation de la variable si discretisation pertinente
// Evaluation de la variable si discretisation ou groupement pertinent
// dans le cas supervise uniquement
if (GetTargetAttributeName() != "")
{
Expand Down Expand Up @@ -488,6 +505,17 @@ void KWAttributeStats::WriteJSONArrayFields(JSONFile* fJSON, boolean bSummary)
fJSON->WriteKeyInt("parts", 1);
}
}
// Dans le cas non supervise, on ecrit eventuellement le nombre de parties
else
{
assert(GetTargetAttributeName() != "");
if (GetPreparedDataGridStats() != NULL)
{
assert(GetPreparedDataGridStats()->GetAttributeNumber() == 1);
fJSON->WriteKeyInt("parts",
GetPreparedDataGridStats()->GetAttributeAt(0)->GetPartNumber());
}
}

// Statistiques descriptives
kwDescriptiveStats->WriteJSONFields(fJSON);
Expand Down Expand Up @@ -550,7 +578,7 @@ void KWAttributeStats::WriteJSONArrayFields(JSONFile* fJSON, boolean bSummary)
{
descriptiveContinuousStats = cast(KWDescriptiveContinuousStats*, GetDescriptiveStats());

// On prend les bornes issues de l'histogramme dans le cas d'un discretisation non
// On prend les bornes issues de l'histogramme dans le cas d'une discretisation non
// supervisee MODL
if (modlHistogramResults != NULL)
{
Expand Down
4 changes: 0 additions & 4 deletions src/Learning/MHHistograms/MHMODLHistogramAnalysisStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,6 @@ void MHMODLHistogramAnalysisStats::WriteJSONKeyReport(JSONFile* fJSON, const ALS
// Nombre d'histogramms interpretables
fJSON->WriteKeyInt("interpretableHistogramNumber", GetInterpretableHistogramNumber());

// Central bin exponents, pour les histogrammes interpretables, et pour le dernier
fJSON->WriteKeyInt("centralBinExponent", GetCentralBinExponent());
fJSON->WriteKeyInt("lastCentralBinExponent", GetLastCentralBinExponent());

// Epsilon de troncature
fJSON->WriteKeyContinuous("truncationEpsilon", GetTruncationEpsilon());

Expand Down
2 changes: 1 addition & 1 deletion src/Learning/MODL/MODL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ int main(int argc, char** argv)
// Choix du repertoire de lancement pour le debugage sous Windows (a commenter apres fin du debug)
// SetWindowsDebugDir("Standard", "IrisLight");
// SetWindowsDebugDir("Standard", "Iris2D");
SetWindowsDebugDir("TextVariables", "TextLoadFile");
SetWindowsDebugDir("Standard", "IrisLightWithTrees");

// Parametrage des logs memoires depuis les variables d'environnement, pris en compte dans KWLearningProject
// KhiopsMemStatsLogFileName, KhiopsMemStatsLogFrequency, KhiopsMemStatsLogToCollect
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Descriptive statistics


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Descriptive statistics


Expand Down Expand Up @@ -34,24 +34,24 @@ Value grouping MODL

Categorical variables statistics

Rank Name Values Mode Mode coverage Missing number Sparse missing number Constr. cost Derivation rule
R01 Class 3 Iris-setosa 0.361905 0 0 3.17805
R02 Class1 2 0.638095 67 0 3.17805 IfC(EQc(Class, "Iris-setosa"), "setosa", "")
R03 Class2 2 0.695238 73 0 3.17805 IfC(EQc(Class, "Iris-versicolor"), "versicolor", "")
R09 SPetalLength 5 1 0.361905 0 0 3.17805 AsCategorical(Floor(PetalLength))
Rank Name Groups Values Mode Mode coverage Missing number Sparse missing number Constr. cost Derivation rule
R01 Class 3 3 Iris-setosa 0.361905 0 0 3.17805
R02 Class1 2 2 0.638095 67 0 3.17805 IfC(EQc(Class, "Iris-setosa"), "setosa", "")
R03 Class2 2 2 0.695238 73 0 3.17805 IfC(EQc(Class, "Iris-versicolor"), "versicolor", "")
R09 SPetalLength 5 5 1 0.361905 0 0 3.17805 AsCategorical(Floor(PetalLength))


Numerical variables statistics

Rank Name Values Min Max Mean Std dev Missing number Sparse missing number Constr. cost Derivation rule
R04 Dummy1 1 0 0 0 0 0 0 3.17805 Copy(0)
R05 Dummy2 105 0.005121241265 0.9859650261 0.5173966838 0.2650019122 0 0 3.17805 Random()
R06 LowerPetalLength 10 1 3 2.446666667 0.7433600251 0 0 3.17805 If(LE(PetalLength, 3), PetalLength, 3)
R07 PetalLength 36 1 6.9 3.686666667 1.80132579 0 0 3.17805
R08 PetalWidth 21 0.1 2.5 1.175238095 0.7880996979 0 0 3.17805
R10 SepalLength 31 4.3 7.7 5.827619048 0.8375127846 0 0 3.17805
R11 SepalWidth 23 2 4.4 3.081904762 0.4284592446 0 0 3.17805
R12 UpperPetalWidth 11 1.5 2.5 1.692380952 0.2962287527 0 0 3.17805 If(GE(PetalWidth, 1.5), PetalWidth, 1.5)
Rank Name Intervals Values Min Max Mean Std dev Missing number Sparse missing number Constr. cost Derivation rule
R04 Dummy1 1 1 0 0 0 0 0 0 3.17805 Copy(0)
R05 Dummy2 1 105 0.005121241265 0.9859650261 0.5173966838 0.2650019122 0 0 3.17805 Random()
R06 LowerPetalLength 4 10 1 3 2.446666667 0.7433600251 0 0 3.17805 If(LE(PetalLength, 3), PetalLength, 3)
R07 PetalLength 5 36 1 6.9 3.686666667 1.80132579 0 0 3.17805
R08 PetalWidth 5 21 0.1 2.5 1.175238095 0.7880996979 0 0 3.17805
R10 SepalLength 2 31 4.3 7.7 5.827619048 0.8375127846 0 0 3.17805
R11 SepalWidth 3 23 2 4.4 3.081904762 0.4284592446 0 0 3.17805
R12 UpperPetalWidth 2 11 1.5 2.5 1.692380952 0.2962287527 0 0 3.17805 If(GE(PetalWidth, 1.5), PetalWidth, 1.5)


--------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"tool": "Khiops",
"version": "10.4.6i",
"version": "10.4.8i",
"shortDescription": "",
"preparationReport": {
"reportType": "Preparation",
Expand Down Expand Up @@ -40,6 +40,7 @@
"rank": "R01",
"name": "Class",
"type": "Categorical",
"parts": 3,
"values": 3,
"mode": "Iris-setosa",
"modeFrequency": 38,
Expand All @@ -51,6 +52,7 @@
"rank": "R02",
"name": "Class1",
"type": "Categorical",
"parts": 2,
"values": 2,
"mode": "",
"modeFrequency": 67,
Expand All @@ -63,6 +65,7 @@
"rank": "R03",
"name": "Class2",
"type": "Categorical",
"parts": 2,
"values": 2,
"mode": "",
"modeFrequency": 73,
Expand All @@ -75,6 +78,7 @@
"rank": "R04",
"name": "Dummy1",
"type": "Numerical",
"parts": 1,
"values": 1,
"min": 0,
"max": 0,
Expand All @@ -89,6 +93,7 @@
"rank": "R05",
"name": "Dummy2",
"type": "Numerical",
"parts": 1,
"values": 105,
"min": 0.005121241265,
"max": 0.9859650261,
Expand All @@ -103,6 +108,7 @@
"rank": "R06",
"name": "LowerPetalLength",
"type": "Numerical",
"parts": 4,
"values": 10,
"min": 1,
"max": 3,
Expand All @@ -117,6 +123,7 @@
"rank": "R07",
"name": "PetalLength",
"type": "Numerical",
"parts": 5,
"values": 36,
"min": 1,
"max": 6.9,
Expand All @@ -130,6 +137,7 @@
"rank": "R08",
"name": "PetalWidth",
"type": "Numerical",
"parts": 5,
"values": 21,
"min": 0.1,
"max": 2.5,
Expand All @@ -143,6 +151,7 @@
"rank": "R09",
"name": "SPetalLength",
"type": "Categorical",
"parts": 5,
"values": 5,
"mode": "1",
"modeFrequency": 38,
Expand All @@ -155,6 +164,7 @@
"rank": "R10",
"name": "SepalLength",
"type": "Numerical",
"parts": 2,
"values": 31,
"min": 4.3,
"max": 7.7,
Expand All @@ -168,6 +178,7 @@
"rank": "R11",
"name": "SepalWidth",
"type": "Numerical",
"parts": 3,
"values": 23,
"min": 2,
"max": 4.4,
Expand All @@ -181,6 +192,7 @@
"rank": "R12",
"name": "UpperPetalWidth",
"type": "Numerical",
"parts": 2,
"values": 11,
"min": 1.5,
"max": 2.5,
Expand Down Expand Up @@ -278,8 +290,6 @@
"modlHistograms": {
"histogramNumber": 1,
"interpretableHistogramNumber": 1,
"centralBinExponent": 0,
"lastCentralBinExponent": 0,
"truncationEpsilon": 0,
"removedSingularIntervalNumber": 0,
"granularities": [0],
Expand Down Expand Up @@ -318,8 +328,6 @@
"modlHistograms": {
"histogramNumber": 5,
"interpretableHistogramNumber": 4,
"centralBinExponent": 2,
"lastCentralBinExponent": 2,
"truncationEpsilon": 0.1,
"removedSingularIntervalNumber": 0,
"granularities": [0,2,3,5,28],
Expand Down Expand Up @@ -375,8 +383,6 @@
"modlHistograms": {
"histogramNumber": 5,
"interpretableHistogramNumber": 4,
"centralBinExponent": 4,
"lastCentralBinExponent": 3,
"truncationEpsilon": 0.1,
"removedSingularIntervalNumber": 0,
"granularities": [0,3,5,7,29],
Expand Down Expand Up @@ -432,8 +438,6 @@
"modlHistograms": {
"histogramNumber": 5,
"interpretableHistogramNumber": 4,
"centralBinExponent": -3,
"lastCentralBinExponent": -4,
"truncationEpsilon": 0.1,
"removedSingularIntervalNumber": 0,
"granularities": [0,1,2,3,30],
Expand Down Expand Up @@ -511,8 +515,6 @@
"modlHistograms": {
"histogramNumber": 2,
"interpretableHistogramNumber": 2,
"centralBinExponent": 2,
"lastCentralBinExponent": 2,
"truncationEpsilon": 0,
"removedSingularIntervalNumber": 0,
"granularities": [0,2],
Expand Down Expand Up @@ -554,8 +556,6 @@
"modlHistograms": {
"histogramNumber": 4,
"interpretableHistogramNumber": 3,
"centralBinExponent": 1,
"lastCentralBinExponent": 3,
"truncationEpsilon": 0.1,
"removedSingularIntervalNumber": 0,
"granularities": [0,1,2,29],
Expand Down Expand Up @@ -604,8 +604,6 @@
"modlHistograms": {
"histogramNumber": 4,
"interpretableHistogramNumber": 3,
"centralBinExponent": 2,
"lastCentralBinExponent": 2,
"truncationEpsilon": 0.1,
"removedSingularIntervalNumber": 0,
"granularities": [0,1,5,28],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Train unsupervised model
Database ../../../datasets/Iris/Iris.txt: Read records: 150 Selected records: 105
Evaluation of variable pairs
Data preparation time: 0:00:00.31
Data preparation time: 0:00:00.38
Write report ./results/AnalysisResults.khj

Original file line number Diff line number Diff line change
@@ -1 +1 @@
Overal time: 0.4437694549560547
Overal time: 0.9410936832427979
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Modeling report

Dictionary Adult
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Descriptive statistics


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Test evaluation report

Dictionary Adult
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Train evaluation report

Dictionary Adult
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"tool": "Khiops",
"version": "10.4.6i",
"version": "10.4.8i",
"shortDescription": "",
"modelingReport": {
"reportType": "Modeling",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i

Dictionary BU1_Adult
<InitialDictionary="Adult"> <PredictorLabel="Univariate relationship"> <PredictorType="Classifier">
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"tool": "Khiops",
"version": "10.4.6i",
"version": "10.4.8i",
"evaluationReport": {
"reportType": "Evaluation",
"evaluationType": "",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Khiops 10.4.6i
#Khiops 10.4.8i
Predictor evaluation report

Dictionary Adult
Expand Down
Loading

0 comments on commit 141675e

Please sign in to comment.