Skip to content

Commit

Permalink
Refactor comparisons with new KWContinuous::CompareIndicatorValue method
Browse files Browse the repository at this point in the history
KWContinuous::CompareIndicatorValue
- Comparaison de deux doubles en les convertissant prealablement en Continuous
  Permet d'avoir une resultat de comparaison robuste, selon la precision des Continuous
- remplace avantageusement le passage par des longint intermediaires pour des comparaison a 10 digits pres
- impacts
  - KWDGSymbolValue::CompareTypicality
  - KWDGVarPartValue::CompareTypicality
  - KWLearningReport::CompareValue
  - KWSelectedAttributeReport::CompareValue
  - KWDataPreparationAttributeCompareSortValue
  - KWAttributeStatsCompareLevel
  - DTTreeAttributeLevelCompare
  - DTTreeSpecsCompareLevels

Test complets sur LearningTest
  • Loading branch information
marcboulle committed Nov 30, 2023
1 parent 88fb83a commit b49f428
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 67 deletions.
9 changes: 2 additions & 7 deletions src/Learning/DTForest/DTAttributeSelection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -729,22 +729,17 @@ longint DTTreeAttribute::GetUsedMemory() const

int DTTreeAttributeLevelCompare(const void* elem1, const void* elem2)
{
longint lLevel1;
longint lLevel2;
int nCompare;

DTTreeAttribute* i1 = (DTTreeAttribute*)*(Object**)elem1;
DTTreeAttribute* i2 = (DTTreeAttribute*)*(Object**)elem2;

// Comparaison des levels des attributs (ramenes a longint)
lLevel1 = longint(floor(i1->dLevel * 1e10));
lLevel2 = longint(floor(i2->dLevel * 1e10));
nCompare = -CompareLongint(lLevel1, lLevel2);
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(i1->dLevel, i2->dLevel);

// Comparaison par nom si match nul
if (nCompare == 0)
nCompare = DTTreeAttributeCompareName(elem1, elem2);

return nCompare;
}

Expand Down
8 changes: 2 additions & 6 deletions src/Learning/DTForest/DTCreationReport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,17 +397,13 @@ void DTCreationReport::ComputeRankIdentifiers(ObjectArray* oaReports)

int DTTreeSpecsCompareLevels(const void* elem1, const void* elem2)
{
longint lLevel1;
longint lLevel2;
int nCompare;

DTDecisionTreeSpec* s1 = (DTDecisionTreeSpec*)*(Object**)elem1;
DTDecisionTreeSpec* s2 = (DTDecisionTreeSpec*)*(Object**)elem2;

// Comparaison des levels des attributs (ramenes a longint)
lLevel1 = longint(floor(s1->GetLevel() * 1e10));
lLevel2 = longint(floor(s2->GetLevel() * 1e10));
nCompare = -CompareLongint(lLevel1, lLevel2);
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(s1->GetLevel(), s2->GetLevel());

// Comparaison par nom d'arbre, si match nul
if (nCompare == 0)
Expand Down
5 changes: 0 additions & 5 deletions src/Learning/DTForest/DTDecisionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -582,11 +582,6 @@ inline int DTSplitCompareSortValue(const void* elem1, const void* elem2)
// dSortValue2 = report2->GetTreeCost();
sSortValue1 = report1->GetSplittableNode()->GetNodeIdentifier();
sSortValue2 = report2->GetSplittableNode()->GetNodeIdentifier();

// On se base sur un comparaison a dix decimales pres
// lSortValue1 = longint(floor(dSortValue1 * 1e10));
// lSortValue2 = longint(floor(dSortValue2 * 1e10));
// nCompare = -CompareLongint(lSortValue1, lSortValue2);
nCompare = sSortValue1.Compare(sSortValue2);

// Comparaison si necessaire sur le nom
Expand Down
10 changes: 10 additions & 0 deletions src/Learning/KWData/KWContinuous.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ class KWContinuous : public Object
// Comparaison
static int Compare(Continuous cValue1, Continuous cValue2);

// Comparaison de deux valeurs de type indicateur, en principe entre 0 et 1
// Permet d'avoir une resultat de comparaison robuste, selon la precision des Continuous
static int CompareIndicatorValue(double dValue1, double dValue2);

// Test des fonctionnalites
static void Test();

Expand Down Expand Up @@ -535,6 +539,12 @@ inline int KWContinuous::Compare(Continuous cValue1, Continuous cValue2)
return (cValue1 == cValue2 ? 0 : (cValue1 > cValue2 ? 1 : -1));
}

inline int KWContinuous::CompareIndicatorValue(double dValue1, double dValue2)
{
// On ajoute 1 pour avoir une precision de mantisse limitee de facon absolue par rapprt au 0
return Compare(DoubleToContinuous(1 + dValue1), DoubleToContinuous(1 + dValue2));
}

// Classe ContinuousObject

inline ContinuousObject::ContinuousObject()
Expand Down
8 changes: 2 additions & 6 deletions src/Learning/KWDataPreparation/KWAttributeStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1954,8 +1954,6 @@ int KWAttributeStatsCompareLevel(const void* elem1, const void* elem2)
{
KWAttributeStats* attributeStats1;
KWAttributeStats* attributeStats2;
longint lLevel1;
longint lLevel2;
int nCompare;

// Acces aux objects
Expand All @@ -1966,10 +1964,8 @@ int KWAttributeStatsCompareLevel(const void* elem1, const void* elem2)
assert(attributeStats1->Check());
assert(attributeStats2->Check());

// Comparaison des levels des attributs (ramanes a longint)
lLevel1 = longint(floor(attributeStats1->GetLevel() * 1e10));
lLevel2 = longint(floor(attributeStats2->GetLevel() * 1e10));
nCompare = -CompareLongint(lLevel1, lLevel2);
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(attributeStats1->GetLevel(), attributeStats2->GetLevel());

// Comparaison par nom si match nul
if (nCompare == 0)
Expand Down
10 changes: 4 additions & 6 deletions src/Learning/KWDataPreparation/KWDataGrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4762,9 +4762,8 @@ int KWDGSymbolValue::CompareTypicality(const KWDGValue* otherValue) const

require(otherValue != NULL);

// Comparaison de la typicalite selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::Compare(KWContinuous::DoubleToContinuous(GetTypicality()),
KWContinuous::DoubleToContinuous(otherValue->GetTypicality()));
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(GetTypicality(), otherValue->GetTypicality());

// Comparaison par effectif decroissaqnt si egalite
if (nCompare == 0)
Expand Down Expand Up @@ -4847,9 +4846,8 @@ int KWDGVarPartValue::CompareTypicality(const KWDGValue* otherValue) const

require(otherValue != NULL);

// Comparaison de la typicalite selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::Compare(KWContinuous::DoubleToContinuous(GetTypicality()),
KWContinuous::DoubleToContinuous(otherValue->GetTypicality()));
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(GetTypicality(), otherValue->GetTypicality());

// Comparaison par valeur si egalite
if (nCompare == 0)
Expand Down
16 changes: 2 additions & 14 deletions src/Learning/KWDataPreparation/KWLearningReport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,9 @@ int KWLearningReport::CompareName(const KWLearningReport* otherReport) const
int KWLearningReport::CompareValue(const KWLearningReport* otherReport) const
{
int nCompare;
longint lSortValue1;
longint lSortValue2;

// On se base sur un comparaison a dix decimales pres
if (GetSortValue() >= 0)
lSortValue1 = longint(GetSortValue() * 1e10);
else
lSortValue1 = -longint(-GetSortValue() * 1e10);
if (otherReport->GetSortValue() >= 0)
lSortValue2 = longint(otherReport->GetSortValue() * 1e10);
else
lSortValue2 = -longint(-otherReport->GetSortValue() * 1e10);

// Comparaison sur les valeurs entieres
nCompare = -CompareLongint(lSortValue1, lSortValue2);
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(1 + GetSortValue(), 1 + otherReport->GetSortValue());

// En cas d'egalite, on se base sur le nom
if (nCompare == 0)
Expand Down
15 changes: 3 additions & 12 deletions src/Learning/KWModeling/KWDataPreparationClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1113,10 +1113,6 @@ int KWDataPreparationAttributeCompareSortValue(const void* elem1, const void* el
{
KWDataPreparationAttribute* dataPreparationAttribute1;
KWDataPreparationAttribute* dataPreparationAttribute2;
double dSortValue1;
double dSortValue2;
longint lSortValue1;
longint lSortValue2;
int nCompare;

check(elem1);
Expand All @@ -1132,14 +1128,9 @@ int KWDataPreparationAttributeCompareSortValue(const void* elem1, const void* el
check(dataPreparationAttribute2);
assert(dataPreparationAttribute2->Check());

// Evaluation univariee des attributs
dSortValue1 = dataPreparationAttribute1->GetPreparedStats()->GetSortValue();
dSortValue2 = dataPreparationAttribute2->GetPreparedStats()->GetSortValue();

// On se base sur un comparaison a dix decimales pres
lSortValue1 = longint(floor(dSortValue1 * 1e10));
lSortValue2 = longint(floor(dSortValue2 * 1e10));
nCompare = -CompareLongint(lSortValue1, lSortValue2);
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(dataPreparationAttribute1->GetPreparedStats()->GetSortValue(),
dataPreparationAttribute2->GetPreparedStats()->GetSortValue());

// Comparaison si necessaire sur le nom
if (nCompare == 0)
Expand Down
15 changes: 4 additions & 11 deletions src/Learning/KWModeling/KWPredictorReport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,22 +424,15 @@ int KWSelectedAttributeReport::CompareValue(const KWLearningReport* otherReport)
{
int nCompare;
KWSelectedAttributeReport* otherAttributeReport = cast(KWSelectedAttributeReport*, otherReport);
longint lSortValue1;
longint lSortValue2;

// On se base sur un comparaison a dix decimales pres
lSortValue1 = longint(floor(fabs(GetImportance()) * 1e10));
lSortValue2 = longint(floor(fabs(otherAttributeReport->GetImportance()) * 1e10));
assert(lSortValue1 >= 0);
assert(lSortValue2 >= 0);
nCompare = -CompareLongint(lSortValue1, lSortValue2);
// Comparaison selon la precison du type Continuous, pour eviter les differences a epsilon pres
nCompare = -KWContinuous::CompareIndicatorValue(GetImportance(), otherAttributeReport->GetImportance());

// En cas d'egalite, on se base sur l'evaluation univariee
if (nCompare == 0)
{
lSortValue1 = longint(floor(fabs(GetUnivariateEvaluation()) * 1e10));
lSortValue2 = longint(floor(fabs(otherAttributeReport->GetUnivariateEvaluation()) * 1e10));
nCompare = -CompareLongint(lSortValue1, lSortValue2);
nCompare = -KWContinuous::CompareIndicatorValue(GetUnivariateEvaluation(),
otherAttributeReport->GetUnivariateEvaluation());
}

// En cas d'egalite, on se base sur le nom
Expand Down

0 comments on commit b49f428

Please sign in to comment.