Skip to content

Commit

Permalink
WIP step 5
Browse files Browse the repository at this point in the history
CCCoclusteringBuilder::OptimizeVarPartDataGrid
- passage de variable local de type pointeur (a allouer et desallouer localement) a des variables locales standard

KWDataGridManager::ComputeGranularizedTotalPartNumbers
- amelioration des performances par bufferisation des results de quantilisation
  • Loading branch information
marcboulle committed Nov 6, 2023
1 parent 1a67b6f commit 41b0a35
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 59 deletions.
14 changes: 11 additions & 3 deletions src/Learning/KWDataPreparation/KWDataGridManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,9 +727,17 @@ void KWDataGridManager::ComputeGranularizedTotalPartNumbers(const ObjectDictiona
{
quantileBuilder = cast(KWQuantileBuilder*, oaQuantilesBuilders.GetAt(nAttribute));

// Calcul du nombre effectif de quantiles
// (peut etre inferieur au nombre theorique du fait de doublons)
nActualPartileNumber = quantileBuilder->ComputeQuantiles(nPartileNumber);
// Calcul du nombre effectif de quantiles si necessaire
if (nGranularity == 0 or
quantileBuilder->GetComputedQuantileNumber() < quantileBuilder->GetValueNumber())
nActualPartileNumber = quantileBuilder->ComputeQuantiles(nPartileNumber);
// Sinon, on a atteint le nombre maximum de valeurs a une granularite precedente
else
{
assert(quantileBuilder->GetComputedQuantileNumber() ==
quantileBuilder->GetValueNumber());
nActualPartileNumber = quantileBuilder->GetValueNumber();
}

// Mise a jour du total
nTotalActualPartileNumber += nActualPartileNumber;
Expand Down
102 changes: 46 additions & 56 deletions src/Learning/MODL_Coclustering/CCCoclusteringBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,9 +618,9 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
boolean bDisplayPartitionLevel = false;
KWDataGridOptimizer dataGridOptimizer;
ObjectDictionary odInnerAttributesQuantileBuilders;
KWDataGrid* nullDataGrid;
KWDataGrid* partitionedDataGrid;
KWDataGrid* partitionedOptimizedDataGrid;
KWDataGrid nullDataGrid;
KWDataGrid partitionedDataGrid;
KWDataGrid partitionedOptimizedDataGrid;
KWDataGridManager dataGridManager;
IntVector ivMaxPartNumbers;
int nPrePartitionIndex;
Expand All @@ -638,7 +638,7 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
int nInnerAttributeCumulated;
KWDGAttribute* varPartAttribute;
KWDGAttribute* innerAttribute;
KWDataGrid* partitionedPostMergedOptimizedDataGrid;
KWDataGrid partitionedPostMergedOptimizedDataGrid;
KWDataGrid partitionedReferencePostMergedDataGrid;
double dMergedCost;
double dBestMergedCost;
Expand Down Expand Up @@ -682,7 +682,6 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
dataGridManager.SetSourceDataGrid(initialDataGrid);
dataGridManager.InitializeInnerAttributesQuantileBuilders(&odInnerAttributesQuantileBuilders,
&ivMaxPartNumbers);

if (bDisplayPrePartitioning)
{
cout << "ivMaxPartNumbers\t" << ivMaxPartNumbers;
Expand All @@ -695,10 +694,9 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti

// Export de la grille du (vrai) modele nul : un seul cluster par attribut et une seule partie de
// variable par attribut interne
nullDataGrid = new KWDataGrid;
dataGridManager.ExportNullDataGrid(nullDataGrid);
dAnyTimeDefaultCost = coclusteringDataGridCosts->ComputeDataGridTotalCost(nullDataGrid);
dataGridManager.CopyDataGrid(nullDataGrid, optimizedDataGrid);
dataGridManager.ExportNullDataGrid(&nullDataGrid);
dAnyTimeDefaultCost = coclusteringDataGridCosts->ComputeDataGridTotalCost(&nullDataGrid);
dataGridManager.CopyDataGrid(&nullDataGrid, optimizedDataGrid);
dBestCost = dAnyTimeDefaultCost;
dBestMergedCost = dBestCost;

Expand All @@ -719,21 +717,20 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti

// Pre-partitionnement des attributs internes de la grille initiale
dataGridManager.SetSourceDataGrid(initialDataGrid);
partitionedDataGrid = new KWDataGrid;
dataGridManager.ExportGranularizedDataGridForVarPartAttributes(partitionedDataGrid, nPrePartitionIndex,
dataGridManager.ExportGranularizedDataGridForVarPartAttributes(&partitionedDataGrid, nPrePartitionIndex,
&odInnerAttributesQuantileBuilders);

if (bDisplayResults)
{
cout << "CCOptimize :partitionedDataGrid pour le pre-partitionnement " << nPrePartitionIndex
<< endl;
partitionedDataGrid->Write(cout);
partitionedDataGrid.Write(cout);
}

// Etude du nombre de parties des attributs internes pour decider du traitement ou non de ce
// pre-partitionnement
nInnerAttributeCumulated = 0;
varPartAttribute = partitionedDataGrid->GetVarPartAttribute();
varPartAttribute = partitionedDataGrid.GetVarPartAttribute();
if (varPartAttribute != NULL)
{
for (nInnerAttribute = 0; nInnerAttribute < varPartAttribute->GetInnerAttributeNumber();
Expand Down Expand Up @@ -772,7 +769,7 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
if (bIsLastPrePartitioning)
// On positionne l'index de granularite au maximum afin que l'affichage soit adapte a ce
// cas
partitionedDataGrid->GetInnerAttributes()->SetVarPartGranularity(nPrePartitionMax);
partitionedDataGrid.GetInnerAttributes()->SetVarPartGranularity(nPrePartitionMax);

// Analyse du nombre de parties par attribut interne granularise pour determiner si la grille
// pre-partitionnee sera optimise Il faut pour cela qu'elle soit suffisamment differente de la
Expand All @@ -793,7 +790,7 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
}

// On ne traite pas les grilles avec un seul attribut informatif
if (partitionedDataGrid->GetInformativeAttributeNumber() <= 1)
if (partitionedDataGrid.GetInformativeAttributeNumber() <= 1)
bIsPrePartitioningSelected = false;

// Cas du traitement de la granularite courante
Expand All @@ -807,12 +804,9 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
// Memorisation des pre-partitionnements exploites
ivUsedPrePartitioning.Add(nPrePartitionIndex);

// Initialisation de la grille pre-partitionnee optimisee
partitionedOptimizedDataGrid = new KWDataGrid;

// Initialisation du modele par defaut : ce modele depend du partitionnement des
// attributs internes
coclusteringDataGridCosts->InitializeDefaultCosts(partitionedDataGrid);
coclusteringDataGridCosts->InitializeDefaultCosts(&partitionedDataGrid);

// Optimisation de la grille pre-partitionnee
// Le cout dPartitionBestCost est le cout de la grille antecedente de la meilleure
Expand All @@ -822,7 +816,7 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
KWDataGridOptimizer::GetProfiler()->WriteKeyString("Pre-partition index",
IntToString(nPrePartitionIndex));
dPartitionBestCost =
dataGridOptimizer.OptimizeDataGrid(partitionedDataGrid, partitionedOptimizedDataGrid);
dataGridOptimizer.OptimizeDataGrid(&partitionedDataGrid, &partitionedOptimizedDataGrid);
KWDataGridOptimizer::GetProfiler()->EndMethod("Optimize VarPart prepartition");

// Calcul du temps d'optimisation (depuis le debut)
Expand All @@ -833,40 +827,41 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
{
cout << "Apres OptimizeGranularizedDataGrid pour Granularite " << nPrePartitionIndex
<< "\t Cout " << dPartitionBestCost << endl;
partitionedOptimizedDataGrid->WriteAttributes(cout);
partitionedOptimizedDataGrid->WriteAttributeParts(cout);
partitionedOptimizedDataGrid.WriteAttributes(cout);
partitionedOptimizedDataGrid.WriteAttributeParts(cout);
}

// Creation d'une grille post-mergee pour cette granularite de pre-partitionnement
// Utilisation d'une grille post-mergee partitionedPostMergedOptimizedDataGrid
// pour cette granularite de pre-partitionnement
// CH AF Il faudrait aussi recalculer la post-optimisation VarPart de la grille
// post-mergee mais de toute facon cette meilleure grille aura ete memorisee avec
// HandleOptimisationStep dans l'algo VNS -> commentaire a comprendre
partitionedPostMergedOptimizedDataGrid = new KWDataGrid;

if (partitionedOptimizedDataGrid->GetInformativeAttributeNumber() > 0 and
if (partitionedOptimizedDataGrid.GetInformativeAttributeNumber() > 0 and
dataGridOptimizer.GetParameters()->GetVarPartPostMerge())
{
dataGridManager.SetSourceDataGrid(partitionedOptimizedDataGrid);
dataGridManager.SetSourceDataGrid(&partitionedOptimizedDataGrid);

// Creation d'une nouvelle grille avec nouvelle description des PV et calcul de
// la variation de cout liee a la fusion des PV
dFusionDeltaCost = dataGridManager.ExportDataGridWithVarPartMergeOptimization(
partitionedPostMergedOptimizedDataGrid, coclusteringDataGridCosts);
assert(not partitionedPostMergedOptimizedDataGrid->GetVarPartsShared());
&partitionedPostMergedOptimizedDataGrid, coclusteringDataGridCosts);
assert(not partitionedPostMergedOptimizedDataGrid.GetVarPartsShared());

// Calcul et verification du cout
dMergedCost = dPartitionBestCost + dFusionDeltaCost;
// Le cout precedent devra etre correct
assert(dMergedCost * (1 - dEpsilon) <
coclusteringDataGridCosts->ComputeDataGridTotalCost(
partitionedPostMergedOptimizedDataGrid));
&partitionedPostMergedOptimizedDataGrid));
assert(coclusteringDataGridCosts->ComputeDataGridTotalCost(
partitionedPostMergedOptimizedDataGrid) < dMergedCost * (1 + dEpsilon));
&partitionedPostMergedOptimizedDataGrid) < dMergedCost * (1 + dEpsilon));

if (bDisplayResults)
{
cout << "CCOptimize : Grille avant fusion \t" << dPartitionBestCost << "\n";
cout << "CCOptimize : Grille fusionnee \t" << dMergedCost << "\n";
partitionedPostMergedOptimizedDataGrid->Write(cout);
partitionedPostMergedOptimizedDataGrid.Write(cout);
cout << flush;
}
}
Expand All @@ -884,23 +879,23 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
<< endl;

// Memorisation de l'optimum post-fusionne
if (partitionedOptimizedDataGrid->GetInformativeAttributeNumber() > 0 and
if (partitionedOptimizedDataGrid.GetInformativeAttributeNumber() > 0 and
dataGridOptimizer.GetParameters()->GetVarPartPostMerge())
{
dataGridManager.CopyDataGrid(partitionedPostMergedOptimizedDataGrid,
dataGridManager.CopyDataGrid(&partitionedPostMergedOptimizedDataGrid,
optimizedDataGrid);

// Mise a jour de la propriete de la description des parties de variable
partitionedPostMergedOptimizedDataGrid->SetVarPartsShared(true);
partitionedPostMergedOptimizedDataGrid.SetVarPartsShared(true);
optimizedDataGrid->SetVarPartsShared(false);
}

else
{
dataGridManager.CopyDataGrid(partitionedOptimizedDataGrid, optimizedDataGrid);
dataGridManager.CopyDataGrid(&partitionedOptimizedDataGrid, optimizedDataGrid);

// Mise a jour de la propriete de la description des parties de variable
partitionedOptimizedDataGrid->SetVarPartsShared(true);
partitionedOptimizedDataGrid.SetVarPartsShared(true);
optimizedDataGrid->SetVarPartsShared(false);
}
}
Expand Down Expand Up @@ -948,33 +943,33 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
}
else
{
HandleOptimizationStep(optimizedDataGrid, partitionedDataGrid, true);
HandleOptimizationStep(optimizedDataGrid, &partitionedDataGrid, true);

if (optimizedDataGrid->GetInnerAttributes() ==
partitionedDataGrid->GetInnerAttributes())
partitionedDataGrid.GetInnerAttributes())
{
partitionedDataGrid->SetVarPartsShared(true);
partitionedDataGrid.SetVarPartsShared(true);
optimizedDataGrid->SetVarPartsShared(false);
}
}
}

if (coclusteringDataGrid != NULL and coclusteringDataGrid->IsVarPartDataGrid())
{
if (partitionedDataGrid->IsVarPartDataGrid() and
partitionedDataGrid->GetInnerAttributes() ==
if (partitionedDataGrid.IsVarPartDataGrid() and
partitionedDataGrid.GetInnerAttributes() ==
coclusteringDataGrid->GetInnerAttributes())
{
coclusteringDataGrid->SetVarPartsShared(false);
partitionedDataGrid->SetVarPartsShared(true);
partitionedDataGrid.SetVarPartsShared(true);
}

if (partitionedPostMergedOptimizedDataGrid->IsVarPartDataGrid() and
partitionedPostMergedOptimizedDataGrid->GetInnerAttributes() ==
if (partitionedPostMergedOptimizedDataGrid.IsVarPartDataGrid() and
partitionedPostMergedOptimizedDataGrid.GetInnerAttributes() ==
coclusteringDataGrid->GetInnerAttributes())
{
coclusteringDataGrid->SetVarPartsShared(false);
partitionedPostMergedOptimizedDataGrid->SetVarPartsShared(true);
partitionedPostMergedOptimizedDataGrid.SetVarPartsShared(true);
}

if (optimizedDataGrid->IsVarPartDataGrid() and
Expand All @@ -987,12 +982,11 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
}

// Nettoyage
delete partitionedPostMergedOptimizedDataGrid;
partitionedPostMergedOptimizedDataGrid.DeleteAll();
partitionedReferencePostMergedDataGrid.DeleteAll();

// Nettoyage de la grille optimisee pour cette granularite
delete partitionedOptimizedDataGrid;
partitionedOptimizedDataGrid = NULL;
partitionedOptimizedDataGrid.DeleteAll();

// Cas d'un temps limite : mise a jour du temps restant par retrait du temps consacre a
// cette granularite
Expand All @@ -1016,9 +1010,6 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
cout << "Totalite du temps alloue ecoule apres la granularite "
"de pre-partitionnement de l'attribut VarPart \t"
<< nPrePartitionIndex << endl;

// Nettoyage avant arret
delete partitionedDataGrid;
break;
}
}
Expand All @@ -1036,7 +1027,7 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti
}

// Nettoyage de la grille granularisee
delete partitionedDataGrid;
partitionedDataGrid.DeleteAll();

nPrePartitionIndex++;
}
Expand All @@ -1049,7 +1040,6 @@ void CCCoclusteringBuilder::OptimizeVarPartDataGrid(const KWDataGrid* inputIniti

// Nettoyage
odInnerAttributesQuantileBuilders.DeleteAll();
delete nullDataGrid;
// CH IV End

// CH IV probleme est ce que le code qui suit est a conserver
Expand Down Expand Up @@ -1139,9 +1129,9 @@ void CCCoclusteringBuilder::PROTO_OptimizeVarPartDataGrid(const KWDataGrid* inpu
&ivMaxPartNumbers);

//DDD
//IntVector ivGranularityTotalPartNumbers;
//dataGridManager.ComputeGranularizedTotalPartNumbers(&odInnerAttributesQuantileBuilders,
// &ivGranularityTotalPartNumbers);
//DDD IntVector ivGranularityTotalPartNumbers;
//DDD dataGridManager.ComputeGranularizedTotalPartNumbers(&odInnerAttributesQuantileBuilders,
//DDD &ivGranularityTotalPartNumbers);

if (bDisplayPrePartitioning)
{
Expand Down

0 comments on commit 41b0a35

Please sign in to comment.