From 2f5112d23aaaaef5dc800165026fcc07ed748925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Boull=C3=A9?= Date: Mon, 23 Oct 2023 10:40:10 +0200 Subject: [PATCH] WIP step 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Debut de refactoring de NEWKWDataGridOptimizer, en fusionnant avec NEWKWDataGridVNSOptimizer - rappatriement des variables de classe - rappatriement de l'implementation de DisplayOptimizationDetails et IsOptimizationTimeElapsed - déplacement des TaskProgression::Begin|EndTask de OptimizeGranularizedDataGrid vers OptimizeDataGrid --- .../NEWKWDataGridOptimizer.cpp | 100 ++++++++++++++++-- .../NEWKWDataGridOptimizer.h | 23 +++- .../MODL_Coclustering/CCDeploymentSpec.cpp | 3 +- .../MODL_Coclustering/MODL_Coclustering.cpp | 4 +- 4 files changed, 111 insertions(+), 19 deletions(-) diff --git a/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.cpp b/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.cpp index bf6ed1c1e..ff2c96229 100644 --- a/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.cpp +++ b/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.cpp @@ -11,12 +11,17 @@ NEWKWDataGridOptimizer::NEWKWDataGridOptimizer() { classStats = NULL; dataGridCosts = NULL; + bSlightOptimizationMode = false; bCleanNonInformativeVariables = false; - dEpsilon = 1e-6; + nVNSIteration = 0; + nVNSLevel = 0; + nVNSMaxLevel = 0; + dVNSNeighbourhoodSize = 0; // CH IV Begin initialVarPartDataGrid = NULL; // CH IV end attributeSubsetStatsHandler = NULL; + dEpsilon = 1e-6; } NEWKWDataGridOptimizer::~NEWKWDataGridOptimizer() {} @@ -25,12 +30,17 @@ void NEWKWDataGridOptimizer::Reset() { classStats = NULL; dataGridCosts = NULL; + bSlightOptimizationMode = false; bCleanNonInformativeVariables = false; - dEpsilon = 1e-6; + nVNSIteration = 0; + nVNSLevel = 0; + nVNSMaxLevel = 0; + dVNSNeighbourhoodSize = 0; // CH IV Begin initialVarPartDataGrid = NULL; // CH IV end attributeSubsetStatsHandler = NULL; + dEpsilon = 1e-6; } void NEWKWDataGridOptimizer::SetDataGridCosts(const KWDataGridCosts* kwdgcCosts) @@ -79,7 +89,6 @@ double NEWKWDataGridOptimizer::OptimizeDataGrid(const KWDataGrid* initialDataGri KWDataGridManager dataGridManager; double dGranularityBestCost; double dBestCost; - Timer timerOptimization; boolean bIsOptimizationNeeded; ObjectDictionary odQuantileBuilders; IntVector ivMaxPartNumbers; @@ -100,6 +109,10 @@ double NEWKWDataGridOptimizer::OptimizeDataGrid(const KWDataGrid* initialDataGri int nLastExploredGranularity; ALString sTmp; + // Debut de suivi des taches + TaskProgression::BeginTask(); + TaskProgression::DisplayMainLabel("Data Grid optimization"); + //Initialisations dGranularityBestCost = DBL_MAX; dBestMergedCost = dGranularityBestCost; @@ -458,6 +471,10 @@ double NEWKWDataGridOptimizer::OptimizeDataGrid(const KWDataGrid* initialDataGri // Nettoyage odQuantileBuilders.DeleteAll(); } + + // Fin de suivi des taches + TaskProgression::EndTask(); + ensure(optimizedDataGrid->AreAttributePartsSorted()); ensure(fabs(dBestCost - GetDataGridCosts()->ComputeDataGridTotalCost(optimizedDataGrid)) < dEpsilon); return dBestCost; @@ -573,10 +590,6 @@ double NEWKWDataGridOptimizer::OptimizeGranularizedDataGrid(const KWDataGrid* in require(initialDataGrid->Check()); require(optimizedDataGrid != NULL); - // Debut de suivi des taches - TaskProgression::BeginTask(); - TaskProgression::DisplayMainLabel("Data Grid internal optimization"); - // Ligne d'entete des messages DisplayOptimizationHeaderLine(); @@ -656,9 +669,6 @@ double NEWKWDataGridOptimizer::OptimizeGranularizedDataGrid(const KWDataGrid* in cout << " Cout meilleure grille " << dBestCost << endl; } - // Fin de suivi des taches - TaskProgression::EndTask(); - // Retour du meilleur cout de codage ensure(fabs(dataGridCosts->ComputeDataGridTotalCost(optimizedDataGrid) - dBestCost) < dEpsilon); return dBestCost; @@ -1152,16 +1162,72 @@ void NEWKWDataGridOptimizer::DisplayOptimizationHeaderLine() const if (optimizationParameters.GetDisplayDetails()) { // Lignes d'entete + cout << "Time\tIter\tNeigh. size\t"; cout << "Initial\t\t\t\tFinal\t\t\t\t\n"; - cout << "Att. number\tPart number\tCell number\tCost\t"; + cout << "\t\t\tAtt. number\tPart number\tCell number\tCost\t"; cout << "Att. number\tPart number\tCell number\tCost\n"; } } void NEWKWDataGridOptimizer::DisplayOptimizationDetails(const KWDataGrid* optimizedDataGrid, boolean bOptimized) const { + ALString sTmp; + int nTotalIterLevel; + int nProgressionIterLevel; + int nMaxExploredGranularity; + ALString sProgressionLabel; + double dOptimizationTime; + + // Calcul du temps d'optimisation + dOptimizationTime = timerOptimization.GetElapsedTime(); + + // Gestion de l'avancement avant chaque etape d'iteration + if (not bOptimized) + { + // Calcul de la granularite max exploree + nMaxExploredGranularity = 0; + if (optimizedDataGrid->GetGranularity() > 0) + nMaxExploredGranularity = ComputeMaxExploredGranularity(optimizedDataGrid); + + // Message d'avancement, avec prise en compte de la granularite uniquement si necessaire + if (optimizedDataGrid->GetGranularity() > 0 and + optimizedDataGrid->GetGranularity() < nMaxExploredGranularity) + sProgressionLabel = sTmp + "Granularity " + IntToString(optimizedDataGrid->GetGranularity()) + + "/" + IntToString(nMaxExploredGranularity) + " "; + if (nVNSIteration > 0) + sProgressionLabel += sTmp + " VNS " + IntToString(nVNSIteration) + " " + + IntToString(nVNSLevel) + "/" + IntToString(nVNSMaxLevel) + " (" + + DoubleToString((int)(10000 * dVNSNeighbourhoodSize) / 10000.0) + ")"; + TaskProgression::DisplayLabel(sProgressionLabel); + + // Niveau d'avancement avec limite de temps + if (optimizationParameters.GetOptimizationTime() > 0) + { + if (dOptimizationTime > optimizationParameters.GetOptimizationTime()) + TaskProgression::DisplayProgression(100); + else + TaskProgression::DisplayProgression( + (int)((dOptimizationTime * 100) / optimizationParameters.GetOptimizationTime())); + } + // Cas sans limite de temps + else + { + //DDD TODO: a revoir: calculer e nombre max total d'iteration, en combinant granularite et VNS + nTotalIterLevel = (int)pow(2.0, 2 + optimizationParameters.GetOptimizationLevel()); + nProgressionIterLevel = nVNSIteration; + while (nTotalIterLevel < nProgressionIterLevel) + nTotalIterLevel = nTotalIterLevel + (1 + nTotalIterLevel) / 2; + TaskProgression::DisplayProgression((int)(nProgressionIterLevel * 100.0 / nTotalIterLevel)); + } + } + + // Affichage des details d'optimisation if (optimizationParameters.GetDisplayDetails()) { + // Affichage de l'iteration + if (not bOptimized) + cout << dOptimizationTime << "\t" << nVNSIteration << "\t" << dVNSNeighbourhoodSize << "\t"; + // Affichage des caracteristiques de la grille terminale cout << optimizedDataGrid->GetAttributeNumber() << "\t" << optimizedDataGrid->GetTotalPartNumber() << "\t" << optimizedDataGrid->GetCellNumber() << "\t" @@ -1174,6 +1240,18 @@ void NEWKWDataGridOptimizer::DisplayOptimizationDetails(const KWDataGrid* optimi } } +boolean NEWKWDataGridOptimizer::IsOptimizationTimeElapsed() const +{ + require(timerOptimization.IsStarted()); + + // Pas de limite par defaut + if (optimizationParameters.GetOptimizationTime() == 0) + return false; + // Test de depassement sinon + else + return timerOptimization.GetElapsedTime() >= optimizationParameters.GetOptimizationTime(); +} + Profiler NEWKWDataGridOptimizer::profiler; ////////////////////////////////////////////////////////////////////////////////// diff --git a/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.h b/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.h index d0b4e99fe..28d5bf27e 100644 --- a/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.h +++ b/src/Learning/KWDataPreparation/NEWKWDataGridOptimizer.h @@ -148,16 +148,19 @@ class NEWKWDataGridOptimizer : public Object double VNSOptimize(const KWDataGrid* initialDataGrid, KWDataGrid* optimizedDataGrid, boolean bIsLastGranularity) const; - // Affichage des caracteristiques d'optimisation (selon le niveau d'affichage demande) + // Affichage des caracteristiques d'optimisation si demande dans les parametre d'optimsiation + // Cette methode gere egalement les messages et le niveau d'avancement de la barre de progesssion void DisplayOptimizationHeaderLine() const; void DisplayOptimizationDetails(const KWDataGrid* optimizedDataGrid, boolean bOptimized) const; + // Test si le temps d'optimisation est depasse (si le parametre correspondant est actif (non null)) + boolean IsOptimizationTimeElapsed() const; + // Parametrage de la structure des couts const KWDataGridCosts* dataGridCosts; // Parametres d'optimisation - // Ajout mutable pour pouvoir modifier optimizationTime - mutable KWDataGridOptimizerParameters optimizationParameters; + KWDataGridOptimizerParameters optimizationParameters; // Attribut de statistiques KWClassStats* classStats; @@ -167,17 +170,27 @@ class NEWKWDataGridOptimizer : public Object KWDataGrid* initialVarPartDataGrid; // CH IV End + // Mode d'optimisation legere dans le cas d'une granularite intermediaire en co-clustering non supervise + boolean bSlightOptimizationMode; + // Nettoyage des attribut non informatifs boolean bCleanNonInformativeVariables; - // Epsilon d'optimisation - double dEpsilon; + // Informations d'indexage des solutions evaluees durant l'optimisation + mutable Timer timerOptimization; + mutable int nVNSIteration; + mutable int nVNSLevel; + mutable int nVNSMaxLevel; + mutable double dVNSNeighbourhoodSize; // Contexte de gestion de la partie anytime de l'optimisation const KWAttributeSubsetStats* attributeSubsetStatsHandler; // Profiler static Profiler profiler; + + // Epsilon d'optimisation + double dEpsilon; }; ////////////////////////////////////////////////////////////////////////////////// diff --git a/src/Learning/MODL_Coclustering/CCDeploymentSpec.cpp b/src/Learning/MODL_Coclustering/CCDeploymentSpec.cpp index 5646d46cd..dfd6d3e03 100644 --- a/src/Learning/MODL_Coclustering/CCDeploymentSpec.cpp +++ b/src/Learning/MODL_Coclustering/CCDeploymentSpec.cpp @@ -69,7 +69,8 @@ void CCDeploymentSpec::Write(ostream& ost) const ost << "Input table variable\t" << GetInputObjectArrayAttributeName() << "\n"; ost << "Coclustering deployed variable\t" << GetDeployedAttributeName() << "\n"; ost << "Build predicted cluster variable\t" << BooleanToString(GetBuildPredictedClusterAttribute()) << "\n"; - ost << "Build inter-cluster distance variables\t" << BooleanToString(GetBuildClusterDistanceAttributes()) << "\n"; + ost << "Build inter-cluster distance variables\t" << BooleanToString(GetBuildClusterDistanceAttributes()) + << "\n"; ost << "Build frequency recoding variables\t" << BooleanToString(GetBuildFrequencyRecodingAttributes()) << "\n"; ost << "Output variables prefix\t" << GetOutputAttributesPrefix() << "\n"; } diff --git a/src/Learning/MODL_Coclustering/MODL_Coclustering.cpp b/src/Learning/MODL_Coclustering/MODL_Coclustering.cpp index 56bb29fcb..9aac75bde 100644 --- a/src/Learning/MODL_Coclustering/MODL_Coclustering.cpp +++ b/src/Learning/MODL_Coclustering/MODL_Coclustering.cpp @@ -34,9 +34,9 @@ int main(int argc, char** argv) Global::ActivateSignalErrorManagement(); // Choix du repertoire de lancement pour le debugage sous Windows (a commenter apres fin du debug) - //SetWindowsDebugDir("y_CoclusteringIV_Standard", "IrisLight"); + SetWindowsDebugDir("y_CoclusteringIV_Standard", "IrisLight"); //SetWindowsDebugDir("y_CoclusteringIV_Standard", "Iris"); - SetWindowsDebugDir("Standard", "Iris"); + //SetWindowsDebugDir("Standard", "Iris"); // Point d'arret sur l'allocation d'un bloc memoire // MemSetAllocIndexExit(77);