From 419c124d8b15588a488c44a1e2dcc15e1c86e899 Mon Sep 17 00:00:00 2001 From: Damon McDougall Date: Thu, 5 Feb 2015 12:17:39 -0600 Subject: [PATCH 1/4] Fix bug in MLE compute and add test All that was happening was a NULL pointer dereference when a particular chain didn't contain a state that maximised the likelihood (probability 1 event). Now unifiedPositionsOfMaximum will only fill up a variable on process zero containing all the states that attain maxima. This variable on other processes will be a vector of length zero, hence the removal of the checks on subSequenceSize == 0. Also added a test for the unifiedPositionsOfMaximum method in SequenceOfVectors. --- .gitignore | 2 + configure.ac | 7 ++ src/basic/src/VectorSequence.C | 5 ++ src/stats/src/MetropolisHastingsSG.C | 48 +++++++------ test/Makefile.am | 5 ++ .../test_unifiedPositionsOfMaximum.C | 71 +++++++++++++++++++ .../test_unifiedPositionsOfMaximum.sh.in | 7 ++ 7 files changed, 122 insertions(+), 23 deletions(-) create mode 100644 test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.C create mode 100644 test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh.in diff --git a/.gitignore b/.gitignore index 8c85666c7..670ecd724 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,5 @@ test/test_Regression/test_jeffreys_samples_diff.sh test/test_adaptedcov_output/ test/test_gpmsa_cobra_output/ test/test_logitadaptedcov +test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh +test/test_unifiedPositionsOfMaximum diff --git a/configure.ac b/configure.ac index 6f39f8bb0..c8cd52d3f 100644 --- a/configure.ac +++ b/configure.ac @@ -221,6 +221,13 @@ AC_CONFIG_FILES([ chmod +x test/test_Regression/test_jeffreys_samples_diff.sh ]) +AC_CONFIG_FILES([ + test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh +], +[ + chmod +x test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh +]) + dnl ---------------------------------------------- dnl Collect files for licence header stamping here dnl ---------------------------------------------- diff --git a/src/basic/src/VectorSequence.C b/src/basic/src/VectorSequence.C index 4773ca93a..465ab50ff 100644 --- a/src/basic/src/VectorSequence.C +++ b/src/basic/src/VectorSequence.C @@ -569,6 +569,11 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 unifiedPositionsOfMaximum.setPositionValues(i,tmpVec); } } + else { + // Process zero has all the states that attain maxima, so let's nuke the + // others rather than letting them contain NULL pointers + unifiedPositionsOfMaximum.resizeSequence(0); + } if (m_env.subDisplayFile()) { *m_env.subDisplayFile() << "Leaving BaseVectorSequence::unifiedPositionsOfMaximum()" diff --git a/src/stats/src/MetropolisHastingsSG.C b/src/stats/src/MetropolisHastingsSG.C index 9f42c2c6c..b7ce925ba 100644 --- a/src/stats/src/MetropolisHastingsSG.C +++ b/src/stats/src/MetropolisHastingsSG.C @@ -1222,23 +1222,25 @@ MetropolisHastingsSG::generateSequence( // Compute raw unified MLE if (workingLogLikelihoodValues) { SequenceOfVectors rawUnifiedMLEpositions(m_vectorSpace,0,m_optionsObj->m_prefix+"rawUnifiedMLEseq"); + double rawUnifiedMLEvalue = workingChain.unifiedPositionsOfMaximum(*workingLogLikelihoodValues, rawUnifiedMLEpositions); - UQ_FATAL_TEST_MACRO(rawUnifiedMLEpositions.subSequenceSize() == 0, - m_env.worldRank(), - "MetropolisHastingsSG::generateSequence()", - "rawUnifiedMLEpositions.subSequenceSize() = 0"); if ((m_env.subDisplayFile() ) && (m_optionsObj->m_ov.m_totallyMute == false)) { P_V tmpVec(m_vectorSpace.zeroVector()); - rawUnifiedMLEpositions.getPositionValues(0,tmpVec); - *m_env.subDisplayFile() << "In MetropolisHastingsSG::generateSequence()" - << ": just computed MLE" - << ", rawUnifiedMLEvalue = " << rawUnifiedMLEvalue - << ", rawUnifiedMLEpositions.subSequenceSize() = " << rawUnifiedMLEpositions.subSequenceSize() - << ", rawUnifiedMLEpositions[0] = " << tmpVec - << std::endl; + + // Make sure the positions vector (which only contains stuff on process + // zero) actually contains positions + if (rawUnifiedMLEpositions.subSequenceSize() > 0) { + rawUnifiedMLEpositions.getPositionValues(0,tmpVec); + *m_env.subDisplayFile() << "In MetropolisHastingsSG::generateSequence()" + << ": just computed MLE" + << ", rawUnifiedMLEvalue = " << rawUnifiedMLEvalue + << ", rawUnifiedMLEpositions.subSequenceSize() = " << rawUnifiedMLEpositions.subSequenceSize() + << ", rawUnifiedMLEpositions[0] = " << tmpVec + << std::endl; + } } } @@ -1248,21 +1250,21 @@ MetropolisHastingsSG::generateSequence( double rawUnifiedMAPvalue = workingChain.unifiedPositionsOfMaximum(*workingLogTargetValues, rawUnifiedMAPpositions); - UQ_FATAL_TEST_MACRO(rawUnifiedMAPpositions.subSequenceSize() == 0, - m_env.worldRank(), - "MetropolisHastingsSG::generateSequence()", - "rawUnifiedMAPpositions.subSequenceSize() = 0"); - if ((m_env.subDisplayFile() ) && (m_optionsObj->m_ov.m_totallyMute == false)) { P_V tmpVec(m_vectorSpace.zeroVector()); - rawUnifiedMAPpositions.getPositionValues(0,tmpVec); - *m_env.subDisplayFile() << "In MetropolisHastingsSG::generateSequence()" - << ": just computed MAP" - << ", rawUnifiedMAPvalue = " << rawUnifiedMAPvalue - << ", rawUnifiedMAPpositions.subSequenceSize() = " << rawUnifiedMAPpositions.subSequenceSize() - << ", rawUnifiedMAPpositions[0] = " << tmpVec - << std::endl; + + // Make sure the positions vector (which only contains stuff on process + // zero) actually contains positions + if (rawUnifiedMAPpositions.subSequenceSize() > 0) { + rawUnifiedMAPpositions.getPositionValues(0,tmpVec); + *m_env.subDisplayFile() << "In MetropolisHastingsSG::generateSequence()" + << ": just computed MAP" + << ", rawUnifiedMAPvalue = " << rawUnifiedMAPvalue + << ", rawUnifiedMAPpositions.subSequenceSize() = " << rawUnifiedMAPpositions.subSequenceSize() + << ", rawUnifiedMAPpositions[0] = " << tmpVec + << std::endl; + } } } } diff --git a/test/Makefile.am b/test/Makefile.am index 89e31702d..c2d590025 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -35,6 +35,7 @@ check_PROGRAMS += test_gsloptimizer check_PROGRAMS += test_seedwithmap check_PROGRAMS += test_seedwithmap_fd check_PROGRAMS += test_logitadaptedcov +check_PROGRAMS += test_unifiedPositionsOfMaximum LDADD = $(top_builddir)/src/libqueso.la @@ -95,6 +96,7 @@ test_gsloptimizer_SOURCES = test_optimizer/test_gsloptimizer.C test_seedwithmap_SOURCES = test_optimizer/test_seedwithmap.C test_seedwithmap_fd_SOURCES = test_optimizer/test_seedwithmap_fd.C test_logitadaptedcov_SOURCES = test_Regression/test_logitadaptedcov.C +test_unifiedPositionsOfMaximum_SOURCES = test_SequenceOfVectors/test_unifiedPositionsOfMaximum.C # Files to freedom stamp srcstamp = @@ -127,6 +129,7 @@ srcstamp += $(test_gsloptimizer_SOURCES) srcstamp += $(test_seedwithmap_SOURCES) srcstamp += $(test_seedwithmap_fd_SOURCES) srcstamp += $(test_logitadaptedcov_SOURCES) +srcstamp += $(test_unifiedPositionsOfMaximum_SOURCES) TESTS = TESTS += $(top_builddir)/test/test_uqEnvironmentCopy @@ -159,6 +162,7 @@ TESTS += $(top_builddir)/test/test_gsloptimizer TESTS += $(top_builddir)/test/test_seedwithmap TESTS += $(top_builddir)/test/test_seedwithmap_fd TESTS += $(top_builddir)/test/test_logitadaptedcov +TESTS += $(top_builddir)/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh XFAIL_TESTS = $(top_builddir)/test/test_SequenceOfVectorsErase @@ -183,6 +187,7 @@ EXTRA_DIST += test_Regression/jeffreys_input.txt.in EXTRA_DIST += test_Regression/test_jeffreys_samples_diff.sh.in EXTRA_DIST += test_Regression/test_jeffreys_samples.m.in EXTRA_DIST += test_Regression/adaptedcov_input.txt.in +EXTRA_DIST += test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh.in DISTCLEANFILES = DISTCLEANFILES += test_gpmsa_cobra_output/display_sub0.txt diff --git a/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.C b/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.C new file mode 100644 index 000000000..671d59508 --- /dev/null +++ b/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.C @@ -0,0 +1,71 @@ +#include + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + MPI_Init(&argc, &argv); + + QUESO::EnvOptionsValues options; + options.m_numSubEnvironments = 2; + options.m_subDisplayFileName = "outputData/test_SequenceOfVectorsMax"; + options.m_subDisplayAllowAll = 1; + options.m_seed = 1.0; + options.m_checkingLevel = 1; + options.m_displayVerbosity = 55; + + QUESO::FullEnvironment env(MPI_COMM_WORLD, "", "", &options); + + // Create a vector space + QUESO::VectorSpace paramSpace(env, "", 1, + NULL); + + QUESO::SequenceOfVectors pretendChain( + paramSpace, 3, "pretendChain"); + + QUESO::GslVector v(paramSpace.zeroVector()); + v[0] = 0.0; + + // Create a scalar sequence on each processor + std::string name = "name"; + QUESO::ScalarSequence scalarSequence(env, 3, name); + pretendChain.setPositionValues(0, v); + pretendChain.setPositionValues(1, v); + pretendChain.setPositionValues(2, v); + + if (env.inter0Rank() == 0) { + scalarSequence[0] = 10.0; + scalarSequence[1] = 11.0; + scalarSequence[2] = 12.0; + } + else if (env.inter0Rank() == 1) { + scalarSequence[0] = 0.0; + scalarSequence[1] = 1.0; + scalarSequence[2] = 2.0; + } + else { + queso_error_msg("Test should not get here!"); + } + + // Create a sequence of vectors + QUESO::SequenceOfVectors maxs(paramSpace, + 0, "name2"); + + pretendChain.unifiedPositionsOfMaximum(scalarSequence, maxs); + + // Should not fail + QUESO::GslVector tmpVec(paramSpace.zeroVector()); + + // The loop should only actually do anything on process zero + for (unsigned int i = 0; i < maxs.subSequenceSize(); i++) { + maxs.getPositionValues(0, tmpVec); + } + + MPI_Finalize(); + + return 0; +} diff --git a/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh.in b/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh.in new file mode 100644 index 000000000..cec7cf6f5 --- /dev/null +++ b/test/test_SequenceOfVectors/test_unifiedPositionsOfMaximum.sh.in @@ -0,0 +1,7 @@ +#!/bin/bash +set -eu +set -o pipefail + +PROG="./test_unifiedPositionsOfMaximum" + +mpirun -np 2 $PROG From 69e53c7222f42f65bf16d95c8fc43ce881fbd680 Mon Sep 17 00:00:00 2001 From: Damon McDougall Date: Tue, 3 Mar 2015 17:27:46 -0600 Subject: [PATCH 2/4] Remove trailing whitespace --- src/basic/src/VectorSequence.C | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/basic/src/VectorSequence.C b/src/basic/src/VectorSequence.C index 465ab50ff..e7dfa4c70 100644 --- a/src/basic/src/VectorSequence.C +++ b/src/basic/src/VectorSequence.C @@ -1,6 +1,6 @@ //-----------------------------------------------------------------------Bl- //-------------------------------------------------------------------------- -// +// // QUESO - a library to support the Quantification of Uncertainty // for Estimation, Simulation and Optimization // @@ -17,7 +17,7 @@ // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software -// Foundation, Inc. 51 Franklin Street, Fifth Floor, +// Foundation, Inc. 51 Franklin Street, Fifth Floor, // Boston, MA 02110-1301 USA // //-----------------------------------------------------------------------el- @@ -745,7 +745,7 @@ BaseVectorSequence::computeStatistics( #ifdef QUESO_COMPUTES_EXTRA_POST_PROCESSING_STATISTICS if ((statisticalOptions.bmmRun() ) && (initialPosForStatistics.size() > 0) && - (statisticalOptions.bmmLengths().size() > 0)) { + (statisticalOptions.bmmLengths().size() > 0)) { this->computeBMM(statisticalOptions, initialPosForStatistics, passedOfs); @@ -779,7 +779,7 @@ BaseVectorSequence::computeStatistics( #ifdef QUESO_COMPUTES_EXTRA_POST_PROCESSING_STATISTICS if ((statisticalOptions.psdAtZeroCompute() ) && (initialPosForStatistics.size() > 0) && - (statisticalOptions.psdAtZeroNumBlocks().size() > 0)) { + (statisticalOptions.psdAtZeroNumBlocks().size() > 0)) { this->computePSDAtZero(statisticalOptions, initialPosForStatistics, passedOfs); @@ -818,7 +818,7 @@ BaseVectorSequence::computeStatistics( //**************************************************** if ((statisticalOptions.autoCorrComputeViaDef()) && (initialPosForStatistics.size() > 0 ) && - (lagsForCorrs.size() > 0 )) { + (lagsForCorrs.size() > 0 )) { this->computeAutoCorrViaDef(statisticalOptions, initialPosForStatistics, lagsForCorrs, @@ -830,7 +830,7 @@ BaseVectorSequence::computeStatistics( //**************************************************** if ((statisticalOptions.autoCorrComputeViaFft()) && (initialPosForStatistics.size() > 0 ) && - (lagsForCorrs.size() > 0 )) { + (lagsForCorrs.size() > 0 )) { this->computeAutoCorrViaFFT(statisticalOptions, initialPosForStatistics, lagsForCorrs, @@ -1195,7 +1195,7 @@ BaseVectorSequence::computeAutoCorrViaDef( } } } - } + } return; } @@ -1367,7 +1367,7 @@ BaseVectorSequence::computeAutoCorrViaFFT( } } } - } + } return; } @@ -2470,7 +2470,7 @@ BaseVectorSequence::computePSDAtZero( tmpRunTime += MiscGetEllapsedSeconds(&timevalTmp); if (m_env.subDisplayFile()) { - *m_env.subDisplayFile() << "Chain PSD at frequency zero took " << tmpRunTime + *m_env.subDisplayFile() << "Chain PSD at frequency zero took " << tmpRunTime << " seconds" << std::endl; } @@ -2505,7 +2505,7 @@ BaseVectorSequence::computePSDAtZero( } } } - } + } return; } From 7f53022efac03e30d4978a18ad79d8085080a271 Mon Sep 17 00:00:00 2001 From: Damon McDougall Date: Tue, 3 Mar 2015 17:28:07 -0600 Subject: [PATCH 3/4] Add helpful comments to unifiedPositionsOfMaximum These comments explain some of the logic of what is happening across several processes. --- src/basic/src/VectorSequence.C | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/basic/src/VectorSequence.C b/src/basic/src/VectorSequence.C index e7dfa4c70..2f4eb6fbf 100644 --- a/src/basic/src/VectorSequence.C +++ b/src/basic/src/VectorSequence.C @@ -446,6 +446,7 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 "BaseVectorSequence::unifiedPositionsOfMaximum()", "invalid input"); + // Compute the max on each process double subMaxValue = subCorrespondingScalarValues.subMaxPlain(); //****************************************************************** @@ -460,6 +461,8 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 "BaseVectorSequence::unifiedPositionsOfMaximum()", "failed MPI.Allreduce() for max"); + // Find number of elements (subNumPos) on each process that attain the + // global max. This could be zero. unsigned int iMax = subCorrespondingScalarValues.subSequenceSize(); int subNumPos = 0; // Yes, 'int', due to MPI to be used soon for (unsigned int i = 0; i < iMax; ++i) { @@ -468,10 +471,13 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 } } + // Fill up unifiedPositionsOfMaximum with the states that attain maxima + // (if they exist) V tmpVec(this->vectorSpace().zeroVector()); - unifiedPositionsOfMaximum.resizeSequence(subNumPos); + unifiedPositionsOfMaximum.resizeSequence(subNumPos); // subNumPos could be 0 unsigned int j = 0; for (unsigned int i = 0; i < iMax; ++i) { + // This 'if' statement is false if subNumPos == 0 if (subCorrespondingScalarValues[i] == unifiedMaxValue) { this->getPositionValues (i,tmpVec); unifiedPositionsOfMaximum.setPositionValues(j,tmpVec); @@ -479,17 +485,21 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 } } + // Compute total (over all processes) number of elements that attain maxima std::vector auxBuf(1,0); int unifiedNumPos = 0; // Yes, 'int', due to MPI to be used soon auxBuf[0] = subNumPos; m_env.inter0Comm().Allreduce((void *) &auxBuf[0], (void *) &unifiedNumPos, (int) auxBuf.size(), RawValue_MPI_INT, RawValue_MPI_SUM, "BaseVectorSequence::unifiedPositionsOfMaximum()", "failed MPI.Allreduce() for sum"); + + // Resize positions to hold all elements that attain maxima unifiedPositionsOfMaximum.resizeSequence(unifiedNumPos); //****************************************************************** // Use MPI_Gatherv for number of positions //****************************************************************** + // Gather up *number* of maxima on each chain and store in recvcntsPos unsigned int Np = (unsigned int) m_env.inter0Comm().NumProc(); std::vector recvcntsPos(Np,0); // '0' is NOT the correct value for recvcntsPos[0] @@ -503,6 +513,7 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 "failed MPI.Gather() result at proc 0 (recvcntsPos[0])"); } + // Construct offset indices based on number of maxima std::vector displsPos(Np,0); for (unsigned int nodeId = 1; nodeId < Np; ++nodeId) { // Yes, from '1' on displsPos[nodeId] = displsPos[nodeId-1] + recvcntsPos[nodeId-1]; @@ -517,6 +528,11 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 //****************************************************************** // Use MPI_Gatherv for number of doubles //****************************************************************** + // Gather up number of states that attain maxima multiplied by the size of + // the state (on each process). Store this in recvcntsDbs. + // So recvcntsDbs[i] is the number of states that attain maxima on chain i + // multiplied by the size of the state (a state could be a vector of length + // > 1). unsigned int dimSize = m_vectorSpace.dimLocal(); int subNumDbs = subNumPos * dimSize; // Yes, 'int', due to MPI to be used soon std::vector recvcntsDbs(Np,0); // '0' is NOT the correct value for recvcntsDbs[0] @@ -530,6 +546,8 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 "failed MPI.Gather() result at proc 0 (recvcntsDbs[0])"); } + // Now gather up the offsets based on the number of states (mulitplied by the + // size of the state) std::vector displsDbs(Np,0); for (unsigned int nodeId = 1; nodeId < Np; ++nodeId) { // Yes, from '1' on displsDbs[nodeId] = displsDbs[nodeId-1] + recvcntsDbs[nodeId-1]; @@ -544,6 +562,8 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 //****************************************************************** // Prepare counters and buffers for gatherv of maximum positions //****************************************************************** + // Take all the states on all chains that attain maxima, and put them in a + // send buffer ready for MPI std::vector sendbufDbs(subNumDbs,0.); for (unsigned int i = 0; i < (unsigned int) subNumPos; ++i) { unifiedPositionsOfMaximum.getPositionValues(i,tmpVec); @@ -554,6 +574,7 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 std::vector recvbufDbs(unifiedNumPos * dimSize); + // Gather up all states that attain maxima and store then in recvbufDbs m_env.inter0Comm().Gatherv((void *) &sendbufDbs[0], (int) subNumDbs, RawValue_MPI_DOUBLE, (void *) &recvbufDbs[0], (int *) &recvcntsDbs[0], (int *) &displsDbs[0], RawValue_MPI_DOUBLE, 0, "BaseVectorSequence::unifiedPositionsOfMaximum()", "failed MPI.Gatherv()"); @@ -561,6 +582,8 @@ BaseVectorSequence::unifiedPositionsOfMaximum( // rr0 //****************************************************************** // Transfer data from 'recvbuf' to 'unifiedPositionsOfMaximum' //****************************************************************** + // Copy over all the gathered states to the unifiedPositionsOfMaximum + // variable on process 0. Process 0 now contains everything. if (m_env.inter0Rank() == (int) 0) { for (unsigned int i = 0; i < (unsigned int) unifiedNumPos; ++i) { for (unsigned int j = 0; j < dimSize; ++j) { From 7ab35ba3a7aaf59b8422db17c2f6e1d567874f89 Mon Sep 17 00:00:00 2001 From: Damon McDougall Date: Tue, 3 Mar 2015 17:42:18 -0600 Subject: [PATCH 4/4] Add docs to unifiedPositionsOfMaximum --- src/basic/inc/VectorSequence.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/basic/inc/VectorSequence.h b/src/basic/inc/VectorSequence.h index 1e5735a75..9040ccd65 100644 --- a/src/basic/inc/VectorSequence.h +++ b/src/basic/inc/VectorSequence.h @@ -146,6 +146,16 @@ class BaseVectorSequence BaseVectorSequence& subPositionsOfMaximum); //! Finds the positions where the maximum element occurs in the unified sequence. + /*! + * \param this The underlying sequence of vectors + * \param subCorrespondingScalarValues For a given process, a scalar sequence + * where element \c i corresponds to element \c i of \c this. + * \param unifiedPositionsOfMaximum Upon returning, on process 0 on chain 0, + * this will contain states that correspond to the unique maximum value in + * \c subCorrespondingScalarValues over *all* processes. + * \return Maximum element of \c subCorrespondingScalarValues over all + * processes. + */ double unifiedPositionsOfMaximum (const ScalarSequence& subCorrespondingScalarValues, BaseVectorSequence& unifiedPositionsOfMaximum);