renamed n_iter_mean n_iter_reinflate

jtwhite79 · Nov 12, 2024 · e884d0d · e884d0d
1 parent 6d03be2
commit e884d0d
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 53 deletions.
diff --git a/benchmarks/basic_tests.py b/benchmarks/basic_tests.py
@@ -1499,16 +1499,18 @@ def tenpar_collapse_invest():
     print(pst.model_command)
 
     obs = pst.observation_data
-    obs.loc[obs.obsnme.str.startswith("h01"),"weight"] = 10.0
+    obs.loc[:,"weight"] = 0.0
+
+    nzoname = obs.obsnme.str.startswith("h01").index[3]
+    obs.loc[nzoname,"weight"] = 10.0
 
-    obs.loc[obs.obsnme.str.startswith("h01"),"standard_deviation"] = 0.1
+    obs.loc[nzoname,"standard_deviation"] = 0.1
 
     pst.parameter_data.loc[:,"partrans"] = "log"
     pst.pestpp_options["ies_multimodal_alpha"] = 0.99
 
-
     # set noptmax
-    num_reals = [10,30,400]
+    num_reals = [10,30,50,100,400,1000]
     pst.control_data.noptmax = 10
     for num_real in num_reals:
 
@@ -1524,7 +1526,7 @@ def tenpar_collapse_invest():
                                worker_root=model_d,port=port,verbose=True)
 
     pst.pestpp_options = {}
-    pst.pestpp_options["ies_num_reals"] = 50000
+    pst.pestpp_options["ies_num_reals"] = 100000
     pst.control_data.noptmax = -1
     pst.write(os.path.join(new_d, "pest.pst"))
 
@@ -1535,8 +1537,8 @@ def tenpar_collapse_invest():
                            worker_root=model_d,port=port,verbose=True)
 
 
-    pst.observation_data.loc[pst.nnz_obs_names[3],"obsval"] += 1.5
-    pst.observation_data.loc[pst.nnz_obs_names[5],"obsval"] -= 1.5
+    pst.observation_data.loc[nzoname,"obsval"] += 8
+    #pst.observation_data.loc[pst.nnz_obs_names[5],"obsval"] -= 1.5
 
     # set noptmax
     pst.control_data.noptmax = 10
@@ -1554,7 +1556,7 @@ def tenpar_collapse_invest():
                                worker_root=model_d,port=port,verbose=True)
 
     pst.pestpp_options = {}
-    pst.pestpp_options["ies_num_reals"] = 50000
+    pst.pestpp_options["ies_num_reals"] = 100000
     pst.control_data.noptmax = -1
     pst.write(os.path.join(new_d, "pest.pst"))
 
@@ -1570,6 +1572,7 @@ def plot_collapse_invest():
 
     pes, oes, phidfs = {},{},{}
     names = []
+    name_dict = {}
     min_phi = 1e300
     for m_d in m_ds:
         phidf = pd.read_csv(os.path.join(m_d,"pest.phi.actual.csv"))
@@ -1595,45 +1598,61 @@ def plot_collapse_invest():
 
         #    o.append(o[0])#.loc[realkeep.index,:].copy())
 
-        name = "{0}, {1} realizations".format(m_d.split("_")[-2],m_d.split("_")[-1].replace("reals",""))
+        if phidf.iteration.max() == 0:
+            nreals = oe.shape[0]
+            name = "{0}, {1} realizations".format(m_d.split("_")[-2],nreals)
+        else:
+
+            name = "{0}, {1} realizations".format(m_d.split("_")[-2],m_d.split("_")[-1].replace("reals",""))
+            nreals = int(m_d.split("_")[-1].replace("reals",""))
+        if nreals not in name_dict:
+            name_dict[nreals] = []
+        name_dict[nreals].append(name)
         names.append(name)
         pes[name] = p
         oes[name] = o
         phidfs[name] = phidf
 
     #print(min_phi)
+    reals = list(name_dict.keys())
+    reals.sort()
 
     # now filter
     #thresh = min_phi * 5
     thresh = 10
-    corrupt_thresh = min_phi * 2
-    for m_d in names:
-        p = pes[m_d]
-        o = oes[m_d]
-        phidf = phidfs[m_d]
-        #print(m_d,len(p))
-        assert len(p) == 2
-        assert len(o) == 2
-        ppost = p[-1]
-        opost = o[-1]
-        phipost = phidf.iloc[-1,:].copy()
-        phipost = phipost.iloc[6:]
-        #print(phipost)
-        if "corrupt" in m_d:
-            phipost = phipost.loc[phipost<=corrupt_thresh]
-        else:    
-            phipost = phipost.loc[phipost<=thresh]
-        print(m_d,phipost.shape)
-        pes[m_d][-1] = ppost.loc[phipost.index.values,:].copy()
-        oes[m_d][-1] = opost.loc[phipost.index.values,:].copy()
-
+    corrupt_thresh = min_phi * 1.075
+    names = []
+    for nreals in reals:
+        m_ds = name_dict[nreals]
+        m_ds.sort()
+        for m_d in m_ds:
+            p = pes[m_d]
+            o = oes[m_d]
+            phidf = phidfs[m_d]
+            #print(m_d,len(p))
+            assert len(p) == 2
+            assert len(o) == 2
+            ppost = p[-1]
+            opost = o[-1]
+            phipost = phidf.iloc[-1,:].copy()
+            phipost = phipost.iloc[6:]
+            #print(phipost)
+            if "corrupt" in m_d:
+                phipost = phipost.loc[phipost<=corrupt_thresh]
+            else:    
+                phipost = phipost.loc[phipost<=thresh]
+            print(m_d,phipost.shape)
+            pes[m_d][-1] = ppost.loc[phipost.index.values,:].copy()
+            oes[m_d][-1] = opost.loc[phipost.index.values,:].copy()
+            names.append(m_d)   
+
 
     import matplotlib.pyplot as plt
     from matplotlib.backends.backend_pdf import PdfPages
-    names.sort()
+
     with PdfPages("collapse_compare.pdf") as pdf:
         for par in pst.adj_par_names:
-            fig,axes = plt.subplots(len(m_ds),1,figsize=(8.5,8.5))
+            fig,axes = plt.subplots(len(names),1,figsize=(8.5,8.5))
             mn,mx = 1e30,-1e30
             for m_d,ax in zip(names,axes):
                 p = pes[m_d]
@@ -1643,18 +1662,23 @@ def plot_collapse_invest():
                 colors = ["0.5","b"]
                 labels = ["prior","posterior"]
                 for pp,oo,c,l in zip(p,o,colors,labels):
+                    if "stage" not in par:
+                        pp.loc[:,par] = np.log10(pp.loc[:,par].values)
                     pp.loc[:,par].plot(ax=ax,kind="hist",color=c,alpha=0.5,label=l,density=True)
-                ax.set_title("{0}, {1}, {2} posterior realizations".format(par,m_d,pp.shape[0]),loc="left")
+                ax.set_title("{0}, {1} {2} posterior realizations".format(par,m_d,pp.shape[0]),loc="left")
                 ax.set_yticks([])
                 mn = min(mn,ax.get_xlim()[0])
                 mx = max(mn,ax.get_xlim()[1])
+                print(m_d)
+
             for ax in axes:
                 ax.set_xlim(mn,mx)
             plt.tight_layout()
             pdf.savefig()
             plt.close(fig)
+
         for obs in pst.obs_names:
-            fig,axes = plt.subplots(len(m_ds),1,figsize=(8.5,8.5))
+            fig,axes = plt.subplots(len(names),1,figsize=(8.5,8.5))
             mn,mx = 1e30,-1e30
             for m_d,ax in zip(names,axes):
                 p = pes[m_d]
@@ -1679,7 +1703,7 @@ def plot_collapse_invest():
     print(m_ds)
 
 if __name__ == "__main__":
-    #tenpar_collapse_invest()
+    tenpar_collapse_invest()
     plot_collapse_invest()
     #run()
     #mf6_v5_ies_test()

diff --git a/documentation/pestpp_users_guide_v5.2.13.docx b/documentation/pestpp_users_guide_v5.2.13.docx
diff --git a/documentation/pestpp_users_manual.md b/documentation/pestpp_users_manual.md
@@ -1,13 +1,13 @@
 
  <img src="./media/image1.png" style="width:6.26806in;height:1.68194in" alt="A close up of a purple sign Description automatically generated" />
 
-# <a id='s1' />Version 5.2.13
+# <a id='s1' />Version 5.2.14
 
 <img src="./media/image2.png" style="width:6.26806in;height:3.05972in" />
 
 PEST++ Development Team
 
-October 2024
+November 2024
 
 # <a id='s2' />Acknowledgements
 
@@ -70,7 +70,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 
 # Table of Contents
 
-- [Version 5.2.13](#s1)
+- [Version 5.2.14](#s1)
 - [Acknowledgements](#s2)
 - [Preface](#s3)
 - [License](#s4)
@@ -241,7 +241,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
         - [9.1.12 Use of observation noise covariance matrices](#s13-1-12)
         - [9.1.13 Detecting and resolving prior-data conflict](#s13-1-13)
         - [9.1.14 Multi-modal solution process](#s13-1-14)
-        - [9.1.15 Mean-Update Iterations](#s13-1-15)
+        - [9.1.15 Covariance Reinflation](#s13-1-15)
     - [9.2 Using PESTPP-IES](#s13-2)
         - [9.2.1 General](#s13-2-1)
         - [9.2.2 Initial Realizations](#s13-2-2)
@@ -3610,15 +3610,15 @@ Closely related to the multimodal solution process is the use of a “weights”
 
 Figure 9.2 – A demonstration of the multi-modal solution process using a weight ensemble on the ZDT1 benchmark problem. The standard solution process using single weight vector drives the posterior towards a single point, while the multi-modal upgrade process uses unique weights on each of the two objectives (observations in the control file) such that each realization targets a different point on the trade-off between the two objectives.
 
-### <a id='s13-1-15' />9.1.15 Mean-Update Iterations
+### <a id='s13-1-15' />9.1.15 Covariance Reinflation
 
 In highly nonlinear problems, the gradient between parameters and simulated equivalents to observations can change (drastically) between iterations of PESTPP-IES. This can drive the need for several iterations in order to fully assimilate data. However, during each iteration, due to the solution equations, both the mean and (co)variance of the parameter ensemble can change, with the latter usually decreasing substantially each iteration, meaning that across multiple iterations, the variance of the parameter ensemble reduces, sometime dramatically, this is especially true in highly nonlinear problems where changing gradients can condition different parameters (and/or parameter combinations) each iteration, and in cases where the prior simulated ensemble has a consistent bias compared to observed counterparts. Spurious correlations exacerbate this problem.
 
-To combat the variance reduction that occurs across multiple iterations, it might be desirable in some settings to “reset” the variance of the current parameter ensemble to the prior state; this can also be labelled “reinflation. Mechanically, this is done by subtracting the mean from the prior ensemble (forming the so-called deviations) and then adding the mean from the current parameter ensemble, thereby translating the prior ensemble across parameter space but (more or less) preserving the variance of the prior ensemble.
+To combat the variance reduction that occurs across multiple iterations, it might be desirable in some settings to “reset” the variance of the current parameter ensemble to the prior state; this can also be labelled “reinflation. Mechanically, this is done by subtracting the mean from the prior ensemble (forming the so-called deviations or “anomalies”) and then adding the mean from the current parameter ensemble, thereby translating the prior ensemble across parameter space but (more or less) preserving the variance of the prior ensemble.
 
-This option is implemented in PESTPP-IES via the *ies_n_iter_mean* option. As the name implies, the argument is the number of iterations to undertake before resetting the variance to the prior ensemble state. Some other behavioural characteristics of using mean-update iterations:
+This option is implemented in PESTPP-IES via the *ies_n_iter_reinflation* option. As the name implies, the argument is the number of iterations to undertake before resetting the variance to the prior ensemble state. Some other behavioural characteristics of using parameter covariance reinflation:
 
-- The phi reduction each iteration may not satisfy the requirements for a “successful” iteration. However, PESTPP-IES will continue iterating anyway and will also undertake partial upgrades of realizations that do show phi improvement; essentially PESTPP-IES will use all of it regular tricks to seek a good fit for iterations less than *ies_n_iter_mean*.
+- The phi reduction each iteration may not satisfy the requirements for a “successful” iteration. However, PESTPP-IES will continue iterating anyway and will also undertake partial upgrades of realizations that do show phi improvement; essentially PESTPP-IES will use all of it regular tricks to seek a good fit for iterations less than *ies_n_iter_reinflation*.
 
 - The iteration count in PESTPP-IES will be incremented during the reinflation. This is so the results of the reinflation can be tracked in the output files.
 
@@ -4159,9 +4159,9 @@ Note also that the number of control variables may change with time. Refer to th
 <td>A flag to use internal weight balancing for each realization. This option should be used in conjunction with the multi-modal solution process.</td>
 </tr>
 <tr class="odd">
-<td><em>ies_n_iter_mean</em></td>
+<td><em>ies_n_iter_reinflation</em></td>
 <td>int</td>
-<td>The number of mean-shift iterations to use. Default is 0, which is indicates not to do the prior-mean-shifting process</td>
+<td>The number of between covariance re-inflation. Default is 0, which is indicates not re-inflate parameter covariance</td>
 </tr>
 <tr class="even">
 <td><em>ies_update_by_reals</em></td>

diff --git a/src/libs/pestpp_common/EnsembleMethodUtils.cpp b/src/libs/pestpp_common/EnsembleMethodUtils.cpp
@@ -4501,10 +4501,7 @@ void EnsembleMethod::sanity_checks()
     {
         errors.push_back("multimodal alpha < 0.001");
     }
-    if (ppo->get_ies_n_iter_mean() > 0)
-    {
-        warnings.push_back("mean-shifting iterations is a new concept and subject to change - experimental at best");
-    }
+
 
     if (warnings.size() > 0)
     {
@@ -5162,7 +5159,7 @@ void EnsembleMethod::initialize(int cycle, bool run, bool use_existing)
     reinflate_to_minphi_real = false;
     if (pest_scenario.get_pestpp_options().get_ies_n_iter_mean() < 0)
     {
-        message(2,"n_iter_mean < 0, using min-phi real for re-inflation, resetting n_iter_mean to positive");
+        message(2,"n_iter_mean < 0, using min-phi real for re-inflation, resetting n_iter_reinflate to positive");
         reinflate_to_minphi_real = true;
         pest_scenario.get_pestpp_options_ptr()->set_ies_n_iter_mean(-1 * pest_scenario.get_pestpp_options().get_ies_n_iter_mean());
     }
@@ -7758,7 +7755,7 @@ void EnsembleMethod::reset_par_ensemble_to_prior_mean(){
     last_best_lam = pest_scenario.get_pestpp_options().get_ies_init_lam();
     double phi_lam = get_lambda();
     last_best_lam = phi_lam;
-    message(1,"iter = ies_n_iter_mean, resetting lambda to ",last_best_lam);
+    message(1,"iter = ies_n_iter_reinflate, resetting lambda to ",last_best_lam);
     consec_bad_lambda_cycles = 0;
 
 }

diff --git a/src/libs/pestpp_common/pest_data_structs.cpp b/src/libs/pestpp_common/pest_data_structs.cpp
@@ -1181,8 +1181,10 @@ bool PestppOptions::assign_ies_value_by_key(const string& key, const string& val
         ies_phi_factors_by_real = pest_utils::parse_string_arg_to_bool(value);
         return true;
     }
-    else if (key == "IES_N_ITER_MEAN")
+    else if ((key == "IES_N_ITER_MEAN") || (key == "IES_N_ITER_REINFLATE"))
     {
+        passed_args.insert("IES_N_ITER_MEAN");
+        passed_args.insert("IES_N_ITER_REINFLATE");
         convert_ip(value,ies_n_iter_mean);
         return true;
     }
@@ -1828,7 +1830,7 @@ void PestppOptions::summary(ostream& os) const
 	os << "ies_localizer_forgive_extra: " << ies_localizer_forgive_missing << endl;
 	os << "ies_phi_factors_file: " << ies_phi_fractions_file << endl;
     os << "ies_phi_factors_by_real: " << ies_phi_factors_by_real << endl;
-    os << "ies_n_iter_mean: " << ies_n_iter_mean << endl;
+    os << "ies_n_iter_reinflate: " << ies_n_iter_mean << endl;
     os << "ies_updatebyreals: " << ies_updatebyreals << endl;