Apply row_scaling in the smoother update

equinor · Feb 17, 2021 · 77c3e45 · 77c3e45
1 parent c1cc5f5
commit 77c3e45
Show file tree

Hide file tree

Showing 17 changed files with 540 additions and 48 deletions.
diff --git a/lib/enkf/enkf_main.cpp b/lib/enkf/enkf_main.cpp
@@ -758,7 +758,6 @@ static module_info_type * enkf_main_module_info_alloc( const local_ministep_type
     stringlist_free( update_keys );
   }
 
-
   { /* Init obs blocks in module_info */
     module_obs_block_vector_type  * module_obs_block_vector =  module_info_get_obs_block_vector ( module_info );
     int current_row = 0;
@@ -965,7 +964,6 @@ static void enkf_main_update__(enkf_main_type * enkf_main, const int_vector_type
           res_log_ferror("No active observations/parameters for MINISTEP: %s.",
                          local_ministep_get_name(ministep));
       }
-
       enkf_main_inflate(enkf_main, source_fs, target_fs, current_step, use_count);
       hash_free(use_count);
     }
@@ -1096,62 +1094,58 @@ static void enkf_main_analysis_update( enkf_main_type * enkf_main ,
     if (localA == NULL)
       analysis_module_initX( module , X , NULL , S , R , dObs , E , D, enkf_main->shared_rng);
 
-
     while (!hash_iter_is_complete( dataset_iter )) {
       const char * dataset_name = hash_iter_get_next_key( dataset_iter );
       const local_dataset_type * dataset = local_ministep_get_dataset( ministep , dataset_name );
       if (local_dataset_get_size( dataset )) {
         local_obsdata_type   * local_obsdata = local_ministep_get_obsdata( ministep );
+        const auto& unscaled_keys = local_dataset_unscaled_keys(dataset);
+        if (unscaled_keys.size()) {
 
-        enkf_main_serialize_dataset(enkf_main_get_ensemble_config(enkf_main), dataset , step2 ,  use_count , tp , serialize_info);
-        module_info_type * module_info = enkf_main_module_info_alloc(ministep, obs_data, dataset, local_obsdata, serialize_info->active_size.data() , serialize_info->row_offset.data());
+          /*
+            The update for one local_dataset instance consists of two main chunks:
 
-        /*
-          The update for one local_dataset instance consists of two main chunks:
+            1. The first chunk updates all the parameters which don't have row
+               scaling attached. These parameters are serialized together to the A
+               matrix and all the parameters are updated in one go.
 
-          1. The first chunk updates all the parameters which don't have row
-             scaling attached. These parameters are serialized together to the A
-             matrix and all the parameters are updated in one go.
+            2. The second chunk is loop over all the parameters which have row
+               scaling attached. These parameters are updated one at a time.
+          */
 
-          2. The second chunk is loop over all the parameters which have row
-             scaling attached. These parameters are updated one at a time.
-         */
+          // Part 1: Parameters which do not have row scaling attached.
+          enkf_main_serialize_dataset(enkf_main_get_ensemble_config(enkf_main), dataset , step2 ,  use_count , tp , serialize_info);
+          module_info_type * module_info = enkf_main_module_info_alloc(ministep, obs_data, dataset, local_obsdata, serialize_info->active_size.data() , serialize_info->row_offset.data());
 
-
-        // Part 1: Parameters which do not have row scaling attached.
-        enkf_main_serialize_dataset(enkf_main_get_ensemble_config(enkf_main), dataset , step2 ,  use_count , tp , serialize_info);
-
-        if (analysis_module_check_option( module , ANALYSIS_UPDATE_A)){
-          if (analysis_module_check_option( module , ANALYSIS_ITERABLE)){
-            analysis_module_updateA( module , localA , S , R , dObs , E , D , module_info, enkf_main->shared_rng);
-          }
-          else
-            analysis_module_updateA( module , localA , S , R , dObs , E , D , module_info, enkf_main->shared_rng);
-        } else {
-          if (analysis_module_check_option( module , ANALYSIS_USE_A)){
+          if (analysis_module_check_option( module , ANALYSIS_UPDATE_A)){
+            if (analysis_module_check_option( module , ANALYSIS_ITERABLE)){
+              analysis_module_updateA( module , localA , S , R , dObs , E , D , module_info, enkf_main->shared_rng);
+            }
+            else
+              analysis_module_updateA( module , localA , S , R , dObs , E , D , module_info, enkf_main->shared_rng);
+          } else {
+            if (analysis_module_check_option( module , ANALYSIS_USE_A)){
             analysis_module_initX( module , X , localA , S , R , dObs , E , D, enkf_main->shared_rng);
+            }
+            matrix_inplace_matmul_mt2( A , X , tp );
           }
 
-          matrix_inplace_matmul_mt2( A , X , tp );
+          enkf_main_deserialize_dataset( enkf_main_get_ensemble_config( enkf_main ) , dataset , serialize_info , tp);
+          enkf_main_module_info_free( module_info );
         }
 
-        enkf_main_deserialize_dataset( enkf_main_get_ensemble_config( enkf_main ) , dataset , serialize_info , tp);
-
-
-
         // Part 2: Parameters with row scaling attached - to support distance based localization.
         {
           const auto& scaled_keys = local_dataset_scaled_keys(dataset);
           if (scaled_keys.size()) {
-            throw std::logic_error("Sorry - the use of row scaling for distance based localization is not yet implemented");
 
             if (analysis_module_check_option( module , ANALYSIS_UPDATE_A))
               throw std::logic_error("Sorry - row scaling for distance based localization can not be combined with analysis modules which update the A matrix");
 
             for (int ikw=0; ikw < scaled_keys.size(); ikw++) {
               const auto& key = scaled_keys[ikw];
               const active_list_type * active_list = local_dataset_get_node_active_list(dataset, key.c_str());
-              for (int iens = serialize_info->iens1; iens < serialize_info->iens2; iens++) {
+              for (int iens = 0; iens < bool_vector_size(ens_mask); iens++) {
                 int column = int_vector_iget( serialize_info->iens_active_index , iens);
                 if (column >= 0) {
                   serialize_node(serialize_info->src_fs,
@@ -1165,24 +1159,23 @@ static void enkf_main_analysis_update( enkf_main_type * enkf_main ,
                                  A);
                 }
               }
+              matrix_shrink_header( A , local_dataset_get_row_scaling(dataset, key.c_str())->size(), matrix_get_columns(A) );
 
               if (analysis_module_check_option( module , ANALYSIS_USE_A))
                 analysis_module_initX( module , X , localA , S , R , dObs , E , D, enkf_main->shared_rng);
 
-              /*
-                row_scaling_type * row_scaling = local_dataset_get_row_scaling( local_dataset, key );
-                row_scaling_multiply(row_scaling, X, A );
-              */
+              const row_scaling_type * row_scaling = local_dataset_get_row_scaling( dataset, key.c_str() );
+              row_scaling_multiply(row_scaling, A, X);
 
-              for (int iens = serialize_info->iens1; iens < serialize_info->iens2; iens++) {
+              for (int iens = 0; iens < bool_vector_size(ens_mask); iens++) {
                 int column = int_vector_iget( serialize_info->iens_active_index , iens);
                 if (column >= 0) {
                   deserialize_node(serialize_info->target_fs,
                                    serialize_info->src_fs,
                                    serialize_info->ensemble_config,
                                    key.c_str(),
                                    iens,
-                                   serialize_info->report_step,
+                                   serialize_info->target_step,
                                    0,
                                    column,
                                    active_list,
@@ -1192,15 +1185,13 @@ static void enkf_main_analysis_update( enkf_main_type * enkf_main ,
             }
           }
         }
-        enkf_main_module_info_free( module_info );
       }
     }
     hash_iter_free( dataset_iter );
     serialize_info_free( serialize_info );
   }
   analysis_module_complete_update( module );
 
-
   /*****************************************************************/
 
   int_vector_free(iens_active_index);

diff --git a/lib/enkf/local_dataset.cpp b/lib/enkf/local_dataset.cpp
@@ -112,7 +112,7 @@ void local_dataset_clear( local_dataset_type * dataset) {
   hash_clear( dataset->active_size );
 }
 
-row_scaling * local_dataset_get_row_scaling(local_dataset_type * dataset, const char * key) {
+const row_scaling * local_dataset_get_row_scaling(const local_dataset_type * dataset, const char * key) {
   auto scaling_iter = dataset->scaling.find( key );
   if (scaling_iter != dataset->scaling.end())
     return &scaling_iter->second;
@@ -134,12 +134,13 @@ row_scaling_type * local_dataset_get_or_create_row_scaling(local_dataset_type *
 
     dataset->scaling.emplace( key, row_scaling{});
   }
-  return local_dataset_get_row_scaling( dataset, key );
+  return &dataset->scaling[key];
 }
 
 
 active_list_type * local_dataset_get_node_active_list(const local_dataset_type * dataset , const char * node_key ) {
-  return (active_list_type *) hash_get( dataset->active_size , node_key );  /* Fails hard if you do not have the key ... */
+  active_list_type * al = (active_list_type *) hash_get( dataset->active_size , node_key );  /* Fails hard if you do not have the key ... */
+  return al;
 }
 
 stringlist_type * local_dataset_alloc_keys( const local_dataset_type * dataset ) {

diff --git a/lib/include/ert/enkf/local_dataset.hpp b/lib/include/ert/enkf/local_dataset.hpp
@@ -50,7 +50,7 @@ int                  local_dataset_get_size( const local_dataset_type * dataset
 void                 local_dataset_del_node( local_dataset_type * dataset , const char * node_key);
 PY_USED bool         local_dataset_has_key(const local_dataset_type * dataset, const char * key);
 hash_iter_type     * local_dataset_alloc_iter(const local_dataset_type * dataset);
-row_scaling_type   * local_dataset_get_row_scaling(const local_dataset_type * dataset, const char * key);
+const row_scaling_type * local_dataset_get_row_scaling(const local_dataset_type * dataset, const char * key);
 row_scaling_type   * local_dataset_get_or_create_row_scaling(local_dataset_type * dataset, const char * key);
 bool                 local_dataset_has_row_scaling(const local_dataset_type * dataset, const char * key);
 

diff --git a/python/res/enkf/data/__init__.py b/python/res/enkf/data/__init__.py
@@ -3,6 +3,5 @@
 from .gen_kw import GenKw
 from .ext_param import ExtParam
 from .enkf_node import EnkfNode
-from .summary import Summary
 
-__all__ = ["Field", "Summary", "GenKw", "GenData", "ExtParam", "EnkfNode"]
+__all__ = ["Field", "GenKw", "GenData", "ExtParam", "EnkfNode"]
diff --git a/python/res/enkf/data/enkf_node.py b/python/res/enkf/data/enkf_node.py
@@ -19,6 +19,7 @@
 from res import ResPrototype
 from res.enkf import EnkfFs, NodeId
 from res.enkf.data import GenKw, GenData, Field, ExtParam
+from res.enkf.data.summary import Summary
 
 
 class EnkfNode(BaseCClass):

diff --git a/python/res/enkf/row_scaling.py b/python/res/enkf/row_scaling.py
@@ -51,3 +51,55 @@ def __getitem__(self, index):
 
     def clamp(self, value):
         return self._clamp(value)
+
+    def assign(self, target_size, func):
+        """Assign tapering value for all elements.
+
+        The assign() method is the main function used to assign a row scaling
+        value to be used as tapering in the update. The first argument is the
+        number of elements in the target parameter, and the second argument is
+        a callable which will be called with element index as argument.
+
+        In the example below we will assume that tapering is for a field
+        variable and we will scale the update with the function exp( -r/r0 )
+        where r is the distance from some point and r0 is length scale.
+
+            def sqr(x):
+                return x*x
+
+            def exp_decay(grid, pos, r0, data_index):
+                x,y,z = grid.get_xyz( active_index = data_index)
+                r = math.sqrt( sqr(pos[0] - x) + sqr(pos[1] - y) + sqr(pos[2] - z))
+                return math.exp( -r/r0 )
+
+
+            ens_config = ert.ensembleConfig()
+            config_node = ens_config["PORO"]
+            field_config = config.node.getFieldModelConfig()
+            grid = ert.eclConfig().getGrid()
+            pos = grid.get_xyz(ijk=(10,10,1))
+            r0 = 10
+
+            if grid.get_num_active() != field_config.get_data_size():
+                raise ValuError("Fatal error - inconsistent field size for: {}".format(config_node.getKey())
+
+            # Some local configuration boilerplate has been skipped here.
+            local_config = main.getLocalConfig()
+            local_data = local_config.createDataset("LOCAL")
+            row_scaling = local_data.row_scaling("PORO")
+            row_scaling.assign( field_config.get_data_size(), functools.partial(exp_decay, grid, pos, r0))
+
+
+        In the example below functools.partial() is used to create a callable
+        which has access to the necessary context, another alternative would be
+        to use a class instance which implements the __call__() method.
+
+        It is an important point that the assign() method does not have any
+        context for error checking, i.e. if you call it with an incorrect value
+        for the size argument things will silently pass initially but might
+        blow up in the subsequent update step.
+
+        """
+
+        for index in range(target_size):
+            self[index] = func(index)
diff --git a/test-data/local/row_scaling/CASE.EGRID b/test-data/local/row_scaling/CASE.EGRID
diff --git a/test-data/local/row_scaling/config b/test-data/local/row_scaling/config
@@ -0,0 +1,11 @@
+NUM_REALIZATIONS 10
+GRID             CASE.EGRID
+FIELD            PORO    PARAMETER    poro.grdecl INIT_FILES:fields/poro%d.grdecl
+SUMMARY          WBHP
+OBS_CONFIG       observations.txt 
+
+
+LOAD_WORKFLOW_JOB workflows/ROW_SCALING_JOB1
+LOAD_WORKFLOW     workflows/ROW_SCALING_WORKFLOW1
+
+TIME_MAP timemap.txt
diff --git a/test-data/local/row_scaling/observations.txt b/test-data/local/row_scaling/observations.txt
@@ -0,0 +1,6 @@
+SUMMARY_OBSERVATION WBHP0 {
+    VALUE = 125;
+    ERROR = 25;
+    RESTART = 1;
+    KEY = WBHP;
+};
diff --git a/test-data/local/row_scaling/timemap.txt b/test-data/local/row_scaling/timemap.txt
@@ -0,0 +1,2 @@
+01/01/2020
+01/02/2020
diff --git a/test-data/local/row_scaling/workflows/ROW_SCALING1 b/test-data/local/row_scaling/workflows/ROW_SCALING1
@@ -0,0 +1 @@
+SCRIPT script/row_scaling1.py
diff --git a/test-data/local/row_scaling/workflows/ROW_SCALING_JOB1 b/test-data/local/row_scaling/workflows/ROW_SCALING_JOB1
@@ -0,0 +1,2 @@
+INTERNAL True
+SCRIPT scripts/row_scaling_job1.py
diff --git a/test-data/local/row_scaling/workflows/ROW_SCALING_WORKFLOW1 b/test-data/local/row_scaling/workflows/ROW_SCALING_WORKFLOW1
@@ -0,0 +1 @@
+ROW_SCALING_JOB1
diff --git a/test-data/local/row_scaling/workflows/scripts/row_scaling_job1.py b/test-data/local/row_scaling/workflows/scripts/row_scaling_job1.py
@@ -0,0 +1,49 @@
+from res.enkf import ErtScript
+from functools import partial
+import math
+
+def gaussian_decay(obs_pos, length_scale, grid, data_index):
+    x,y,z = grid.get_xyz( active_index = data_index)
+    dx = (obs_pos[0] - x)/length_scale[0]
+    dy = (obs_pos[1] - y)/length_scale[1]
+    dz = (obs_pos[2] - z)/length_scale[2]
+
+    exp_arg = -0.5* (dx*dx + dy*dy + dz*dz)
+    return math.exp( exp_arg )
+
+
+
+
+class RowScalingJob1(ErtScript):
+
+    def run(self):
+        main = self.ert()
+        local_config = main.getLocalConfig()
+        local_config.clear()
+
+
+        local_data = local_config.createDataset("LOCAL")
+        local_data.addNode("PORO")
+        row_scaling = local_data.row_scaling("PORO")
+
+        ens_config = main.ensembleConfig()
+        poro_config = ens_config["PORO"]
+        field_config = poro_config.getFieldModelConfig()
+
+        #-------------------------------------------------------------------------------------
+        grid = main.eclConfig().getGrid()
+        obs_pos = grid.get_xyz( ijk=(5,5,1) )
+        length_scale = (2, 1, 0.50)
+        gaussian = partial( gaussian_decay, obs_pos, length_scale, grid)
+        row_scaling.assign( field_config.get_data_size(), gaussian)
+        #-------------------------------------------------------------------------------------
+
+
+        obs = local_config.createObsdata("OBSSET_LOCAL")
+        obs.addNode("WBHP0")
+        ministep = local_config.createMinistep("MINISTEP_LOCAL")
+        ministep.attachDataset(local_data)
+        ministep.attachObsset(obs)
+        updatestep = local_config.getUpdatestep()
+        updatestep.attachMinistep(ministep)
+
diff --git a/tests/res/enkf/data/test_summary.py b/tests/res/enkf/data/test_summary.py
@@ -2,7 +2,7 @@
 import json
 
 from ecl.util.test.ecl_mock import createEclSum
-from res.enkf.data import Summary
+from res.enkf.data.summary import Summary
 from res.enkf.config import SummaryConfig
 from ecl.util.test import TestAreaContext
 from tests import ResTest
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		INTERNAL True
		SCRIPT scripts/row_scaling_job1.py