built user-facing wrap_model function

harmslab · harmsm · Aug 27, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 7, 2024
commit 09a4a65302bc05cf2b32000b881a33394b3c4ea8
diff --git a/src/dataprob/__init__.py b/src/dataprob/__init__.py
@@ -1,13 +1,12 @@
 __description__ = \
 """
-Fitters for doing fits with likelihood functions.
+Key public functions and methods for dataprob library.
 """
-__author__ = "Michael J. Harms"
-__date__ = "2017-05-09"
-
-from .fitters import MLFitter
-from .fitters import BootstrapFitter
-from .fitters import BayesianFitter
-from .model_wrapper.model_wrapper import ModelWrapper
-from .model_wrapper.vector_model_wrapper import VectorModelWrapper
-from .fit_param import FitParameter
+
+from .model_wrapper.wrap_function import wrap_function
+
+from .fitters.ml import MLFitter
+from .fitters.bootstrap import BootstrapFitter
+from .fitters.bayesian import BayesianFitter
+
+
diff --git a/src/dataprob/fitters/__init__.py b/src/dataprob/fitters/__init__.py
@@ -1,4 +0,0 @@
-
-from .ml import MLFitter
-from .bootstrap import BootstrapFitter
-from .bayesian import BayesianFitter

diff --git a/src/dataprob/model_wrapper/model_wrapper.py b/src/dataprob/model_wrapper/model_wrapper.py
@@ -214,6 +214,58 @@ def load_fit_result(self,fitter):
         for i, p in enumerate(self._position_to_param):
             self.fit_parameters[p].load_fit_result(fitter,i)
 
+    def load_param_dict(self,params_to_load):
+        """
+        Load parameter guesses, fixed-ness, bounds, and priors from a
+        dictionary. 
+
+        Parameters
+        ----------
+        params_to_load : dict
+            Dictionary keys should be the names of parameters loaded into the
+            model_wrapper. Values are themselves dictionaries keying attributes
+            to their appropriate value. For example, the following argument:
+                `param_to_load['K'] = {'fixed':True,'guess':5}`
+            would fix parameter 'K' and set its guess to 5. Not all parameters
+            and attributes need to be in the dictionary. Parameters not seen in 
+            the model will cause an error. 
+
+        Note
+        ----
+        Allowed attributes: 
+
+        |----------+--------------------------------------------------------------------------|
+        | key      | value                                                                    |
+        |----------+--------------------------------------------------------------------------|
+        | 'guess'  | single float value (must be within bounds, if specified)                 |
+        | 'fixed'  | True of False                                                            | 
+        | 'bounds' | (lower,upper) as floats (-np.inf,np.inf) allowed                         | 
+        | 'prior'  | (mean,stdev) as floats (np.nan,np.nan) allowed, meaning uniform prior    |
+        |----------+--------------------------------------------------------------------------| 
+
+        """
+
+        # make sure its a dictionary
+        if not issubclass(type(params_to_load),dict):
+            err = "params_to_load should be a dictionary keying parameter names\n"
+            err += "to dictionaries of attribute values.\n"
+            raise ValueError(err)
+
+        # Set fit parameter attributes from the spreadsheet values
+        for p in params_to_load:
+            for field in params_to_load[p]:
+
+                if p not in self.fit_parameters:
+                    err = f"parameter '{p}' is not in this model\n"
+                    raise ValueError(err)
+
+                setattr(self.fit_parameters[p],field,params_to_load[p][field])
+
+        # Update parameters with new information. 
+        self._update_parameter_map()
+
+
+
     def load_param_spreadsheet(self,spreadsheet):
         """
         Load parameter guesses, fixed-ness, bounds, and priors from a
@@ -229,7 +281,21 @@ def load_param_spreadsheet(self,spreadsheet):
 
         Notes
         -----
-
+
+        Allowable columns:
+
+        |---------------+---------------------------------------------------------------------|
+        | key           | value                                                               |
+        |---------------+---------------------------------------------------------------------|
+        | 'param'       | string name of the parameter                                        |
+        | 'guess'       | guess as single float value (must be within bounds, if specified)   |
+        | 'fixed'       | True of False                                                       | 
+        | 'lower_bound' | single float value; -np.inf allowed                                 | 
+        | 'upper_bound' | single float value; np.inf allowed                                  | 
+        | 'prior_mean'  | single float value; np.nan allowed                                  |
+        | 'prior_std'   | single float value; np.nan allowed                                  |
+        |---------------+---------------------------------------------------------------------| 
+
         + The 'param' column is required. All parameters in the spreadsheet must
           match parameters in the model; however, not all parameters in the
           model must be in the spreadsheet. Parameters not in the spreadsheet 
@@ -263,21 +329,11 @@ def load_param_spreadsheet(self,spreadsheet):
           parameter. 
         """
 
-        # Load spreadsheet
+        # Load spreadsheet into a dictionary
         params_to_load = load_param_spreadsheet(spreadsheet=spreadsheet)
 
-        # Set fit parameter attributes from the spreadsheet values
-        for p in params_to_load:
-            for field in params_to_load[p]:
-
-                if p not in self.fit_parameters:
-                    err = f"parameter '{p}' is not in this model\n"
-                    raise ValueError(err)
-
-                setattr(self.fit_parameters[p],field,params_to_load[p][field])
-
-        # Update parameters with new information. 
-        self._update_parameter_map()
+        # Load via load_param_dict
+        self.load_param_dict(params_to_load=params_to_load)
 
     @property
     def model(self):

diff --git a/src/dataprob/model_wrapper/read_spreadsheet.py b/src/dataprob/model_wrapper/read_spreadsheet.py
@@ -6,7 +6,7 @@
 from dataprob.check import check_bool
 from dataprob.check import check_float
 
-def _read_spreadsheet(spreadsheet):
+def read_spreadsheet(spreadsheet):
     """
     Read a spreadsheet. Use pandas to read files of various types or, if 
     spreadsheet is already a dataframe, return a copy of the dataframe. 
@@ -229,7 +229,7 @@ def load_param_spreadsheet(spreadsheet):
     """
 
     # read spreadsheet
-    df = _read_spreadsheet(spreadsheet=spreadsheet)
+    df = read_spreadsheet(spreadsheet=spreadsheet)
 
     # Make sure 'param' is present
     if "param" not in df.columns:

diff --git a/src/dataprob/model_wrapper/vector_model_wrapper.py b/src/dataprob/model_wrapper/vector_model_wrapper.py
@@ -45,9 +45,9 @@ def _mw_load_model(self,model_to_fit,fittable_params):
             if num_param < 1:
                 raise ValueError
         except Exception as e:
-                err = f"fittable_params must be a list or dictionary with at least one\n"
-                err += "fittable parameter\n"
-                raise ValueError(err) from e
+            err = f"fittable_params must be a list or dictionary with at least one\n"
+            err += "fittable parameter\n"
+            raise ValueError(err) from e
 
         # Make sure fittable param names do not conflict with argument param
         # names

diff --git a/src/dataprob/model_wrapper/wrap_function.py b/src/dataprob/model_wrapper/wrap_function.py
@@ -0,0 +1,201 @@
+
+from dataprob.model_wrapper.model_wrapper import ModelWrapper
+from dataprob.model_wrapper.vector_model_wrapper import VectorModelWrapper
+from dataprob.model_wrapper.read_spreadsheet import read_spreadsheet
+
+from dataprob.check import check_bool
+
+import pandas as pd
+
+def wrap_function(some_function,
+                  fit_parameters=None,
+                  vector_first_arg=False):
+    """
+    Wrap a function for regression or Bayesian sampling. 
+
+    Parameters
+    ----------
+    some_function : callable
+        A function that takes at least one argument and returns a float value 
+        or float numpy array. Fitter objects will compare the outputs of this 
+        function against y_obs. 
+    fit_parameters : list, dict, str, pandas.DataFrame; optional
+        fit_parameters lets the user specify information about the parameters 
+        in the fit. See Note below for details.
+    vector_first_arg : bool, default=False
+        If True, the first argument of the function is taken as a vector of 
+        parameters to fit. All other arguments to some_function are treated as 
+        non-fittable parameters. Fit_parameters must then specify the names of
+        each vector element. 
+
+    Returns
+    -------
+    mw : ModelWrapper
+        ModelWrapper instance can be fed directly into a Fitter.fit method. The
+        user can also manipulate fit parameters prior to the analysis. 
+
+    Note
+    ----
+    There are two classes of parameters to each model. Fittable parameters are
+    visible to Fitter instances (such as the ML fitter or Bayesian sampler) and
+    are thus regressed/sampled. Non-fittable parameters are fixed and passed
+    into `some_function` whenever it is called, but are invisible to the Fitter. 
+
+    The software uses the signature of `some_function`, `fit_parameters`, and
+    `vector_first_arg` to figure out what fit parameters to use. 
+
+    In the simplest case (`fit_parameters is None`, `vector_first_arg is False`),
+    the software will infer the fittable and non-fittable parameters from the
+    `some_function` signature. It will grab the first N arguments with no
+    default or whose default can be coerced to a float. The remaining arguments
+    are treated as non-fittable parameters. Consider the example:
+
+        `some_function == my_func(a,b=1,c="test",d=1)`
+
+    The software will find the fittable parameters `a` and `b`, setting the
+    guesses to `a = 0` and `b = 1`. The `c` and `d` parameters will be set as
+    non-fittable.  
+
+    If fittable_parameters is defined, it can override this default. For 
+    example, if `fit_parameters = ['a','d']`, `a` and `d` will be fittable
+    parameters and `b` and `c` will be non-fittable parameters. Except for two
+    special cases described below, the parameters in `fit_parameters` must match
+    the parameters in the function signature. The parameters `a`, `b`, and `d` 
+    can be specified as fittable; the parameter `c` cannot because its default
+    argument is a string. 
+
+    NOTE: `fit_parameters` is treated as an exhaustive list of fittable 
+    parameters. If specified, *only* the parameters in the list will be
+    fittable.
+
+    `fit_parameters` can differ from the parameters in the signature of 
+    `some_function` in two cases: 
+
+    1)  If the signature of `some_function` contains `**kwargs`, `fit_parameters`
+        can be used to specify parameters to pass into some_function that are
+        note explicitly delineated in the function signature. For example:
+
+            `some_function == my_func(a,**kwargs)`
+
+        would allow `fit_parameters = ['a','b','c']`. The `b` and `c` parameters 
+        would be passed in as keyword arguments. (The code does not check 
+        whether `my_func` can take those keyword arguments; that is the user's
+        responsibility) 
+
+    2)  If `vector_first_arg` is `True`, `fit_parameters` defines the parameters
+        to pass in as a numpy.ndarray as the first function argument. If
+        `vector_first_arg` is `True`, `fit_parameters` is required. All 
+        function arguments besides this vector are treated as non-fittable 
+        parameters. 
+
+    Finally, `fit_parameters` can be used to pass in other information about 
+    the fit parameters. This includes the parameter guess, whether or not it is
+    fixed during the regression, its bounds, and the mean and standard deviation
+    of a gaussian prior to apply to that fit parameter (Bayesian sampling only).
+    This information can either be passed in via a dictionary or dataframe. 
+
+    If `fit_parameters` comes in as a dictionary, the keys should be the
+    parameter names (just like the entries to a `fit_parameters` list). The
+    values should be dictionaries keying parameter attributes to their values.
+    For example:
+
+        `fit_parameters = {"K":{"guess":1,"bounds":(-np.inf,0)}}`
+
+    would indicate that parameter "K" should have a guess of 1 and bounds from
+    negative infinity to zero. 
+
+    The allowed keys are: 
+
+    |----------+--------------------------------------------------------------------------|
+    | key      | value                                                                    |
+    |----------+--------------------------------------------------------------------------|
+    | 'guess'  | single float value (must be within bounds, if specified)                 |
+    | 'fixed'  | True of False                                                            | 
+    | 'bounds' | (lower,upper) as floats (-np.inf,np.inf) allowed                         | 
+    | 'prior'  | (mean,stdev) as floats (np.nan,np.nan) allowed, meaning uniform prior    |
+    |----------+--------------------------------------------------------------------------| 
+
+    If `fit_parameters` comes in as a dataframe, the dataframe can have the
+    following columns. 
+
+    |---------------+---------------------------------------------------------------------|
+    | key           | value                                                               |
+    |---------------+---------------------------------------------------------------------|
+    | 'param'       | string name of the parameter                                        |
+    | 'guess'       | guess as single float value (must be within bounds, if specified)   |
+    | 'fixed'       | True of False                                                       | 
+    | 'lower_bound' | single float value; -np.inf allowed                                 | 
+    | 'upper_bound' | single float value; np.inf allowed                                  | 
+    | 'prior_mean'  | single float value; np.nan allowed                                  |
+    | 'prior_std'   | single float value; np.nan allowed                                  |
+    |---------------+---------------------------------------------------------------------| 
+
+    If `fit_parameters` comes in as a string, this function will treat it as 
+    the name of a spreadsheet file to read into a dataframe.
+    """
+
+    vector_first_arg = check_bool(value=vector_first_arg,
+                                  variable_name="vector_first_arg")
+
+    # Select the appropriate ModelWrapper instance to use
+    if vector_first_arg:
+        mw_class = VectorModelWrapper
+    else:
+        mw_class = ModelWrapper
+
+    # Figure out how to set up the ModelWrapper based on the type of 
+    # fit_parameters
+    fit_param_type = type(fit_parameters)
+
+    # None --> not specified. Use ModelWrapper default scheme
+    if issubclass(fit_param_type,type(None)):
+
+        fit_param_list = None
+        mw = mw_class(model_to_fit=some_function,
+                      fittable_params=fit_param_list)
+
+    # List --> send in a list of fit parameters
+    elif issubclass(fit_param_type,list):
+
+        fit_param_list = fit_parameters
+        mw = mw_class(model_to_fit=some_function,
+                      fittable_params=fit_param_list)
+
+    # dict --> send in keys as a list of fit parameters, then load the parameter
+    # values in via the load_param_dict method. 
+    elif issubclass(fit_param_type,dict):
+
+        fit_param_list = list(fit_parameters.keys())
+        mw = mw_class(model_to_fit=some_function,
+                      fittable_params=fit_param_list)
+        mw.load_param_dict(fit_parameters)
+
+    # pd.DataFrame or str: treat as a spreadsheet. 
+    elif issubclass(fit_param_type,pd.DataFrame) or issubclass(fit_param_type,str):
+
+        # Read fit_parameters spreadsheet (or get copy of dataframe)
+        fit_parameters = read_spreadsheet(fit_parameters)
+
+        # Get list of fit parameters
+        if "param" not in fit_parameters.columns:
+            err = "fit_parameters DataFrame must have a 'param' column\n"
+            raise ValueError(err)
+        fit_param_list = list(fit_parameters["param"])
+
+        # Initialize class, then load fit parameter data from the spreadsheet
+        mw = mw_class(model_to_fit=some_function,
+                      fittable_params=fit_param_list)
+        mw.load_param_spreadsheet(fit_parameters)
+
+    else:
+
+        err = "fit_parameters not recognized. If specified, fit_parameters\n"
+        err += "must be a list, dictionary, pandas DataFrame, or filename\n"
+        err += "pointing to a spreadsheet. See the wrap_model docstring\n"
+        err += "for details.\n"
+        raise ValueError(err)
+
+    return mw
+
+
+