Data Reconciliation

The optional argument return_values in theta_est can be used for data reconciliation or to return model values based on the specified objective.

For data reconciliation, the m.unknown_parameters is empty and the objective function is defined to minimize measurement to model error. Note that the model used for data reconciliation may differ from the model used for parameter estimation.

The functions grouped_boxplot or grouped_violinplot can be used to visually compare the original and reconciled data.

The following example from the reactor design subdirectory returns reconciled values for experiment outputs (ca, cb, cc, and cd) and then uses those values in parameter estimation (k1, k2, and k3).

#  ___________________________________________________________________________
#
#  Pyomo: Python Optimization Modeling Objects
#  Copyright (c) 2008-2024
#  National Technology and Engineering Solutions of Sandia, LLC
#  Under the terms of Contract DE-NA0003525 with National Technology and
#  Engineering Solutions of Sandia, LLC, the U.S. Government retains certain
#  rights in this software.
#  This software is distributed under the 3-clause BSD License.
#  ___________________________________________________________________________

import pyomo.environ as pyo
from pyomo.common.dependencies import numpy as np, pandas as pd
import pyomo.contrib.parmest.parmest as parmest
from pyomo.contrib.parmest.examples.reactor_design.reactor_design import (
    reactor_design_model,
    ReactorDesignExperiment,
)

np.random.seed(1234)


class ReactorDesignExperimentDataRec(ReactorDesignExperiment):

    def __init__(self, data, data_std, experiment_number):

        super().__init__(data, experiment_number)
        self.data_std = data_std

    def create_model(self):

        self.model = m = reactor_design_model()
        m.caf.fixed = False

        return m

    def label_model(self):

        m = self.model

        # experiment outputs
        m.experiment_outputs = pyo.Suffix(direction=pyo.Suffix.LOCAL)
        m.experiment_outputs.update(
            [
                (m.ca, self.data_i['ca']),
                (m.cb, self.data_i['cb']),
                (m.cc, self.data_i['cc']),
                (m.cd, self.data_i['cd']),
            ]
        )

        # experiment standard deviations
        m.experiment_outputs_std = pyo.Suffix(direction=pyo.Suffix.LOCAL)
        m.experiment_outputs_std.update(
            [
                (m.ca, self.data_std['ca']),
                (m.cb, self.data_std['cb']),
                (m.cc, self.data_std['cc']),
                (m.cd, self.data_std['cd']),
            ]
        )

        # no unknowns (theta names)
        m.unknown_parameters = pyo.Suffix(direction=pyo.Suffix.LOCAL)

        return m


class ReactorDesignExperimentPostDataRec(ReactorDesignExperiment):

    def __init__(self, data, data_std, experiment_number):

        super().__init__(data, experiment_number)
        self.data_std = data_std

    def label_model(self):

        m = super().label_model()

        # add experiment standard deviations
        m.experiment_outputs_std = pyo.Suffix(direction=pyo.Suffix.LOCAL)
        m.experiment_outputs_std.update(
            [
                (m.ca, self.data_std['ca']),
                (m.cb, self.data_std['cb']),
                (m.cc, self.data_std['cc']),
                (m.cd, self.data_std['cd']),
            ]
        )

        return m


def generate_data():

    ### Generate data based on real sv, caf, ca, cb, cc, and cd
    sv_real = 1.05
    caf_real = 10000
    ca_real = 3458.4
    cb_real = 1060.8
    cc_real = 1683.9
    cd_real = 1898.5

    data = pd.DataFrame()
    ndata = 200
    # Normal distribution, mean = 3400, std = 500
    data["ca"] = 500 * np.random.randn(ndata) + 3400
    # Random distribution between 500 and 1500
    data["cb"] = np.random.rand(ndata) * 1000 + 500
    # Lognormal distribution
    data["cc"] = np.random.lognormal(np.log(1600), 0.25, ndata)
    # Triangular distribution between 1000 and 2000
    data["cd"] = np.random.triangular(1000, 1800, 3000, size=ndata)

    data["sv"] = sv_real
    data["caf"] = caf_real

    return data


def main():

    # Generate data
    data = generate_data()
    data_std = data.std()

    # Create an experiment list
    exp_list = []
    for i in range(data.shape[0]):
        exp_list.append(ReactorDesignExperimentDataRec(data, data_std, i))

    # Define sum of squared error objective function for data rec
    def SSE_with_std(model):
        expr = sum(
            ((y - y_hat) / model.experiment_outputs_std[y]) ** 2
            for y, y_hat in model.experiment_outputs.items()
        )
        return expr

    ### Data reconciliation
    pest = parmest.Estimator(exp_list, obj_function=SSE_with_std)

    obj, theta, data_rec = pest.theta_est(return_values=["ca", "cb", "cc", "cd", "caf"])
    print(obj)
    print(theta)

    parmest.graphics.grouped_boxplot(
        data[["ca", "cb", "cc", "cd"]],
        data_rec[["ca", "cb", "cc", "cd"]],
        group_names=["Data", "Data Rec"],
    )

    ### Parameter estimation using reconciled data
    data_rec["sv"] = data["sv"]

    # make a new list of experiments using reconciled data
    exp_list = []
    for i in range(data_rec.shape[0]):
        exp_list.append(ReactorDesignExperimentPostDataRec(data_rec, data_std, i))

    pest = parmest.Estimator(exp_list, obj_function=SSE_with_std)
    obj, theta = pest.theta_est()
    print(obj)
    print(theta)

    theta_real = {"k1": 5.0 / 6.0, "k2": 5.0 / 3.0, "k3": 1.0 / 6000.0}
    print(theta_real)


if __name__ == "__main__":
    main()

The following example returns model values from a Pyomo Expression.

>>> import pandas as pd
>>> import pyomo.contrib.parmest.parmest as parmest
>>> from pyomo.contrib.parmest.examples.rooney_biegler.rooney_biegler import RooneyBieglerExperiment

>>> # Generate data
>>> data = pd.DataFrame(data=[[1,8.3],[2,10.3],[3,19.0],
...                           [4,16.0],[5,15.6],[7,19.8]],
...                     columns=['hour', 'y'])

>>> # Create an experiment list
>>> exp_list = []
>>> for i in range(data.shape[0]):
...     exp_list.append(RooneyBieglerExperiment(data.loc[i, :]))

>>> # Define objective
>>> def SSE(model):
...     expr = (model.experiment_outputs[model.y]
...             - model.response_function[model.experiment_outputs[model.hour]]
...            ) ** 2
...     return expr

>>> pest = parmest.Estimator(exp_list, obj_function=SSE, solver_options=None)
>>> obj, theta, var_values = pest.theta_est(return_values=['response_function'])
>>> #print(var_values)