Skip to content

Problem

Problem

Source code in opti/problem.py
class Problem:
    def __init__(
        self,
        inputs: ParametersLike,
        outputs: ParametersLike,
        objectives: Optional[ObjectivesLike] = None,
        constraints: Optional[ConstraintsLike] = None,
        output_constraints: Optional[ObjectivesLike] = None,
        f: Optional[Callable] = None,
        models: Optional[ModelsLike] = None,
        data: Optional[DataFrameLike] = None,
        optima: Optional[DataFrameLike] = None,
        name: Optional[str] = None,
        **kwargs,
    ):
        """An optimization problem.

        Args:
            inputs: Input parameters.
            outputs: Output parameters.
            objectives: Optimization objectives. Defaults to minimization.
            constraints: Constraints on the inputs.
            output_constraints: Constraints on the outputs.
            f: Function to evaluate the outputs for given inputs.
                Must have the signature: f(x: pd.DataFrame) -> pd.DataFrame
            data: Experimental data.
            optima: Pareto optima.
            name: Name of the problem.
        """
        self.name = name if name is not None else "Problem"
        self.inputs = inputs if isinstance(inputs, Parameters) else Parameters(inputs)
        self.outputs = (
            outputs if isinstance(outputs, Parameters) else Parameters(outputs)
        )

        if objectives is None:
            self.objectives = Objectives([Minimize(m) for m in self.outputs.names])
        elif isinstance(objectives, Objectives):
            self.objectives = objectives
        else:
            self.objectives = Objectives(objectives)

        if isinstance(constraints, Constraints):
            pass
        elif not constraints:
            constraints = None
        else:
            constraints = Constraints(constraints)
            if len(constraints) == 0:  # no valid constraints
                constraints = None
        self.constraints = constraints

        if isinstance(output_constraints, Objectives) or output_constraints is None:
            self.output_constraints = output_constraints
        else:
            self.output_constraints = Objectives(output_constraints)

        if isinstance(models, Models) or models is None:
            self.models = models
        else:
            self.models = Models(models)

        if f is not None:
            self.f = f

        if isinstance(data, dict):
            data = pd.read_json(json.dumps(data), orient="split")

        if isinstance(optima, dict):
            optima = pd.read_json(json.dumps(optima), orient="split")

        self.set_data(data)
        self.set_optima(optima)
        self.check_problem()
        self.check_models()

    @property
    def n_inputs(self) -> int:
        return len(self.inputs)

    @property
    def n_outputs(self) -> int:
        return len(self.outputs)

    @property
    def n_objectives(self) -> int:
        return len(self.objectives)

    @property
    def n_constraints(self) -> int:
        return 0 if self.constraints is None else len(self.constraints)

    def __repr__(self):
        return self.__str__()

    def __str__(self):
        s = "Problem(\n"
        s += f"name={self.name},\n"
        s += f"inputs={self.inputs},\n"
        s += f"outputs={self.outputs},\n"
        s += f"objectives={self.objectives},\n"
        if self.output_constraints is not None:
            s += f"output_constraints={self.output_constraints},\n"
        if self.constraints is not None:
            s += f"constraints={self.constraints},\n"
        if self.models is not None:
            s += f"models={self.models},\n"
        if self.data is not None:
            s += f"data=\n{self.data.head()}\n"
        if self.optima is not None:
            s += f"optima=\n{self.optima.head()}\n"
        return s + ")"

    @staticmethod
    def from_config(config: dict) -> "Problem":
        """Create a Problem instance from a configuration dict."""
        return Problem(**config)

    def to_config(self) -> dict:
        """Return json-serializable configuration dict."""

        config = {
            "name": self.name,
            "inputs": self.inputs.to_config(),
            "outputs": self.outputs.to_config(),
            "objectives": self.objectives.to_config(),
        }
        if self.output_constraints is not None:
            config["output_constraints"] = self.output_constraints.to_config()
        if self.constraints is not None:
            config["constraints"] = self.constraints.to_config()
        if self.models is not None:
            config["models"] = self.models.to_config()
        if self.data is not None:
            config["data"] = self.data.replace({np.nan: None}).to_dict("split")
        if self.optima is not None:
            config["optima"] = self.optima.replace({np.nan: None}).to_dict("split")
        return config

    @staticmethod
    def from_json(fname: PathLike) -> "Problem":
        """Read a problem from a JSON file."""
        with open(fname, "rb") as infile:
            config = json.loads(infile.read())
        return Problem(**config)

    def to_json(self, fname: PathLike) -> None:
        """Save a problem from a JSON file."""
        with open(fname, "wb") as outfile:
            b = json.dumps(self.to_config(), ensure_ascii=False, separators=(",", ":"))
            outfile.write(b.encode("utf-8"))

    def check_problem(self) -> None:
        """Check if input and output parameters are consistent."""
        # check for duplicate names
        duplicates = set(self.inputs.names).intersection(self.outputs.names)
        if duplicates:
            raise ValueError(f"Parameter name in both inputs and outputs: {duplicates}")

        # check if all objectives refer to an output
        for obj in self.objectives:
            if obj.name not in self.outputs.names:
                raise ValueError(f"Objective refers to unknown parameter: {obj.name}")

    def check_data(self, data: pd.DataFrame) -> None:
        """Check if data is consistent with input and output parameters."""
        for p in self.inputs + self.outputs:
            # data must contain all parameters
            if p.name not in data.columns:
                raise ValueError(
                    f"Parameter {p.name} is missing. Data must contain all parameters."
                )

            # data for continuous / discrete parameters must be numeric
            if isinstance(p, (Continuous, Discrete)):
                ok = is_numeric_dtype(data[p.name]) or data[p.name].isnull().all()
                if not ok:
                    raise ValueError(
                        f"Parameter {p.name} contains non-numeric values. Data for continuous / discrete parameters must be numeric."
                    )

            # categorical levels in data must be specified
            elif isinstance(p, Categorical):
                ok = p.contains(data[p.name]) | data[p.name].isna()
                if not ok.all():
                    unknowns = data[p.name][~ok].unique().tolist()
                    raise ValueError(
                        f"Data for parameter {p.name} contains unknown values: {unknowns}. All categorical levels must be specified."
                    )

        # inputs must be complete
        for p in self.inputs:
            if data[p.name].isnull().any():
                raise ValueError(
                    f"Input parameter {p.name} has missing data. Inputs must be complete."
                )

        # outputs must have at least one observation
        for p in self.outputs:
            if data[p.name].isnull().all():
                raise ValueError(
                    f"Output parameter {p.name} has no data. Outputs must have at least one observation."
                )

    def check_models(self) -> None:
        """Check if the models are well defined"""
        if self.models is None:
            return

        for model in self.models:
            # models need to refer to output parameters
            for n in model.names:
                if n not in self.outputs.names:
                    raise ValueError(f"Model {model} refers to unknown outputs")

            if isinstance(model, LinearModel):
                if len(model.coefficients) != self.n_inputs:
                    raise ValueError(f"Model {model} has wrong number of coefficients.")

    def set_data(self, data: Optional[pd.DataFrame]) -> None:
        """Set the data."""
        if data is not None:
            for p in self.inputs:
                # Categorical levels are required to be strings. Ensure that the corresponding data is as well.
                if isinstance(p, Categorical):
                    nulls = data[p.name].isna()
                    data[p.name] = data[p.name].astype(str).mask(nulls, np.nan)

            self.check_data(data)

        self.data = data

    def get_data(self) -> pd.DataFrame:
        """Return `self.data` if it exists or an empty dataframe."""
        if self.data is None:
            return pd.DataFrame(columns=self.inputs.names + self.outputs.names)
        return self.data

    def add_data(self, data: pd.DataFrame) -> None:
        """Add a number of data points."""
        self.check_data(data)
        self.data = pd.concat([self.data, data], axis=0)

    def set_optima(self, optima: Optional[pd.DataFrame]) -> None:
        """Set the optima / Pareto front."""
        if optima is not None:
            self.check_data(optima)
        self.optima = optima

    def get_X(self, data: Optional[pd.DataFrame] = None) -> np.ndarray:
        """Return the input values in `data` or `self.data`."""
        if data is not None:
            return data[self.inputs.names].values
        return self.get_data()[self.inputs.names].values

    def get_Y(self, data: Optional[pd.DataFrame] = None) -> np.ndarray:
        """Return the output values in `data` or `self.data`."""
        if data is not None:
            return data[self.outputs.names].values
        return self.get_data()[self.outputs.names].values

    def get_XY(
        self,
        outputs: Optional[List[str]] = None,
        data: Optional[pd.DataFrame] = None,
        continuous: str = "none",
        discrete: str = "none",
        categorical: str = "none",
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Return the input and output values as numeric numpy arrays.

        Rows with missing output values will be dropped.
        Input values are assumed to be complete.
        Categorical outputs are one-hot or label encoded.

        Args:
            outputs (optional): Subset of the outputs to consider.
            data (optional): Dataframe to consider instead of problem.data
        """
        if outputs is None:
            outputs = self.outputs.names
        if data is None:
            data = self.get_data()
        notna = data[outputs].notna().all(axis=1)
        X = self.inputs.transform(
            data, continuous=continuous, discrete=discrete, categorical=categorical
        )[notna].values
        Y = data[outputs][notna].values
        return X, Y

    def get_X_bounds(self) -> Tuple[np.ndarray, np.ndarray]:
        """Return the lower and upper data bounds."""
        X = self.get_X()
        xlo = X.min(axis=0)
        xhi = X.max(axis=0)
        b = xlo == xhi
        xhi[b] = xlo[b] + 1  # prevent division by zero when dividing by (xhi - xlo)
        return xlo, xhi

    def sample_inputs(self, n_samples=10) -> pd.DataFrame:
        """Uniformly sample points from the input space subject to the constraints."""
        if self.constraints is None:
            return sobol_sampling(n_samples, self.inputs)
        return constrained_sampling(n_samples, self.inputs, self.constraints)

    def create_initial_data(self, n_samples: int = 10) -> None:
        """Create an initial data set for benchmark problems by sampling uniformly from the input space and evaluating f(x) at the sampled inputs."""
        if self.f is None:
            raise NotImplementedError("problem.f is not implemented for the problem.")
        X = self.sample_inputs(n_samples)
        Y = self.f(X)
        self.data = pd.concat([X, Y], axis=1)

__init__(self, inputs, outputs, objectives=None, constraints=None, output_constraints=None, f=None, models=None, data=None, optima=None, name=None, **kwargs) special

An optimization problem.

Parameters:

Name Type Description Default
inputs Union[opti.parameter.Parameters, List[opti.parameter.Parameter], List[Dict]]

Input parameters.

required
outputs Union[opti.parameter.Parameters, List[opti.parameter.Parameter], List[Dict]]

Output parameters.

required
objectives Union[opti.objective.Objectives, List[opti.objective.Objective], List[Dict]]

Optimization objectives. Defaults to minimization.

None
constraints Union[opti.constraint.Constraints, List[opti.constraint.Constraint], List[Dict]]

Constraints on the inputs.

None
output_constraints Union[opti.objective.Objectives, List[opti.objective.Objective], List[Dict]]

Constraints on the outputs.

None
f Optional[Callable]

Function to evaluate the outputs for given inputs. Must have the signature: f(x: pd.DataFrame) -> pd.DataFrame

None
data Union[pandas.core.frame.DataFrame, Dict]

Experimental data.

None
optima Union[pandas.core.frame.DataFrame, Dict]

Pareto optima.

None
name Optional[str]

Name of the problem.

None
Source code in opti/problem.py
def __init__(
    self,
    inputs: ParametersLike,
    outputs: ParametersLike,
    objectives: Optional[ObjectivesLike] = None,
    constraints: Optional[ConstraintsLike] = None,
    output_constraints: Optional[ObjectivesLike] = None,
    f: Optional[Callable] = None,
    models: Optional[ModelsLike] = None,
    data: Optional[DataFrameLike] = None,
    optima: Optional[DataFrameLike] = None,
    name: Optional[str] = None,
    **kwargs,
):
    """An optimization problem.

    Args:
        inputs: Input parameters.
        outputs: Output parameters.
        objectives: Optimization objectives. Defaults to minimization.
        constraints: Constraints on the inputs.
        output_constraints: Constraints on the outputs.
        f: Function to evaluate the outputs for given inputs.
            Must have the signature: f(x: pd.DataFrame) -> pd.DataFrame
        data: Experimental data.
        optima: Pareto optima.
        name: Name of the problem.
    """
    self.name = name if name is not None else "Problem"
    self.inputs = inputs if isinstance(inputs, Parameters) else Parameters(inputs)
    self.outputs = (
        outputs if isinstance(outputs, Parameters) else Parameters(outputs)
    )

    if objectives is None:
        self.objectives = Objectives([Minimize(m) for m in self.outputs.names])
    elif isinstance(objectives, Objectives):
        self.objectives = objectives
    else:
        self.objectives = Objectives(objectives)

    if isinstance(constraints, Constraints):
        pass
    elif not constraints:
        constraints = None
    else:
        constraints = Constraints(constraints)
        if len(constraints) == 0:  # no valid constraints
            constraints = None
    self.constraints = constraints

    if isinstance(output_constraints, Objectives) or output_constraints is None:
        self.output_constraints = output_constraints
    else:
        self.output_constraints = Objectives(output_constraints)

    if isinstance(models, Models) or models is None:
        self.models = models
    else:
        self.models = Models(models)

    if f is not None:
        self.f = f

    if isinstance(data, dict):
        data = pd.read_json(json.dumps(data), orient="split")

    if isinstance(optima, dict):
        optima = pd.read_json(json.dumps(optima), orient="split")

    self.set_data(data)
    self.set_optima(optima)
    self.check_problem()
    self.check_models()

add_data(self, data)

Add a number of data points.

Source code in opti/problem.py
def add_data(self, data: pd.DataFrame) -> None:
    """Add a number of data points."""
    self.check_data(data)
    self.data = pd.concat([self.data, data], axis=0)

check_data(self, data)

Check if data is consistent with input and output parameters.

Source code in opti/problem.py
def check_data(self, data: pd.DataFrame) -> None:
    """Check if data is consistent with input and output parameters."""
    for p in self.inputs + self.outputs:
        # data must contain all parameters
        if p.name not in data.columns:
            raise ValueError(
                f"Parameter {p.name} is missing. Data must contain all parameters."
            )

        # data for continuous / discrete parameters must be numeric
        if isinstance(p, (Continuous, Discrete)):
            ok = is_numeric_dtype(data[p.name]) or data[p.name].isnull().all()
            if not ok:
                raise ValueError(
                    f"Parameter {p.name} contains non-numeric values. Data for continuous / discrete parameters must be numeric."
                )

        # categorical levels in data must be specified
        elif isinstance(p, Categorical):
            ok = p.contains(data[p.name]) | data[p.name].isna()
            if not ok.all():
                unknowns = data[p.name][~ok].unique().tolist()
                raise ValueError(
                    f"Data for parameter {p.name} contains unknown values: {unknowns}. All categorical levels must be specified."
                )

    # inputs must be complete
    for p in self.inputs:
        if data[p.name].isnull().any():
            raise ValueError(
                f"Input parameter {p.name} has missing data. Inputs must be complete."
            )

    # outputs must have at least one observation
    for p in self.outputs:
        if data[p.name].isnull().all():
            raise ValueError(
                f"Output parameter {p.name} has no data. Outputs must have at least one observation."
            )

check_models(self)

Check if the models are well defined

Source code in opti/problem.py
def check_models(self) -> None:
    """Check if the models are well defined"""
    if self.models is None:
        return

    for model in self.models:
        # models need to refer to output parameters
        for n in model.names:
            if n not in self.outputs.names:
                raise ValueError(f"Model {model} refers to unknown outputs")

        if isinstance(model, LinearModel):
            if len(model.coefficients) != self.n_inputs:
                raise ValueError(f"Model {model} has wrong number of coefficients.")

check_problem(self)

Check if input and output parameters are consistent.

Source code in opti/problem.py
def check_problem(self) -> None:
    """Check if input and output parameters are consistent."""
    # check for duplicate names
    duplicates = set(self.inputs.names).intersection(self.outputs.names)
    if duplicates:
        raise ValueError(f"Parameter name in both inputs and outputs: {duplicates}")

    # check if all objectives refer to an output
    for obj in self.objectives:
        if obj.name not in self.outputs.names:
            raise ValueError(f"Objective refers to unknown parameter: {obj.name}")

create_initial_data(self, n_samples=10)

Create an initial data set for benchmark problems by sampling uniformly from the input space and evaluating f(x) at the sampled inputs.

Source code in opti/problem.py
def create_initial_data(self, n_samples: int = 10) -> None:
    """Create an initial data set for benchmark problems by sampling uniformly from the input space and evaluating f(x) at the sampled inputs."""
    if self.f is None:
        raise NotImplementedError("problem.f is not implemented for the problem.")
    X = self.sample_inputs(n_samples)
    Y = self.f(X)
    self.data = pd.concat([X, Y], axis=1)

from_config(config) staticmethod

Create a Problem instance from a configuration dict.

Source code in opti/problem.py
@staticmethod
def from_config(config: dict) -> "Problem":
    """Create a Problem instance from a configuration dict."""
    return Problem(**config)

from_json(fname) staticmethod

Read a problem from a JSON file.

Source code in opti/problem.py
@staticmethod
def from_json(fname: PathLike) -> "Problem":
    """Read a problem from a JSON file."""
    with open(fname, "rb") as infile:
        config = json.loads(infile.read())
    return Problem(**config)

get_X(self, data=None)

Return the input values in data or self.data.

Source code in opti/problem.py
def get_X(self, data: Optional[pd.DataFrame] = None) -> np.ndarray:
    """Return the input values in `data` or `self.data`."""
    if data is not None:
        return data[self.inputs.names].values
    return self.get_data()[self.inputs.names].values

get_XY(self, outputs=None, data=None, continuous='none', discrete='none', categorical='none')

Return the input and output values as numeric numpy arrays.

Rows with missing output values will be dropped. Input values are assumed to be complete. Categorical outputs are one-hot or label encoded.

Parameters:

Name Type Description Default
outputs optional

Subset of the outputs to consider.

None
data optional

Dataframe to consider instead of problem.data

None
Source code in opti/problem.py
def get_XY(
    self,
    outputs: Optional[List[str]] = None,
    data: Optional[pd.DataFrame] = None,
    continuous: str = "none",
    discrete: str = "none",
    categorical: str = "none",
) -> Tuple[np.ndarray, np.ndarray]:
    """Return the input and output values as numeric numpy arrays.

    Rows with missing output values will be dropped.
    Input values are assumed to be complete.
    Categorical outputs are one-hot or label encoded.

    Args:
        outputs (optional): Subset of the outputs to consider.
        data (optional): Dataframe to consider instead of problem.data
    """
    if outputs is None:
        outputs = self.outputs.names
    if data is None:
        data = self.get_data()
    notna = data[outputs].notna().all(axis=1)
    X = self.inputs.transform(
        data, continuous=continuous, discrete=discrete, categorical=categorical
    )[notna].values
    Y = data[outputs][notna].values
    return X, Y

get_X_bounds(self)

Return the lower and upper data bounds.

Source code in opti/problem.py
def get_X_bounds(self) -> Tuple[np.ndarray, np.ndarray]:
    """Return the lower and upper data bounds."""
    X = self.get_X()
    xlo = X.min(axis=0)
    xhi = X.max(axis=0)
    b = xlo == xhi
    xhi[b] = xlo[b] + 1  # prevent division by zero when dividing by (xhi - xlo)
    return xlo, xhi

get_Y(self, data=None)

Return the output values in data or self.data.

Source code in opti/problem.py
def get_Y(self, data: Optional[pd.DataFrame] = None) -> np.ndarray:
    """Return the output values in `data` or `self.data`."""
    if data is not None:
        return data[self.outputs.names].values
    return self.get_data()[self.outputs.names].values

get_data(self)

Return self.data if it exists or an empty dataframe.

Source code in opti/problem.py
def get_data(self) -> pd.DataFrame:
    """Return `self.data` if it exists or an empty dataframe."""
    if self.data is None:
        return pd.DataFrame(columns=self.inputs.names + self.outputs.names)
    return self.data

sample_inputs(self, n_samples=10)

Uniformly sample points from the input space subject to the constraints.

Source code in opti/problem.py
def sample_inputs(self, n_samples=10) -> pd.DataFrame:
    """Uniformly sample points from the input space subject to the constraints."""
    if self.constraints is None:
        return sobol_sampling(n_samples, self.inputs)
    return constrained_sampling(n_samples, self.inputs, self.constraints)

set_data(self, data)

Set the data.

Source code in opti/problem.py
def set_data(self, data: Optional[pd.DataFrame]) -> None:
    """Set the data."""
    if data is not None:
        for p in self.inputs:
            # Categorical levels are required to be strings. Ensure that the corresponding data is as well.
            if isinstance(p, Categorical):
                nulls = data[p.name].isna()
                data[p.name] = data[p.name].astype(str).mask(nulls, np.nan)

        self.check_data(data)

    self.data = data

set_optima(self, optima)

Set the optima / Pareto front.

Source code in opti/problem.py
def set_optima(self, optima: Optional[pd.DataFrame]) -> None:
    """Set the optima / Pareto front."""
    if optima is not None:
        self.check_data(optima)
    self.optima = optima

to_config(self)

Return json-serializable configuration dict.

Source code in opti/problem.py
def to_config(self) -> dict:
    """Return json-serializable configuration dict."""

    config = {
        "name": self.name,
        "inputs": self.inputs.to_config(),
        "outputs": self.outputs.to_config(),
        "objectives": self.objectives.to_config(),
    }
    if self.output_constraints is not None:
        config["output_constraints"] = self.output_constraints.to_config()
    if self.constraints is not None:
        config["constraints"] = self.constraints.to_config()
    if self.models is not None:
        config["models"] = self.models.to_config()
    if self.data is not None:
        config["data"] = self.data.replace({np.nan: None}).to_dict("split")
    if self.optima is not None:
        config["optima"] = self.optima.replace({np.nan: None}).to_dict("split")
    return config

to_json(self, fname)

Save a problem from a JSON file.

Source code in opti/problem.py
def to_json(self, fname: PathLike) -> None:
    """Save a problem from a JSON file."""
    with open(fname, "wb") as outfile:
        b = json.dumps(self.to_config(), ensure_ascii=False, separators=(",", ":"))
        outfile.write(b.encode("utf-8"))

read_json(filepath)

Read a problem specification from a JSON file.

Source code in opti/problem.py
def read_json(filepath: PathLike) -> Problem:
    """Read a problem specification from a JSON file."""
    return Problem.from_json(filepath)