Source code for pymc.func_utils

#   Copyright 2023 The PyMC Developers
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
from typing import Callable, Dict, Optional, Union

import numpy as np
import pytensor.tensor as pt

from pytensor.gradient import NullTypeGradError
from scipy import optimize

import pymc as pm

__all__ = ["find_constrained_prior"]


[docs]def find_constrained_prior( distribution: pm.Distribution, lower: float, upper: float, init_guess: Dict[str, float], mass: float = 0.95, fixed_params: Optional[Dict[str, float]] = None, mass_below_lower: Optional[float] = None, **kwargs, ) -> Dict[str, float]: """ Find optimal parameters to get `mass` % of probability of a :ref:`distribution <api_distributions>` between `lower` and `upper`. Note: only works for one- and two-parameter distributions, as there are exactly two constraints. Fix some combination of parameters if you want to use it on >=3-parameter distributions. Parameters ---------- distribution : Distribution PyMC distribution you want to set a prior on. Needs to have a ``logcdf`` method implemented in PyMC. lower : float Lower bound to get `mass` % of probability of `pm_dist`. upper : float Upper bound to get `mass` % of probability of `pm_dist`. init_guess : dict of {str : float} Initial guess for ``scipy.optimize.least_squares`` to find the optimal parameters of `pm_dist` fitting the interval constraint. Must be a dictionary with the name of the PyMC distribution's parameter as keys and the initial guess as values. mass : float, default 0.95 Share of the probability mass we want between ``lower`` and ``upper``. Defaults to 95%. fixed_params : str or float, optional, default None Only used when `pm_dist` has at least three parameters. Dictionary of fixed parameters, so that there are only 2 to optimize. For instance, for a StudentT, you fix nu to a constant and get the optimized mu and sigma. mass_below_lower : float, optional, default None The probability mass below the ``lower`` bound. If ``None``, defaults to ``(1 - mass) / 2``, which implies that the probability mass below the ``lower`` value will be equal to the probability mass above the ``upper`` value. Returns ------- opt_params : dict The optimized distribution parameters as a dictionary. Dictionary keys are the parameter names and dictionary values are the optimized parameter values. Notes ----- Optional keyword arguments can be passed to ``find_constrained_prior``. These will be delivered to the underlying call to :external:py:func:`scipy.optimize.minimize`. Examples -------- .. code-block:: python # get parameters obeying constraints opt_params = pm.find_constrained_prior( pm.Gamma, lower=0.1, upper=0.4, mass=0.75, init_guess={"alpha": 1, "beta": 10} ) # use these parameters to draw random samples samples = pm.Gamma.dist(**opt_params, size=100).eval() # use these parameters in a model with pm.Model(): x = pm.Gamma('x', **opt_params) # specify fixed values before optimization opt_params = pm.find_constrained_prior( pm.StudentT, lower=0, upper=1, init_guess={"mu": 5, "sigma": 2}, fixed_params={"nu": 7}, ) Under some circumstances, you might not want to have the same cumulative probability below the ``lower`` threshold and above the ``upper`` threshold. For example, you might want to constrain an Exponential distribution to find the parameter that yields 90% of the mass below the ``upper`` bound, and have zero mass below ``lower``. You can do that with the following call to ``find_constrained_prior`` .. code-block:: python opt_params = pm.find_constrained_prior( pm.Exponential, lower=0, upper=3., mass=0.9, init_guess={"lam": 1}, mass_below_lower=0, ) """ assert 0.01 <= mass <= 0.99, ( "This function optimizes the mass of the given distribution +/- " f"1%, so `mass` has to be between 0.01 and 0.99. You provided {mass}." ) if mass_below_lower is None: mass_below_lower = (1 - mass) / 2 # exit when any parameter is not scalar: if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): raise NotImplementedError( "`pm.find_constrained_prior` does not work with non-scalar parameters yet.\n" "Feel free to open a pull request on PyMC repo if you really need this feature." ) dist_params = pt.vector("dist_params") params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } if fixed_params is not None: params_to_optim.update(fixed_params) dist = distribution.dist(**params_to_optim) try: logcdf_lower = pm.logcdf(dist, pm.floatX(lower)) logcdf_upper = pm.logcdf(dist, pm.floatX(upper)) except AttributeError: raise AttributeError( f"You cannot use `find_constrained_prior` with {distribution} -- it doesn't have a logcdf " "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " "need it." ) target = (pt.exp(logcdf_lower) - mass_below_lower) ** 2 target_fn = pm.pytensorf.compile_pymc([dist_params], target, allow_input_downcast=True) constraint = pt.exp(logcdf_upper) - pt.exp(logcdf_lower) constraint_fn = pm.pytensorf.compile_pymc([dist_params], constraint, allow_input_downcast=True) jac: Union[str, Callable] constraint_jac: Union[str, Callable] try: pytensor_jac = pm.gradient(target, [dist_params]) jac = pm.pytensorf.compile_pymc([dist_params], pytensor_jac, allow_input_downcast=True) pytensor_constraint_jac = pm.gradient(constraint, [dist_params]) constraint_jac = pm.pytensorf.compile_pymc( [dist_params], pytensor_constraint_jac, allow_input_downcast=True ) # when PyMC cannot compute the gradient except (NotImplementedError, NullTypeGradError): jac = "2-point" constraint_jac = "2-point" cons = optimize.NonlinearConstraint(constraint_fn, lb=mass, ub=mass, jac=constraint_jac) opt = optimize.minimize( target_fn, x0=list(init_guess.values()), jac=jac, constraints=cons, **kwargs ) if not opt.success: raise ValueError( f"Optimization of parameters failed.\nOptimization termination details:\n{opt}" ) # save optimal parameters opt_params = { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } if fixed_params is not None: opt_params.update(fixed_params) return opt_params