# Copyright 2024 The PyMC Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import numpy as np
import pytensor.tensor as pt
from pytensor.tensor import TensorConstant
from pytensor.tensor.random.basic import (
ScipyRandomVariable,
bernoulli,
betabinom,
binomial,
categorical,
geometric,
hypergeometric,
nbinom,
poisson,
uniform,
)
from pytensor.tensor.random.utils import normalize_size_param
from scipy import stats
import pymc as pm
from pymc.distributions.dist_math import (
betaln,
binomln,
check_icdf_parameters,
check_icdf_value,
check_parameters,
factln,
log_diff_normal_cdf,
logpow,
normal_lccdf,
normal_lcdf,
)
from pymc.distributions.distribution import Discrete, SymbolicRandomVariable
from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none
from pymc.logprob.basic import logcdf, logp
from pymc.math import sigmoid
__all__ = [
"Binomial",
"BetaBinomial",
"Bernoulli",
"DiscreteWeibull",
"Poisson",
"NegativeBinomial",
"DiscreteUniform",
"Geometric",
"HyperGeometric",
"Categorical",
"OrderedLogistic",
"OrderedProbit",
]
from pymc.pytensorf import normalize_rng_param
[docs]
class Binomial(Discrete):
R"""
Binomial log-likelihood.
The discrete probability distribution of the number of successes
in a sequence of n independent yes/no experiments, each of which
yields success with probability p.
The pmf of this distribution is
.. math:: f(x \mid n, p) = \binom{n}{x} p^x (1-p)^{n-x}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import arviz as az
plt.style.use('arviz-darkgrid')
x = np.arange(0, 22)
ns = [10, 17]
ps = [0.5, 0.7]
for n, p in zip(ns, ps):
pmf = st.binom.pmf(x, n, p)
plt.plot(x, pmf, '-o', label='n = {}, p = {}'.format(n, p))
plt.xlabel('x', fontsize=14)
plt.ylabel('f(x)', fontsize=14)
plt.legend(loc=1)
plt.show()
======== ==========================================
Support :math:`x \in \{0, 1, \ldots, n\}`
Mean :math:`n p`
Variance :math:`n p (1 - p)`
======== ==========================================
Parameters
----------
n : tensor_like of int
Number of Bernoulli trials (n >= 0).
p : tensor_like of float
Probability of success in each trial (0 < p < 1).
logit_p : tensor_like of float
Alternative log odds for the probability of success.
"""
rv_op = binomial
[docs]
@classmethod
def dist(cls, n, p=None, logit_p=None, *args, **kwargs):
if p is not None and logit_p is not None:
raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.")
elif p is None and logit_p is None:
raise ValueError("Incompatible parametrization. Must specify either p or logit_p.")
if logit_p is not None:
p = pt.sigmoid(logit_p)
n = pt.as_tensor_variable(n, dtype=int)
p = pt.as_tensor_variable(p)
return super().dist([n, p], **kwargs)
def support_point(rv, size, n, p):
mean = pt.round(n * p)
if not rv_size_is_none(size):
mean = pt.full(size, mean)
return mean
def logp(value, n, p):
res = pt.switch(
pt.or_(pt.lt(value, 0), pt.gt(value, n)),
-np.inf,
binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value),
)
return check_parameters(
res,
n >= 0,
0 <= p,
p <= 1,
msg="n >= 0, 0 <= p <= 1",
)
def logcdf(value, n, p):
value = pt.floor(value)
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.switch(
pt.lt(value, n),
pt.log(pt.betainc(n - value, value + 1, 1 - p)),
0,
),
)
return check_parameters(
res,
n >= 0,
0 <= p,
p <= 1,
msg="n >= 0, 0 <= p <= 1",
)
[docs]
class BetaBinomial(Discrete):
R"""
Beta-binomial log-likelihood.
Equivalent to binomial random variable with success probability
drawn from a beta distribution.
The pmf of this distribution is
.. math::
f(x \mid \alpha, \beta, n) =
\binom{n}{x}
\frac{B(x + \alpha, n - x + \beta)}{B(\alpha, \beta)}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
from scipy import special
import arviz as az
plt.style.use('arviz-darkgrid')
def BetaBinom(a, b, n, x):
pmf = special.binom(n, x) * (special.beta(x+a, n-x+b) / special.beta(a, b))
return pmf
x = np.arange(0, 11)
alphas = [0.5, 1, 2.3]
betas = [0.5, 1, 2]
n = 10
for a, b in zip(alphas, betas):
pmf = BetaBinom(a, b, n, x)
plt.plot(x, pmf, '-o', label=r'$\alpha$ = {}, $\beta$ = {}, n = {}'.format(a, b, n))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.ylim(0)
plt.legend(loc=9)
plt.show()
======== =================================================================
Support :math:`x \in \{0, 1, \ldots, n\}`
Mean :math:`n \dfrac{\alpha}{\alpha + \beta}`
Variance :math:`\dfrac{n \alpha \beta (\alpha+\beta+n)}{(\alpha+\beta)^2 (\alpha+\beta+1)}`
======== =================================================================
Parameters
----------
n : tensor_like of int
Number of Bernoulli trials (n >= 0).
alpha : tensor_like of float
alpha > 0.
beta : tensor_like of float
beta > 0.
"""
rv_op = betabinom
[docs]
@classmethod
def dist(cls, alpha, beta, n, *args, **kwargs):
alpha = pt.as_tensor_variable(alpha)
beta = pt.as_tensor_variable(beta)
n = pt.as_tensor_variable(n, dtype=int)
return super().dist([n, alpha, beta], **kwargs)
def support_point(rv, size, n, alpha, beta):
mean = pt.round((n * alpha) / (alpha + beta))
if not rv_size_is_none(size):
mean = pt.full(size, mean)
return mean
def logp(value, n, alpha, beta):
res = pt.switch(
pt.or_(pt.lt(value, 0), pt.gt(value, n)),
-np.inf,
binomln(n, value) + betaln(value + alpha, n - value + beta) - betaln(alpha, beta),
)
return check_parameters(
res,
n >= 0,
alpha > 0,
beta > 0,
msg="n >= 0, alpha > 0, beta > 0",
)
def logcdf(value, n, alpha, beta):
# logcdf can only handle scalar values at the moment
if np.ndim(value):
raise TypeError(
f"BetaBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
)
safe_lower = pt.switch(pt.lt(value, 0), value, 0)
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.switch(
pt.lt(value, n),
pt.logsumexp(
logp(
BetaBinomial.dist(alpha=alpha, beta=beta, n=n),
pt.arange(safe_lower, value + 1),
),
keepdims=False,
),
0,
),
)
return check_parameters(
res,
n >= 0,
alpha > 0,
beta > 0,
msg="n >= 0, alpha > 0, beta > 0",
)
[docs]
class Bernoulli(Discrete):
R"""Bernoulli log-likelihood.
The Bernoulli distribution describes the probability of successes
(x=1) and failures (x=0).
The pmf of this distribution is
.. math:: f(x \mid p) = p^{x} (1-p)^{1-x}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import arviz as az
plt.style.use('arviz-darkgrid')
x = [0, 1]
for p in [0, 0.5, 0.8]:
pmf = st.bernoulli.pmf(x, p)
plt.plot(x, pmf, '-o', label='p = {}'.format(p))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.ylim(0)
plt.legend(loc=9)
plt.show()
======== ======================
Support :math:`x \in \{0, 1\}`
Mean :math:`p`
Variance :math:`p (1 - p)`
======== ======================
The bernoulli distribution can be parametrized either in terms of p or logit_p.
The link between the parametrizations is given by
.. math:: logit(p) = ln(\frac{p}{1-p})
Parameters
----------
p : tensor_like of float
Probability of success (0 < p < 1).
logit_p : tensor_like of float
Alternative log odds for the probability of success.
"""
rv_op = bernoulli
[docs]
@classmethod
def dist(cls, p=None, logit_p=None, *args, **kwargs):
if p is not None and logit_p is not None:
raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.")
elif p is None and logit_p is None:
raise ValueError("Incompatible parametrization. Must specify either p or logit_p.")
if logit_p is not None:
p = pt.sigmoid(logit_p)
p = pt.as_tensor_variable(p)
return super().dist([p], **kwargs)
def support_point(rv, size, p):
if not rv_size_is_none(size):
p = pt.full(size, p)
return pt.switch(p < 0.5, 0, 1)
def logp(value, p):
res = pt.switch(
pt.or_(pt.lt(value, 0), pt.gt(value, 1)),
-np.inf,
pt.switch(value, pt.log(p), pt.log1p(-p)),
)
return check_parameters(
res,
0 <= p,
p <= 1,
msg="0 <= p <= 1",
)
def logcdf(value, p):
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.switch(
pt.lt(value, 1),
pt.log1p(-p),
0,
),
)
return check_parameters(
res,
0 <= p,
p <= 1,
msg="0 <= p <= 1",
)
class DiscreteWeibullRV(SymbolicRandomVariable):
name = "discrete_weibull"
extended_signature = "[rng],[size],(),()->[rng],()"
_print_name = ("dWeibull", "\\operatorname{dWeibull}")
@classmethod
def rv_op(cls, q, beta, *, size=None, rng=None):
q = pt.as_tensor(q)
beta = pt.as_tensor(beta)
rng = normalize_rng_param(rng)
size = normalize_size_param(size)
if rv_size_is_none(size):
size = implicit_size_from_params(q, beta, ndims_params=cls.ndims_params)
next_rng, p = uniform(size=size, rng=rng).owner.outputs
draws = pt.ceil(pt.power(pt.log(1 - p) / pt.log(q), 1.0 / beta)) - 1
draws = draws.astype("int64")
return cls(inputs=[rng, size, q, beta], outputs=[next_rng, draws])(rng, size, q, beta)
[docs]
class DiscreteWeibull(Discrete):
R"""Discrete Weibull log-likelihood.
The discrete Weibull distribution is a flexible model of count data that
can handle both over- and under-dispersion.
The pmf of this distribution is
.. math:: f(x \mid q, \beta) = q^{x^{\beta}} - q^{(x + 1)^{\beta}}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
from scipy import special
import arviz as az
plt.style.use('arviz-darkgrid')
def DiscreteWeibull(q, b, x):
return q**(x**b) - q**((x + 1)**b)
x = np.arange(0, 10)
qs = [0.1, 0.9, 0.9]
betas = [0.3, 1.3, 3]
for q, b in zip(qs, betas):
pmf = DiscreteWeibull(q, b, x)
plt.plot(x, pmf, '-o', label=r'q = {}, $\beta$ = {}'.format(q, b))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.ylim(0)
plt.legend(loc=1)
plt.show()
======== ======================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\mu = \sum_{x = 1}^{\infty} q^{x^{\beta}}`
Variance :math:`2 \sum_{x = 1}^{\infty} x q^{x^{\beta}} - \mu - \mu^2`
======== ======================
Parameters
----------
q : tensor_like of float
Shape parameter (0 < q < 1).
beta : tensor_like of float
Shape parameter (beta > 0).
"""
rv_type = DiscreteWeibullRV
rv_op = DiscreteWeibullRV.rv_op
[docs]
@classmethod
def dist(cls, q, beta, *args, **kwargs):
return super().dist([q, beta], **kwargs)
def support_point(rv, size, q, beta):
median = pt.power(pt.log(0.5) / pt.log(q), 1 / beta) - 1
if not rv_size_is_none(size):
median = pt.full(size, median)
return median
def logp(value, q, beta):
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.log(pt.power(q, pt.power(value, beta)) - pt.power(q, pt.power(value + 1, beta))),
)
return check_parameters(
res,
0 < q,
q < 1,
beta > 0,
msg="0 < q < 1, beta > 0",
)
def logcdf(value, q, beta):
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.log1p(-pt.power(q, pt.power(value + 1, beta))),
)
return check_parameters(
res,
0 < q,
q < 1,
beta > 0,
msg="0 < q < 1, beta > 0",
)
[docs]
class Poisson(Discrete):
R"""
Poisson log-likelihood.
Often used to model the number of events occurring in a fixed period
of time when the times at which events occur are independent.
The pmf of this distribution is
.. math:: f(x \mid \mu) = \frac{e^{-\mu}\mu^x}{x!}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import arviz as az
plt.style.use('arviz-darkgrid')
x = np.arange(0, 15)
for m in [0.5, 3, 8]:
pmf = st.poisson.pmf(x, m)
plt.plot(x, pmf, '-o', label='$\mu$ = {}'.format(m))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.ylim(0)
plt.legend(loc=1)
plt.show()
======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\mu`
Variance :math:`\mu`
======== ==========================
Parameters
----------
mu : tensor_like of float
Expected number of occurrences during the given interval
(mu >= 0).
Notes
-----
The Poisson distribution can be derived as a limiting case of the
binomial distribution.
"""
rv_op = poisson
[docs]
@classmethod
def dist(cls, mu, *args, **kwargs):
mu = pt.as_tensor_variable(mu)
return super().dist([mu], *args, **kwargs)
def support_point(rv, size, mu):
mu = pt.floor(mu)
if not rv_size_is_none(size):
mu = pt.full(size, mu)
return mu
def logp(value, mu):
res = pt.switch(
pt.lt(value, 0),
-np.inf,
logpow(mu, value) - factln(value) - mu,
)
# Return zero when mu and value are both zero
res = pt.switch(
pt.eq(mu, 0) * pt.eq(value, 0),
0,
res,
)
return check_parameters(
res,
mu >= 0,
msg="mu >= 0",
)
def logcdf(value, mu):
value = pt.floor(value)
# Avoid C-assertion when the gammaincc function is called with invalid values (#4340)
safe_mu = pt.switch(pt.lt(mu, 0), 0, mu)
safe_value = pt.switch(pt.lt(value, 0), 0, value)
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.log(pt.gammaincc(safe_value + 1, safe_mu)),
)
return check_parameters(
res,
mu >= 0,
msg="mu >= 0",
)
[docs]
class NegativeBinomial(Discrete):
R"""
Negative binomial log-likelihood.
The negative binomial distribution describes a Poisson random variable
whose rate parameter is gamma distributed.
Its pmf, parametrized by the parameters alpha and mu of the gamma distribution, is
.. math::
f(x \mid \mu, \alpha) =
\binom{x + \alpha - 1}{x}
(\alpha/(\mu+\alpha))^\alpha (\mu/(\mu+\alpha))^x
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
from scipy import special
import arviz as az
plt.style.use('arviz-darkgrid')
def NegBinom(a, m, x):
pmf = special.binom(x + a - 1, x) * (a / (m + a))**a * (m / (m + a))**x
return pmf
x = np.arange(0, 22)
alphas = [0.9, 2, 4]
mus = [1, 2, 8]
for a, m in zip(alphas, mus):
pmf = NegBinom(a, m, x)
plt.plot(x, pmf, '-o', label=r'$\alpha$ = {}, $\mu$ = {}'.format(a, m))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()
======== ==================================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\mu`
Variance :math:`\frac{\mu^2}{\alpha} + \mu`
======== ==================================
The negative binomial distribution can be parametrized either in terms of mu or p,
and either in terms of alpha or n. The link between the parametrizations is given by
.. math::
p &= \frac{\alpha}{\mu + \alpha} \\
n &= \alpha
If it is parametrized in terms of n and p, the negative binomial describes the probability to have x failures
before the n-th success, given the probability p of success in each trial. Its pmf is
.. math::
f(x \mid n, p) =
\binom{x + n - 1}{x}
(p)^n (1 - p)^x
Parameters
----------
alpha : tensor_like of float
Gamma distribution shape parameter (alpha > 0).
mu : tensor_like of float
Gamma distribution mean (mu > 0).
p : tensor_like of float
Alternative probability of success in each trial (0 < p < 1).
n : tensor_like of float
Alternative number of target success trials (n > 0)
"""
rv_op = nbinom
[docs]
@classmethod
def dist(cls, mu=None, alpha=None, p=None, n=None, *args, **kwargs):
n, p = cls.get_n_p(mu=mu, alpha=alpha, p=p, n=n)
n = pt.as_tensor_variable(n)
p = pt.as_tensor_variable(p)
return super().dist([n, p], *args, **kwargs)
@classmethod
def get_n_p(cls, mu=None, alpha=None, p=None, n=None):
if n is None:
if alpha is not None:
n = alpha
else:
raise ValueError("Incompatible parametrization. Must specify either alpha or n.")
elif alpha is not None:
raise ValueError("Incompatible parametrization. Can't specify both alpha and n.")
if p is None:
if mu is not None:
p = n / (mu + n)
else:
raise ValueError("Incompatible parametrization. Must specify either mu or p.")
elif mu is not None:
raise ValueError("Incompatible parametrization. Can't specify both mu and p.")
return n, p
def support_point(rv, size, n, p):
mu = pt.floor(n * (1 - p) / p)
if not rv_size_is_none(size):
mu = pt.full(size, mu)
return mu
def logp(value, n, p):
alpha = n
mu = alpha * (1 - p) / p
res = pt.switch(
pt.lt(value, 0),
-np.inf,
(
binomln(value + alpha - 1, value)
+ logpow(mu / (mu + alpha), value)
+ logpow(alpha / (mu + alpha), alpha)
),
)
negbinom = check_parameters(
res,
mu > 0,
alpha > 0,
msg="mu > 0, alpha > 0",
)
# Return Poisson when alpha gets very large.
return pt.switch(pt.gt(alpha, 1e10), logp(Poisson.dist(mu=mu), value), negbinom)
def logcdf(value, n, p):
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.log(pt.betainc(n, pt.floor(value) + 1, p)),
)
return check_parameters(
res,
n > 0,
0 <= p,
p <= 1,
msg="n > 0, 0 <= p <= 1",
)
[docs]
class Geometric(Discrete):
R"""
Geometric log-likelihood.
The probability that the first success in a sequence of Bernoulli
trials occurs on the x'th trial.
The pmf of this distribution is
.. math:: f(x \mid p) = p(1-p)^{x-1}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import arviz as az
plt.style.use('arviz-darkgrid')
x = np.arange(1, 11)
for p in [0.1, 0.25, 0.75]:
pmf = st.geom.pmf(x, p)
plt.plot(x, pmf, '-o', label='p = {}'.format(p))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()
======== =============================
Support :math:`x \in \mathbb{N}_{>0}`
Mean :math:`\dfrac{1}{p}`
Variance :math:`\dfrac{1 - p}{p^2}`
======== =============================
Parameters
----------
p : tensor_like of float
Probability of success on an individual trial (0 < p <= 1).
"""
rv_op = geometric
[docs]
@classmethod
def dist(cls, p, *args, **kwargs):
p = pt.as_tensor_variable(p)
return super().dist([p], *args, **kwargs)
def support_point(rv, size, p):
mean = pt.round(1.0 / p)
if not rv_size_is_none(size):
mean = pt.full(size, mean)
return mean
def logp(value, p):
res = pt.switch(
pt.lt(value, 1),
-np.inf,
pt.log(p) + logpow(1 - p, value - 1),
)
return check_parameters(
res,
0 <= p,
p <= 1,
msg="0 <= p <= 1",
)
def logcdf(value, p):
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.log1mexp(pt.log1p(-p) * value),
)
return check_parameters(
res,
0 <= p,
p <= 1,
msg="0 <= p <= 1",
)
def icdf(value, p):
res = pt.ceil(pt.log1p(-value) / pt.log1p(-p)).astype("int64")
res_1m = pt.maximum(res - 1, 0)
dist = pm.Geometric.dist(p=p)
value_1m = pt.exp(logcdf(dist, res_1m))
res = pt.switch(value_1m >= value, res_1m, res)
res = check_icdf_value(res, value)
return check_icdf_parameters(
res,
0 <= p,
p <= 1,
msg="0 <= p <= 1",
)
[docs]
class HyperGeometric(Discrete):
R"""
Discrete hypergeometric distribution.
The probability of :math:`x` successes in a sequence of :math:`n` bernoulli
trials taken without replacement from a population of :math:`N` objects,
containing :math:`k` good (or successful or Type I) objects.
The pmf of this distribution is
.. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import arviz as az
plt.style.use('arviz-darkgrid')
x = np.arange(1, 15)
N = 50
k = 10
for n in [20, 25]:
pmf = st.hypergeom.pmf(x, N, k, n)
plt.plot(x, pmf, '-o', label='N = {}, k = {}, n = {}'.format(N, k, n))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.legend(loc=1)
plt.show()
======== =============================
Support :math:`x \in \left[\max(0, n - N + k), \min(k, n)\right]`
Mean :math:`\dfrac{nk}{N}`
Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}`
======== =============================
Parameters
----------
N : tensor_like of int
Total size of the population (N > 0)
k : tensor_like of int
Number of successful individuals in the population (0 <= k <= N)
n : tensor_like of int
Number of samples drawn from the population (0 <= n <= N)
"""
rv_op = hypergeometric
[docs]
@classmethod
def dist(cls, N, k, n, *args, **kwargs):
good = pt.as_tensor_variable(k, dtype=int)
bad = pt.as_tensor_variable(N - k, dtype=int)
n = pt.as_tensor_variable(n, dtype=int)
return super().dist([good, bad, n], *args, **kwargs)
def support_point(rv, size, good, bad, n):
N, k = good + bad, good
mode = pt.floor((n + 1) * (k + 1) / (N + 2))
if not rv_size_is_none(size):
mode = pt.full(size, mode)
return mode
def logp(value, good, bad, n):
tot = good + bad
result = (
betaln(good + 1, 1)
+ betaln(bad + 1, 1)
+ betaln(tot - n + 1, n + 1)
- betaln(value + 1, good - value + 1)
- betaln(n - value + 1, bad - n + value + 1)
- betaln(tot + 1, 1)
)
# value in [max(0, n - N + k), min(k, n)]
lower = pt.switch(pt.gt(n - tot + good, 0), n - tot + good, 0)
upper = pt.switch(pt.lt(good, n), good, n)
res = pt.switch(
pt.lt(value, lower),
-np.inf,
pt.switch(
pt.le(value, upper),
result,
-np.inf,
),
)
return check_parameters(
res,
lower <= upper,
msg="lower <= upper",
)
def logcdf(value, good, bad, n):
# logcdf can only handle scalar values at the moment
if np.ndim(value):
raise TypeError(
f"HyperGeometric.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
)
N = good + bad
# TODO: Use lower upper in locgdf for smarter logsumexp?
safe_lower = pt.switch(pt.lt(value, 0), value, 0)
res = pt.switch(
pt.lt(value, 0),
-np.inf,
pt.switch(
pt.lt(value, n),
pt.logsumexp(
HyperGeometric.logp(pt.arange(safe_lower, value + 1), good, bad, n),
keepdims=False,
),
0,
),
)
return check_parameters(
res,
N > 0,
0 <= good,
good <= N,
0 <= n,
n <= N,
msg="N > 0, 0 <= good <= N, 0 <= n <= N",
)
class DiscreteUniformRV(ScipyRandomVariable):
name = "discrete_uniform"
signature = "(),()->()"
dtype = "int64"
_print_name = ("DiscreteUniform", "\\operatorname{DiscreteUniform}")
@classmethod
def rng_fn_scipy(cls, rng, lower, upper, size=None):
return stats.randint.rvs(lower, upper + 1, size=size, random_state=rng)
discrete_uniform = DiscreteUniformRV()
[docs]
class Categorical(Discrete):
R"""
Categorical log-likelihood.
The most general discrete distribution. The pmf of this distribution is
.. math:: f(x \mid p) = p_x
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import arviz as az
plt.style.use('arviz-darkgrid')
ps = [[0.1, 0.6, 0.3], [0.3, 0.1, 0.1, 0.5]]
for p in ps:
x = range(len(p))
plt.plot(x, p, '-o', label='p = {}'.format(p))
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
plt.ylim(0)
plt.legend(loc=1)
plt.show()
======== ===================================
Support :math:`x \in \{0, 1, \ldots, |p|-1\}`
======== ===================================
Parameters
----------
p : array of floats
p > 0 and the elements of p must sum to 1.
logit_p : float
Alternative log odds for the probability of success.
"""
rv_op = categorical
[docs]
@classmethod
def dist(cls, p=None, logit_p=None, **kwargs):
if p is not None and logit_p is not None:
raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.")
elif p is None and logit_p is None:
raise ValueError("Incompatible parametrization. Must specify either p or logit_p.")
if logit_p is not None:
p = pm.math.softmax(logit_p, axis=-1)
p = pt.as_tensor_variable(p)
if isinstance(p, TensorConstant):
p_ = np.asarray(p.data)
if np.any(p_ < 0):
raise ValueError(f"Negative `p` parameters are not valid, got: {p_}")
p_sum_ = np.sum([p_], axis=-1)
if not np.all(np.isclose(p_sum_, 1.0)):
warnings.warn(
f"`p` parameters sum to {p_sum_}, instead of 1.0. "
"They will be automatically rescaled. "
"You can rescale them directly to get rid of this warning.",
UserWarning,
)
p_ = p_ / pt.sum(p_, axis=-1, keepdims=True)
p = pt.as_tensor_variable(p_)
return super().dist([p], **kwargs)
def support_point(rv, size, p):
mode = pt.argmax(p, axis=-1)
if not rv_size_is_none(size):
mode = pt.full(size, mode)
return mode
def logp(value, p):
k = pt.shape(p)[-1]
value_clip = pt.clip(value, 0, k - 1)
# In the standard case p has one more dimension than value
dim_diff = p.type.ndim - value.type.ndim
if dim_diff > 1:
# p brodacasts implicitly beyond value
value_clip = pt.shape_padleft(value_clip, dim_diff - 1)
elif dim_diff < 1:
# value broadcasts implicitly beyond p
p = pt.shape_padleft(p, 1 - dim_diff)
a = pt.log(pt.take_along_axis(p, value_clip[..., None], axis=-1).squeeze(-1))
res = pt.switch(
pt.or_(pt.lt(value, 0), pt.gt(value, k - 1)),
-np.inf,
a,
)
return check_parameters(
res,
0 <= p,
p <= 1,
pt.isclose(pt.sum(p, axis=-1), 1),
msg="0 <= p <=1, sum(p) = 1",
)
[docs]
class OrderedLogistic:
R"""Ordered Logistic distribution.
Useful for regression on ordinal data values whose values range
from 1 to K as a function of some predictor, :math:`\eta`. The
cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
constrained to be ordered.
.. math::
f(k \mid \eta, c) = \left\{
\begin{array}{l}
1 - \text{logit}^{-1}(\eta - c_1)
\,, \text{if } k = 0 \\
\text{logit}^{-1}(\eta - c_{k - 1}) -
\text{logit}^{-1}(\eta - c_{k})
\,, \text{if } 0 < k < K \\
\text{logit}^{-1}(\eta - c_{K - 1})
\,, \text{if } k = K \\
\end{array}
\right.
Parameters
----------
eta : tensor_like of float
The predictor.
cutpoints : tensor_like of array
The length K - 1 array of cutpoints which break :math:`\eta` into
ranges. Do not explicitly set the first and last elements of
:math:`c` to negative and positive infinity.
compute_p: boolean, default True
Whether to compute and store in the trace the inferred probabilities of each categories,
based on the cutpoints' values. Defaults to True.
Might be useful to disable it if memory usage is of interest.
Examples
--------
.. code-block:: python
# Generate data for a simple 1 dimensional example problem
n1_c = 300; n2_c = 300; n3_c = 300
cluster1 = np.random.randn(n1_c) + -1
cluster2 = np.random.randn(n2_c) + 0
cluster3 = np.random.randn(n3_c) + 2
x = np.concatenate((cluster1, cluster2, cluster3))
y = np.concatenate((1*np.ones(n1_c),
2*np.ones(n2_c),
3*np.ones(n3_c))) - 1
# Ordered logistic regression
with pm.Model() as model:
cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
transform=pm.distributions.transforms.ordered)
y_ = pm.OrderedLogistic("y", cutpoints=cutpoints, eta=x, observed=y)
idata = pm.sample()
# Plot the results
plt.hist(cluster1, 30, alpha=0.5);
plt.hist(cluster2, 30, alpha=0.5);
plt.hist(cluster3, 30, alpha=0.5);
posterior = idata.posterior.stack(sample=("chain", "draw"))
plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k');
plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');
"""
def __new__(cls, name, eta, cutpoints, compute_p=True, **kwargs):
p = cls.compute_p(eta, cutpoints)
if compute_p:
p = pm.Deterministic(f"{name}_probs", p)
out_rv = Categorical(name, p=p, **kwargs)
return out_rv
[docs]
@classmethod
def dist(cls, eta, cutpoints, **kwargs):
p = cls.compute_p(eta, cutpoints)
return Categorical.dist(p=p, **kwargs)
@classmethod
def compute_p(cls, eta, cutpoints):
eta = pt.as_tensor_variable(eta)
cutpoints = pt.as_tensor_variable(cutpoints)
pa = sigmoid(cutpoints - pt.shape_padright(eta))
p_cum = pt.concatenate(
[
pt.zeros_like(pt.shape_padright(pa[..., 0])),
pa,
pt.ones_like(pt.shape_padright(pa[..., 0])),
],
axis=-1,
)
p = p_cum[..., 1:] - p_cum[..., :-1]
return p
[docs]
class OrderedProbit:
R"""
Ordered Probit distributions.
Useful for regression on ordinal data values whose values range
from 1 to K as a function of some predictor, :math:`\eta`. The
cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
constrained to be ordered.
In order to stabilize the computation, log-likelihood is computed
in log space using the scaled error function `erfcx`.
.. math::
f(k \mid \eta, c) = \left\{
\begin{array}{l}
1 - \text{normal_cdf}(0, \sigma, \eta - c_1)
\,, \text{if } k = 0 \\
\text{normal_cdf}(0, \sigma, \eta - c_{k - 1}) -
\text{normal_cdf}(0, \sigma, \eta - c_{k})
\,, \text{if } 0 < k < K \\
\text{normal_cdf}(0, \sigma, \eta - c_{K - 1})
\,, \text{if } k = K \\
\end{array}
\right.
Parameters
----------
eta : tensor_like of float
The predictor.
cutpoints : tensor_like array of floats
The length K - 1 array of cutpoints which break :math:`\eta` into
ranges. Do not explicitly set the first and last elements of
:math:`c` to negative and positive infinity.
sigma : tensor_like of float, default 1.0
Standard deviation of the probit function.
compute_p : boolean, default True
Whether to compute and store in the trace the inferred probabilities of each categories,
based on the cutpoints' values. Defaults to True.
Might be useful to disable it if memory usage is of interest.
Examples
--------
.. code:: python
# Generate data for a simple 1 dimensional example problem
n1_c = 300; n2_c = 300; n3_c = 300
cluster1 = np.random.randn(n1_c) + -1
cluster2 = np.random.randn(n2_c) + 0
cluster3 = np.random.randn(n3_c) + 2
x = np.concatenate((cluster1, cluster2, cluster3))
y = np.concatenate((1*np.ones(n1_c),
2*np.ones(n2_c),
3*np.ones(n3_c))) - 1
# Ordered probit regression
with pm.Model() as model:
cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
transform=pm.distributions.transforms.ordered)
y_ = pm.OrderedProbit("y", cutpoints=cutpoints, eta=x, observed=y)
idata = pm.sample()
# Plot the results
plt.hist(cluster1, 30, alpha=0.5);
plt.hist(cluster2, 30, alpha=0.5);
plt.hist(cluster3, 30, alpha=0.5);
posterior = idata.posterior.stack(sample=("chain", "draw"))
plt.hist(posterior["cutpoints"][0], 80, alpha=0.2, color='k');
plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');
"""
def __new__(cls, name, eta, cutpoints, sigma=1, compute_p=True, **kwargs):
p = cls.compute_p(eta, cutpoints, sigma)
if compute_p:
p = pm.Deterministic(f"{name}_probs", p)
out_rv = Categorical(name, p=p, **kwargs)
return out_rv
[docs]
@classmethod
def dist(cls, eta, cutpoints, sigma=1, **kwargs):
p = cls.compute_p(eta, cutpoints, sigma)
return Categorical.dist(p=p, **kwargs)
@classmethod
def compute_p(cls, eta, cutpoints, sigma):
eta = pt.as_tensor_variable(eta)
cutpoints = pt.as_tensor_variable(cutpoints)
probits = pt.shape_padright(eta) - cutpoints
log_p = pt.concatenate(
[
pt.shape_padright(normal_lccdf(0, sigma, probits[..., 0])),
log_diff_normal_cdf(
0, pt.shape_padright(sigma), probits[..., :-1], probits[..., 1:]
),
pt.shape_padright(normal_lcdf(0, sigma, probits[..., -1])),
],
axis=-1,
)
p = pt.exp(log_p)
return p