Sampling
base
apply_nchoosek(samples, constraint)
Apply an n-choose-k constraint in-place
Source code in opti/sampling/base.py
def apply_nchoosek(samples: pd.DataFrame, constraint: NChooseK):
"""Apply an n-choose-k constraint in-place"""
n_zeros = len(constraint.names) - constraint.max_active
for i in samples.index:
s = np.random.choice(constraint.names, size=n_zeros, replace=False)
samples.loc[i, s] = 0
constrained_sampling(n_samples, parameters, constraints)
Uniform sampling from a constrained space.
Source code in opti/sampling/base.py
def constrained_sampling(
n_samples: int, parameters: Parameters, constraints: Constraints
) -> pd.DataFrame:
"""Uniform sampling from a constrained space."""
nchoosek_constraints, other_constraints = split_nchoosek(constraints)
try:
samples = rejection_sampling(n_samples, parameters, other_constraints)
except Exception:
samples = polytope_sampling(n_samples, parameters, other_constraints)
if len(nchoosek_constraints) == 0:
return samples
for c in nchoosek_constraints:
apply_nchoosek(samples, c)
# check if other constraints are still satisfied
if not constraints.satisfied(samples).all():
raise Exception(
"Applying the n-choose-k constraint(s) violated another constraint."
)
return samples
rejection_sampling(n_samples, parameters, constraints, max_iters=1000)
Uniformly distributed samples from a constrained space via rejection sampling.
Source code in opti/sampling/base.py
def rejection_sampling(
n_samples: int,
parameters: Parameters,
constraints: Constraints,
max_iters: int = 1000,
) -> pd.DataFrame:
"""Uniformly distributed samples from a constrained space via rejection sampling."""
if constraints is None:
return parameters.sample(n_samples)
# check for equality constraints in combination with continuous parameters
for c in constraints:
if isinstance(c, (LinearEquality, NonlinearEquality)):
for p in parameters:
if isinstance(p, Continuous):
raise Exception(
"Rejection sampling doesn't work for equality constraints over continuous variables."
)
n_iters = 0
n_found = 0
points_found = []
while n_found < n_samples:
n_iters += 1
if n_iters > max_iters:
raise Exception("Maximum iterations exceeded in rejection sampling")
points = parameters.sample(10000)
valid = constraints.satisfied(points)
n_found += np.sum(valid)
points_found.append(points[valid])
return pd.concat(points_found, ignore_index=True).iloc[:n_samples]
sobol_sampling(n_samples, parameters)
Super-uniform sampling from an unconstrained space using a Sobol sequence.
Source code in opti/sampling/base.py
def sobol_sampling(n_samples: int, parameters: Parameters) -> pd.DataFrame:
"""Super-uniform sampling from an unconstrained space using a Sobol sequence."""
d = len(parameters)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
X = Sobol(d).random(n_samples)
res = []
for i, p in enumerate(parameters):
if isinstance(p, Continuous):
x = p.from_unit_range(X[:, i])
else:
bins = np.linspace(0, 1, len(p.domain) + 1)
idx = np.digitize(X[:, i], bins) - 1
x = np.array(p.domain)[idx]
res.append(pd.Series(x, name=p.name))
return pd.concat(res, axis=1)
split_nchoosek(constraints)
Split constraints in n-choose-k constraint and all other constraints.
Source code in opti/sampling/base.py
def split_nchoosek(
constraints: Optional[Constraints],
) -> Tuple[Constraints, Constraints]:
"""Split constraints in n-choose-k constraint and all other constraints."""
if constraints is None:
return Constraints([]), Constraints([])
nchoosek_constraints = Constraints(
[c for c in constraints if isinstance(c, NChooseK)]
)
other_constraints = Constraints(
[c for c in constraints if not isinstance(c, NChooseK)]
)
return nchoosek_constraints, other_constraints
polytope
This module provides functions to uniformly sample points subject to a system of linear
inequality constraints, :math:Ax <= b
(convex polytope), and linear equality
constraints, :math:Ax = b
(affine projection).
A comparison of MCMC algorithms to generate uniform samples over a convex polytope is
given in [Chen2018]. Here, we use the Hit & Run algorithm described in [Smith1984].
The R-package hitandrun
_ provides similar functionality to this module.
References
.. [Chen2018] Chen Y., Dwivedi, R., Wainwright, M., Yu B. (2018) Fast MCMC Sampling
Algorithms on Polytopes. JMLR, 19(55):1−86
https://arxiv.org/abs/1710.08165
.. [Smith1984] Smith, R. (1984). Efficient Monte Carlo Procedures for Generating
Points Uniformly Distributed Over Bounded Regions. Operations Research,
32(6), 1296-1308.
www.jstor.org/stable/170949
.. _hitandrun
: https://cran.r-project.org/web/packages/hitandrun/index.html
polytope_sampling(n_samples, parameters, constraints, thin=100)
Hit-and-run method to sample uniformly under linear constraints.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n_samples |
int |
Number of samples. |
required |
parameters |
opti.Parameters |
Parameter space. |
required |
constraints |
opti.Constraints |
Constraints on the parameters. |
required |
thin |
int |
Thinning factor of the generated samples. |
100 |
Returns:
Type | Description |
---|---|
array, shape=(n_samples, dimension) |
Randomly sampled points. |
Source code in opti/sampling/polytope.py
def polytope_sampling(
n_samples: int, parameters: Parameters, constraints: Constraints, thin: int = 100
) -> pd.DataFrame:
"""Hit-and-run method to sample uniformly under linear constraints.
Args:
n_samples (int): Number of samples.
parameters (opti.Parameters): Parameter space.
constraints (opti.Constraints): Constraints on the parameters.
thin (int, optional): Thinning factor of the generated samples.
Returns:
array, shape=(n_samples, dimension): Randomly sampled points.
"""
for c in constraints:
if not isinstance(c, (LinearEquality, LinearInequality)):
raise Exception("Polytope sampling only works for linear constraints.")
At, bt, N, xp = _get_AbNx(parameters, constraints)
# hit & run sampling
x0 = _chebyshev_center(At, bt)
sampler = _hitandrun(At, bt, x0)
X = np.empty((n_samples, At.shape[1]))
for i in range(n_samples):
for _ in range(thin - 1):
next(sampler)
X[i] = next(sampler)
# project back
X = X @ N.T + xp
return pd.DataFrame(columns=parameters.names, data=X)
simplex
grid(dimension, levels)
Construct a regular grid on the unit simplex.
The number of grid points is L = (n+m-1)!/(n!*(m-1)!) where m is the dimension and n+1 is the number of levels.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dimension |
int |
Number of variables. |
required |
levels |
int |
Number of levels for each variable. |
required |
Returns:
Type | Description |
---|---|
array |
Regularily spaced points on the unit simplex. |
Examples:
>>> simplex_grid(3, 3)
array([
[0. , 0. , 1. ],
[0. , 0.5, 0.5],
[0. , 1. , 0. ],
[0.5, 0. , 0.5],
[0.5, 0.5, 0. ],
[1. , 0. , 0. ]
])
References
Nijenhuis and Wilf, Combinatorial Algorithms, Chapter 5, Academic Press, 1978.
Source code in opti/sampling/simplex.py
def grid(dimension: int, levels: int) -> np.ndarray:
"""Construct a regular grid on the unit simplex.
The number of grid points is L = (n+m-1)!/(n!*(m-1)!)
where m is the dimension and n+1 is the number of levels.
Args:
dimension (int): Number of variables.
levels (int): Number of levels for each variable.
Returns:
array: Regularily spaced points on the unit simplex.
Examples:
>>> simplex_grid(3, 3)
array([
[0. , 0. , 1. ],
[0. , 0.5, 0.5],
[0. , 1. , 0. ],
[0.5, 0. , 0.5],
[0.5, 0.5, 0. ],
[1. , 0. , 0. ]
])
References:
Nijenhuis and Wilf, Combinatorial Algorithms, Chapter 5, Academic Press, 1978.
"""
m = dimension
n = levels - 1
L = int(comb(dimension - 1 + levels - 1, dimension - 1, exact=True))
x = np.zeros(m, dtype=int)
x[-1] = n
out = np.empty((L, m), dtype=int)
out[0] = x
h = m
for i in range(1, L):
h -= 1
val = x[h]
x[h] = 0
x[h - 1] += 1
x[-1] = val - 1
if val != 1:
h = m
out[i] = x
return out / n
sample(dimension, n_samples=1)
Sample uniformly from the unit simplex.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dimension |
int |
Number of dimensions. |
required |
n_samples |
int |
Number of samples to draw. |
1 |
Returns:
Type | Description |
---|---|
array, shape=(n_samples, dimesnion) |
Random samples from the unit simplex. |
Source code in opti/sampling/simplex.py
def sample(dimension: int, n_samples: int = 1) -> np.ndarray:
"""Sample uniformly from the unit simplex.
Args:
dimension (int): Number of dimensions.
n_samples (int): Number of samples to draw.
Returns:
array, shape=(n_samples, dimesnion): Random samples from the unit simplex.
"""
s = np.random.standard_exponential((n_samples, dimension))
return (s.T / s.sum(axis=1)).T
sphere
sample(dimension, n_samples=1, positive=False)
Sample uniformly from the unit hypersphere.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dimension |
int |
Number of dimensions. |
required |
n_samples |
int |
Number of samples to draw. |
1 |
positive |
bool |
Sample from the non-negative unit-sphere. |
False |
Returns:
Type | Description |
---|---|
array, shape=(n_samples, dimesnion) |
Random samples from the unit simplex. |
Source code in opti/sampling/sphere.py
def sample(dimension: int, n_samples: int = 1, positive: bool = False) -> np.ndarray:
"""Sample uniformly from the unit hypersphere.
Args:
dimension (int): Number of dimensions.
n_samples (int): Number of samples to draw.
positive (bool): Sample from the non-negative unit-sphere.
Returns:
array, shape=(n_samples, dimesnion): Random samples from the unit simplex.
"""
x = np.random.normal(0, 1, size=(n_samples, dimension))
x = x / np.sum(x**2, axis=1, keepdims=True) ** 0.5
if positive:
x *= -2 * (x < 0) + 1
return x