Source code for optbayesexpt.particlepdf

import numpy as np
import warnings

GOT_NUMBA = False
# GOT_NUMBA = True
try:
    from numba import njit, float64
except ImportError:
    GOT_NUMBA = False


[docs] class ParticlePDF: """A probability distribution function. A probability distribution :math:`P(\\theta_0, \\theta_1, \\ldots, \\theta_{n\_dims})` over parameter variables :math:`\\theta_i` is represented by a large-ish number of samples from the distribution, each with a weight value. The distribution can be visualized as a cloud of particles in parameter space, with each particle corresponding to a weighted random draw from the distribution. The methods implemented here largely follow the algorithms published in Christopher E Granade et al. 2012 *New J. Phys.* **14** 103013. Warnings: The number of samples (i.e. particles) required for good performance will depend on the application. Too many samples will slow down the calculations, but too few samples can produce incorrect results. With too few samples, the probability distribution can become overly narrow, and it may not include the "true" parameter values. See the ``resample()`` method documentation for details. Arguments: prior (:obj:`2D array-like`): The Bayesian *prior*, which initializes the :obj:`ParticlePDF` distribution. Each of ``n_dims`` sub-arrays contains ``n_particles`` values of a single parameter, so that the *j*\ _th elements of the sub-arrays determine the coordinates of a point in parameter space. Users are encouraged to experiment with different ``n_particles`` sizes to assure consistent results. Keyword Args: a_param: (float) In resampling, determines the scale of random diffusion relative to the distribution covariance. After weighted sampling, some parameter values may have been chosen multiple times. To make the new distribution smoother, the parameters are given small 'nudges', random displacements much smaller than the overall parameter distribution, but with the same shape as the overall distribution. More precisely, the covariance of the nudge distribution is :code:`(1 - a_param ** 2)` times the covariance of the parameter distribution. Default ``0.98``. scale (:obj:`bool`): determines whether resampling includes a contraction of the parameter distribution toward the distribution mean. The idea of this contraction is to compensate for the overall expansion of the distribution that is a by-product of random displacements. If true, parameter samples (particles) move a fraction ``a_param`` of the distance to the distribution mean. Default is ``True``, but ``False`` is recommended. resample_threshold (:obj:`float`): Sets a threshold for automatic resampling. Resampling is triggered when the effective fraction of particles, :math:`1 / (N\\sum_i^N w_i^2)`, is smaller than ``resample_threshold``. Default ``0.5``. auto_resample (:obj:`bool`): Determines whether threshold testing and resampling are performed when ``bayesian_update()`` is called. Default ``True``. use_jit (:obj:`bool`): Allows precompilation of some methods for a modest increase in speed. Only effective on systems where ``numba`` is installed. Default ``True`` **Attributes:** """ def __init__(self, prior, a_param=0.98, resample_threshold=0.5, auto_resample=True, scale=True, use_jit=True): #: dict: A package of parameters affecting the resampling algorithm #: #: - ``'a_param'`` (:obj:`float`): Initially, the value of the #: ``a_param`` keyword argument. Default ``0.98`` #: #: - ``'scale'`` (:obj:`bool`): Initially, the value of the #: ``scale`` keyword argument. Default ``True`` #: #: - ``'resample_threshold'`` (:obj:`float`): Initially, #: the value of the ``resample_threshold`` keyword argument. #: Default ``0.5``. #: #: - ``'auto_resample'`` (:obj:`bool`): Initially, the value of the #: ``auto_resample`` keyword argument. Default ``True``. self.tuning_parameters = {'a_param': a_param, 'resample_threshold': resample_threshold, 'auto_resample': auto_resample, 'scale': scale} #: ``n_dims x n_particles ndarray`` of ``float64``: Together with #: ``particle_weights``,#: these ``n_particles`` points represent #: the parameter probability distribution. Initialized by the #: ``prior`` argument. self.particles = np.asarray(prior) #: ``int``: the number of parameter samples representing the #: probability distribution. Determined from the trailing dimension #: of ``prior``. self.n_particles = self.particles.shape[-1] #: ``int``: The number of parameters, i.e. the dimensionality of #: parameter space. Determined from the leading dimension of ``prior``. self.n_dims = self.particles.shape[0] #: ``ndarray`` of ``int``: Indices into the particle arrays. self._particle_indices = np.arange(self.n_particles, dtype='int') #: ndarray of ``float64``: Array of probability weights #: corresponding to the particles. self.particle_weights = np.ones(self.n_particles) / self.n_particles #: ``bool``: A flag set by the ``resample_test()`` function. ``True`` if #: the last ``bayesian_update()`` resulted in resampling, #: else ``False``. self.just_resampled = False # Precompile numerically intensive functions for speed # and overwrite _normalized_product() method. if GOT_NUMBA and use_jit: @njit([float64[:](float64[:], float64[:])], cache=True) def _proto_normalized_product(wgts, lkl): tmp = wgts * lkl return tmp / np.sum(tmp) else: def _proto_normalized_product(wgts, lkl): tmp = np.nan_to_num(wgts * lkl) result = np.nan_to_num(tmp / np.sum(tmp)) return result self._normalized_product = _proto_normalized_product try: self.rng = np.random.default_rng() except AttributeError: self.rng = np.random
[docs] def set_pdf(self, samples, weights=None): """Re-initializes the probability distribution Also resets ``n_particles`` and ``n_dims`` deduced from the dimensions of ``samples``. Args: samples (array-like): A representation of the new distribution comprising `n_dims` sub-arrays of `n_particles` samples of each parameter. weights (ndarray): If ``None``, weights will be assigned uniform probability. Otherwise, an array of length ``n_particles`` """ self.particles = np.asarray(samples) self.n_particles = self.particles.shape[-1] self.n_dims = self.particles.shape[0] if weights is None: self.particle_weights = np.ones(self.n_particles)\ / self.n_particles else: if len(weights) != self.n_particles: raise ValueError('Length of weights does not match the ' 'number of particles.') else: self.particle_weights = weights / np.sum(weights)
[docs] def mean(self): """Calculates the mean of the probability distribution. The weighted mean of the parameter distribution. See also :obj:`std()` and :obj:`covariance()`. Returns: Size ``n_dims`` array. """ return np.average(self.particles, axis=1, weights=self.particle_weights)
[docs] def covariance(self): """Calculates the covariance of the probability distribution. Returns: The covariance of the parameter distribution as an ``n_dims`` X ``n_dims`` array. See also :obj:`mean()` and :obj:`std()`. """ raw_covariance = np.cov(self.particles, aweights=self.particle_weights) if self.n_dims == 1: return raw_covariance.reshape((1, 1)) else: return raw_covariance
[docs] def std(self): """Calculates the standard deviation of the distribution. Calculates the square root of the diagonal elements of the covariance matrix. See also :obj:`covariance()` and :obj:`mean()`. Returns: The standard deviation as an n_dims array. """ var = np.zeros(self.n_dims) for i, p in enumerate(self.particles): mean = np.dot(p, self.particle_weights) msq = np.dot(p*p, self.particle_weights) var[i] = msq - mean ** 2 return np.sqrt(var)
[docs] def bayesian_update(self, likelihood): """Performs a Bayesian update on the probability distribution. Multiplies ``particle_weights`` by the ``likelihood`` and renormalizes the probability distribution. After the update, the distribution is tested for resampling depending on ``self.tuning_parameters['auto_resample']``. Args: likelihood: (:obj:`ndarray`): An ``n_samples`` sized array describing the Bayesian likelihood of a measurement result calculated for each parameter combination. """ self.particle_weights = self._normalized_product(self.particle_weights, likelihood) if self.tuning_parameters['auto_resample']: self.resample_test()
[docs] def resample_test(self): """Tests the distribution and performs a resampling if required. If the effective number of particles falls below ``self.tuning_parameters['resample_threshold'] * n_particles``, performs a resampling. Sets the ``just_resampled`` flag. """ wsquared = np.nan_to_num(self.particle_weights * self.particle_weights) n_eff = 1 / np.sum(wsquared) if n_eff < 0.1 * self.n_particles: warnings.warn("\nParticle filter rejected > 90 % of particles. " f"N_eff = {n_eff:.2f}. " "Particle impoverishment may lead to errors.", RuntimeWarning) self.resample() self.just_resampled = True # n_eff = 1 / (self.particle_weights @ self.particle_weights) elif n_eff / self.n_particles < \ self.tuning_parameters['resample_threshold']: self.resample() self.just_resampled = True else: self.just_resampled = False
[docs] def resample(self): """Performs a resampling of the distribution. Resampling refreshes the random draws that represent the probability distribution. As Bayesian updates are made, the weights of low-probability particles can become very small. These particles consume memory and computation time, and they contribute little to values that are determined from the distribution. Resampling abandons some low-probability particles while allowing high-probability particles to multiply in higher-probability regions. *Sample impoverishment* can occur if there are too few particles. In this phenomenon, a resampling step fails to sample particles from an important, but low-probability region, effectively removing that region from future consideration. The symptoms of this ``sample impoverishment`` phenomenon include: - Inconsistent results from repeated runs. Standard deviations from individual final distributions will be too small to explain the spread of individual mean values. - Sudden changes in the standard deviations or other measures of the distribution on resampling. The resampling is not *supposed* to change the distribution, just refresh its representation. """ coords = self.randdraw(self.n_particles) # coords is n_dims x n_particles origin = np.zeros(self.n_dims) covar = self.covariance() old_center = self.mean().reshape((self.n_dims, 1)) # a_param is typically close to but less than 1 a_param = self.tuning_parameters['a_param'] # newcover is a small version of covar that determines the size of # the nudge. newcovar = (1 - a_param ** 2) * covar # multivariate normal returns n_particles x n_dims array. ".T" # transposes to match coords shape. nudged = coords + self.rng.multivariate_normal(origin, newcovar, self.n_particles).T if self.tuning_parameters['scale']: scaled = nudged * a_param + old_center * (1 - a_param) self.particles = scaled else: self.particles = nudged self.particle_weights = np.full_like(self.particle_weights, 1.0 / self.n_particles)
[docs] def randdraw(self, n_draws=1): """Provides random parameter draws from the distribution Particles are selected randomly with probabilities given by ``self.particle_weights``. Args: n_draws (:obj:`int`): the number of draws requested. Default ``1``. Returns: An ``n_dims`` x ``N_DRAWS`` :obj:`ndarray` of parameter draws. """ # draws = self.rng.choice(self.particles, size=n_draws, # p=self.particle_weights, axis=1) draws = np.zeros((self.n_dims, n_draws)) try: indices = self.rng.choice(self._particle_indices, size=n_draws, p=self.particle_weights) for i, param in enumerate(self.particles): # for j, selected_index in enumerate(indices): # draws[i,j] = param[selected_index] draws[i] = param[indices] except ValueError: print('weights: ', self.particle_weights) print('weight sum = ', np.sum(self.particle_weights)) for i, param in enumerate(self.particles): # for j, selected_index in enumerate(indices): # draws[i,j] = param[selected_index] draws[i] = param[indices] return draws
@staticmethod def _normalized_product(weight_array, likelihood_array): """multiplies two arrays and normalizes the result. Functionality is added by overwriting in __init__(). Precompiled by numba if available and if `use_jit = True`. Args: weight_array (``ndarray``): typically particle weights likelihood_array(``ndarray``: typically likelihoods Returns: a probability ``np.array`` with sum = 1. """ pass
# end ParticlePDF definition