Source code for optbayesexpt.obe_utils

import numpy as np

    rng = np.random.default_rng()
except AttributeError:
    rng = np.random
[docs] class MeasurementSimulator(): """ Provides simulated measurement data Evaluates the model function and adds noise. Args: model_function (func): Generally the same as the function used by OptBayesExpt true_params (tuple): Parameter values, typically the "true values" of the simulated experiment. cons (tuple): The constants. noise_level (float): standard deviation of the added noise. """ def __init__(self, model_function, true_params, cons, noise_level): self.model_function = model_function self.params = true_params self.cons = cons self.noise_level = noise_level
[docs] def simdata(self, setting, params=None, noise_level=None): """ Simulate a measurement Args: setting (tuple of floats): The setting values params (tuple of floats): if not ``None``, temporarily used instead of the initial values. (opt) noise_level (float): if not ``None``, temporarily used instead of the initial value. (opt) Returns: Simulated measurement value(s) """ if params is None: params = self.params if noise_level is None: noise_level = self.noise_level y = np.array(self.model_function(setting, params, self.cons)) tmpnoise = rng.standard_normal(y.shape) * noise_level yn = y + tmpnoise return yn
[docs] def trace_sort(settings, measurements): """Combine measurements at identical settings values Analyzes input arrays of setttings and corresponding measurement values, data where settings values may repeat, i. e. more than one measurement was done at some of the settings. The function bins the measurements by setting value and calculates some statistics for measurments in each bin. Args: settings: (ndarray) Setting values measurements: (ndarray) measurement values Returns: A tuple, (sorted_settings, m_average, m_std, n_of_m) - sorted_settings (list): setting values (sorted, none repeated) - m_average (list): average measurement value at each setting - m_sigma (list): standard deviation of measurement values at each setting - n_of_m (list): number of measurements at each setting. """ # Sort the arrays by the setting values sortindices = np.argsort(settings) sarr = np.array(settings)[sortindices] marr = np.array(measurements)[sortindices] oldx = sarr[0] sorted_settings = [] m_average = [] m_std = [] n_of_m = [] m_list = [] for x, y in zip(sarr, marr): # accumulate batches having the same x # check if the new x value is different if x != oldx: # new x value, so batch is complete # process the accumulated data for the old x value sorted_settings.append(oldx) m_average.append(np.mean(np.array(m_list))) m_std.append(np.std(m_list)/np.sqrt(len(m_list))) n_of_m.append(len(m_list)) # reset accumulation & start a new batch oldx = x m_list = [y, ] else: # same setting value, so just accumulate the y value m_list.append(y) # process the last accumulated batch sorted_settings.append(oldx) m_average.append(np.mean(np.array(m_list))) n_of_m.append(len(m_list)) m_std.append(np.std(np.array(m_list))/np.sqrt(len(m_list))) return sorted_settings, m_average, m_std, n_of_m
[docs] def differential_entropy(values, window_length=None, base=None, axis=0, method='auto'): """Given a sample of a distribution, estimate the differential entropy. This code is copied from scipy.stats with reformatted docstrings. When the module is loaded, attempts to import ``differential_entropy()`` from scipy.stats, and loads this version from if an ``ImportError`` is raised. Several estimation methods are available using the `method` parameter. By default, a method is selected based the size of the sample. Args: values (:obj:`sequence`): Samples from a continuous distribution. window_length (:obj:`int`, optional): Window length for computing Vasicek estimate. Must be an integer between 1 and half of the sample size. If ``None`` (the default), it uses the heuristic value .. math:: \left \lfloor \\sqrt{n} + 0.5 \\right \\rfloor where :math:`n` is the sample size. This heuristic was originally proposed in [2]_ and has become common in the literature. base (:obj:`float`, optional) The logarithmic base to use, defaults to ``e`` (natural logarithm). axis (:obj:`int`, optional) The axis along which the differential entropy is calculated. Default is 0. method : {'vasicek', 'van es', 'ebrahimi', 'correa', 'auto'}, optional The method used to estimate the differential entropy from the sample. Default is ``'auto'``. See Notes for more information. Returns: entropy (:obj:`float`): The calculated differential entropy. Notes: This function will converge to the true differential entropy in the limit .. math:: n \\to \\infty, \\quad m \\to \\infty, \\quad \\frac{m}{n} \\to 0 The optimal choice of ``window_length`` for a given sample size depends on the (unknown) distribution. Typically, the smoother the density of the distribution, the larger the optimal value of ``window_length`` [1]_. The following options are available for the `method` parameter. * ``'vasicek'`` uses the estimator presented in [1]_. This is one of the first and most influential estimators of differential entropy. * ``'van es'`` uses the bias-corrected estimator presented in [3]_, which is not only consistent but, under some conditions, asymptotically normal. * ``'ebrahimi'`` uses an estimator presented in [4]_, which was shown in simulation to have smaller bias and mean squared error than the Vasicek estimator. * ``'correa'`` uses the estimator presented in [5]_ based on local linear regression. In a simulation study, it had consistently smaller mean square error than the Vasiceck estimator, but it is more expensive to compute. * ``'auto'`` selects the method automatically (default). Currently, this selects ``'van es'`` for very small samples (<10), ``'ebrahimi'`` for moderate sample sizes (11-1000), and ``'vasicek'`` for larger samples, but this behavior is subject to change in future versions. All estimators are implemented as described in [6]_. References: .. [1] Vasicek, O. (1976). A test for normality based on sample entropy. Journal of the Royal Statistical Society: Series B (Methodological), 38(1), 54-59. .. [2] Crzcgorzewski, P., & Wirczorkowski, R. (1999). Entropy-based goodness-of-fit test for exponentiality. Communications in Statistics-Theory and Methods, 28(5), 1183-1202. .. [3] Van Es, B. (1992). Estimating functionals related to a density by a class of statistics based on spacings. Scandinavian Journal of Statistics, 61-72. .. [4] Ebrahimi, N., Pflughoeft, K., & Soofi, E. S. (1994). Two measures of sample entropy. Statistics & Probability Letters, 20(3), 225-234. .. [5] Correa, J. C. (1995). A new estimator of entropy. Communications in Statistics-Theory and Methods, 24(10), 2439-2449. .. [6] Noughabi, H. A. (2015). Entropy Estimation Using Numerical Methods. Annals of Data Science, 2(2), 231-241. """ values = np.asarray(values) values = np.moveaxis(values, axis, -1) n = values.shape[-1] # number of observations if window_length is None: window_length = int(np.floor(np.sqrt(n) + 0.5)) if not 2 <= 2 * window_length < n: raise ValueError( f"Window length ({window_length}) must be positive and less " f"than half the sample size ({n}).", ) if base is not None and base <= 0: raise ValueError("`base` must be a positive number or `None`.") sorted_data = np.sort(values, axis=-1) methods = {"vasicek": _vasicek_entropy, "van es": _van_es_entropy, "correa": _correa_entropy, "ebrahimi": _ebrahimi_entropy, "auto": _vasicek_entropy} method = method.lower() if method not in methods: message = f"`method` must be one of {set(methods)}" raise ValueError(message) if method == "auto": if n <= 10: method = 'van es' elif n <= 1000: method = 'ebrahimi' else: method = 'vasicek' res = methods[method](sorted_data, window_length) if base is not None: res /= np.log(base) return res
def _pad_along_last_axis(X, m): # Pad the data for computing the rolling window difference. # scales a bit better than method in _vasicek_like_entropy shape = np.array(X.shape) shape[-1] = m Xl = np.broadcast_to(X[..., [0]], shape) # [0] vs 0 to maintain shape Xr = np.broadcast_to(X[..., [-1]], shape) return np.concatenate((Xl, X, Xr), axis=-1) def _vasicek_entropy(X, m): # Compute the Vasicek estimator as described in [7] Eq. 1.3. n = X.shape[-1] X = _pad_along_last_axis(X, m) differences = X[..., 2 * m:] - X[..., : -2 * m:] logs = np.log(n/(2*m) * differences) return np.mean(logs, axis=-1) def _van_es_entropy(X, m): # Compute the van Es estimator as described in [7]. # No equation number, but referred to as HVE_mn. # Typo: there should be a log within the summation. n = X.shape[-1] difference = X[..., m:] - X[..., :-m] term1 = 1/(n-m) * np.sum(np.log((n+1)/m * difference), axis=-1) k = np.arange(m, n+1) return term1 + np.sum(1/k) + np.log(m) - np.log(n+1) def _ebrahimi_entropy(X, m): # Compute the Ebrahimi estimator as described in [7]. # No equation number, but referred to as HE_mn n = X.shape[-1] X = _pad_along_last_axis(X, m) differences = X[..., 2 * m:] - X[..., : -2 * m:] i = np.arange(1, n+1).astype(float) ci = np.ones_like(i)*2 ci[i <= m] = 1 + (i[i <= m] - 1)/m ci[i >= n - m + 1] = 1 + (n - i[i >= n-m+1])/m logs = np.log(n * differences / (ci * m)) return np.mean(logs, axis=-1) def _correa_entropy(X, m): # Compute the Correa estimator as described in [7]. # No equation number, but referred to as HC_mn n = X.shape[-1] X = _pad_along_last_axis(X, m) i = np.arange(1, n+1) dj = np.arange(-m, m+1)[:, None] j = i + dj j0 = j + m - 1 # 0-indexed version of j Xibar = np.mean(X[..., j0], axis=-2, keepdims=True) difference = X[..., j0] - Xibar num = np.sum(difference*dj, axis=-2) # dj is d-i den = n*np.sum(difference**2, axis=-2) return -np.mean(np.log(num/den), axis=-1)