Temperature independent negative logarithm of average observable#

This type of extrapolation is useful for extrapolating hard-sphere chemical potentials in temperature or volume, or other quantities involving logarithms of probabilities (e.g. PMFs).

This is in fact easier than Case 2 because we do not need to augment the data in any way - we just set a flag that the quantity to extrapolate is the negative logarithm of an ensemble average, i.e., post_func = 'minus_log'. Note, though, that we handled the -log calculation in the definition of the derivatives (even at zeroth order). This means we want to just pass data, not the -log of the data. Also note that post_func can take any function utilizing sympy expressions for modifying the average observable we want to extrapolate, so passing post_func = lambda x: -sympy.log(x) would be equivalent. If you’re applying the function to the observable itself, that’s just a different observable and you don’t need to worry about what we’re doing in this notebook… it’s the difference between \(\ln \langle x \rangle\) and \(\langle \ln x \rangle\).

%matplotlib inline

import cmomy
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

# Import idealgas module
from thermoextrap import idealgas

rng = cmomy.random.default_rng(seed=0)

# Define test betas and reference beta
betas = np.arange(0.1, 10.0, 0.5)
beta_ref = betas[11]
vol = 1.0

# Define orders to extrapolate to
orders = [1, 2, 4, 6]
order = orders[-1]

npart = 1000  # Number of particles (in single configuration)
nconfig = 100_000  # Number of configurations

# Generate all the data we could want
xdata, udata = idealgas.generate_data((nconfig, npart), beta_ref, vol)

import thermoextrap as xtrap

# Create and train extrapolation model
xem_log = xtrap.beta.factory_extrapmodel(
    beta=beta_ref,
    post_func="minus_log",
    data=xtrap.DataCentralMomentsVals.from_vals(
        order=orders[-1],
        xv=xr.DataArray(xdata, dims="rec"),
        uv=xr.DataArray(udata, dims="rec"),
        deriv_dim=None,
        central=True,
    ),
)

# Check the derivatives
print("Model parameters (derivatives):")
print(xem_log.derivs(norm=False))
print("\n")

# Finally, look at predictions
print("Model predictions:")
print(xem_log.predict(betas[:4], order=2))
print("\n")

Model parameters (derivatives):

<xarray.DataArray (order: 7)> Size: 56B
array([ 1.7438,  0.1615, -0.0186,  0.015 ,  0.612 , -1.9426, -1.9077])
Dimensions without coordinates: order


Model predictions:
<xarray.DataArray (beta: 4)> Size: 32B
array([0.5741, 0.7037, 0.8286, 0.9489])
Coordinates:
  * beta     (beta) float64 32B 0.1 0.6 1.1 1.6
    dalpha   (beta) float64 32B -5.5 -5.0 -4.5 -4.0
    beta0    float64 8B 5.6

# And bootstrapped uncertainties
print("Bootstrapped uncertainties in predictions:")
print(xem_log.resample(sampler={"nrep": 100}).predict(betas[:4], order=3).std("rep"))

Bootstrapped uncertainties in predictions:

<xarray.DataArray (beta: 4)> Size: 32B
array([2.0496, 1.54  , 1.1227, 0.7887])
Coordinates:
  * beta     (beta) float64 32B 0.1 0.6 1.1 1.6
    dalpha   (beta) float64 32B -5.5 -5.0 -4.5 -4.0
    beta0    float64 8B 5.6

# Repeat comparison of results, but for -ln<x> instead of <x>

fig, ax = plt.subplots(len(orders), sharex=True, sharey=True)

nsampvals = np.array((10.0 * np.ones(5)) ** np.arange(1, 6), dtype=int)
nsampcolors = plt.cm.viridis(np.arange(0.0, 1.0, float(1.0 / len(nsampvals))))

# First plot the analytical result
for a in ax:
    a.plot(betas, -np.log(idealgas.x_ave(betas, vol)), "k--", linewidth=2.0)

# Next look at extrapolation with an infinite number of samples
# This is possible in the ideal gas model in both temperature and volume
for j, o in enumerate(orders):
    trueExtrap, trueDerivs = idealgas.x_beta_extrap_minuslog(o, beta_ref, betas, vol)
    ax[j].plot(betas, trueExtrap, "r-", zorder=0)
    if j == len(orders) - 1:
        print(f"True extrapolation coefficients: {trueDerivs}")

for i, n in enumerate(nsampvals):
    thisinds = rng.choice(len(xdata), size=n, replace=False)

    # Get parameters for extrapolation model with this data by training it - the parameters are the derivatives
    xem_log = xtrap.beta.factory_extrapmodel(
        beta=beta_ref,
        post_func="minus_log",
        data=xtrap.DataCentralMomentsVals.from_vals(
            order=orders[-1],
            uv=xr.DataArray(udata[thisinds], dims="rec"),
            xv=xr.DataArray(xdata[thisinds], dims="rec"),
            central=True,
        ),
    )
    out = xem_log.predict(betas, cumsum=True)
    print(
        f"\t With N_configs = {n}: {xem_log.derivs(norm=False).values.flatten()}"
    )  # Have to flatten because observable is 1-D
    for j, o in enumerate(orders):
        out.sel(order=o).plot(
            marker="s",
            ms=4,
            color=nsampcolors[i],
            ls="None",
            label=f"N={n}",
            ax=ax[j],
        )

ax[2].set_ylabel(r"$-\mathrm{ln} \langle x \rangle$")
ax[-1].set_xlabel(r"$\beta$")

for j, o in enumerate(orders):
    ax[j].annotate(
        f"O({o}) Extrapolation", xy=(0.4, 0.7), xycoords="axes fraction", fontsize=9
    )

ax[0].set_ylim((0.0, 3.0))
ax[-1].yaxis.set_major_locator(mpl.ticker.MaxNLocator(nbins=4, prune="both"))

fig.tight_layout()
fig.subplots_adjust(hspace=0.0)

for a in ax:
    a.set_title(None)

plt.show()

True extrapolation coefficients: [ 1.7438e+00  1.6106e-01 -1.7727e-02  3.6676e-04  2.1114e-03 -1.5377e-03
  3.9927e-04]
	 With N_configs = 10: [ 1.7469e+00  1.1364e-01  1.3435e-01 -8.7672e-01 -1.8012e+01 -1.0596e+02
  3.2007e+03]

	 With N_configs = 100: [ 1.7423e+00  1.3459e-01 -1.9823e-01  1.2182e+00 -1.0755e+01 -6.0855e+01
  3.2539e+03]
	 With N_configs = 1000: [ 1.7426  0.1588 -0.0696 -0.1739  0.8373 46.7624 56.4464]
	 With N_configs = 10000: [  1.7436   0.161   -0.0531  -0.2102   4.1724 -23.4454  22.6547]
	 With N_configs = 100000: [ 1.7438  0.1615 -0.0186  0.015   0.612  -1.9426 -1.9077]

../../../_images/ac7fe9d29ae66afc036f1b3fe08955c7a0feb252115ec5e724cc8d2d2d4f8b51.png