Temperature independent negative logarithm of average observable

Temperature independent negative logarithm of average observable#

This type of extrapolation is useful for extrapolating hard-sphere chemical potentials in temperature or volume, or other quantities involving logarithms of probabilities (e.g. PMFs).

This is in fact easier than Case 2 because we do not need to augment the data in any way - we just set a flag that the quantity to extrapolate is the negative logarithm of an ensemble average, i.e., post_func = 'minus_log'. Note, though, that we handled the -log calculation in the definition of the derivatives (even at zeroth order). This means we want to just pass data, not the -log of the data. Also note that post_func can take any function utilizing sympy expressions for modifying the average observable we want to extrapolate, so passing post_func = lambda x: -sympy.log(x) would be equivalent. If you’re applying the function to the observable itself, that’s just a different observable and you don’t need to worry about what we’re doing in this notebook… it’s the difference between \(\ln \langle x \rangle\) and \(\langle \ln x \rangle\).

%matplotlib inline

import cmomy
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

# Import idealgas module
from thermoextrap import idealgas

rng = cmomy.random.default_rng(seed=0)

# Define test betas and reference beta
betas = np.arange(0.1, 10.0, 0.5)
beta_ref = betas[11]
vol = 1.0

# Define orders to extrapolate to
orders = [1, 2, 4, 6]
order = orders[-1]

npart = 1000  # Number of particles (in single configuration)
nconfig = 100_000  # Number of configurations

# Generate all the data we could want
xdata, udata = idealgas.generate_data((nconfig, npart), beta_ref, vol)
import thermoextrap as xtrap
# Create and train extrapolation model
xem_log = xtrap.beta.factory_extrapmodel(
    beta=beta_ref,
    post_func="minus_log",
    data=xtrap.DataCentralMomentsVals.from_vals(
        order=orders[-1], xv=xdata, uv=udata, deriv_dim=None, central=True
    ),
)

# Check the derivatives
print("Model parameters (derivatives):")
print(xem_log.derivs(norm=False))
print("\n")

# Finally, look at predictions
print("Model predictions:")
print(xem_log.predict(betas[:4], order=2))
print("\n")
Model parameters (derivatives):
<xarray.DataArray 'x' (order: 7)> Size: 56B
array([ 1.7438,  0.1615, -0.0186,  0.015 ,  0.612 , -1.9426, -1.9077])
Dimensions without coordinates: order


Model predictions:
<xarray.DataArray (beta: 4)> Size: 32B
array([0.5741, 0.7037, 0.8286, 0.9489])
Coordinates:
  * beta     (beta) float64 32B 0.1 0.6 1.1 1.6
    dalpha   (beta) float64 32B -5.5 -5.0 -4.5 -4.0
    beta0    float64 8B 5.6
# And bootstrapped uncertainties
print("Bootstrapped uncertainties in predictions:")
print(xem_log.resample(nrep=100).predict(betas[:4], order=3).std("rep"))
Bootstrapped uncertainties in predictions:
<xarray.DataArray (beta: 4)> Size: 32B
array([2.0496, 1.54  , 1.1227, 0.7887])
Coordinates:
  * beta     (beta) float64 32B 0.1 0.6 1.1 1.6
    dalpha   (beta) float64 32B -5.5 -5.0 -4.5 -4.0
    beta0    float64 8B 5.6
# Repeat comparison of results, but for -ln<x> instead of <x>

fig, ax = plt.subplots(len(orders), sharex=True, sharey=True)

nsampvals = np.array((10.0 * np.ones(5)) ** np.arange(1, 6), dtype=int)
nsampcolors = plt.cm.viridis(np.arange(0.0, 1.0, float(1.0 / len(nsampvals))))

# First plot the analytical result
for a in ax:
    a.plot(betas, -np.log(idealgas.x_ave(betas, vol)), "k--", linewidth=2.0)

# Next look at extrapolation with an infinite number of samples
# This is possible in the ideal gas model in both temperature and volume
for j, o in enumerate(orders):
    trueExtrap, trueDerivs = idealgas.x_beta_extrap_minuslog(o, beta_ref, betas, vol)
    ax[j].plot(betas, trueExtrap, "r-", zorder=0)
    if j == len(orders) - 1:
        print(f"True extrapolation coefficients: {trueDerivs}")

for i, n in enumerate(nsampvals):
    thisinds = rng.choice(len(xdata), size=n, replace=False)

    # Get parameters for extrapolation model with this data by training it - the parameters are the derivatives
    xem_log = xtrap.beta.factory_extrapmodel(
        beta=beta_ref,
        post_func="minus_log",
        data=xtrap.DataCentralMomentsVals.from_vals(
            order=orders[-1], uv=udata[thisinds], xv=xdata[thisinds], central=True
        ),
    )
    out = xem_log.predict(betas, cumsum=True)
    print(
        "\t With N_configs = %6i: %s"
        % (n, str(xem_log.derivs(norm=False).values.flatten()))
    )  # Have to flatten because observable is 1-D
    for j, o in enumerate(orders):
        out.sel(order=o).plot(
            marker="s",
            ms=4,
            color=nsampcolors[i],
            ls="None",
            label=f"N={n}",
            ax=ax[j],
        )

ax[2].set_ylabel(r"$-\mathrm{ln} \langle x \rangle$")
ax[-1].set_xlabel(r"$\beta$")

for j, o in enumerate(orders):
    ax[j].annotate(
        "O(%i) Extrapolation" % (o), xy=(0.4, 0.7), xycoords="axes fraction", fontsize=9
    )

ax[0].set_ylim((0.0, 3.0))
ax[-1].yaxis.set_major_locator(mpl.ticker.MaxNLocator(nbins=4, prune="both"))

fig.tight_layout()
fig.subplots_adjust(hspace=0.0)

for a in ax:
    a.set_title(None)

plt.show()
True extrapolation coefficients: [ 1.7438e+00  1.6106e-01 -1.7727e-02  3.6676e-04  2.1114e-03 -1.5377e-03
  3.9927e-04]
	 With N_configs =     10: [ 1.7469e+00  1.1364e-01  1.3435e-01 -8.7672e-01 -1.8012e+01 -1.0596e+02
  3.2007e+03]
	 With N_configs =    100: [ 1.7423e+00  1.3459e-01 -1.9823e-01  1.2182e+00 -1.0755e+01 -6.0855e+01
  3.2539e+03]
	 With N_configs =   1000: [ 1.7426  0.1588 -0.0696 -0.1739  0.8373 46.7624 56.4464]
	 With N_configs =  10000: [  1.7436   0.161   -0.0531  -0.2102   4.1724 -23.4454  22.6547]
	 With N_configs = 100000: [ 1.7438  0.1615 -0.0186  0.015   0.612  -1.9426 -1.9077]
../../../_images/257baa6bdd06b7a1355c145f0a4e7fc80d36221b5b059b956fe65a4e730461b5.png