Source code for multistate_accumulator

"""
This module analyzes and manipulates multistate accumulators.
For example, the canonical ensemble average energy for a given number of particles.
"""

from pathlib import Path
import pandas as pd


[docs]
def splice_by_max_column(prefix, suffix, column='moment0', crit_prefix=None, crit_suffix=None):
    """
    Combine all files with matching prefix and suffix, each with the same number of states,
    by assigning rows based on the maximum in the given column.

    :param str prefix: Find all files beginning with this prefix.
    :param str suffix: Find all files endding with this suffix.
    :param str column:
        Use the row with the maximum column. For example, moment0 is the highest number of samples.
    :param str crit_prefix: Use criteria to read only specific macrostates, if given.
    :param str crit_suffix: Use criteria to read only specific macrostates, if given.

    >>> import pandas as pd
    >>> from pyfeasst import multistate_accumulator
    >>> spliced = multistate_accumulator.splice_by_max_column(prefix="../../tests/lj_enn0s",
    ...                                                       suffix='.txt')
    >>> en0 = pd.read_csv('../../tests/lj_enn0s00.txt')
    >>> en1 = pd.read_csv('../../tests/lj_enn0s01.txt')
    >>> assert spliced['average'][136] == en0['average'][136]
    >>> assert spliced['average'][136] != en1['average'][136]
    >>> assert spliced['average'][137] == en0['average'][137]
    >>> assert spliced['average'][137] == en1['average'][137]
    >>> assert spliced['average'][138] != en0['average'][138]
    >>> assert spliced['average'][138] == en1['average'][138]
    """
    spliced = None
    if crit_prefix is None:
        for filename in Path('.').glob(prefix+'*'+suffix):
            if spliced is None:
                spliced = pd.read_csv(filename)
            else:
                addition = pd.read_csv(filename)
                #print('addition', addition, 'column', column, 'spliced', spliced)
                newer = addition[addition[column] > spliced[column]]
                if len(newer) > 0:
                    spliced.loc[newer['state'].values[0]: newer['state'].values[-1]] = newer
    else:
        rows = list()
        for filename in sorted(Path('.').glob(crit_prefix+'*'+crit_suffix)):
            #print('filename', filename)
            with open(filename, 'r') as file1:
                lines = file1.readlines()
            exec('iprm={' + lines[0][1:] + '}', globals())
            rows.append([iprm['soft_min'], iprm['soft_max']])
        #print('rows', rows)
        ifile = 0
        lines=list()
        for filename in sorted(Path('.').glob(prefix+'*'+suffix)):
            #print('filename', filename, 'ifile', ifile)
            with open(filename, 'r') as file1:
                tlines = file1.readlines()
                if ifile == 0: lines += tlines[0]
                lines += tlines[rows[ifile][0]+1:rows[ifile][1]+2]
            ifile += 1
        #print(lines)
        with open(prefix+suffix+'_agg.csv', 'w') as file1:
            for line in lines: file1.write(line)
        spliced = pd.read_csv(prefix+suffix+'_agg.csv', usecols=range(0, 5))
    return spliced



[docs]
def splice_by_node(prefix, suffix, num_nodes, extra_overlap=0):
    """
    Use splice_by_max_column for each node, with prefix=prefix+node.
    Then, drop 1+extra_overlap
    Combine all files with matching prefix and suffix, each with the same number of states,
    by assigning rows based on the maximum in the given column.

    :param int num_nodes: The number of nodes to splice.

    >>> import pandas as pd
    >>> from pyfeasst import multistate_accumulator
    >>> from pyfeasst import macrostate_distribution
    >>> spliced = splice_by_node(prefix='../../tests/lj_enn', suffix='.txt', num_nodes=2)
    >>> round(float(spliced['average'][375]), 8)
    -2001.76687973
    >>> round(float(spliced['average'][376]), 8)
    -2012.46764871
    >>> len(spliced)
    476
    >>> spliced.to_csv('spliced.csv')
    >>> lnpi = macrostate_distribution.splice_files(prefix='../../tests/lj_lnpin', suffix='.txt')
    >>> lnpi.concat_dataframe(spliced, add_prefix='e_')
    >>> round(float(lnpi.equilibrium()), 8)
    -0.31402411
    >>> vapor, liquid = lnpi.split()
    >>> round(float(-vapor.ln_prob()[0]*0.7/8**3), 8)  # pressure
    0.00136904
    >>> round(float(vapor.ensemble_average('e_average')/vapor.average_macrostate()), 8)
    -0.02500369
    >>> round(float(liquid.ensemble_average('e_average')/liquid.average_macrostate()), 8)
    -6.09838831
    """
    node_data = list()
    for node in range(num_nodes):
        dat = splice_by_max_column(prefix=prefix+str(node), suffix=suffix)
        if node > 0:
            dat.drop(dat.head(1 + extra_overlap).index, inplace=True)
            dat['state'] += node_data[-1]['state'].values[-1]
        node_data.append(dat)
    data = pd.concat(node_data)
    data.reset_index(inplace=True)
    block = 0
    while 'block'+str(block) in data.columns:
        cname = 'block'+str(block)
        # if data[cname].isnull().values.any():
        #     data.drop([cname], inplace=True, axis=1)
        block += 1
    return data



[docs]
def splice(prefix, suffix, extra_overlap=0):
    """
    Use splice to combine pandas csv files with matching prefix + * + suffix.
    If there are more than ten files, they must contain leading zeros or they will
    be out of order.
    Drop 1+extra_overlap at the beginning of each file after the first.

    >>> import numpy as np
    >>> from pyfeasst import multistate_accumulator
    >>> spliced = multistate_accumulator.splice(prefix='../../tests/ljn0s', suffix='_en.csv')
    >>> round(float(np.average(spliced['average'])), 8)
    -2.91222716
    """
    frames = list()
    first = True
    for filename in sorted(Path('.').glob(prefix+'*'+suffix)):
        df = pd.read_csv(filename)
        if not first:
            df.drop(df.head(1 + extra_overlap).index, inplace=True)
        frames.append(df)
        first = False
    combined = pd.concat(frames)
    combined.reset_index(inplace=True)
    return combined


if __name__ == "__main__":
    import doctest
    doctest.testmod()