"""
Comparison of the efficiency of 1 vs 2 bin flat histogram simulations, as described in
https://doi.org/10.1021/acs.jpcb.3c00613
"""
import sys
import subprocess
import argparse
import random
import unittest
import pathlib
import numpy as np
from multiprocessing import Pool
from pyfeasst import cd
# define parameters of a pure component NVT MC Lennard-Jones simulation
params = {
"cubic_side_length": 8, "fstprt": "/feasst/particle/lj.fstprt", "beta": 1/0.7, "mu": -4.1603632,
#"cubic_side_length": 8, "fstprt": "/feasst/particle/lj.fstprt", "beta": 1/1.5, "mu": -2.352321,
"min_particles": 100, "min_sweeps": 1e9,
"trials_per": 1e6, "hours_per_adjust": 0.01, "hours_per_checkpoint": 1, "seed": random.randrange(1e9), "num_hours": 0.5,
"equilibration": 1e6, "num_nodes": 1, "procs_per_sim": 1, "procs_per_node": 4, "script": __file__,
"dir": str(pathlib.Path(__file__).parent.resolve())}
params["max_particles"] = params["min_particles"] + 1
params["num_minutes"] = round(params["num_hours"]*60)
params["hours_per_adjust"] = params["hours_per_adjust"]*params["procs_per_sim"]
params["hours_per_checkpoint"] = params["hours_per_checkpoint"]*params["procs_per_sim"]
params["num_hours_terminate"] = 0.95*params["num_hours"]*params["procs_per_sim"]
params["mu_init"] = params["mu"] + 1
# write fst script to run a single simulation
def mc_lj(params=params, script_file="launch.txt"):
with open(script_file, "w") as myfile: myfile.write("""
# first, initialize multiple clones into windows
CollectionMatrixSplice hours_per {hours_per_adjust} ln_prob_file lj_lnpin{min_particles}s{sim}.txt ln_prob_file_append true bounds_file lj_boundsn{min_particles}s{sim}.txt num_adjust_per_write 1 min_window_size 1
{window_custom}
Checkpoint checkpoint_file lj_checkpointn{min_particles}s{sim}.fst num_hours {hours_per_checkpoint} num_hours_terminate {num_hours_terminate}
# begin description of each MC clone
RandomMT19937 seed {seed}
Configuration cubic_side_length {cubic_side_length} particle_type0 {fstprt}
Potential Model LennardJones
Potential VisitModel LongRangeCorrections
ThermoParams beta {beta} chemical_potential {mu_init}
Metropolis
TrialTranslate weight 1 tunable_param 0.2 tunable_target_acceptance 0.25
Log trials_per_write {trials_per} output_file ljn{min_particles}s{sim}_[sim_index].txt
Tune
CheckEnergy trials_per_update {trials_per} decimal_places 8
# gcmc initialization and nvt equilibration
TrialAdd particle_type 0
Run until_num_particles [soft_macro_min]
RemoveTrial name TrialAdd
Run num_trials {equilibration}
RemoveModify name Tune
# gcmc tm production
ThermoParams beta {beta} chemical_potential {mu}
FlatHistogram Macrostate MacrostateNumParticles width 1 max {max_particles} min {min_particles} soft_macro_max [soft_macro_max] soft_macro_min [soft_macro_min] \
Bias TransitionMatrix min_sweeps {min_sweeps} new_sweep 1
#Bias WLTM min_sweeps {min_sweeps} new_sweep 1 min_flatness 25 collect_flatness 20 min_collect_sweeps 20
TrialTransfer weight 2 particle_type 0
#TrialTransfer weight 2 particle_type 0 reference_index 0 num_steps 4
Movie trials_per_write {trials_per} output_file ljn{min_particles}s{sim}_[sim_index].xyz
Tune trials_per_write {trials_per} output_file lj_tunen{min_particles}s{sim}_[sim_index].txt multistate true
Energy trials_per_write {trials_per} output_file lj_enn{min_particles}s{sim}_[sim_index].txt multistate true append true
CPUTime trials_per_write {trials_per} output_file lj_cpun{min_particles}s{sim}_[sim_index].txt append true
CriteriaWriter trials_per_write {trials_per} output_file lj_critn{min_particles}s{sim}_[sim_index].txt
CriteriaUpdater trials_per_update {trials_per}
#Run until_criteria_complete true
""".format(**params))
# write slurm script to fill nodes with simulations
def slurm_queue(min_particles):
params["min_particles"] = min_particles
with open("slurm.txt", "w") as myfile: myfile.write("""#!/bin/bash
#SBATCH -n {procs_per_node} -N {num_nodes} -t {num_minutes}:00 -o hostname_%j.out -e hostname_%j.out
echo "Running {script} ID $SLURM_JOB_ID on $(hostname) at $(date) in $PWD"
original_dir=$PWD; echo $original_dir
wrk=/wrk/$LOGNAME/$SLURM_JOB_ID/; mkdir -p $wrk; cd $wrk; echo "wrk:$wrk"
rsync -au $original_dir/* .; rm hostname_*
ls
export OMP_NUM_THREADS={procs_per_node}
python {script} --run_type 0 --dir $original_dir --min_particles {min_particles}
#python {script} --run_type 0 --task $SLURM_ARRAY_TASK_ID --dir $original_dir --min_particles {min_particles}
echo "sync"
rsync -au . $original_dir/
if [ $? == 0 ]; then
echo "Job is done"
scancel $SLURM_ARRAY_JOB_ID
else
echo "Job is terminating, to be restarted again"
fi
echo "Time is $(date)"
""".format(**params))
# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('--run_type', '-r', type=int, default=0, help="0: run, 1: submit to queue")
#parser.add_argument('--task', type=int, default=0, help="input by slurm scheduler. If >0, restart from checkpoint.")
parser.add_argument('--dir', type=str, default="", help="orginal working directory that the script was launched.")
parser.add_argument('--min_particles', type=int, default=100, help="minimum number of particles")
args = parser.parse_args()
params['min_particles'] = args.min_particles
params["max_particles"] = params["min_particles"] + 1
if args.dir:
params['dir']=args.dir
def dists_to_time_std(dists):
time = list()
std = list()
for dist in dists:
time.append(dist[1]['cpu_hours'])
std.append(dist[0].dataframe()['delta_ln_prob_stdev'].values[1])
return time, std
def linear_fit(x, b):
return -0.5*x + b
# after the simulation is complete, perform some tests
class TestFlatHistogramLJ(unittest.TestCase):
def test(self):
# analyze grand canonical ensemble average number of particles
import pandas as pd
from pyfeasst import macrostate_distribution
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
show_plot = False
#show_plot = True
slopes = list()
for s in range(3):
dists = macrostate_distribution.read_appended(file_name='lj_lnpin100s'+str(s)+'.txt', num_states=2)
#print(dists)
time, std = dists_to_time_std(dists)
logt = np.log(time[10:])
logs = np.log(std[10:])
popt, pcov = curve_fit(linear_fit, logt, logs)
slopes.append(popt[0])
#print(s, popt[0])
if show_plot:
label = '2bin'
if s == 2:
label = '1bin'
plt.plot(np.log(time), np.log(std), label=label)
plt.plot(logt, linear_fit(logt, popt[0]), label=label + ' slope: ' + str(round(popt[0],2)))
z12 = np.exp(2*(slopes[2] - np.average(slopes[0:1])))
print('efficiency of a double macrostate simulation compared to two single macrostate simulations', z12)
if show_plot:
plt.xlabel('log(time)', fontsize=16)
plt.ylabel('log(std_of_average)', fontsize=16)
#plt.title(r'$z_{12}=$'+str(round(z12, 2)), fontsize=16)
plt.title('efficiency of a double macrostate simulation\n compared to two single macrostate simulations\n'+r'$z_{12}=$'+str(round(z12, 2)), fontsize=16)
plt.legend()
plt.show()
# run the simulation and, if complete, analyze.
def run(sim):
params['sim'] = sim
params["seed"] = random.randrange(1e9)
# if args.task == 0:
params['window_custom'] = """WindowCustom min0 {min_particles} max {max_particles} overlap 0""".format(**params)
params['procs_per_sim'] = 1
if sim >= 2*params['num_1bin_batch']:
params['window_custom'] = """WindowCustom min0 {min_particles} min1 {max_particles} max {max_particles} overlap 0""".format(**params)
params['procs_per_sim'] = 1
file_name = "lj_launchn"+str(params['min_particles'])+"s"+str(sim)+".txt"
mc_lj(params, script_file=file_name)
syscode = subprocess.call("~/binning_efficiency/feasst/build/bin/fst < " + file_name + " > lj_launchn"+str(params['min_particles'])+"s"+str(sim)+".log", shell=True, executable='/bin/bash')
# else:
# syscode = subprocess.call("~/binning_efficiency/feasst/build/bin/rst lj_checkpointn"+str(params['min_particles'])+"s"+str(sim)+".fst", shell=True, executable='/bin/bash')
if syscode == 0:
unittest.main(argv=[''], verbosity=2, exit=False)
return syscode
if __name__ == "__main__":
if args.run_type == 0:
params['num_1bin_batch'] = int(params['procs_per_node']/4)
with Pool(3*params['num_1bin_batch']) as pool:
codes = pool.starmap(run, zip(range(0, 3*params['num_1bin_batch'])))
if np.count_nonzero(codes) > 0:
sys.exit(1)
elif args.run_type == 1:
for min_particles in [200]:
slurm_queue(min_particles=min_particles)
subprocess.call("sbatch slurm.txt | awk '{print $4}' >> launch_ids.txt", shell=True, executable='/bin/bash')
elif args.run_type == 2:
unittest.main(argv=[''], verbosity=2, exit=False)
else:
assert False # unrecognized run_type