CG simulation of a protein using anisotropic precomputed potentials
"""
Generate and simulate a coarse-grained potential of a protein.
This is done in a number of steps.
In the first step, generate an orientation file.
This file is used to determine if the orientation is identical to a previous orientation.
For example, when the spherical polar angle is zero, the orientation is the same for all azimuthal
angles.
Orientation files may be included with TabulateTwoRigidBody3D::input_orientation_file.
For each orientation, a value of -1 identifies a unique orientation and a value of zero or more
identifies an earlier orientation that is identical.
These orientation files only need to be generated once per num_orientations_per_pi value.
After this step, the next script after_1_contact is called in post_process.
"""
import copy
import os.path
import argparse
import subprocess
import pandas as pd
from pyfeasst import fstio
def parse():
""" Parse arguments for this and following scripts """
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--feasst_install', type=str, default='../../../build/',
help='FEASST install directory (e.g., the path to build)')
parser.add_argument('--num_orientations_per_pi', type=int, default=2,
help='maximum number of orientations per 180 degrees')
parser.add_argument('--run_type', '-r', type=int, default=0,
help='0: run, 1: submit to queue, 2: post-process')
parser.add_argument('--hours_terminate', type=float, default=14*24,
help='hours until termination')
parser.add_argument('--num_nodes', type=int, default=1, help='Number of nodes in queue')
parser.add_argument('--procs_per_node', type=int, default=16, help='Number processors in a node')
parser.add_argument('--scratch', type=str, default=None,
help='Optionally write scheduled job to scratch/logname/jobid.')
parser.add_argument('--queue_flags', type=str, default="",
help='extra flags for queue (e.g., for slurm, "-p queue")')
parser.add_argument('--node', type=int, default=0, help='node ID')
parser.add_argument('--queue_id', type=int, default=-1, help='If != -1, read args from file')
parser.add_argument('--queue_task', type=int, default=0, help='If > 0, restart from checkpoint')
# additional in contact step (or more)
parser.add_argument('--pH', type=float, default=6, help='pH')
parser.add_argument('--domain1', type=str, default='4lyt', help='fstprt file')
parser.add_argument('--domain2', type=str, default='4lyt', help='fstprt file')
parser.add_argument('--contact_xyz_file', type=str, default='',
help='If not empty, print contact configuration for each orientation')
parser.add_argument('--contact_xyz_index', type=int, default=-1,
help='If not -1, print contact configuration for one orientation')
# additional in energy step (or more)
parser.add_argument('--num_z', type=int, default=7, help='num of distances per orientation')
parser.add_argument('--gamma', type=float, default=-4, help='stretching exponent for table')
parser.add_argument('--temperature', type=float, default=298.15, help='temperature in Kelvin')
parser.add_argument('--ionic_strength', type=float, default=0.15, help='formulation ionic strength of NaCl in Molar units')
parser.add_argument('--smoothing_distance', type=float, default=2, help='distance from cutoff to smooth to zero')
# additional in b2 step
parser.add_argument('--table_file', type=str, default='energy.txt', help='file describe cg table potential.')
parser.add_argument('--cubic_side_length', type=float, default=1e4, help='cubic side length')
parser.add_argument('--molecular_weight', type=float, default=14315.03534, help='molecular weight of protein in g/mol')
parser.add_argument('--reference_sigma', type=float, default=30, help='size of hard sphere on COM of rigid domain')
parser.add_argument('--ignore_energy', type=str, default='false', help='true if interaction is excluded volume only.')
parser.add_argument('--ignore_intra_energy', type=str, default='false', help='true if intra interaction is excluded volume only.')
parser.add_argument('--num_beta_taylor', type=int, default=10, help='number of Tayler series derivatives')
parser.add_argument('--show_plot', type=int, default=0, help='show extrapolation plot if != 0')
parser.add_argument('--trials_per', type=int, default=int(1e5), help='number of trials per iteration')
parser.add_argument('--equilibration', type=int, default=int(2e1), help='number of iterations in equilibration')
parser.add_argument('--production', type=int, default=int(2e1), help='number of iterations in production')
parser.add_argument('--seed', type=int, default=-1,
help='Random number generator seed. If -1, assign random seed to each sim.')
parser.add_argument('--fstprt', type=str, default='/feasst/plugin/aniso/particle/aniso_tabular.fstprt', help='fstprt file')
return parser
def write_feasst_script(params, script_file):
""" Write fst script for a single simulation with keys of params {} enclosed. """
with open(script_file, 'w', encoding='utf-8') as myfile:
myfile.write("""
MonteCarlo
Configuration cubic_side_length 2e2 particle_type0 /feasst/particle/spce.fstprt particle_type1 /feasst/particle/spce.fstprt \
add_particles_of_type0 1 add_particles_of_type1 1 \
group0 fixed fixed_particle_type 0 group1 mobile mobile_particle_type 1
Potential Model ModelEmpty
TabulateTwoRigidBody3D num_orientations_per_pi {num_orientations_per_pi} output_orientation_file {prefix}{num_orientations_per_pi}.txt
MonteCarlo
Configuration cubic_side_length 2e2 particle_type0 /feasst/particle/spce.fstprt particle_type1 /feasst/particle/propane.fstprt \
add_particles_of_type0 1 add_particles_of_type1 1 \
group0 fixed fixed_particle_type 0 group1 mobile mobile_particle_type 1
Potential Model ModelEmpty
TabulateTwoRigidBody3D num_orientations_per_pi {num_orientations_per_pi} output_orientation_file {prefix}{num_orientations_per_pi}_ij.txt
""".format(**params))
def post_process(params):
""" Check the final file length and then launch the next step. """
nk = params['num_orientations_per_pi']
for ij in [True, False]:
extra = ''
expected = (2*nk+1)**2 * (nk+1)**3
if ij:
expected = (2*nk+1)**3 * (nk+1)**2
extra = '_ij'
#print('expected number of orientations:', expected)
df = pd.read_csv('''{prefix}{num_orientations_per_pi}'''.format(**params)+extra+'.txt', skiprows=1, sep=r'\s+')
assert expected == len(df.columns)
print('launching after_1_1_contact.py')
subprocess.check_call('python after_1_1_contact.py '+fstio.dict_to_argparse(params['original_args']),
shell=True, executable='/bin/bash')
if __name__ == '__main__':
prsr = parse()
args, unknown_args = prsr.parse_known_args()
assert len(unknown_args) == 0, 'An unknown argument was included: '+str(unknown_args)
prms = vars(args)
prms['original_args'] = copy.deepcopy(prms)
prms['script'] = __file__
prms['prefix'] = 'orientations'
prms['sim_id_file'] = prms['prefix'] + '_sim_ids.txt'
prms['minutes'] = int(prms['hours_terminate']*60) # minutes allocated on queue
prms['hours_terminate'] = 0.99*prms['hours_terminate'] - 0.0333 # terminate before queue
prms['procs_per_sim'] = prms['procs_per_node']
prms['num_nodes'] = 1
prms['num_sims'] = prms['num_nodes']
orient_file_prefix = '''{prefix}{num_orientations_per_pi}'''.format(**prms)
if os.path.isfile(orient_file_prefix+'.txt') and \
os.path.isfile(orient_file_prefix+'_ij.txt'):
print('using existing:', orient_file_prefix)
post_process(params=prms)
else:
fstio.run_simulations(params=prms,
sim_node_dependent_params=None,
write_feasst_script=write_feasst_script,
post_process=post_process,
queue_function=fstio.slurm_single_node,
args=args)
"""
Generate a contact table of a protein, by default described by the provided 4lyt.pdb file.
Each atom is modeled as a hard sphere and the contact distance is computed over a number of
orientations.
"""
import copy
import sys
import os.path
import subprocess
from pyfeasst import fstio
from pyfeasst import coarse_grain_pdb
from launch_1_cg_protein import parse
def write_feasst_script(params, script_file):
""" Write fst script for a single simulation with keys of params {} enclosed. """
with open(script_file, 'w', encoding='utf-8') as myfile:
myfile.write("""
MonteCarlo
Configuration cubic_side_length 2e2 particle_type0 {domain1}.fstprt particle_type1 {domain2}.fstprt \
add_particles_of_type0 1 add_particles_of_type1 1 cutoff 0 set_cutoff_min_to_sigma true {vis_extra}
Potential Model HardSphere VisitModel VisitModelCell min_length max_sigma energy_cutoff 1e100
TabulateTwoRigidBody3D proc {sim} num_proc {num_sims} input_orientation_file {orientation_file} num_z -1 output_table_file {prefix}{sim}.txt {contact_xyz_file} contact_xyz_index {contact_xyz_index}
""".format(**params))
def post_process(params):
""" Combining table files, checking length and then launching the next step """
subprocess.check_call(['sleep', '5']) # without this command, combine doesn't read all tables?
if not os.path.isfile('''{prefix}.txt'''.format(**params)):
fstio.combine_tables_two_rigid_body(prefix=params['prefix'], suffix='.txt',
num_procs=params['num_sims'])
if params['num_orientations_per_pi'] == 6 and params['pH'] == 6 and \
params['domain1'] == '4lyt' and params['domain2'] == params['domain1']:
with open("""{prefix}.txt""".format(**params), 'r', encoding="utf-8") as file1:
lines = file1.readlines()
#print(len(lines))
assert len(lines) == 681
assert lines[0] == 'site_types 1 0\n'
assert lines[-1] == '-1 160\n'
print('launching after_1_2_energy.py')
subprocess.check_call('python after_1_2_energy.py '+fstio.dict_to_argparse(params['original_args']), shell=True, executable='/bin/bash')
if __name__ == '__main__':
parser = parse()
args, unknown_args = parser.parse_known_args()
assert len(unknown_args) == 0, 'An unknown argument was included: '+str(unknown_args)
prms = vars(args)
prms['original_args'] = copy.deepcopy(prms)
prms['prefix'] = 'contact'
if os.path.isfile('''{prefix}.txt'''.format(**prms)):
print('using existing:', '''{prefix}.txt'''.format(**prms))
post_process(prms)
sys.exit()
prms['script'] = __file__
prms['sim_id_file'] = prms['prefix'] + '_sim_ids.txt'
prms['minutes'] = int(prms['hours_terminate']*60) # minutes allocated on queue
prms['hours_terminate'] = 0.99*prms['hours_terminate'] - 0.0333 # terminate before queue
prms['orientation_file'] = 'orientations' + str(prms['num_orientations_per_pi'])
if prms['domain1'] != prms['domain2']:
prms['orientation_file'] += '_ij'
prms['orientation_file'] += '.txt'
prms['procs_per_sim'] = 1
prms['num_sims'] = prms['num_nodes']*prms['procs_per_node']
if prms['contact_xyz_file'] != '':
prms['contact_xyz_file'] = 'contact_xyz_file ' + prms['contact_xyz_file']
prms['vis_extra'] = 'group0 fixed fixed_particle_type 0 group1 mobile mobile_particle_type 1'
else:
prms['vis_extra'] = ''
# convert pdb to pqr to fstprt
if prms['run_type'] != 2:
for domain in [prms['domain1'], prms['domain2']]:
# example of how pdb is converted to pqr
#subprocess.check_call('pdb2pqr30 --titration-state-method=propka --with-ph='+str(prms['pH'])+' --ff=PARSE '+domain+'.pdb '+domain+'.pqr --drop-water > '+domain+'.log 2>&1', shell=True, executable='/bin/bash')
coarse_grain_pdb.write_fstprt(domain)
fstio.run_simulations(params=prms,
sim_node_dependent_params=None,
#sim_node_dependent_params=sim_node_dependent_params,
write_feasst_script=write_feasst_script,
post_process=post_process,
queue_function=fstio.slurm_single_node,
args=args)
"""
Generate an energy table of a protein, by default described by the provided 4lyt.pdb file.
Each atom is modeled as a hard sphere with an LJ and screened charge interation computed over a number of orientations.
"""
import copy
import sys
import os.path
import subprocess
import numpy as np
from pyfeasst import fstio
from pyfeasst import physical_constants
from launch_1_cg_protein import parse
def write_feasst_script(params, script_file):
""" Write fst script for a single simulation with keys of params {} enclosed. """
with open(script_file, 'w', encoding='utf-8') as myfile:
myfile.write("""
MonteCarlo
Configuration cubic_side_length {initial_box} particle_type0 {domain1}.fstprt particle_type1 {domain2}.fstprt \
add_particles_of_type0 1 add_particles_of_type1 1 cutoff {cutoff} \
group0 fixed fixed_particle_type 0 group1 mobile mobile_particle_type 1
Potential Model ModelTwoBodyFactory \
model0 HardSphere \
model1 LennardJones \
model2 DebyeHuckel kappa {kappa} dielectric {dielectric_water} smoothing_distance {smoothing_distance} \
VisitModel VisitModelCell min_length max_cutoff energy_cutoff 1e100
TabulateTwoRigidBody3D proc {sim} num_proc {num_sims} input_orientation_file {orientation_file} num_z {num_z} smoothing_distance {smoothing_distance} input_table_file {contact_file} output_table_file {prefix}{sim}.txt gamma {gamma}
""".format(**params))
def post_process(params):
""" Combine tables, check their length and launch the next step """
subprocess.check_call(['sleep', '15']) # without this command, combine doesn't read all tables?
if not os.path.isfile('''{prefix}.txt'''.format(**params)):
fstio.combine_tables_two_rigid_body(prefix=params['prefix'], suffix='.txt',
num_procs=params['num_sims'])
if params['num_orientations_per_pi'] == 6 and params['domain1'] == '4lyt' and \
params['domain2'] == params['domain1']:
with open("""{prefix}.txt""".format(**params), 'r', encoding="utf-8") as file1:
lines = file1.readlines()
#print(len(lines))
assert len(lines) == 681
assert lines[0] == 'site_types 1 0\n'
assert lines[6] == '3.762260e+01 -4.069026e+00 -7.593451e-04\n'
assert lines[-1] == '-1 160\n'
print('launching after_1_3_b2.py')
subprocess.check_call('python after_1_3_b2.py '+fstio.dict_to_argparse(params['original_args']),
shell=True, executable='/bin/bash')
if __name__ == '__main__':
parser = parse()
args, unknown_args = parser.parse_known_args()
assert len(unknown_args) == 0, 'An unknown argument was included: '+str(unknown_args)
prms = vars(args)
prms['original_args'] = copy.deepcopy(prms)
prms['script'] = __file__
prms['prefix'] = 'energy'
if os.path.isfile('''{prefix}.txt'''.format(**prms)):
print('using existing:', '''{prefix}.txt'''.format(**prms))
post_process(prms)
sys.exit()
prms['sim_id_file'] = prms['prefix'] + '_sim_ids.txt'
prms['minutes'] = int(prms['hours_terminate']*60) # minutes allocated on queue
prms['hours_terminate'] = 0.99*prms['hours_terminate'] - 0.0333 # terminate before queue
prms['orientation_file'] = 'orientations' + str(prms['num_orientations_per_pi'])
if prms['domain1'] != prms['domain2']:
prms['orientation_file'] += '_ij'
prms['orientation_file'] += '.txt'
prms['contact_file'] = 'contact.txt'
prms['procs_per_sim'] = 1
prms['num_sims'] = prms['num_nodes']*prms['procs_per_node']
temp_cel = prms['temperature'] - 273.15
prms['dielectric_water'] = 87.74 - 0.40008*temp_cel + 9.398e-4*temp_cel**2 - 1.4e-6*temp_cel**3
eps_0 = physical_constants.VacuumElectricPermittivity().value()
elem_q = physical_constants.ElementaryCharge().value()
na = physical_constants.AvogadroConstant().value()
kb = physical_constants.BoltzmannConstant().value()
prms['kappa'] = np.sqrt(2*(elem_q**2)*prms['ionic_strength']*(1e3)*na/(prms['dielectric_water']*eps_0*kb*prms['temperature']*1e20))
prms['cutoff'] = 5/prms['kappa']
prms['initial_box'] = 4*prms['cutoff'] # initial box adjusted by TabulateTwoRigidBody3D
fstio.run_simulations(params=prms,
sim_node_dependent_params=None,
#sim_node_dependent_params=sim_node_dependent_params,
write_feasst_script=write_feasst_script,
post_process=post_process,
queue_function=fstio.slurm_single_node,
args=args)
"""
Compute the second virial coefficient using coarse-grained models.
"""
import subprocess
import numpy as np
import pandas as pd
from pyfeasst import accumulator
from pyfeasst import fstio
from pyfeasst import physical_constants
from launch_1_cg_protein import parse
def write_feasst_script(params, script_file):
""" Write fst script for a single simulation with keys of params {} enclosed. """
with open(script_file, 'w', encoding='utf-8') as myfile:
myfile.write("""
MonteCarlo
RandomMT19937 seed {seed}
Configuration cubic_side_length {cubic_side_length} particle_type0 {fstprt} add_particles_of_type0 2 group0 mobile mobile_particle_index 1
Potential Model TwoBodyTable VisitModelInner VisitModelInnerTable table_file {table_file} ignore_energy {ignore_energy}
RefPotential Model HardSphere sigma 0 cutoff 0 sigma0 {reference_sigma} cutoff0 {reference_sigma}
ThermoParams beta {beta}
MayerSampling num_beta_taylor {num_beta_taylor} num_trials_per_iteration {trials_per} num_iterations_to_complete {equilibration}
TrialTranslate new_only true reference_index 0 tunable_param 35 group mobile
TrialRotate new_only true reference_index 0 tunable_param 40
# tune trial parameters
CriteriaWriter trials_per_write {trials_per} output_file {prefix}_{sim}_b2_eq.txt
Log trials_per_write {trials_per} output_file {prefix}_{sim}_eq.txt
#Movie trials_per_write {trials_per} output_file {prefix}_{sim}_eq.xyz
#CPUTime trials_per_write {trials_per} output_file {prefix}_{sim}_cpu.txt append true
Tune
#Run num_trials {equilibration}
Run until_criteria_complete true
RemoveAnalyze name CriteriaWriter
RemoveAnalyze name Log
#RemoveAnalyze name Movie
RemoveModify name Tune
# production
CriteriaWriter trials_per_write {trials_per} output_file {prefix}_{sim}_b2.txt
Log trials_per_write {trials_per} output_file {prefix}_{sim}.txt
#Movie trials_per_write {trials_per} output_file {prefix}_{sim}.xyz
MayerSampling num_beta_taylor {num_beta_taylor} num_trials_per_iteration {trials_per} num_iterations_to_complete {production}
Run until_criteria_complete true
""".format(**params))
def post_process(params):
""" Compute statistics and output B2 value in appropriate units. """
subprocess.check_call(['sleep', '5']) # without this command, combine doesn't read all tables?
b2acc = accumulator.Accumulator()
ref = (2*np.pi/3)*params['reference_sigma']**3
mw = params['molecular_weight']/1e3 # kDa
ref *= 1e-26*physical_constants.AvogadroConstant().value()/mw/mw # A3 to 10^4 molml/g2
for p in range(params['procs_per_node']):
with open(params['prefix']+'_'+str(p)+"_b2.txt", 'r', encoding="utf-8") as file1:
# with open(params['prefix']+'_'+str(p)+"_b2_eq.txt", 'r') as file1:
lines = file1.readlines()
if len(lines) != 0:
#print('p', p, 'lines[0]', lines[0])
exec('iprm=' + lines[0], globals())
#print(iprm)
#print('ref(A^3)', ref)
b2_molmlg2=iprm['second_virial_ratio']*ref
#print('b2(A^3)', b2)
#print(b2_molmlg2, '10^-4 mol*ml*g^-2')
b2acc.add(b2_molmlg2)
if p == 0:
df = pd.DataFrame(data={p: iprm['beta_taylor']})
else:
df[p] = iprm['beta_taylor']
print('b2 (mol*ml/g2)', b2acc.mean(), b2acc.stdev()/np.sqrt(params['procs_per_node']))
if params['molecular_weight'] == 14315.03534:
assert np.abs(b2acc.mean() - 1.72) < 0.1
if __name__ == '__main__':
parser = parse()
args, unknown_args = parser.parse_known_args()
assert len(unknown_args) == 0, 'An unknown argument was included: '+str(unknown_args)
prms = vars(args)
prms['prefix'] = 'b2'
prms['sim_id_file'] = prms['prefix'] + '_sim_ids.txt'
prms['script'] = __file__
prms['minutes'] = int(prms['hours_terminate']*60) # minutes allocated on queue
prms['hours_terminate'] = 0.99*prms['hours_terminate'] - 0.0333 # terminate before queue
prms['procs_per_sim'] = 1
prms['num_sims'] = prms['num_nodes']*prms['procs_per_node']
prms['beta'] = 1./(prms['temperature']*physical_constants.MolarGasConstant().value()/1e3) # mol/kJ
fstio.run_simulations(params=prms,
sim_node_dependent_params=None,
#sim_node_dependent_params=sim_node_dependent_params,
write_feasst_script=write_feasst_script,
post_process=post_process,
queue_function=fstio.slurm_single_node,
args=args)