"""
Scripts to perform rate calculations using Multiwell.
The focus here is to take output from a pseudo-variational
calculation performed using CFOUR, format the results (e.g.
constants, frequencies, etc.) into ktools format
"""
import os
from subprocess import Popen, PIPE
import logging
from glob import glob
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit as cf
from matplotlib import pyplot as plt
from pyspectools.qchem import parsers
from pyspectools.routines import read_yaml
[docs]class Vtst:
"""
Class that handles a VTST calculation. This class is mainly
designed for filepath handling, since the multiwell programs
are extremely annoying about relative paths...
"""
[docs] @classmethod
def vtst_calc(cls, calc_root, reaction, ktools_yml_path):
""" Serialization method specifically for VTST calculations
using ktools.
The required inputs are:
calc_root - path to root of the calculation directory
reaction - str specification to name the reaction
ktools_yml_path - path to ktools YAML input
"""
vtst_obj = cls(calc_root, reaction, ktools_yml_path)
# Get all the filepaths to calculations
vtst_obj.logs = glob(
os.path.join(
vtst_obj.paths["calc_root"],
"*/freq*.log"
)
)
# Initialize ktools settings
vtst_obj.init_ktools()
return vtst_obj
[docs] @classmethod
def tst_calc(cls, data_dict, reaction, ktools_yml_path):
""" Serialization method for conventional TST calculations
using ktools.
The required inputs are:
data_dict - dictionary holding paths to output files, with
the keys corresponding to reactant, ts, product as well as
reaction enthalpies
reaction - str specification to name the reaction
ktools_yml_path - path to ktools YAML input
format of data_dict should be like this:
data_dict = {"reactant": {"path": , "E"}}
"""
tst_obj = cls(None, reaction, ktools_yml_path)
# Convert all paths to absolute ones
tst_obj.logs = dict()
for key in data_dict:
tst_obj.logs[key] = os.path.abspath(
data_dict[key]["path"]
)
tst_obj.data = data_dict.copy()
# Initialize ktools settings
tst_obj.init_ktools()
return tst_obj
def __init__(self, calc_root, reaction, ktools_yml_path):
""" Init method for Ktools calculation
Depending on the type of calculation - either TST or VTST -
serialize the Ktools instance with those specific methods.
"""
# Dictionary for holding all of the paths
self.paths = dict()
# Dictionary for holding all of the template strings
self.temps = dict()
"""
Set up paths management
"""
# Path to notebook directory
self.paths["nb"] = os.getcwd()
# Path to multiwell templates
self.paths["mw_temp"] = os.path.join(
self.paths["nb"],
"multiwell"
)
# Path to the calculation root. Only used for VTST
if calc_root is not None:
self.paths["calc_root"] = os.path.abspath(calc_root)
# If output folder is not present, make it
if os.path.isdir("ktools-vtst") is False:
os.mkdir("ktools-vtst")
# output_path is where the ktools logs will be stored
self.paths["output"] = os.path.join("ktools-vtst", reaction)
if os.path.isdir(self.paths["output"]) is False:
os.mkdir(self.paths["output"])
# Convert all paths into absolute paths
for key, value in self.paths.items():
self.paths[key] = os.path.abspath(value)
print(self.paths[key])
"""
Read in ktools settings
"""
self.ktools = read_yaml(ktools_yml_path)
for key, value in self.ktools.items():
print(key + ":\t\t" + str(value))
"""
Reading in the string templates
"""
print("Found the following templates:")
for temp_path in glob(os.path.join(self.paths["mw_temp"], "*.temp")):
with open(temp_path) as read_file:
self.temps[os.path.basename(temp_path)] = read_file.read()
print(temp_path)
[docs] def analyze_tst(self):
""" Analysis function for transition state RRKM
More or less the same as the VTST version, but simpler
"""
os.chdir(self.paths["output"])
mol_str = ""
for species in self.data:
sym_path = os.path.basename(self.logs[species])
try:
os.symlink(
self.logs[species],
sym_path
)
except FileExistsError:
pass
# Parse log file
parsed_data = parsers.parse_cfour(sym_path)
# Series of if cases to determine what the formatting
# should be
if species == "reactant":
species_type = "reac"
name = "Reactant"
dist = 0.
elif species == "product":
species_type = "prod"
name = "Product"
dist = 2.
elif species == "ts":
species_type = "ctst"
name = "TS"
dist = 1.
# Create a species object with all its associated
# parameters
current_species = Species(
self.temps["mominert.temp"],
name,
parsed_data,
species_type,
dist,
self.data[species]["E"]
)
mol_str += current_species.format_species() + "\n"
self.ktools["molecules"] = mol_str
self.dump_ktools()
print("Dumped ktools input file to disk")
output, error = self.call_ktools(
os.path.join(self.paths["output"], "ktools.inp")
)
os.chdir(self.paths["nb"])
return output, error
[docs] def analyze_vtst(self):
""" Main analysis function
Loops over all of the log files detected in __init__ and
produces ktools input file format strings
"""
os.chdir(self.paths["output"])
mol_str = ""
end_step = self.ktools["stepsize"] * self.ktools["steps"]
irc_values = np.arange(0, end_step, self.ktools["stepsize"])
steps = list()
energies = list()
# Loop over the log files and generate Species objects
for index in range(len(self.logs)):
# Create symlinks to the logfile
log_path = os.path.join(
self.paths["calc_root"],
str(index) + "/freq" + str(index) + ".log"
)
sym_path = os.path.basename(log_path)
try:
os.symlink(
log_path,
sym_path
)
except FileExistsError:
pass
# Parse the log file
parsed_data = parsers.parse_cfour(sym_path)
print("Step " + str(index))
# First step is always global minimum
if index == 0:
species_type = "reac"
name = "Reactant"
E0 = parsed_data["total energy"] + parsed_data["zpe"] / 2625.50
E = 0.
# If it's the last species, treat as products
elif index == len(self.logs) - 1:
species_type = "prod"
name = "Product"
current_E = parsed_data["total energy"] + parsed_data["zpe"] / 2625.5
E = (current_E - E0) * 2625.50
else:
# Everything else is trial TS
species_type = "ctst"
name = "TS-" + str(index + 1)
current_E = parsed_data["total energy"] + parsed_data["zpe"] / 2625.5
E = (current_E - E0) * 2625.50
dist = np.round(irc_values[index], decimals=2)
E = np.round(E, decimals=3)
current_species = Species(
self.temps["mominert.temp"],
name,
parsed_data,
species_type,
dist,
E
)
mol_str += current_species.format_species() + "\n"
steps.append(dist)
energies.append(E)
# Add the complete molecule specification
self.ktools["molecules"] = mol_str
# Write the ktools file to disk
self.dump_ktools()
#print("Calling ktools executable")
#output, error =self.call_ktools(
# os.path.join(self.paths["output"], "ktools.inp")
# )
os.chdir(self.paths["nb"])
return steps, energies
[docs]class Species:
# Class that handles an individual species in a ktools calculation
# The required input are:
# name: string representing name of molecule
# log_path: string path to CFOUR calculation output
# species_type: string designation to denote reactant, TS, product
def __init__(self, mominert_str, name, parsed_data, species_type, dist, delh=0.):
self.mom_temp_str = mominert_str
#self.logger = init_logger(ktools_path + "/" + name + "-ktools.log")
self.cfour_calc = parsed_data
# Format parameters for mominert calculation
geom_str = ""
# atom_counter required because dummy atoms exist
atom_counter = 0
for atom in self.cfour_calc["coordinates"]:
if atom[0] != "X":
atom_counter += 1
# Insert atom index into list
atom.insert(1, atom_counter)
# Convert float values for coordinates to string
atom = [str(item) for item in atom]
# Flatten list to string
atom_coords = " ".join(atom)
# Only take real atoms
geom_str += atom_coords + "\n"
# Convert float values for coordinates to string
self.path = name + "-coords"
self.mom_dict = {
"title": name,
"natoms": atom_counter,
"geometry": geom_str
}
# Calculate moments of inertia
self.inertia, self.rotcon = self.calc_mominert()
# Determine number of rotational degrees of freedom
if self.inertia[0] == 0.:
nrot = 1
# For all non-linear molecules, we'll have j and krotors
else:
nrot = 2
# Set up dictionary for formatting ktools input
self.species_dict = {
"type": species_type,
"name": name,
"delh": str(delh),
"dist": str(dist),
"formula": self.get_formula(),
"comment": "-".join([name, species_type, "r" + str(dist)]),
"dof": str(len(self.cfour_calc["frequencies"]) + nrot),
"Eelev": "0.0",
"gele": str(self.cfour_calc["multiplicity"]),
"modes": self.format_dof()
}
[docs] def calc_mominert(self):
# Write the mominert input to disk and run mominert
with open(self.path + ".dat", "w+") as write_file:
write_file.write(
self.mom_temp_str.format_map(self.mom_dict)
)
inertia, rotcon = self.call_mominert()
return inertia, rotcon
[docs] def call_mominert(self):
# Function that will call mominert to calculate moments of
# inertia and parse the output
with Popen(["mominert", self.path + ".dat"], stdout=PIPE) as mominert:
output, error = mominert.communicate()
log_file = self.path + ".out"
# Parse the output of mominert
inertia, rotcon = parse_mominert(log_file)
return inertia, rotcon
[docs] def determine_rot(self, mode_num):
# Function to determine what kind of model to treat rotation based
# on the output of mominert
# Only input is the mode number; make it the last mode
# Format string for ktools
rot_str = "{mode} {flag} {AAA} {BBB} {CCC}"
full_rot = ""
# Linear molecule case
if self.inertia[0] == 0.:
jrotor = rot_str.format_map(
{
"mode": str(mode_num),
"flag": "jro",
"AAA": str(self.inertia[-1]),
"BBB": "1",
"CCC": "2", # 2-fold degenerate
}
)
full_rot = jrotor
# Asymmetric rotor
else:
krotor = rot_str.format_map(
{
"mode": str(mode_num),
"flag": "kro",
"AAA": self.inertia[0], # Depends on A
"BBB": "1",
"CCC": "1", # 1-fold degenerate
}
)
jrotor = rot_str.format_map(
{
"mode": str(mode_num + 1),
"flag": "jro",
"AAA": np.average(self.inertia[1:]),
"BBB": "1",
"CCC": "2", # 2-fold degenerate
}
)
full_rot = krotor + "\n" + jrotor
return full_rot
[docs]def init_logger(log_name):
"""
Use `logging` module to provide debug data for HEAT analysis.
All of the extraneous data (e.g. extraplation coefficients, etc)
will be output with the logger.
Required input is the name of the log file including file extension.
Returns a logger object.
"""
logger = logging.getLogger(log_name)
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler("logs/" + log_name)
fh.setLevel(logging.DEBUG)
# Set up the formatter
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
fh.setFormatter(formatter)
logger.addHandler(fh)
return logger
"""
Data parsing from the Multiwell programs
"""
[docs]def parse_mominert(momout_path):
# Function to parse the output of a mominert calculation
# Ordering of the values are A, B, C
with open(momout_path) as read_file:
lines = read_file.readlines()
for line in lines:
# Read in the moments of inertia in units of amu ang**2
if "amu" in line:
split_line = line.split()
inertia = [
float(split_line[2]),
float(split_line[5]),
float(split_line[8])
]
# Read in rotational constants in MHz
if "GHz" in line:
split_line = line.split()
if "Infinity" in line:
# If molecule is linear, Ia is 0 and we will not
# parse infinity!
rotcon = [
0.,
float(split_line[5]),
float(split_line[8]),
]
else:
rotcon = [
float(split_line[2]),
float(split_line[5]),
float(split_line[8])
]
# Make sure ordering of values are descending
# Rotational constants are sorted in descending order, while
# the moments of inertia are reversed!
inertia = sorted(inertia, reverse=False)
rotcon = sorted(rotcon, reverse=True)
return inertia, rotcon
[docs]def parse_canonical(filecontents):
""" Function to parse data out of a .canonical file from ktools
Returns a dataframe containing the recommended thermal rate constants
along with the temperature and inverse temperature.
"""
read_flag = False
data = list()
read = False
for line in filecontents:
if read_flag is True:
if "------" in line and read is True:
read = False
if read is True:
read_out = []
for value in line.split()[1:]:
# Try and convert values to flots
try:
read_out.append(np.float(value))
# This will fail for extremely small numbers
# where the ktools output screws up the formatting
except ValueError:
read_out.append(0.)
data.append(np.array(read_out))
if "------" in line and read is False:
read = True
if "FINAL RECOMMENDED REACTION RATE" in line:
read_flag = True
df = pd.DataFrame(data, columns=["T", "k(forward)", "k(reverse)", "Keq"])
df["1000 / T"] = 1000. / df["T"]
return df
"""
Rate analysis routines
"""
[docs]def mod_arrhenius(T, a, b, g):
# Modified Arrhenius equation from Wakelam 2010
return a * (T / 300.)**b * np.exp(-g / T)
[docs]def fit_arrhenius(canonical_df, guess=None):
""" Function to fit a modified Arrhenius rate model to
a dataframe containing canonical rates extracted from
the parse_canonical routine
"""
# Some random initial parameters
canonical_df = canonical_df.dropna()
if guess is None:
p0 = [1e-5, 0., 1e4]
else:
p0 = guess
popt, pcov = cf(
mod_arrhenius,
canonical_df["T"],
canonical_df["k(forward)"],
p0=p0
)
results = {
"a": popt[0],
"b": popt[1],
"g": popt[2]
}
fig, ax = plt.subplots()
ax.plot(
canonical_df["T"],
canonical_df["k(forward)"],
lw=2,
label="Data"
)
ax.plot(
canonical_df["T"],
mod_arrhenius(canonical_df["T"], *p0),
linestyle="--",
alpha=0.7,
label="Initial"
)
ax.plot(
canonical_df["T"],
mod_arrhenius(canonical_df["T"], **results),
linestyle="--",
label="Fit"
)
ax.legend()
ax.set_yscale("log")
ax.set_xscale("log")
return results