Source code for pyspectools.qchem.multiwell

"""
Scripts to perform rate calculations using Multiwell.
The focus here is to take output from a pseudo-variational
calculation performed using CFOUR, format the results (e.g.
constants, frequencies, etc.) into ktools format
"""

import os
from subprocess import Popen, PIPE
import logging
from glob import glob

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit as cf
from matplotlib import pyplot as plt

from pyspectools.qchem import parsers
from pyspectools.routines import read_yaml


[docs]class Vtst:
    """
    Class that handles a VTST calculation. This class is mainly
    designed for filepath handling, since the multiwell programs
    are extremely annoying about relative paths...
    """
[docs]    @classmethod
    def vtst_calc(cls, calc_root, reaction, ktools_yml_path):
        """ Serialization method specifically for VTST calculations
        using ktools.

        The required inputs are:
        calc_root - path to root of the calculation directory
        reaction - str specification to name the reaction
        ktools_yml_path - path to ktools YAML input
        """
        vtst_obj = cls(calc_root, reaction, ktools_yml_path)
        # Get all the filepaths to calculations
        vtst_obj.logs = glob(
            os.path.join(
                vtst_obj.paths["calc_root"],
                "*/freq*.log"
                )
            )
        # Initialize ktools settings
        vtst_obj.init_ktools()
        return vtst_obj

[docs]    @classmethod
    def tst_calc(cls, data_dict, reaction, ktools_yml_path):
        """ Serialization method for conventional TST calculations
        using ktools.

        The required inputs are:
        data_dict - dictionary holding paths to output files, with
        the keys corresponding to reactant, ts, product as well as
        reaction enthalpies
        reaction - str specification to name the reaction
        ktools_yml_path - path to ktools YAML input

        format of data_dict should be like this:
        data_dict = {"reactant": {"path": , "E"}}
        """
        tst_obj = cls(None, reaction, ktools_yml_path)
        # Convert all paths to absolute ones
        tst_obj.logs = dict()
        for key in data_dict:
            tst_obj.logs[key] = os.path.abspath(
                data_dict[key]["path"]
                )
        tst_obj.data = data_dict.copy()
        # Initialize ktools settings
        tst_obj.init_ktools()
        return tst_obj


    def __init__(self, calc_root, reaction, ktools_yml_path):
        """ Init method for Ktools calculation
        Depending on the type of calculation - either TST or VTST -
        serialize the Ktools instance with those specific methods.
        """
        # Dictionary for holding all of the paths
        self.paths = dict()
        # Dictionary for holding all of the template strings
        self.temps = dict()
        """
        Set up paths management
        """
        # Path to notebook directory
        self.paths["nb"] = os.getcwd()
        # Path to multiwell templates
        self.paths["mw_temp"] = os.path.join(
            self.paths["nb"],
            "multiwell"
            )
        # Path to the calculation root. Only used for VTST
        if calc_root is not None:
            self.paths["calc_root"] = os.path.abspath(calc_root)
        # If output folder is not present, make it
        if os.path.isdir("ktools-vtst") is False:
            os.mkdir("ktools-vtst")
        # output_path is where the ktools logs will be stored
        self.paths["output"] = os.path.join("ktools-vtst", reaction)
        if os.path.isdir(self.paths["output"]) is False:
            os.mkdir(self.paths["output"])
        # Convert all paths into absolute paths
        for key, value in self.paths.items():
            self.paths[key] = os.path.abspath(value)
            print(self.paths[key])
        """
        Read in ktools settings
        """
        self.ktools = read_yaml(ktools_yml_path)
        for key, value in self.ktools.items():
            print(key + ":\t\t" + str(value))
        """
        Reading in the string templates
        """
        print("Found the following templates:")
        for temp_path in glob(os.path.join(self.paths["mw_temp"], "*.temp")):
            with open(temp_path) as read_file:
                self.temps[os.path.basename(temp_path)] = read_file.read()
                print(temp_path)

[docs]    def init_ktools(self):
        # sort the list of calculations
        self.ktools["NT"] = len(self.ktools["Tlist"])
        self.ktools["Tlist"] = [str(T) for T in self.ktools["Tlist"]]
        self.ktools["Tlist"] = " ".join(self.ktools["Tlist"])
        self.ktools["Nreact"] = "1"
        self.ktools["Nprod"] = "1"
        self.ktools["NTS"] = len(self.logs) - 2

[docs]    def analyze_tst(self):
        """ Analysis function for transition state RRKM
        More or less the same as the VTST version, but simpler
        """
        os.chdir(self.paths["output"])
        mol_str = ""
        for species in self.data:
            sym_path = os.path.basename(self.logs[species])
            try:
                os.symlink(
                    self.logs[species],
                    sym_path
                    )
            except FileExistsError:
                pass
            # Parse log file
            parsed_data = parsers.parse_cfour(sym_path)
            # Series of if cases to determine what the formatting
            # should be
            if species == "reactant":
                species_type = "reac"
                name = "Reactant"
                dist = 0.
            elif species == "product":
                species_type = "prod"
                name = "Product"
                dist = 2.
            elif species == "ts":
                species_type = "ctst"
                name = "TS"
                dist = 1.
            # Create a species object with all its associated
            # parameters
            current_species = Species(
                self.temps["mominert.temp"],
                name,
                parsed_data,
                species_type,
                dist,
                self.data[species]["E"]
                )
            mol_str += current_species.format_species() + "\n"
        self.ktools["molecules"] = mol_str
        self.dump_ktools()
        print("Dumped ktools input file to disk")
        output, error = self.call_ktools(
                os.path.join(self.paths["output"], "ktools.inp")
                )
        os.chdir(self.paths["nb"])
        return output, error

[docs]    def analyze_vtst(self):
        """ Main analysis function
        Loops over all of the log files detected in __init__ and
        produces ktools input file format strings
        """
        os.chdir(self.paths["output"])
        mol_str = ""
        end_step = self.ktools["stepsize"] * self.ktools["steps"]
        irc_values = np.arange(0, end_step, self.ktools["stepsize"])
        steps = list()
        energies = list()
        # Loop over the log files and generate Species objects
        for index in range(len(self.logs)):
            # Create symlinks to the logfile
            log_path = os.path.join(
                self.paths["calc_root"],
                str(index) + "/freq" + str(index) + ".log"
                )
            sym_path = os.path.basename(log_path)
            try:
                os.symlink(
                    log_path,
                    sym_path
                    )
            except FileExistsError:
                pass
            # Parse the log file
            parsed_data = parsers.parse_cfour(sym_path)
            print("Step " + str(index))
            # First step is always global minimum
            if index == 0:
                species_type = "reac"
                name = "Reactant"
                E0 = parsed_data["total energy"] + parsed_data["zpe"] / 2625.50
                E = 0.
            # If it's the last species, treat as products
            elif index == len(self.logs) - 1:
                species_type = "prod"
                name = "Product"
                current_E = parsed_data["total energy"] + parsed_data["zpe"] / 2625.5
                E = (current_E - E0) * 2625.50
            else:
                # Everything else is trial TS
                species_type = "ctst"
                name = "TS-" + str(index + 1)
                current_E = parsed_data["total energy"] + parsed_data["zpe"] / 2625.5
                E = (current_E - E0) * 2625.50
            dist = np.round(irc_values[index], decimals=2)
            E = np.round(E, decimals=3)
            current_species = Species(
                self.temps["mominert.temp"],
                name,
                parsed_data,
                species_type,
                dist,
                E
                )
            mol_str += current_species.format_species() + "\n"
            steps.append(dist)
            energies.append(E)
        # Add the complete molecule specification
        self.ktools["molecules"] = mol_str
        # Write the ktools file to disk
        self.dump_ktools()
        #print("Calling ktools executable")
        #output, error =self.call_ktools(
        #    os.path.join(self.paths["output"], "ktools.inp")
        #    )
        os.chdir(self.paths["nb"])
        return steps, energies

[docs]    def dump_ktools(self):
        # Method for writing the ktools input file to disk
        # Open file and write ktools input
        with open(os.path.join(self.paths["output"], "ktools.inp"), "w+") as write_file:
            write_file.write(
                self.temps["ktools.temp"].format_map(self.ktools)
                )

[docs]    def call_ktools(self, inp_path):
        # wrapper for ktools executable
        with Popen(["ktools", inp_path], stdout=PIPE) as ktools_exe:
            output, error = ktools_exe.communicate()
        print(output)
        print(error)
        return output, error


[docs]class Species:
    # Class that handles an individual species in a ktools calculation
    # The required input are:
    # name: string representing name of molecule
    # log_path: string path to CFOUR calculation output
    # species_type: string designation to denote reactant, TS, product
    def __init__(self, mominert_str, name, parsed_data, species_type, dist, delh=0.):
        self.mom_temp_str = mominert_str
        #self.logger = init_logger(ktools_path + "/" + name + "-ktools.log")
        self.cfour_calc = parsed_data
        # Format parameters for mominert calculation
        geom_str = ""
        # atom_counter required because dummy atoms exist
        atom_counter = 0
        for atom in self.cfour_calc["coordinates"]:
            if atom[0] != "X":
                atom_counter += 1
                # Insert atom index into list
                atom.insert(1, atom_counter)
                # Convert float values for coordinates to string
                atom = [str(item) for item in atom]
                # Flatten list to string
                atom_coords = " ".join(atom)
                # Only take real atoms
                geom_str += atom_coords + "\n"
            # Convert float values for coordinates to string
        self.path = name + "-coords"
        self.mom_dict = {
            "title": name,
            "natoms": atom_counter,
            "geometry": geom_str
        }
        # Calculate moments of inertia
        self.inertia, self.rotcon = self.calc_mominert()
        # Determine number of rotational degrees of freedom
        if self.inertia[0] == 0.:
            nrot = 1
        # For all non-linear molecules, we'll have j and krotors
        else:
            nrot = 2
        # Set up dictionary for formatting ktools input
        self.species_dict = {
            "type": species_type,
            "name": name,
            "delh": str(delh),
            "dist": str(dist),
            "formula": self.get_formula(),
            "comment": "-".join([name, species_type, "r" + str(dist)]),
            "dof": str(len(self.cfour_calc["frequencies"]) + nrot),
            "Eelev": "0.0",
            "gele": str(self.cfour_calc["multiplicity"]),
            "modes": self.format_dof()
            }

[docs]    def calc_mominert(self):
        # Write the mominert input to disk and run mominert
        with open(self.path + ".dat", "w+") as write_file:
            write_file.write(
                self.mom_temp_str.format_map(self.mom_dict)
                )
        inertia, rotcon = self.call_mominert()
        return inertia, rotcon

[docs]    def call_mominert(self):
        # Function that will call mominert to calculate moments of
        # inertia and parse the output
        with Popen(["mominert", self.path + ".dat"], stdout=PIPE) as mominert:
            output, error = mominert.communicate()
        log_file = self.path + ".out"
        # Parse the output of mominert
        inertia, rotcon = parse_mominert(log_file)
        return inertia, rotcon

[docs]    def determine_rot(self, mode_num):
        # Function to determine what kind of model to treat rotation based
        # on the output of mominert
        # Only input is the mode number; make it the last mode
        # Format string for ktools
        rot_str = "{mode} {flag} {AAA} {BBB} {CCC}"
        full_rot = ""
        # Linear molecule case
        if self.inertia[0] == 0.:
            jrotor = rot_str.format_map(
                    {
                    "mode": str(mode_num),
                    "flag": "jro",
                    "AAA": str(self.inertia[-1]),
                    "BBB": "1",
                    "CCC": "2",                # 2-fold degenerate
                    }
                )
            full_rot = jrotor
        # Asymmetric rotor
        else:
            krotor = rot_str.format_map(
                {
                "mode": str(mode_num),
                "flag": "kro",
                "AAA": self.inertia[0],    # Depends on A
                "BBB": "1",
                "CCC": "1",                # 1-fold degenerate
                }
                )
            jrotor = rot_str.format_map(
                {
                "mode": str(mode_num + 1),
                "flag": "jro",
                "AAA": np.average(self.inertia[1:]),
                "BBB": "1",
                "CCC": "2",                # 2-fold degenerate
                }
                )
            full_rot = krotor + "\n" + jrotor
        return full_rot

[docs]    def get_formula(self):
        # Function for determining chemical formula very roughly
        formula_str = ""
        for line in self.cfour_calc["coordinates"]:
            if line[0] != "X":
                formula_str += line[0]
        return formula_str

[docs]    def format_dof(self):
        # Function for formatting the internal degrees of freedom for this
        # species
        template_str = '''{mode} vib {frequency} 0 1\n'''
        freq_str = ""
        counter = 0
        for index, frequency in enumerate(self.cfour_calc["frequencies"]):
            freq_str += template_str.format_map(
                {"mode": str(index + 1), "frequency": str(frequency)}
                )
            counter = index
        # Tack on the rotational term
        freq_str += self.determine_rot(counter + 2)
        return freq_str

[docs]    def format_species(self):
        # Master function for yielding a complete specification for ktools
        # input
        temp_string = """{type} {name} {delh} {dist}
{formula}
1. {comment}
2.
3.
1 1 1
{Eelev} {gele}
{dof} har amua
{modes}
        """
        return temp_string.format_map(self.species_dict)



[docs]def init_logger(log_name):
    """
    Use `logging` module to provide debug data for HEAT analysis.
    All of the extraneous data (e.g. extraplation coefficients, etc)
    will be output with the logger.

    Required input is the name of the log file including file extension.
    Returns a logger object.
    """
    logger = logging.getLogger(log_name)
    logger.setLevel(logging.DEBUG)

    fh = logging.FileHandler("logs/" + log_name)
    fh.setLevel(logging.DEBUG)

    # Set up the formatter
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    return logger

""" 
    Data parsing from the Multiwell programs
"""

[docs]def parse_mominert(momout_path):
    # Function to parse the output of a mominert calculation
    # Ordering of the values are A, B, C
    with open(momout_path) as read_file:
        lines = read_file.readlines()
    for line in lines:
        # Read in the moments of inertia in units of amu ang**2
        if "amu" in line:
            split_line = line.split()
            inertia = [
                float(split_line[2]),
                float(split_line[5]),
                float(split_line[8])
                ]
        # Read in rotational constants in MHz
        if "GHz" in line:
            split_line = line.split()
            if "Infinity" in line:
                # If molecule is linear, Ia is 0 and we will not
                # parse infinity!
                rotcon = [
                    0.,
                    float(split_line[5]),
                    float(split_line[8]),
                    ]
            else:
                rotcon = [
                    float(split_line[2]),
                    float(split_line[5]),
                    float(split_line[8])
                    ]
    # Make sure ordering of values are descending
    # Rotational constants are sorted in descending order, while
    # the moments of inertia are reversed!
    inertia = sorted(inertia, reverse=False)
    rotcon = sorted(rotcon, reverse=True)
    return inertia, rotcon


[docs]def parse_ktools(ktoolslog_path):
    """ ktools_path corresponds to the logfile for the ktools
    calculation.

    The routine will parse canonical rate constants from the
    file with extension .canonical, and J-resolved microcanonical
    constants from .kej
    """
    # Get the directory where the log files exist
    basedir = os.path.dirname(
        os.path.abspath(ktoolslog_path)
        )
    filedict = dict()
    for extension in ["*.canonical", "*.kej"]:
        target = os.path.join(basedir, extension)
        filedict[extension.split(".")[-1]] = glob(target)[0]

    analysis_dict = dict()
    # loop over the list of files now
    for name, path in filedict.items():
        with open(path) as read_file:
            # Determine which parser to use
            if name == "canonical":
                func = parse_canonical
            elif name == "kej":
                func = parse_kej
            analysis_dict[name] = func(read_file)


[docs]def parse_canonical(filecontents):
    """ Function to parse data out of a .canonical file from ktools
    Returns a dataframe containing the recommended thermal rate constants
    along with the temperature and inverse temperature.
    """
    read_flag = False
    data = list()
    read = False
    for line in filecontents:
        if read_flag is True:
            if "------" in line and read is True:
                read = False
            if read is True:
                read_out = []
                for value in line.split()[1:]:
                    # Try and convert values to flots
                    try:
                        read_out.append(np.float(value))
                    # This will fail for extremely small numbers
                    # where the ktools output screws up the formatting
                    except ValueError:
                        read_out.append(0.)
                data.append(np.array(read_out))
            if "------" in line and read is False:
                read = True
        if "FINAL RECOMMENDED REACTION RATE" in line:
            read_flag = True
    df = pd.DataFrame(data, columns=["T", "k(forward)", "k(reverse)", "Keq"])
    df["1000 / T"] = 1000. / df["T"]
    return df


"""
    Rate analysis routines
"""

[docs]def mod_arrhenius(T, a, b, g):
    # Modified Arrhenius equation from Wakelam 2010
    return a * (T / 300.)**b * np.exp(-g / T)


[docs]def fit_arrhenius(canonical_df, guess=None):
    """ Function to fit a modified Arrhenius rate model to
    a dataframe containing canonical rates extracted from
    the parse_canonical routine
    """
    # Some random initial parameters
    canonical_df = canonical_df.dropna()
    if guess is None:
        p0 = [1e-5, 0., 1e4]
    else:
        p0 = guess
    popt, pcov = cf(
            mod_arrhenius,
            canonical_df["T"],
            canonical_df["k(forward)"],
            p0=p0
            )
    results = {
            "a": popt[0],
            "b": popt[1],
            "g": popt[2]
            }

    fig, ax = plt.subplots()

    ax.plot(
            canonical_df["T"],
            canonical_df["k(forward)"],
            lw=2,
            label="Data"
            )

    ax.plot(
            canonical_df["T"],
            mod_arrhenius(canonical_df["T"], *p0),
            linestyle="--",
            alpha=0.7,
            label="Initial"
            )
    ax.plot(
            canonical_df["T"],
            mod_arrhenius(canonical_df["T"], **results),
            linestyle="--",
            label="Fit"
            )

    ax.legend()
    ax.set_yscale("log")
    ax.set_xscale("log")

    return results


[docs]def format_multiwell(molecule, state="reac", comment="", delh=0.):
    """
    Convert the output of an electronic structure program into the form
    used for a lot of the Multiwell programs; specifically for `thermo`.

    Takes the parsed output as a dictionary, and the user can specify
    what state the structure is (e.g. reac, ctst, or prod).

    Parameters
    ----------
    molecule: dict
        Dictionary containing parsed output from electronic structure programs.
        The format is that generated by the CalculationResult class.
    state: str, optional
        Specify which part of the reaction this structure is.
    comment: str, optional
        Comment line for the structure
    delh: float, optional
        The 0 K heat of formation, or relative energy of this point in the units
        specified in the multiwell input file (usually kJ/mol)

    Returns
    -------
    mw_str: str
        String containing the formatted output
    """
    mw_str = """{state} {name} {delh}
{formula}
! {comment}
!
!
1 1 1
0. {multi}
{ndof} 'HAR' 'MHZ'
{vibrot}
    """
    vibrot = ""
    # Round the frequencies to the nearest wavenumber
    molecule["harm_freq"] = np.unique(np.round(molecule["harm_freq"], 0))[::-1]
    ndof = len(molecule["harm_freq"]) + 2
    for index, freq in enumerate(molecule["harm_freq"]):
        vibrot += "{:d} vib {:.0f} 0. 1\n".format(index + 1, freq)
    vibrot += "{:d} qro {:.3f} 1\n".format(index + 2, molecule["A"])
    vibrot += "{:d} qro {:.3f} 2\n".format(index + 3, molecule["B"])
    form_dict = {
        "vibrot": vibrot,
        "ndof": ndof,
        "multi": molecule["multi"],
        "comment": comment,
        "formula": molecule["formula"],
        "state": state,
        "name": molecule["filename"],
        "delh": delh
    }
    return mw_str.format(**form_dict)