Source code for pyspectools.pypickett

import os
from pathlib import Path
from glob import glob
from dataclasses import dataclass, field
import pickle
import shutil
import tempfile
from copy import deepcopy
from typing import List, Dict
from itertools import product, combinations_with_replacement
from subprocess import run, PIPE

import numpy as np
import pandas as pd
import stat
import pprint
import joblib
from tqdm.autonotebook import tqdm

from pyspectools import routines
from pyspectools.spcat import *
from pyspectools import parsers
from pyspectools.spectra.assignment import LineList


[docs]class MoleculeFit:
    """ Class for handling the top level of a Pickett simulation.
        Inspired by PGopher, the `molecule` class stores information about
        whether we're dealing with a diatomic or polyatomic, number of lines
        to fit, temperature, etc.

        All of the information is bundled into a dictionary called `properties`.

        There are two methods right now that will generate a `molecule` object;
        `from_json` and `from_yaml`, which will interpret JSON and YAML input
        files respectively.
    """

[docs]    @classmethod
    def from_json(cls, json_filepath):
        # Generate a molecule from a specified JSON file
        json_data = read_json(json_filepath)
        species = MoleculeFit(json_data)
        return species

[docs]    @classmethod
    def from_yaml(cls, yaml_filepath):
        # Generate a molecule object from a YAML file
        yaml_data = read_yaml(yaml_filepath)
        species = MoleculeFit(yaml_data)
        return species

[docs]    @classmethod
    def from_pickle(cls, picklepath):
        # Generate a molecule object from a pickle instance
        with open(picklepath) as pickle_read:
            species = pickle.load(pickle_read)
        return species

    def __init__(self, options=None):
        # Initialize the default properties of a diatomic molecule
        self.properties = {
            "name": "Molecule",
            "tag": 0,
            "parameters": dict(),  # human input of parameters
            "linear": False,
            "symmetric": False,
            "prolate": False,
            "dipole": {"A": 0.0, "B": 0.0, "C": 0.0},
            "reduction": "A",
            "spin": 1,
            "spin degeneracy": 0,
            # These are options for the simulation
            "units": "MHz",
            "temperature": 5.0,
            "intensity threshold": [-8.0, -3.0],
            "K range": [0, 0],  # range for K quantum number
            "partition function": 1e3,
            "interactions": 0,
            "quantum number range": [0, 99],
            "odd state weight": 1,
            "even state weight": 1,
            "frequency limit": 100.0,
            "vibration limit": 1,
            "vsym": 1,
            "ewt": 0,
            "statistical axis": 1,
            "number of parameters": 100,
            "number of lines": 0,
            "number of iterations": 10,
            "number of skipped lines": 0,
            "marquadrt-levenburg": 0.0000e00,
            "maximum error": 1e13,
            "fractional importance": 1.0000e00,
            "IR scaling": 1.0,
            "diagonalization": 0,
            "xopt": 1,
        }

        self.param_objects = dict()  # stores parameter objects

        self.iteration_count = 1  # running count of iterations
        self.iterations = dict()  # stores data from each iteration
        self.top_dir = os.getcwd()  # top directory always stored
        self.cwd = ""  # current working directory

        self.experimental_lines = dict()  # experimental lines that go to a .lin

        # Function that updates parameters, and re-writes the .par and .int
        # files.
        self.initialize(options)
        self.interactive = False  # Interactive plotting option

[docs]    def initialize(self, options=None):
        """ Method that will rewrite the .par and .int files, after updating
            the properties dictionary.

            Any warnings regarding common mistakes in input files should also be
            flagged here.
        """
        if options is not None:
            self.properties.update(options)

        self.generate_parameter_objects(verbose=False)
        self.check_input()

        self.setup_int()
        self.setup_par()

        print("Initialized settings for molecule " + self.properties["name"])

[docs]    def check_input(self):
        """ Method for going through the input settings and flagging common
            mistakes.
        """
        # If the K range is set to 0, but the molecule is not linear
        if self.properties["K range"][1] == 0 and self.properties["linear"] is False:
            raise UserWarning(
                "Warning: You have specified a non-linear molecule \
                               and the maximum K value is unset. Result cat file \
                               may be empty!"
            )

        # If A and C constants are given and molecule is supposed to be linear
        if ["A", "C"] in list(self.properties["parameters"].keys()):
            if self.properties["linear"] is True:
                raise UserWarning(
                    "Molecule flagged as linear, \
                                   but A and C constants specified."
                )

[docs]    def generate_parameter_objects(self, verbose=True):
        for param_key in self.properties["parameters"]:
            self.param_objects[param_key] = self.parameter(
                param_key,
                self.properties["parameters"][param_key],
                self.properties["reduction"],
                linear=self.properties["linear"],
                verbose=verbose,
            )

[docs]    def nuclear(self, delete=False):
        """ Function that cleans up Pickett files in a folder """
        filelist = [
            ".cat",
            ".var",
            ".par",
            ".int",
            "_parsedlines.csv",
            "_spectrum.pdf",
            ".fit",
            ".out",
        ]
        files = [self.properties["name"] + name for name in filelist]
        if delete is True:
            for file in files:
                try:
                    os.system("rm " + file)
                except FileNotFoundError:
                    # Ignore files that aren't in the folder
                    pass
        else:
            raise EnvironmentError("Please provide a True value to confirm deletion!")

[docs]    def toggle_interactive(self, connected=False):
        """ Method to toggle interactive plots on and off """
        self.interactive = not self.interactive
        init_notebook_mode(connected=connected)
        if self.interactive is False:
            # Pseudo-disconnect interactivity by removing JS injections
            init_notebook_mode(connected=True)

[docs]    def setup_par(self):
        """ Function that provides the correct formatting for a .par file """
        opt_line = ""
        opt_line += str(self.properties["number of parameters"]).rjust(4) + " "
        opt_line += str(self.properties["number of lines"]).rjust(5) + " "
        opt_line += str(self.properties["number of iterations"]).rjust(5) + " "
        opt_line += str(self.properties["number of skipped lines"]).rjust(5) + " "
        opt_line += str(self.properties["marquadrt-levenburg"]).rjust(12) + " "
        opt_line += str(self.properties["maximum error"]).rjust(15) + " "
        opt_line += str(self.properties["fractional importance"]).rjust(15) + " "
        opt_line += str(self.properties["IR scaling"]).rjust(13) + " "

        prop_line = ""
        prop_line += str("'" + self.properties["reduction"] + "'").rjust(3)
        # Format the spin degeneracy sign - if it's positive, we use asym top
        # quantum numbers. If negative use symmetric top.
        if self.properties["symmetric"] is True and self.properties["linear"] is False:
            prop_line += (
                str(np.negative(self.properties["spin degeneracy"])).rjust(3) + " "
            )
        elif (
            self.properties["symmetric"] is False and self.properties["linear"] is False
        ):
            prop_line += (
                str(np.absolute(self.properties["spin degeneracy"])).rjust(3) + " "
            )
        elif (
            self.properties["symmetric"] is False and self.properties["linear"] is True
        ):
            prop_line += (
                str(np.negative(self.properties["spin degeneracy"])).rjust(3) + " "
            )
        # Format the sign of vibration limit; negative treats top as oblate case
        # while positive treats the prolate case
        if self.properties["prolate"] is True and self.properties["linear"] is False:
            prop_line += (
                str(np.absolute(self.properties["vibration limit"])).rjust(3) + " "
            )
        elif self.properties["prolate"] is False and self.properties["linear"] is False:
            prop_line += (
                str(np.negative(self.properties["vibration limit"])).rjust(3) + " "
            )
        else:
            prop_line += str(self.properties["vibration limit"]).rjust(3) + " "
        prop_line += str(self.properties["K range"][0]).rjust(3) + " "
        prop_line += str(self.properties["K range"][1]).rjust(3) + " "
        prop_line += str(self.properties["interactions"]).rjust(3) + " "
        prop_line += str(self.properties["statistical axis"]).rjust(3) + " "
        prop_line += str(self.properties["even state weight"]).rjust(3) + " "
        prop_line += str(self.properties["odd state weight"]).rjust(3) + " "
        prop_line += str(self.properties["vsym"]).rjust(3)
        prop_line += str(self.properties["ewt"]).rjust(3)
        prop_line += str(self.properties["diagonalization"]).rjust(3)
        prop_line += str(self.properties["xopt"]).rjust(3)
        # may be missing EWT

        with open(self.properties["name"] + ".par", "w+") as write_file:
            write_file.write(self.properties["name"] + "\n")
            write_file.write(opt_line + "\n")
            write_file.write(prop_line + "\n")
            for parameter in self.param_objects:
                par_line = self.param_objects[parameter].format_line()
                write_file.write(par_line + "\n")

[docs]    def setup_int(self):
        """ Setup the int file. Order of things written:
            1. units
            2. molecular tag identifier
            3. partition function
            4. quantum number lower limit
            5. quantum number upper limit
            6. intensity threshold lower limit
            7. intensity threshold upper limit
            8. frequency limit in GHz
            9. vibrational quantum number limit
            10. dipole moments
        """
        settings_line = " "  # units of the sim
        if self.properties["units"] == "wavenumbers":
            settings_line += "1"
        elif self.properties["units"] == "MHz":
            settings_line += "0"
        else:
            settings_line += "0"
        settings_line += " " + str(self.properties["tag"]).rjust(6)
        settings_line += str(self.properties["partition function"]).rjust(10)
        for value in self.properties["quantum number range"]:
            settings_line += str(value).rjust(5)
        for value in self.properties["intensity threshold"]:
            settings_line += str(value).rjust(7)
        settings_line += str(self.properties["frequency limit"]).rjust(9)
        settings_line += str(self.properties["temperature"]).rjust(6)
        settings_line += str(self.properties["vibration limit"]).rjust(7)

        with open(self.properties["name"] + ".int", "w+") as write_file:
            write_file.write(self.properties["name"] + "\n")
            write_file.write(settings_line + "\n")
            for projection in self.properties["dipole"]:
                if projection == "A":
                    write_file.write(
                        " 1      " + str(self.properties["dipole"][projection]) + "\n"
                    )
                elif projection == "B":
                    write_file.write(
                        " 2      " + str(self.properties["dipole"][projection]) + "\n"
                    )
                elif projection == "C":
                    write_file.write(
                        " 3      " + str(self.properties["dipole"][projection]) + "\n"
                    )

[docs]    def fit_lines(self, verbose=True):
        os.chdir(self.top_dir)
        if os.path.isfile(self.properties["name"] + ".lin") is False:
            runinput = input("Run calbak to generate a .lin file from .cat? (Y/N)")
            if runinput.lower() == "y":
                run_calbak(self.properties["name"])
            else:
                raise FileNotFoundError(self.properties["name"] + ".lin not found!")

        # Read in the .lin file to to figure out the number of lines we're going
        # to use in the fit. Right now, the contents are saved to a lin_file
        # attribute as a list, but can potentially be used to do more...
        with open(self.properties["name"] + ".lin") as read_file:
            self.lin_file = read_file.readlines()
            # self.properties["number of lines"] = len(self.lin_file)

        # Figure out what the next iteration is
        folder_number = generate_folder()
        self.cwd = str(folder_number) + "/"
        # If this is the first iteration, backup the data before we do anything
        if folder_number == 1:
            if os.path.isdir("initial") is False:
                os.mkdir("initial")
            backup_files(self.properties["name"], "./initial/")
        # Copy files to the work directory
        backup_files(self.properties["name"], self.cwd)

        # write the settings used for the current simulation to disk
        dump_yaml(
            str(folder_number) + "/" + self.properties["name"] + ".yml", self.properties
        )
        # change directory to the new working directory
        os.chdir(self.cwd)
        # Write the .int and .par file to disk
        self.setup_int()
        self.setup_par()
        # Run SPFIT to fit the lines
        run_spfit(self.properties["name"])
        # Create an instance of a fit object, and add it to the pile
        self.iterations[self.iteration_count] = fit_output(
            self.properties["name"] + ".fit",
            verbose=verbose,
            interactive=self.interactive,
        )
        self.predict_lines()
        os.chdir(self.top_dir)

        # Update the parameters in the molecule instance!
        current_params = self.iterations[self.iteration_count].export_parameters()
        self.iteration_count += 1
        self.update_parameters(current_params, verbose=False)

        # Save the updated parameters to disk
        dump_yaml(self.cwd + self.properties["name"] + ".fit.yml", self.properties)

        print("Current parameters (MHz)")
        for parameter in current_params:
            print(
                parameter
                + "\t"
                + str(current_params[parameter]["value"])
                + "("
                + str(current_params[parameter]["uncertainty"] + ")")
            )

[docs]    def calbak(self):
        """ Run calbak to generate a .lin file from .cat """
        run_calbak(self.properties["name"])
        with open(self.properties["name"] + ".lin") as read_file:
            print(read_file.read())

[docs]    def predict_lines(self, verbose=True):
        """ Run SPCAT to predict lines based on a .var and .int file.
            This will operate in the current directory - since it does not
            need to overwrite any files, we don't actually back up anything!

            The predicted lines are also parsed into a .csv file, and plot up
            into: (A) a notebook inline plot, and (B) a PDF output.

            SPCAT is run twice - first to get the partition function, and second
            to get the correct intensities.
        """
        # First pass of SPCAT; get the partition function
        self.properties["partition function"] = run_spcat(
            self.properties["name"],
            temperature="{:.3f}".format(self.properties["temperature"]),
        )
        # Make sure the int file has the correct partition function
        self.setup_int()
        # Second pass of SPCAT with correct intensities
        run_spcat(self.properties["name"])
        # Parse the output of SPCAT
        self.cat_lines = parsers.parse_cat(self.properties["name"] + ".cat")
        print(
            "Saving the parsed lines to " + self.properties["name"] + "_parsedlines.csv"
        )
        self.cat_lines.to_csv(self.properties["name"] + "_parsedlines.csv")

        # Plot the .cat file up
        fig, ax = plot_pickett(self.cat_lines, verbose=verbose)
        os.chdir(self.top_dir)
        fig.savefig(self.properties["name"] + "_spectrum.pdf", format="pdf")

[docs]    def copy_settings(self, iteration=0):
        """ Copy settings used in a previous iteration
            If none specified, we'll take the settings from before the first fit
        """
        if iteration == 0:
            iteration = "initial"
        # current_params = self.iterations[iteration].export_parameters()
        iteration_folder = str(iteration) + "/" + self.properties["name"]
        if os.path.isfile(iteration_folder + ".fit.yml") is True:
            iteration_file = iteration_folder + ".fit.yml"
        else:
            iteration_file = iteration_folder + ".yml"
        iteration_params = read_yaml(iteration_file)
        self.properties.update(iteration_params)
        print("Settings copied from " + iteration_file)

[docs]    def update_parameters(self, parameters, verbose=True):
        """ Update the simulation parameters
            This is written with a loop in order to make sure the
            keys that aren't specified remain the same as before without
            change.
        """
        for key in parameters:
            self.properties["parameters"][key].update(parameters[key])
        self.generate_parameter_objects(verbose=verbose)

[docs]    def backup(self, comment=None):
        """ Method for backing up files for a specific reason.
            The most common use I have for this is to backup .cat and .lin files
            for whatever reason (adding/removing lines to/from fit)
        """
        if comment is None:
            raise RuntimeError("Please provide a comment for this backup!")
        if os.path.isdir("backup") is False:
            os.mkdir("backup")
        os.chdir("backup")
        folder_number = generate_folder()
        with open(str(folder_number) + "/README", "w+") as write_file:
            write_file.write(comment)
        os.chdir(self.top_dir)
        backup_files(self.properties["name"], "./backup/" + str(folder_number))

[docs]    def restore_backup(self, number):
        """ Restores backed up files from the `backup` method """
        files = glob("backup/" + str(number) + "/*")
        for file in files:
            shutil.copy2(file, self.top_dir)
        print("Restored from backup " + str(number))

[docs]    def save_progress(self):
        if os.path.isdir("instances") is False:
            os.mkdir("instances")
        counter = 0
        while os.path.exists(
            "instances/" + self.properties["name"] + ".%s.pickle" % counter
        ):
            counter += 1
        with open(
            "instances/" + self.properties["name"] + "." + str(counter) + ".pickle",
            "wb",
        ) as write_file:
            pickle.dump(self, write_file, pickle.HIGHEST_PROTOCOL)

[docs]    def set_fit(self, fit=True, params=None, verbose=False):
        """ Function to flip all parameters to fit or not to fit.
            The default behaviour, if nothing is provided, is to fit all the
            parameters.

            A list of parameters can be supplied to the params argument, which
            will specifically fit or not fit those parameters.
        """
        if params is None:
            params = list(self.properties["parameters"].keys())
        if type(params) is str:
            if params not in list(self.properties["parameters"].keys()):
                raise KeyError(
                    "Parameter " + parameter + " is not in your parameter list!"
                )
            else:
                self.properties["parameters"][parameter]["fit"] = fit
        elif type(params) is list:
            for parameter in params:
                if parameter not in list(self.properties["parameters"].keys()):
                    raise KeyError(
                        "Parameter " + parameter + " is not in your parameter list!"
                    )
                else:
                    self.properties["parameters"][parameter]["fit"] = fit
        self.generate_parameter_objects(verbose=verbose)

[docs]    def report_parameters(self):
        """ Method to return a dictionary of only the parameter values """
        param_dict = dict()
        for parameter in self.properties["parameters"]:
            param_dict[parameter] = self.properties["parameters"][parameter]["value"]
        return param_dict

[docs]    def finalize(self, iteration=None):
        """ Been hard to keep track which iteration was the final one
            sometimes, and so this function will "finalize" the fits by
            creating a separate folder for the final fits.
        """
        if iteration is None:
            # If no iteration is specified, the last iteration is used
            iteration = self.iteration_count - 1
            print("No iteration specified, using the last iteration.")
        if os.path.isdir(self.top_dir + "/final") is True:
            confirmation = input("Final folder exists. Confirm deletion? Y/N").lower()
            if confirmation == "y":
                shutil.rmtree(self.top_dir + "/final")
            else:
                raise ValueError("Final folder exists, and not deleting.")
        # Copy the files over from designated iteration
        shutil.copytree(self.top_dir + "/" + str(iteration), self.top_dir + "/final")
        # Generate a report for the final fits
        for parameter in self.properties["parameters"]:
            if "uncertainty" in list(self.properties["parameters"][parameter].keys()):
                self.properties["parameters"][parameter]["formatted"] = (
                    str(self.properties["parameters"][parameter]["value"])
                    + "("
                    + str(self.properties["parameters"][parameter]["uncertainty"])
                    + ")"
                )
            else:
                self.properties["parameters"][parameter]["formatted"] = str(
                    self.properties["parameters"][parameter]["value"]
                )
        report_df = pd.DataFrame.from_dict(self.properties["parameters"]).T
        with open(self.top_dir + "/final/parameter_report.html", "w+") as write_file:
            write_file.write(report_df.to_html())
        for file in glob(self.top_dir + "/final/*"):
            # Make the final files read-only
            os.chmod(file, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)

[docs]    def parameter_scan(self, parameter, values, relaxed=False):
        """ A method for automatically scanning parameter space to see
            what values give a minimum RMS.

            We will repeatedly call SPFIT to estimate the fitting error
            based on what the parameters are. There are two modes depending
            on the `relaxed` variable: whether or not all other values are
            allowed to relax (like a relaxed potential energy scan) or if
            all parameters are held frozen.

            Args:
            parameter - a string input name of the parameter to change
            values - a list or array of values to be scanned
            relaxed - boolean determining whether the parameters are fixed
        """
        # Make sure none of the parameters are being fit
        self.set_fit(fit=relaxed, verbose=False)
        # We won't fit the scanning parameter regardless of relaxation
        self.properties["parameters"][parameter]["fit"] = False
        # "Remember" what the parameter settings were at this point in time
        parameter_dict = self.properties
        # Loop over all the specified values
        parameter_values = list()
        for index, value in enumerate(values):
            # Reset the parameters to scan values
            self.properties.update(parameter_dict)
            self.properties["parameters"][parameter]["value"] = value
            self.generate_parameter_objects(verbose=False)
            # Silence the output
            self.fit_lines(verbose=False)
            # Record the final RMS error for this parameter
            current_params = self.report_parameters()
            last_key = max(list(self.iterations.keys()))
            current_params["rms"] = self.iterations[last_key].fit_properties[
                "final rms"
            ]
            parameter_values.append(current_params)
        scan_report = pd.DataFrame(parameter_values)

        fig, ax = plt.subplots(figsize=(14, 5.5))

        ax.scatter(x=scan_report[parameter].values, y=scan_report["rms"].values)
        ax.set_ylabel("RMS (MHz)")
        ax.set_xlabel(parameter + " value (MHz)")
        ax.set_xticks(values)
        ax.set_xticklabels(values)
        ax.set_title("Parameter scan for " + parameter)

        fig.savefig(
            "scan_" + parameter + "_" + str(values[0]) + "-" + str(values[-1]) + ".pdf",
            format="pdf",
        )
        if isnotebook() is True:
            plt.show()
        else:
            pass

        self.scan_report = scan_report

[docs]    class parameter:
        """ Class for handling parameters in Pickett.
            Each instance has a human name (i.e. A, B, C constants) which
            is to be translated to Pickett identifiers.

            A `fit` flag is used to denote whether or not the parameter
            is going to be fit, which automatically sets the uncertainty
            to a large number.

            Nuclear spin terms will have the nuclei identifier
        """

        def __init__(
            self, name, parameter_dict, reduction="A", linear=True, verbose=True
        ):
            # Initialize values for parameters
            self.variables = {
                "name": name,
                "value": 0.0,
                "uncertainty": 1e-25,
                "fit": False,
                "identifier": 0,
                "nuclei": 0,
            }
            # After initializing, put in user values
            self.variables.update(parameter_dict)
            self.fit_check()
            if verbose is True:
                # Format the parameter printing to be nicer
                pp = pprint.PrettyPrinter(indent=4)
                pp.pprint(self.variables)
            if self.variables["nuclei"] == 0 and self.variables["name"] in [
                "eQq",
                "eQq/2",
            ]:
                # warning message issued if no nucleus specified
                print("You have specificed a hyperfine parameter, but")
                print("did not specify a nucleus in your input file.")
                raise ValueError("Hyperfine parameter with no nuclei ID!")

            # Convert the human parameter name to a Pickett identifier
            self.variables["identifier"] = human2pickett(
                self.variables["name"], reduction, linear, self.variables["nuclei"] + 1
            )
            if self.variables["fit"] is True:
                # flag variable for fitting
                self.variables["uncertainty"] = 1e25

[docs]        def format_line(self):
            """ Formats a string output to match the Pickett format for a parameter """
            pickett_line = str(self.variables["identifier"]).rjust(13)
            pickett_line += str(self.variables["value"]).rjust(24)
            pickett_line += str(self.variables["uncertainty"]).rjust(16)
            pickett_line += " /" + self.variables["name"]
            return pickett_line

[docs]        def fit_check(self):
            """ A function to flip the uncertainty if we will fit a parameter """
            if self.variables["fit"] is True:
                self.variables["uncertainty"] = 1e25
            else:
                self.variables["uncertainty"] = 1e-25

[docs]    class exp_line:
        """ Class for an experimental line. Converts to and from .lin files. """

        def __init__(self, frequency, uncertainty, lower_num, upper_num, comment=None):
            self.properties = {
                "frequency": frequency,  # frequency in MHz
                "uncertainty": uncertainty,  # uncertainty in MHz
                "lower quantum nos.": lower_num,  # list of quantum numbers
                "upper quantum nos.": upper_num,  # for lower and upper states
                "comment": comment,
            }

[docs]        def update_line(self, line_dict):
            self.properties.update(line_dict)
            print(self.properties)

[docs]        def format_quantum_numbers(quantum_list):
            line = ""
            for value in quantum_list:
                line += str(value).rjust(3)  # each quantum number is right
            return line  # justified by 3 positions...

[docs]        def format_line(self):
            # Formatting for a .lin line
            line = ""
            line += self.format_quantum_numbers(self.properties["upper quantum nos."])
            line += self.format_quantum_numbers(self.properties["lower quantum nos."])
            line += str(self.properties["frequency"]).rjust(33)
            line += str(self.properties["uncertainty"]).rjust(9)
            if self.properties["comment"] is not None:
                line += " /" + self.properties["comment"]
            return line


[docs]@dataclass
class QuantumNumber:
    """
    Class for a Quantum Number.
    """

    value: int
    upper: bool = False
    frozen: bool = False
    j: bool = True
    center: int = 1
    width: int = 1
    delta: None = None

    def __post_init__(self):
        if self.center is None:
            self.center = 1
        if self.width is None:
            self.width = 1

    def __repr__(self):
        return str(self.value)

    def __str__(self):
        return str(self.value)

[docs]    def roll(self):
        """
        Generate a random value for the current quantum number.
        """
        self.value = int(np.random.normal(self.center, self.width, 1))

[docs]    def spawn_upper(self, center=0, width=1):
        """
        Create a new QuantumNumber instance for the corresponding upper state.
        Will also shift the quantum number to a new value, +/-2,1,0.

        Returns
        -------
        QuantumNumber: object
            A deep copy of the current QuantumNumber instance, but also shifts the
            quantum number by a random integer.
        """
        # If the user specified did not give a value of delta to enforce, generate it
        if self.delta is None:
            delta = int(np.random.normal(center, width, 1))
        else:
            delta = self.delta
        new_qno = deepcopy(self)
        if new_qno.value + delta < 0:
            new_qno.value = 0
        else:
            new_qno.value += delta
        new_qno.upper = True
        return new_qno


[docs]@dataclass
class Transition:
    frequency: float
    n_numbers: int
    quantum_numbers: List = field(default_factory=list)
    j: List = field(default_factory=list)
    lower_state: List = field(default_factory=list)
    upper_state: List = field(default_factory=list)
    centers: List = field(default_factory=list)
    widths: List = field(default_factory=list)
    deltas: List = field(default_factory=list)
    uncertainty: float = 0.005

[docs]    @classmethod
    def from_list(cls, frequency, qnos, uncertainty=None):
        """
        Class method to spawn a Transition from a list of quantum numbers.
        This takes a flat iterable (qnos), and splits it into two; the first
        half is used for the lower state quantum numbers, and the second half
        the upper state.

        Parameters
        ----------
        frequency: float
            Frequency of the transition in MHz
        qnos: list
            Flat list of quantum numbers, includes both lower and upper states
        uncertainty: float or None, optional
            Optional frequency uncertainty of the transition in MHz. Defaults to None, which uses
            the default value of 0.005 MHz.

        Returns
        -------
        Transition: object
            Transition object initialized with the specifications
        """
        n_numbers = len(qnos)
        half = int(n_numbers / 2)
        trans = cls(frequency=frequency, n_numbers=n_numbers, uncertainty=uncertainty)
        lower_state = qnos[:half]
        upper_state = qnos[half:]
        trans.quantum_numbers = [
            [str(lower) for lower in lower_state],
            [str(upper) for upper in upper_state],
        ]
        return trans

    def __post_init__(self):
        if self.centers is None:
            self.centers = [1 for i in range(self.n_numbers)]
        if self.widths is None:
            self.widths = [1 for i in range(self.n_numbers)]
        if self.uncertainty is None:
            self.uncertainty = 0.005
        # Initialize the quantum numbers
        self.lower_state = [
            QuantumNumber(0, center=center, width=width)
            for center, width in zip(self.centers, self.widths)
        ]
        if len(self.j) != 0:
            for qno, j in zip(self.lower_state, self.j):
                qno.j = j

    def __repr__(self):
        return str(self)

    def __str__(self):
        """
        Method to format the quantum numbers into lin file format.

        Returns
        -------
        line: str
            Upper and lower state quantum numbers formatted into lin
            format.
        """
        line = "  {upper}  {lower}                 {frequency}     {uncertainty}   1."
        format_dict = {
            "upper": "  ".join(self.quantum_numbers[1]),
            "lower": "  ".join(self.quantum_numbers[0]),
            "frequency": self.frequency,
            "uncertainty": self.uncertainty,
        }
        return line.format(**format_dict)

[docs]    def random_quantum_numbers(self):
        """
        Create a random set of quantum numbers for a transition. First, the lower state is generated based on
        lower and upper bounds specified by the user. A deepcopy is made for each lower state quantum number,
        and the associated upper state number is created by shifting the lower value by a random integer between
        -2 and 2.
        Returns
        -------

        """
        for qno in self.lower_state:
            qno.roll()
        self.upper_state = [qno.spawn_upper() for qno in self.lower_state]
        self.quantum_numbers = [
            [str(qno) for qno in self.lower_state],
            [str(qno) for qno in self.upper_state],
        ]
        return self.quantum_numbers


[docs]@dataclass
class AutoFitSession:
    filename: str
    n_numbers: int
    frequencies: List = field(default_factory=list)
    uncertainties: List = field(default_factory=list)
    centers: List = field(default_factory=list)
    widths: List = field(default_factory=list)
    deltas: List = field(default_factory=list)
    j: List = field(default_factory=list)
    method: str = "mc"
    rms_target: float = 1.0
    rms: float = 1e9
    nfit: int = 0
    niter: int = 10000
    nprocesses: int = 1
    verbose: int = 0
    debug: bool = False
    clean: bool = True

[docs]    @classmethod
    def from_yml(cls, filepath):
        """
        Class method to read the settings for an AutoFitSession via YAML file.
        Parameters
        ----------
        filepath: str
            Filepath to the YAML settings file

        Returns
        -------
        AutoFitSession object
        """
        session = cls(**routines.read_yaml(filepath))
        return session

[docs]    @classmethod
    def from_pkl(cls, filepath):
        """
        Function to load a previously saved Pickle instance of AutoFitSession.

        TODO: write a check to make sure we're reading in an AutoFitSession pickle!
        Parameters
        ----------
        filepath: str
            Filepath to the pickle file to load.

        Returns
        -------
        Loaded
        """
        cls = routines.read_obj(filepath)
        # if cls.__name__ != "AutoFitSession":
        #    raise Exception("Target file is not an AutoFitSession object!")
        # else:
        return cls

    def __post_init__(self):
        if self.uncertainties is None:
            self.uncertainties = [0.005 for i, _ in enumerate(self.frequencies)]
        if os.path.exists(self.filename + ".par") is False:
            raise Exception(".par file does not exist!")
        with open(self.filename + ".par") as read_file:
            self.par = read_file.readlines()
        self.wd = os.getcwd()
        if self.clean is True:
            for dir in ["fits", "yml", "lin"]:
                try:
                    os.rmdir(dir)
                except FileNotFoundError:
                    pass
        # Setup filestructure
        for folder in ["fits", "yml", "lin"]:
            if os.path.exists(folder) is False:
                os.mkdir(folder)
        if self.method not in ["mc", "bruteforce"]:
            raise Exception("Testing method {} not implemented!".format(self.method))
        else:
            if self.method == "mc":
                self._iteration = self._rng
            elif self.method == "bruteforce":
                self._iteration = self._brute

    def _brute_generator(self):
        """
        Create a generator that will brute force every possible combination of quantum numbers for every
        transition. The user sets the maximum values for each quantum number, and this function will produce a
        generator.

        The quantum number tuple that is produced from the generator is flat, and will need to be
        reshaped before feeding into a QuantumNumber object.

        Returns
        -------
        enumerate: generator
            Product generator from itertools, wrapped around an enumerate generator
        """
        # This bit of code is probably not very pythonic. The general gist of it is to generate
        # a nested list since for every transition (number of frequencies), we want to systematically
        # test every possible combination of quantum numbers.
        possible = [list(range(val + 1)) for val in self.max_values] * 2
        return enumerate(
            combinations_with_replacement(product(*possible), len(self.frequencies))
        )

    def _brute(self, iteration):
        """

        Parameters
        ----------
        iteration

        Returns
        -------

        """
        index, qnos = iteration
        # Set up the quantum numbers for this transition
        transitions = [
            Transition.from_list(frequency, qno, uncertainty)
            for frequency, uncertainty, qno in zip(
                self.frequencies, self.uncertainties, qnos
            )
        ]
        # Call SPFIT and return the fit RMS
        index, rms = self._check_spfit(index, transitions)
        return index, rms

    def _rng(self, index):
        """
        Private method to perform a single iteration of the whole process. Starts by generating random quantum
        numbers, and creates a temporary folder to run SPFIT along with the .par and .lin files.

        The output .fit file is parsed, and both the parsed data and the .fit file is copied back over to
        the working directory. An index argument will let the function be run in async while still letting
        the specific file index be known.

        Parameters
        ----------
        index: int
            Index that keeps track of the file name to save the output as

        Returns
        -------
        index, rms: 2-tuple
            The current iteration index and the RMS for this iteration
        """
        pkg = zip(self.frequencies, self.uncertainties)
        # Make sure a different seed is used to other parallel jobs
        np.random.seed()
        # Initialize the Transition object, which handles all of the quantum numbers for a given transition
        transitions = [
            Transition(
                frequency,
                uncertainty=uncertainty,
                n_numbers=self.n_numbers,
                centers=self.centers,
                widths=self.widths,
                deltas=self.deltas,
                j=self.j,
            )
            for frequency, uncertainty in pkg
        ]
        # Generate the quantum numbers for each transition
        _ = [transition.random_quantum_numbers() for transition in transitions]
        index, rms = self._check_spfit(index, transitions)
        return index, rms

    def _check_spfit(self, index, transitions):
        """
        Driver to actually run SPFIT and parse the output of the .fit file.

        Parameters
        ----------
        lines

        Returns
        -------

        """
        lines = "\n".join([str(transition) for transition in transitions])
        update = False
        with tempfile.TemporaryDirectory() as path:
            os.chdir(path)
            with open(self.filename + ".lin", "w+") as write_file:
                write_file.write(lines)
            with open(self.filename + ".par", "w+") as write_file:
                write_file.writelines(self.par)
            # Run SPFIT
            routines.run_spfit(self.filename)
            # Parse the output
            fit_dict = parsers.parse_fit(self.filename + ".fit")
            # If the RMS is improved upon, and there is more than one line fit
            if (fit_dict["rms"] < self.rms) or (len(fit_dict["o-c"]) >= self.nfit):
                if fit_dict["rms"] > 0.001:
                    with open("{}.par".format(self.filename), "r") as read_file:
                        self.par = read_file.readlines()
                    self.nfit = len(fit_dict["o-c"])
                    self.rms = fit_dict["rms"]
                    update = True
            # Dump files only if debug mode is on, or we had a successful iteration
            if self.debug is True or update is True:
                # Copy some of the data back over
                routines.dump_yaml(
                    os.path.join(self.wd, "yml/{}.yml".format(index)), fit_dict
                )
                shutil.copy2(
                    self.filename + ".fit",
                    os.path.join(self.wd, "fits/{}.fit".format(index)),
                )
                shutil.copy2(
                    self.filename + ".lin",
                    os.path.join(self.wd, "lin/{}.lin".format(index)),
                )
            # Not sure if this is necessary, but just in case
            os.chdir(self.wd)
        return index, fit_dict["rms"]

[docs]    def run(self, niter=None, nprocesses=None, headless=True, method="mc"):
        """
        Run the search for assignments. The function wraps a private method, and is parallelized with a joblib
        Pool.
        Parameters
        ----------
        niter: int or None, optional
            Number of iterations. If None, uses the class attribute, otherwise the user specified value.
        nprocesses: int or None, optional
            Number of parallel jobs to break the task into. If None, uses the class attribute.

        Returns
        -------
        results: list
            List of the final RMS values from individual SPFIT runs.
        """
        if niter is None:
            niter = self.niter
        if nprocesses is None:
            nprocesses = self.nprocesses
        pool = joblib.Parallel(n_jobs=nprocesses, verbose=self.verbose, timeout=30.0)
        if method in ["mc", "bruteforce"]:
            if method == "mc":
                iterator = range(niter)
                self._iteration = self._rng
            elif method == "bruteforce":
                iterator = self._brute_generator()
                self._iteration = self._brute
        else:
            raise Exception("Iterator not implemented. Please choose mc or bruteforce.")
        if headless is False:
            iterator = tqdm(iterator)
        # Distribute and run the quantum number testing
        results = pool(joblib.delayed(self._iteration)(i) for i in iterator)
        self.results = results
        return results

[docs]    def filter(self, nlines=1):
        """
        Reads in all of the YAML parsed fits, and reduces the number of possible fits
        a person has to look through.

        Parameters
        ----------
        nlines: int
            Integer value for the minimum number of lines seen in the observed - calc. parse

        Returns
        -------
        viable: dict
            Dictionary with indexes corresponding to the fit number, and values are the parsed
            fit dictionaries
        """
        fits = {
            index: routines.read_yaml(file)
            for index, file in enumerate(glob("yml/*.yml"))
        }
        # Find all fits where the number of actual fitted lines exceeds nlines
        viable = {
            index: fit for index, fit in fits.items() if len(fit["o-c"]) >= nlines
        }
        return viable

[docs]    def save(self, filepath=None):
        """
        Dump the current session to disk as a Pickle file.

        Parameters
        ----------
        filepath: str, optional
            Filepath to save the file to. If None, uses the filename attribute plus .pkl extension
        """
        if filepath is None:
            filepath = self.filename + ".pkl"
        routines.save_obj(self, filepath)


[docs]@dataclass
class GenerateCatalog:
    niter: int = 10
    temp_path: str = ""
    out_path: str = ""
    temp_dict: Dict = field(default_factory=dict)
    constants_dict: Dict = field(default_factory=dict)
    constants: List = field(default_factory=list)
    cwd: str = ""

    def __post_init__(self):
        self.temp_path = Path(self.temp_path)
        self.out_path = Path(self.out_path)
        if self.temp_path.exists() is False:
            raise Exception("Target template folder is missing!")
        self.cwd = os.getcwd()
        self.counter = self._count_generator()

    def _count_generator(self):
        """
        Private generator for counting the simulation number.

        Returns
        -------
        index : str
            Left zero-padded string corresponding to the index
        """
        for index in range(self.niter):
            yield f"{index:07d}"

    def _read_templates(self):
        """
        Private method for reading in the var and int file templates. Will
        perform checks to make sure that the correct number of templates are
        actually found.
        """
        self.temp_dict = dict()
        template_paths = self.temp_path.rglob("template*")
        if len(list(template_paths)) < 2:
            raise Exception(f"Template files missing; found {template_paths}")
        else:
            for path in self.temp_path.rglob("template*"):
                self.temp_dict[path.suffix] = path.read_text()

[docs]    def generate_constants(self, lower=1000.0, upper=10000.0, distortion=True):
        """
        Function to generate a random set of constants for an Asymmetric top
        based on a uniform random distribution. This also includes centrifugal
        distortion terms.

        Parameters
        ----------
        lower, upper : float, optional
            Lower and upper bounds for constants to be generated, in MHz.
        distortion : bool, optional
            If True, quartic centrifugal distortion terms are also included
            in the simulation. This option is on by default.

        Returns
        -------
        constants_dict : dict
            Dictionary with generated rotational constants
        """
        assert lower <= upper
        c, b, a = np.sort(np.random.uniform(lower, upper, 3))
        if distortion:
            # Generate random values for quartic centrifugal distortion as well
            cd = np.random.uniform(1e-6, 1e-3, 5)
            dj, djk, dk, d1, d2 = cd
        else:
            # Otherwise set CD terms to zero and not use them
            dj = djk = dk = d1 = d2 = 0.0
        # Generate dipole moment as boolean values
        dipoles = np.random.randint(0, 2, 3)
        # Make sure that we have at least one dipole moment; at the very least
        # an a-type
        if np.sum(dipoles) == 0:
            dipoles[0] = 1.0
        u_a, u_b, u_c = dipoles
        # This part ensures that there is a minimum working example
        self.constants_dict = {
            "A": 10000.0,
            "B": 5000.0,
            "C": 2000.0,
            "u_a": 1.0,
            "u_b": 0.0,
            "u_c": 0.0,
        }
        self.constants_dict.update(
            **{
                "A": a,
                "B": b,
                "C": c,
                "DJ": dj,
                "DJK": djk,
                "DK": dk,
                "d1": d1,
                "d2": d2,
                "u_a": u_a,
                "u_b": u_b,
                "u_c": u_c,
            }
        )
        return self.constants_dict

[docs]    def run_spcat(self):
        """
        Function to run SPCAT from within Python. The function does so with
        temporary folders within a context manager, such that there are no
        scratch files left behind when the process is finished.

        Returns
        -------
        catalog : LineList object
            PySpecTools LineList object based on the digested catalog
        """
        name = next(self.counter)
        # Assume that the constants dictionary actually has values in it
        assert len(self.constants_dict) != 0
        with tempfile.TemporaryDirectory() as temp_dir:
            try:
                os.chdir(temp_dir)
                with open("molecule.var", "w+") as write_file:
                    write_file.write(
                        self.temp_dict[".var"].format_map(self.constants_dict)
                    )
                with open("molecule.int", "w+") as write_file:
                    write_file.write(
                        self.temp_dict[".int"].format_map(self.constants_dict)
                    )
                # Call SPCAT
                _ = run(["spcat", "molecule"], stdout=PIPE)
                # Create LineList object from the catalog
                catalog = LineList.from_catalog(name, name, "molecule.cat")
                catalog.constants = self.constants_dict
                return catalog
            except KeyError:
                raise Exception("temp_dict is missing entries!")
            finally:
                # finally statement to make sure we go back to the original
                # working directory before finishing
                os.chdir(self.cwd)

    def _iteration(self, **kwargs):
        constants = self.generate_constants(**kwargs)
        catalog = self.run_spcat()
        # Add this to the list of constants created
        self.constants.append(constants)
        return catalog

[docs]    def run_batch(self, niter=None, progressbar=True, **kwargs):
        """
        Run a batch of simulations. This is the main function to be called by a
        user, and will automatically carry out all of the book keeping and

        Parameters
        ----------
        niter : int or None, optional
            Number of simulations to perform. This option overrides the class
            attribute, and if the user provided argument is None, will just use
            that instead.
        progressbar : bool, optional
            If True, wraps the iterator with a `tqdm` progress bar.
        kwargs
            Additional kwargs are passed into the `generate_constants`
            function, which allows for control on bounds, etc.

        Returns
        -------

        """
        # Reset the constants list
        self.constants = list()
        np.random.seed()
        if not niter:
            niter = self.niter
        # Set up an iterator for the batch
        iterator = range(niter)
        if progressbar:
            iterator = tqdm(iterator)
        # Run the loop
        results = [self._iteration(**kwargs) for _ in iterator]
        return results