Source code for pyspectools.database

"""
    database.py

    Routines for managing a spectral line database.

    TODO - set up routines for a persistent database
"""

import os
import warnings

try:
    import tables
    from tables import IsDescription, open_file
    from tables import StringCol, Int64Col, Float64Col
except ImportError:
    warnings.warn(f"PyTables is not installed correctly!")

import tinydb
from tinydb.middlewares import CachingMiddleware
from tinydb.storages import JSONStorage
import pandas as pd

from pyspectools import parsers
from pyspectools import spectra


[docs]class SpectralCatalog(tinydb.TinyDB): """ Grand unified experimental catalog. Stores assignment and uline information across the board. """ def __init__(self, dbpath=None): if dbpath is None: dbpath = os.path.expanduser("~/.pyspectools/pyspec_experiment.db") super().__init__( dbpath, sort_keys=True, indent=4, separators=(",", ": "), storage=CachingMiddleware(JSONStorage), ) def __exit__(self, exc_type, exc_value, traceback): """ Dunder method that should be called when the object is destroyed. This will make sure the database is saved properly. """ self.close()
[docs] def add_entry(self, assignment, dup_check=True): """ This function adds an Transition object to an existing database. The method will check for duplicates before adding. Parameters ---------- assignment - Transition object Reference to an Transition object dup_check - bool, optional If True (default), will check to make sure the Transition object doesn't already exist in the database. """ add = False if type(assignment) != dict: new_entry = assignment.__dict__ else: new_entry = assignment if dup_check is True: if any([new_entry == entry for entry in self.all()]) is False: add = True else: warnings.warn("Entry already exists in database.") else: add = True if add is True: self.insert(new_entry)
[docs] def add_catalog(self, catalog_path, name, formula, **kwargs): """ Load a SPCAT catalog file into the database. Creates independent Transition objects from each line of the catalog file. Kwargs are passed into the Transition object, which will allow additional settings for the Transition object to be accessed. :param catalog_path: :param name: :param formula: :param kwargs: :return: """ # check if the name and formula exists already exist_df = self.search_molecule(name) cat_df = parsers.parse_cat(catalog_path) if exist_df is not None: # drop all of the entries that are already in the catalog exist_freqs = exist_df["frequency"].values cat_df = cat_df.loc[~cat_df["Frequency"].isin(list(exist_freqs)),] assign_dict = {"name": name, "formula": formula} assign_dict.update(**kwargs) # slice out only the relevant information from the dataframe select_df = cat_df[["Frequency", "Intensity", "Lower state energy"]] select_df.columns = ["catalog_frequency", "catalog_intensity", "ustate_energy"] select_dict = select_df.to_dict(orient="records") # update each line with the common data entries assignments = [ spectra.assignment.Transition(**line, **assign_dict).__dict__ for line in select_dict ] # Insert all of the documents en masse self.insert_multiple(assignments)
[docs] def search_frequency(self, frequency, freq_prox=0.1, freq_abs=True, dataframe=True): """\ :param frequency: float, center frequency to search for in the database :param freq_prox: float, search range tolerance. If freq_abs is True, the absolute value is used (in MHz). Otherwise, freq_prox is a decimal percentage of the frequency. :param freq_abs: bool, dictates whether the absolute value of freq_prox is used. :return: """ frequency = float(frequency) if freq_abs is True: min_freq = frequency - freq_prox max_freq = frequency + freq_prox else: min_freq = frequency * (1 - freq_prox) max_freq = frequency * (1 + freq_prox) Entry = tinydb.Query() matches = self.search( (Entry["frequency"] <= max_freq) & (min_freq <= Entry["frequency"]) | (Entry["catalog_frequency"] <= max_freq) & (min_freq <= Entry["catalog_frequency"]) ) if len(matches) != 0: if dataframe is True: return pd.DataFrame(matches) else: return matches else: return None
def _search_field(self, field, value, dataframe=True): """ Function for querying the database for a particular field and value. The option dataframe specifies whether the matches are returned as a pandas datafarame, or as a list of Transition objects. :param field: str field to query :param value: value to compare with :param dataframe: bool, if True will return the matches as a pandas dataframe. :return: """ matches = self.search(tinydb.where(field) == value) if len(matches) != 0: if dataframe is True: df = pd.DataFrame(matches) return df else: objects = [spectra.transition.Transition(**data) for data in matches] return objects else: return None
[docs] def search_molecule(self, name, dataframe=True): """ Search for a molecule in the database based on its name (not formula!). Wraps the _search_field method, which will return None if nothing is found, or either a pandas dataframe or a list of Transition objects :param name: str, name (not formula) of the molecule to search for :param dataframe: bool, if True, returns a pandas dataframe :return: matches: a dataframe or list of Transition objects that match the search name """ matches = self._search_field("name", name, dataframe) return matches
[docs] def search_experiment(self, exp_id, dataframe=True): matches = self._search_field("experiment", exp_id, dataframe) return matches
[docs] def search_formula(self, formula, dataframe=True): matches = self._search_field("formula", formula, dataframe) return matches
def _remove_field(self, field, value): Entry = tinydb.Query() self.remove(Entry[field] == value)
[docs] def remove_experiment(self, exp_id): """ Remove all entries based on an experiment ID. :param exp_id: int, experiment ID """ self._remove_field("exp_id", exp_id)
[docs] def remove_molecule(self, name): self._remove_field("name", name)
[docs]class TheoryCatalog(tinydb.TinyDB): """ Grand unified theory catalog. """ def __init__(self, dbpath=None): if dbpath is None: dbpath = os.path.expanduser("~/.pyspectools/pyspec_theory.db") super().__init__(dbpath)