"""
database.py
Routines for managing a spectral line database.
TODO - set up routines for a persistent database
"""
import os
import warnings
try:
import tables
from tables import IsDescription, open_file
from tables import StringCol, Int64Col, Float64Col
except ImportError:
warnings.warn(f"PyTables is not installed correctly!")
import tinydb
from tinydb.middlewares import CachingMiddleware
from tinydb.storages import JSONStorage
import pandas as pd
from pyspectools import parsers
from pyspectools import spectra
[docs]class SpectralCatalog(tinydb.TinyDB):
"""
Grand unified experimental catalog. Stores assignment and uline information
across the board.
"""
def __init__(self, dbpath=None):
if dbpath is None:
dbpath = os.path.expanduser("~/.pyspectools/pyspec_experiment.db")
super().__init__(
dbpath,
sort_keys=True,
indent=4,
separators=(",", ": "),
storage=CachingMiddleware(JSONStorage),
)
def __exit__(self, exc_type, exc_value, traceback):
"""
Dunder method that should be called when the object is destroyed. This will make sure
the database is saved properly.
"""
self.close()
[docs] def add_entry(self, assignment, dup_check=True):
"""
This function adds an Transition object to an existing database. The method will
check for duplicates before adding.
Parameters
----------
assignment - Transition object
Reference to an Transition object
dup_check - bool, optional
If True (default), will check to make sure the Transition object doesn't already exist in
the database.
"""
add = False
if type(assignment) != dict:
new_entry = assignment.__dict__
else:
new_entry = assignment
if dup_check is True:
if any([new_entry == entry for entry in self.all()]) is False:
add = True
else:
warnings.warn("Entry already exists in database.")
else:
add = True
if add is True:
self.insert(new_entry)
[docs] def add_catalog(self, catalog_path, name, formula, **kwargs):
"""
Load a SPCAT catalog file into the database. Creates independent Transition objects
from each line of the catalog file. Kwargs are passed into the Transition object,
which will allow additional settings for the Transition object to be accessed.
:param catalog_path:
:param name:
:param formula:
:param kwargs:
:return:
"""
# check if the name and formula exists already
exist_df = self.search_molecule(name)
cat_df = parsers.parse_cat(catalog_path)
if exist_df is not None:
# drop all of the entries that are already in the catalog
exist_freqs = exist_df["frequency"].values
cat_df = cat_df.loc[~cat_df["Frequency"].isin(list(exist_freqs)),]
assign_dict = {"name": name, "formula": formula}
assign_dict.update(**kwargs)
# slice out only the relevant information from the dataframe
select_df = cat_df[["Frequency", "Intensity", "Lower state energy"]]
select_df.columns = ["catalog_frequency", "catalog_intensity", "ustate_energy"]
select_dict = select_df.to_dict(orient="records")
# update each line with the common data entries
assignments = [
spectra.assignment.Transition(**line, **assign_dict).__dict__
for line in select_dict
]
# Insert all of the documents en masse
self.insert_multiple(assignments)
[docs] def search_frequency(self, frequency, freq_prox=0.1, freq_abs=True, dataframe=True):
"""\
:param frequency: float, center frequency to search for in the database
:param freq_prox: float, search range tolerance. If freq_abs is True, the absolute value is used (in MHz).
Otherwise, freq_prox is a decimal percentage of the frequency.
:param freq_abs: bool, dictates whether the absolute value of freq_prox is used.
:return:
"""
frequency = float(frequency)
if freq_abs is True:
min_freq = frequency - freq_prox
max_freq = frequency + freq_prox
else:
min_freq = frequency * (1 - freq_prox)
max_freq = frequency * (1 + freq_prox)
Entry = tinydb.Query()
matches = self.search(
(Entry["frequency"] <= max_freq) & (min_freq <= Entry["frequency"])
| (Entry["catalog_frequency"] <= max_freq)
& (min_freq <= Entry["catalog_frequency"])
)
if len(matches) != 0:
if dataframe is True:
return pd.DataFrame(matches)
else:
return matches
else:
return None
def _search_field(self, field, value, dataframe=True):
"""
Function for querying the database for a particular field and value.
The option dataframe specifies whether the matches are returned as a
pandas datafarame, or as a list of Transition objects.
:param field: str field to query
:param value: value to compare with
:param dataframe: bool, if True will return the matches as a pandas dataframe.
:return:
"""
matches = self.search(tinydb.where(field) == value)
if len(matches) != 0:
if dataframe is True:
df = pd.DataFrame(matches)
return df
else:
objects = [spectra.transition.Transition(**data) for data in matches]
return objects
else:
return None
[docs] def search_molecule(self, name, dataframe=True):
"""
Search for a molecule in the database based on its name (not formula!).
Wraps the _search_field method, which will return None if nothing is found, or either a
pandas dataframe or a list of Transition objects
:param name: str, name (not formula) of the molecule to search for
:param dataframe: bool, if True, returns a pandas dataframe
:return: matches: a dataframe or list of Transition objects that match the search name
"""
matches = self._search_field("name", name, dataframe)
return matches
[docs] def search_experiment(self, exp_id, dataframe=True):
matches = self._search_field("experiment", exp_id, dataframe)
return matches
def _remove_field(self, field, value):
Entry = tinydb.Query()
self.remove(Entry[field] == value)
[docs] def remove_experiment(self, exp_id):
"""
Remove all entries based on an experiment ID.
:param exp_id: int, experiment ID
"""
self._remove_field("exp_id", exp_id)
[docs] def remove_molecule(self, name):
self._remove_field("name", name)
[docs]class TheoryCatalog(tinydb.TinyDB):
"""
Grand unified theory catalog.
"""
def __init__(self, dbpath=None):
if dbpath is None:
dbpath = os.path.expanduser("~/.pyspectools/pyspec_theory.db")
super().__init__(dbpath)