Source code for pyspectools.mmw.interpolation


import numpy as np
import pandas as pd

"""
    Routines for performing Shepard-interpolation with
    simple inverse-distance weighting.
    
    The basic usage of these routines is to feed in a list
    of Pandas dataframes, which consists of your experimental
    data. 
"""



[docs]def idw(x, xn, p=6, threshold=1.):
    """
        Inverse-distance weighting function
        For a given value of x with known data, and the interpolating
        x (xn), return the weighting value.
    """
    if x == xn:
        weight = 1e60
    else:
        weight = 1. / np.abs(x - xn)**p
        if weight < threshold:
            # Return a zero if the value is sufficiently small
            weight = 0.
    return weight
    

[docs]def vidw(x, xn, p=6, threshold=1.):
    """
        Vectorized form of the inverse-distance weighting function
    """
    func = np.vectorize(idw)
    return func(x, xn, p, threshold)


[docs]def find_nearest(dataframes, x, npoints=5):
    """
        Function that will take a list of dataframes, and
        return slices of the dataframes containing a specified
        number of closest points.
    """
    nearest = [
        df.loc[(df["Frequency"] - x).abs().argsort()[:npoints]] for df in dataframes
    ]
    return nearest


[docs]def find_nearest_nieu(dataframes, x, xrange=3., npoints=5):
    """
        New function that will return only data that is within a
        specified range, and within that the closest values.
    """
    nearest = list()
    
    for df in dataframes:
        # Find data within range
        temp_df = df.loc[(df["Frequency"] > x - xrange) & (df["Frequency"] < x + xrange)]
        # Only append if there is data inside the dataframe
        if temp_df.empty is False:
            nearest.append(temp_df.loc[(df["Frequency"] - x).abs().argsort()[:npoints]])
    return nearest


[docs]def shep_interp(dataframes, xn, col="Intensity", p=6, threshold=1.):
    """
        Calculate the Shepard interpolation of a point
    """
    # Loop over all selected data and calculate their weights
    weights = list()
    for df in dataframes:
        df["weight"] = vidw(
            df["Frequency"],
            xn,
            p,
            threshold
        )
        # Take only values that are not zero
        #df = df.loc[df["weight"] != 0.]
        weights.extend([value for value in df["weight"].values if value != 0.])
    weights = np.array(weights)
    cumint = 0.
    # Loop over data again, weighting the intensity and
    # calculating the interpolated intensity
    for df in dataframes:
        df["weighted-Intensity"] = df["weight"] * df[col]
        cumint+=df["weighted-Intensity"].sum()
    return cumint / np.sum(weights)

        
[docs]def calc_shep_interp(dataframes, xnew, col="Intensity", xrange=10., p=4., threshold=1., npoints=15):
    """
        Main function for calculating the Shepard interpolation for
        a given set of existing data, and an array corresponding
        to the new set of x data.
        
        dataframes - a list comprising pandas DataFrames, containing x/y data with
        column names ["Frequency", "Intensity"]
        xnew - the new x values to interpolate into. Use np.arange/np.linspace to
        generate.
        xrange - the frequency radius (in units of whatever frequency units are in data)
        to search for neighbours. Also defines the rate at which the neighbourlist is updated.
        p - the decay rate of the inverse distance weighting; should be even values.
        threshold - minimum weight value to be considered in the interpolation
        npoints - the maximum number of data points to use in the interpolation; smaller
        numbers mean crappy interpolation, larger means expensive.
        
        Returns the new y values
    """
    ynew = np.zeros(xnew.size)
    nele = ynew.size
    for index, x in enumerate(xnew):
        # Every 100 MHz, we will print a notification to tell you we're doing something...
        if index / nele * 100. % 10. == 0.:
            print(str(index / nele * 100.) + "% complete.")
        # Find nearest neighbours. This step will only be done every few steps as it
        # is the most computationally expensive part of the algorithm
        # The update frequency depends on the radius we look for neighbours, specified
        # with xrange.
        if index % xrange == 0.:
            neighbour_list = find_nearest_nieu(
                dataframes,
                x,
                xrange,
                npoints
            )
        # Calculate the y value based on nearest neighbours
        ynew[index]+=shep_interp(
            neighbour_list,
            x,
            col,
            p,
            threshold
        )
    return ynew