Source code for mocca.peak.purity_funcs

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 23 15:55:25 2021

@author: haascp
"""

from mocca.peak.utils import get_peak_data, is_unimodal

import numpy as np
from sklearn.decomposition import PCA


[docs]def get_trimmed_peak_data(peak):
    """
    Returns peak data trimmed with cut edges of the peak to 5% of max absorbance
    to avoid noise artifacts.
    """
    peak_data = get_peak_data(peak)
    return peak_data[:, np.sum(peak_data, axis=0) > 0.05 *
                     np.max(np.sum(peak_data, axis=0))]


[docs]def get_max_loc(peak_data):
    """
    Returns the maximum location of the given peak data.
    """
    return np.argmax(np.sum(peak_data, axis=0))


[docs]def get_noise_variance(peak):
    """
    Filters dataset with only timepoints whose max absorbance at
    any wavelength is below 1% of max absorbance. Returns the average of the
    variance over all wavelengths.
    """
    noise_data = peak.dataset.data[:, np.max(peak.dataset.data, axis=0) <
                                   0.01 * np.max(peak.dataset.data)]
    return np.mean(np.var(noise_data, axis=0))


[docs]def get_correls(peak_data, max_loc):
    """
    Get a list with correlation coefficients of UV-Vis spectra at every
    timepoint with reference to the UV-Vis spectrum at maximum absorbance.
    """
    correls_to_max = [(np.corrcoef(peak_data[:, i],
                                   peak_data[:, max_loc])[0, 1])**2
                      for i in range(peak_data.shape[1])]
    return correls_to_max


[docs]def get_agilent_thresholds(peak_data, max_loc, noise_variance, param=2.5):
    """
    Returns the thresholds calculated by the Agilent purity algorithm.
    """
    agilent_thresholds = [(max(0, 1 - param *
                               (noise_variance / np.var(peak_data[:, i]) +
                                noise_variance / np.var(peak_data[:, max_loc]))))**2  # noqa: E501
                          for i in range(peak_data.shape[1])]
    return agilent_thresholds


[docs]def get_purity_value_agilent(peak_data, correls, agilent_thresholds):
    """
    Uses Agilent's peak purity algorithm to predict purity of peak. Param
    gives strictness of test (original was 0.5, which is more strict)
    """
    # check if > 90% of the points are greater than the modified agilent threshold.
    agilent_test = np.sum(np.greater(correls,
                                     agilent_thresholds)) / peak_data.shape[1]
    return agilent_test


[docs]def predict_purity_unimodal(correls):
    """
    Checks for unimodality of a peak by an averaging filter of length 3
    on the correlation vector to the maximum
    https://stackoverflow.com/questions/14313510/how-to-calculate-rolling-moving-average-using-numpy-scipy
    """
    return is_unimodal(np.convolve(correls, np.ones(3), 'valid') / 3, 0.999)


[docs]def get_pca_explained_variance(peak_data):
    """
    Calculates the ration of explained variance by the first principal
    component of the devonvoluted peak data.
    """
    pca = PCA(n_components=1)
    pca.fit(peak_data)
    return pca.explained_variance_ratio_[0]