Source code for mocca.peak.match

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec  1 12:06:57 2021

@author: haascp
"""
import numpy as np
import logging

from mocca.peak.models import PreprocessedPeak
from mocca.peak.utils import average_peak_spectrum


[docs]def get_spectrum_correl_coef(peak, component): """ Returns the correlation coefficient of the average peak spectrum and the spectrum of the component. """ peak_spectrum = average_peak_spectrum(peak) if not np.any(peak_spectrum): logging.warning("Peak spectrum of peak {} in dataset {} contains only " "zeros.".format(peak.idx, peak.dataset.path)) return 0 else: return np.corrcoef(peak_spectrum, component.spectrum)[1, 0]
[docs]def get_relative_distance(peak, component): """ Returns the distance of an offset-corrected peak maximum and a component maximum relative to the length of the time vector. """ distance = abs(peak.maximum - peak.offset - component.maximum) return distance / len(peak.dataset.time)
[docs]def get_similarity_dicts(peak, component_db, relative_distance_thresh): """ Returns a sorted list of dictionaries. For each component in the given database, similarity values to the given peak are stored. """ simil_by_comp = [] for component in component_db: relative_distance = get_relative_distance(peak, component) if relative_distance <= relative_distance_thresh: dic = {} dic['compound_id'] = component.compound_id dic['spectrum_correl_coef'] = get_spectrum_correl_coef(peak, component) dic['distance'] = abs(peak.maximum - component.maximum) dic['relative_distance'] = get_relative_distance(peak, component) simil_by_comp.append(dic) simil_by_comp = sorted(simil_by_comp, reverse=True, key=lambda dic: dic['spectrum_correl_coef']) return simil_by_comp
[docs]def get_filtered_similarity_dicts(peak, component_db, spectrum_correl_coef_thresh, relative_distance_thresh, print_out=False): """ Filters the list of similarity dictionaries with regard to the given thresholds. Return possible matches which have a spectral correlation coefficient higher than the given threshold and a relative distance between the peak maxima lower than the given threshold. """ similarity_dict = get_similarity_dicts(peak, component_db, relative_distance_thresh) if print_out: print(similarity_dict) for d in similarity_dict: print(d['spectrum_correl_coef'] >= spectrum_correl_coef_thresh) print(d['relative_distance'] <= spectrum_correl_coef_thresh) matches = [d for d in similarity_dict if (d['spectrum_correl_coef'] >= spectrum_correl_coef_thresh)] if print_out: print(matches) return matches
[docs]def match_peak(corrected_peak, component_db, spectrum_correl_coef_thresh, relative_distance_thresh, print_similarity_dicts=False): """ Routine to assign possible matches to a returned preprocessed peak. """ if not corrected_peak.pure: matches = None else: matches = get_filtered_similarity_dicts(corrected_peak, component_db, spectrum_correl_coef_thresh, relative_distance_thresh, print_similarity_dicts) return PreprocessedPeak(left=corrected_peak.left, right=corrected_peak.right, maximum=corrected_peak.maximum, offset=corrected_peak.offset, dataset=corrected_peak.dataset, idx=corrected_peak.idx, saturation=corrected_peak.saturation, pure=corrected_peak.pure, integral=corrected_peak.integral, istd=corrected_peak.istd, matches=matches)
[docs]def update_matches(peak, new_matches): """ Updates the matches of a given peak. """ return PreprocessedPeak(left=peak.left, right=peak.right, maximum=peak.maximum, offset=peak.offset, dataset=peak.dataset, idx=peak.idx, saturation=peak.saturation, pure=peak.pure, integral=peak.integral, istd=peak.istd, matches=new_matches)