#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 1 12:06:57 2021
@author: haascp
"""
import numpy as np
import logging
from mocca.peak.models import PreprocessedPeak
from mocca.peak.utils import average_peak_spectrum
[docs]def get_spectrum_correl_coef(peak, component):
"""
Returns the correlation coefficient of the average peak spectrum and the
spectrum of the component.
"""
peak_spectrum = average_peak_spectrum(peak)
if not np.any(peak_spectrum):
logging.warning("Peak spectrum of peak {} in dataset {} contains only "
"zeros.".format(peak.idx, peak.dataset.path))
return 0
else:
return np.corrcoef(peak_spectrum, component.spectrum)[1, 0]
[docs]def get_relative_distance(peak, component):
"""
Returns the distance of an offset-corrected peak maximum and a component
maximum relative to the length of the time vector.
"""
distance = abs(peak.maximum - peak.offset - component.maximum)
return distance / len(peak.dataset.time)
[docs]def get_similarity_dicts(peak, component_db, relative_distance_thresh):
"""
Returns a sorted list of dictionaries. For each component in the given
database, similarity values to the given peak are stored.
"""
simil_by_comp = []
for component in component_db:
relative_distance = get_relative_distance(peak, component)
if relative_distance <= relative_distance_thresh:
dic = {}
dic['compound_id'] = component.compound_id
dic['spectrum_correl_coef'] = get_spectrum_correl_coef(peak, component)
dic['distance'] = abs(peak.maximum - component.maximum)
dic['relative_distance'] = get_relative_distance(peak, component)
simil_by_comp.append(dic)
simil_by_comp = sorted(simil_by_comp, reverse=True,
key=lambda dic: dic['spectrum_correl_coef'])
return simil_by_comp
[docs]def get_filtered_similarity_dicts(peak, component_db, spectrum_correl_coef_thresh,
relative_distance_thresh, print_out=False):
"""
Filters the list of similarity dictionaries with regard to the given thresholds.
Return possible matches which have a spectral correlation coefficient higher
than the given threshold and a relative distance between the peak maxima
lower than the given threshold.
"""
similarity_dict = get_similarity_dicts(peak, component_db,
relative_distance_thresh)
if print_out:
print(similarity_dict)
for d in similarity_dict:
print(d['spectrum_correl_coef'] >= spectrum_correl_coef_thresh)
print(d['relative_distance'] <= spectrum_correl_coef_thresh)
matches = [d for d in similarity_dict if (d['spectrum_correl_coef'] >=
spectrum_correl_coef_thresh)]
if print_out:
print(matches)
return matches
[docs]def match_peak(corrected_peak, component_db, spectrum_correl_coef_thresh,
relative_distance_thresh, print_similarity_dicts=False):
"""
Routine to assign possible matches to a returned preprocessed peak.
"""
if not corrected_peak.pure:
matches = None
else:
matches = get_filtered_similarity_dicts(corrected_peak, component_db,
spectrum_correl_coef_thresh,
relative_distance_thresh,
print_similarity_dicts)
return PreprocessedPeak(left=corrected_peak.left,
right=corrected_peak.right,
maximum=corrected_peak.maximum,
offset=corrected_peak.offset,
dataset=corrected_peak.dataset,
idx=corrected_peak.idx,
saturation=corrected_peak.saturation,
pure=corrected_peak.pure,
integral=corrected_peak.integral,
istd=corrected_peak.istd,
matches=matches)
[docs]def update_matches(peak, new_matches):
"""
Updates the matches of a given peak.
"""
return PreprocessedPeak(left=peak.left,
right=peak.right,
maximum=peak.maximum,
offset=peak.offset,
dataset=peak.dataset,
idx=peak.idx,
saturation=peak.saturation,
pure=peak.pure,
integral=peak.integral,
istd=peak.istd,
matches=new_matches)