Source code for mocca.chromatogram.assign

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 17 18:55:41 2021

@author: haascp
"""
from operator import attrgetter

from mocca.peak.match import update_matches, match_peak
from mocca.peak.process import process_peak

from mocca.user_interaction.user_objects import Compound


[docs]def sort_peaks_by_best_match(peaks): """ Sorts peaks by descending spectrum correlation coefficient in their matches. """ matched_peaks = [peak for peak in peaks if peak.matches] unmatched_peaks = [peak for peak in peaks if not peak.matches] sorted_peaks = sorted(matched_peaks, reverse=True, key=lambda peak: peak.matches[0]['spectrum_correl_coef']) return sorted_peaks + unmatched_peaks
[docs]def get_best_match_compound_id(peak): """ Returns the compound id of the best match of the given peak. """ return peak.matches[0]['compound_id']
[docs]def update_peaks_and_matches(sorted_peaks): """ Triggered after peak assignment. Deletes the peak which was assigned and removes the consumed compound id from the matches of all remaining peaks. """ compound_id = get_best_match_compound_id(sorted_peaks[0]) peaks = sorted_peaks peaks.pop(0) new_peaks = [] if peaks: for peak in peaks: new_matches = [match for match in peak.matches if match['compound_id'] != compound_id] new_peak = update_matches(peak, new_matches) new_peaks.append(new_peak) return new_peaks
[docs]def assign_best_match_peak(peaks): """ Assigns the peak with the best correlation coefficient with the compound id and updates all remaining peaks accordingly. """ sorted_peaks = sort_peaks_by_best_match(peaks) compound_id = get_best_match_compound_id(sorted_peaks[0]) assigned_peak = process_peak(sorted_peaks[0], Compound(compound_id), is_compound=False) new_peaks = update_peaks_and_matches(sorted_peaks) return assigned_peak, new_peaks
[docs]def assign_matched_peaks(peaks, assigned_peaks=[]): """ Assigns peaks containing matches with compound ids. In the rare case, that some peaks will not contain matches anymore, these are given back as unassigned and unmatched peaks. """ assigned_peaks = [] residual_peaks = peaks while any(len(peak.matches) > 0 for peak in residual_peaks): assigned_peak, residual_peaks = assign_best_match_peak(residual_peaks) assigned_peaks.append(assigned_peak) if not residual_peaks: break return assigned_peaks, residual_peaks
[docs]def get_next_unknown_id(peak_db): """ Returns the next unknown compound_id. """ peak_db.increment_unknown_counter() return "unknown_" + str(peak_db.unknown_counter)
[docs]def assign_unmatched_peaks_react(peaks, peak_db): """ Assigns peaks which do not contain matches with unknown compound ids. """ peaks = sorted(peaks, key=lambda peak: peak.maximum) peak_db.update_unknown_counter() assigned_peaks = [] for peak in peaks: if peak.matches is None: new_peak = process_peak(peak, Compound(None), is_compound=False) elif peak.idx < 0: new_peak = process_peak(peak, Compound("unknown_parafac"), False) else: new_peak = process_peak(peak, Compound(get_next_unknown_id(peak_db)), False) assigned_peaks.append(new_peak) return assigned_peaks
[docs]def get_matched_peaks(peaks): """ Returns all peaks which have possible matches. """ return [peak for peak in peaks if peak.matches]
[docs]def get_unmatched_peaks(peaks): """ Returns all peaks which do not have possible matches. """ return [peak for peak in peaks if not peak.matches]
[docs]def assign_peaks_react(chromatogram, peak_db): """ Assigns peaks of reaction runs with compound ids using unknown compound ids for unmatched peaks. """ matched_peaks = get_matched_peaks(chromatogram.peaks) unmatched_peaks = get_unmatched_peaks(chromatogram.peaks) assigned_peaks, unassigned_peaks = assign_matched_peaks(matched_peaks) unknown_peaks = assign_unmatched_peaks_react(unmatched_peaks + unassigned_peaks, peak_db) chromatogram.peaks = sorted(assigned_peaks + unknown_peaks, key=lambda peak: peak.maximum) return chromatogram
[docs]def get_unknown_impurity_peaks(assigned_peaks): """ Returns all peaks which are a compound impurity or are unknown. """ return [peak for peak in assigned_peaks if "unknown" in peak.compound_id or "impurity" in peak.compound_id]
[docs]def get_max_integral_peak(peaks): """ Returns the peak with the maximum integral value in the given list of peaks. """ if not peaks: return None if not all(hasattr(peak, 'integral') for peak in peaks): raise AttributeError("All given peaks must have integral attribute.") return max(peaks, key=attrgetter('integral'))
[docs]def assign_unmatched_peaks_compound(peaks, compound_id, impurity_counter=0): """ Assigns peaks which do not contain matches with unknown compound ids. """ peaks = sorted(peaks, key=lambda peak: peak.maximum) assigned_peaks = [] for peak in peaks: impurity_counter += 1 compound = Compound(compound_id + "_impurity_" + str(impurity_counter)) new_peak = process_peak(peak, compound, is_compound=False) assigned_peaks.append(new_peak) return assigned_peaks
[docs]def assign_peaks_compound(chromatogram, compound): """ Assigns all matched peaks with compound_ids. """ matched_peaks = get_matched_peaks(chromatogram.peaks) assigned_peaks, unassigned_peaks = assign_matched_peaks(matched_peaks) unmatched_peaks = get_unmatched_peaks(chromatogram.peaks) unmatched_peaks = unmatched_peaks + unassigned_peaks if any([peak.compound_id == compound.key for peak in assigned_peaks]): for peak in assigned_peaks: if peak.compound_id == compound.key: processed_peak = process_peak(peak, compound, is_compound=True) assigned_peaks = [p for p in assigned_peaks if p != peak] else: max_peak = get_max_integral_peak(unmatched_peaks) unmatched_peaks = [p for p in unmatched_peaks if p != max_peak] if max_peak: if not max_peak.pure: chromatogram.bad_data = True chromatogram.warnings.append("An impure peak was found to be assigned " "in a pure compound experiment. Run is " "therefore dismissed.") chromatogram.peaks = sorted((assigned_peaks + unmatched_peaks + [max_peak]), key=lambda peak: peak.maximum) return chromatogram else: processed_peak = process_peak(max_peak, compound, is_compound=True) else: chromatogram.bad_data = True chromatogram.warnings.append("No new peak could be found in the data " "to which the given compound could be " "assigned.") chromatogram.peaks = sorted(assigned_peaks + unmatched_peaks, key=lambda peak: peak.maximum) return chromatogram impurity_peaks = assign_unmatched_peaks_compound(unmatched_peaks, compound.key) if processed_peak.saturation: chromatogram.warnings.append("Compound was assigned to a peak possibly " "affected from saturation effect. User " "check required!") chromatogram.peaks = sorted([processed_peak] + assigned_peaks + impurity_peaks, key=lambda peak: peak.maximum) return chromatogram
[docs]def reassign_impurities(chromatogram, peak_db, quali_comp_db, spectrum_correl_coef_thresh, relative_distance_thresh, print_similarity_dicts=False): """ This function is only allowed to be run in the process_all_experiments function which has to be run everytime a new compound should be added to quali_comp_db. """ impurity_peaks = [peak for peak in chromatogram if 'impurity' in peak.compound_id] compound_peaks = [peak for peak in chromatogram if peak not in impurity_peaks] compound_id = [peak for peak in compound_peaks if peak.is_compound][0].compound_id # get impurity counter from peak db cur_count = 0 for peak in peak_db: if peak.compound_id.startswith(compound_id + "_impurity_"): num = int(peak.compound_id[len(compound_id + "_impurity_"):]) if num > cur_count: cur_count = num new_peaks = [] for peak in impurity_peaks: matched_peak = match_peak(peak, quali_comp_db, spectrum_correl_coef_thresh, relative_distance_thresh, print_similarity_dicts) new_peaks.append(matched_peak) matched_peaks = get_matched_peaks(new_peaks) assigned_peaks, unassigned_peaks = assign_matched_peaks(matched_peaks) unmatched_peaks = get_unmatched_peaks(new_peaks) unmatched_peaks = unmatched_peaks + unassigned_peaks impurity_peaks = assign_unmatched_peaks_compound(unmatched_peaks, compound_id, impurity_counter=cur_count) chromatogram.peaks = sorted(compound_peaks + assigned_peaks + impurity_peaks, key=lambda peak: peak.maximum) return chromatogram