Source code for mocca.dad_data.apis.allotrope

# -*- coding: utf-8 -*-
"""
Created on Thu May 12 09:10:57 2022

@author: CPH
"""

import numpy as np
import pandas as pd

from mocca.dad_data.utils import df_to_array, apply_filter


[docs]def get_uvvis_dataset_name(path): """ Queries the data description layer of the adf file to find the name of the dataset which is of the type 'three-dimensional ultraviolet spectrum' as defined by the AFO. """ from h5ld import AllotropeDF import h5py import rdflib with h5py.File(path, mode="r") as f: g = AllotropeDF(f).get_ld() datasets_query = '''SELECT ?s ?p ?o WHERE { ?s ?p ?o . FILTER regex(str(?o), "DataSet") . }''' qres = g.query(datasets_query) datasets = [x[0] for x in qres] for d in datasets: subj = list(g.triples((None, None, d)))[1][0] dataset = list( g.triples((subj, None, rdflib.term.URIRef('http://purl.allotrope.org/ontologies/result#AFR_0001527'))) # noqa: E501 ) if dataset: dataset_name = d return dataset_name
[docs]def read_adf_datacube(path): """ Reads the raw data stored in the data cube layer, which are the HPLC-DAD absorbance values and the time scale. """ import h5py with h5py.File(path, mode="r") as f: data_cubes = f['data-cubes'] dataset_name = get_uvvis_dataset_name(path) dataset_key = str(dataset_name).split(':')[1][2:].replace("/", '-') uvvis_data = data_cubes[dataset_key] measures = uvvis_data['measures'] data_idx = list(measures.keys())[0] absorbance_idx = measures[data_idx] absorbance = absorbance_idx[()] absorbance = np.swapaxes(absorbance, 0, 1) scales = uvvis_data['scales'] data_idx = list(scales.keys())[0] time_idx = scales[data_idx] time = time_idx[()] return np.array(absorbance), list(time)
[docs]def get_function_paramenters(path): """ Reads the parameters of the linear function which describes the wavelength vector out of the data description layer. """ from h5ld import AllotropeDF import h5py import rdflib with h5py.File(path, mode="r") as f: g = AllotropeDF(f).get_ld() dataset = get_uvvis_dataset_name(path) # Step 1: Extract scaleMapping from a Dataset scale_mapping_id = list(g.objects(list(g.triples((None, None, dataset)))[0][0], rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#scaleMapping'))) # noqa: E501 # In case of error, i.e., there is no FunctionScaleMapping, it returns 0, 0 param1 = 0.0 param2 = 0.0 # Code based on the assumption that there is only one FunctionScaleMapping for mapping in scale_mapping_id: # Step 2: Looking for a Function Scale Mapping mapping_function = list(g.triples((mapping, rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), # noqa: E501 rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#FunctionScaleMapping')))) # noqa: E501 # Step 3: Check the Index Function (contains type of function and parameters) # Step 4: Extract parameters if(mapping_function): param1 = float(list(g.objects( list(g.objects(mapping, rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#indexFunction')))[0], # noqa: E501 rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#parameter1')))[0]) # noqa: E501 param2 = float(list(g.objects( list(g.objects(mapping, rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#indexFunction')))[0], # noqa: E501 rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#parameter2')))[0]) # noqa: E501 return param1, param2 return param1, param2
[docs]def read_adf_description(path, wl_len): """ Queries the adf data description layer to extract the wavlength vector. For this query, the h5ld package is required which can be installed by editable pip install from https://github.com/laura-dirocco/h5ld. In case there are problems with installation, the user can give start and stop values as set on the DAD manually. """ try: wl_slope, wl_start = get_function_paramenters(path) wl_stop = wl_start + wl_len * wl_slope except AttributeError: print("If the h5ld package cannot be installed on your machine, " "you have to give the wavelength values manually for adf data.") if wl_start == 0 and wl_slope == 0: wl_start = 190 wl_stop = wl_start + wl_len wavelength = list(np.linspace(wl_start, wl_stop, wl_len)) return wavelength
[docs]def preprocess_df(df): """ Preprocesses the df time column to be in line with the Chemstation API. """ acq_time = df.time.max() / len(df) # generate new time column time_series = pd.Series(range(1, (len(df) + 1))).astype(float) * acq_time / 60 df['time'] = time_series return df
[docs]def read_adf(path, wl_high_pass=None, wl_low_pass=None): """ Reads adf files as exported by the Agilent ADF Adapter. """ absorbance, time = read_adf_datacube(path) wavelength = read_adf_description(path, absorbance.shape[0]) df = pd.DataFrame(np.swapaxes(absorbance, 0, 1), columns=wavelength) df.insert(0, "time", time) df = preprocess_df(df) df = pd.melt(df, id_vars='time', value_vars=df.columns[1:], var_name='wavelength', value_name='absorbance') df['wavelength'] = df['wavelength'].astype(float) df = apply_filter(df, wl_high_pass, wl_low_pass) data, time, wavelength = df_to_array(df) return data, time, wavelength