# -*- coding: utf-8 -*-
"""
Created on Thu May 12 09:10:57 2022
@author: CPH
"""
import numpy as np
import pandas as pd
from mocca.dad_data.utils import df_to_array, apply_filter
[docs]def get_uvvis_dataset_name(path):
"""
Queries the data description layer of the adf file to find the name of the
dataset which is of the type 'three-dimensional ultraviolet spectrum' as
defined by the AFO.
"""
from h5ld import AllotropeDF
import h5py
import rdflib
with h5py.File(path, mode="r") as f:
g = AllotropeDF(f).get_ld()
datasets_query = '''SELECT ?s ?p ?o
WHERE { ?s ?p ?o .
FILTER regex(str(?o), "DataSet") .
}'''
qres = g.query(datasets_query)
datasets = [x[0] for x in qres]
for d in datasets:
subj = list(g.triples((None, None, d)))[1][0]
dataset = list(
g.triples((subj, None,
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/result#AFR_0001527'))) # noqa: E501
)
if dataset:
dataset_name = d
return dataset_name
[docs]def read_adf_datacube(path):
"""
Reads the raw data stored in the data cube layer, which are the HPLC-DAD
absorbance values and the time scale.
"""
import h5py
with h5py.File(path, mode="r") as f:
data_cubes = f['data-cubes']
dataset_name = get_uvvis_dataset_name(path)
dataset_key = str(dataset_name).split(':')[1][2:].replace("/", '-')
uvvis_data = data_cubes[dataset_key]
measures = uvvis_data['measures']
data_idx = list(measures.keys())[0]
absorbance_idx = measures[data_idx]
absorbance = absorbance_idx[()]
absorbance = np.swapaxes(absorbance, 0, 1)
scales = uvvis_data['scales']
data_idx = list(scales.keys())[0]
time_idx = scales[data_idx]
time = time_idx[()]
return np.array(absorbance), list(time)
[docs]def get_function_paramenters(path):
"""
Reads the parameters of the linear function which describes the wavelength
vector out of the data description layer.
"""
from h5ld import AllotropeDF
import h5py
import rdflib
with h5py.File(path, mode="r") as f:
g = AllotropeDF(f).get_ld()
dataset = get_uvvis_dataset_name(path)
# Step 1: Extract scaleMapping from a Dataset
scale_mapping_id = list(g.objects(list(g.triples((None, None, dataset)))[0][0],
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#scaleMapping'))) # noqa: E501
# In case of error, i.e., there is no FunctionScaleMapping, it returns 0, 0
param1 = 0.0
param2 = 0.0
# Code based on the assumption that there is only one FunctionScaleMapping
for mapping in scale_mapping_id:
# Step 2: Looking for a Function Scale Mapping
mapping_function = list(g.triples((mapping,
rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), # noqa: E501
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#FunctionScaleMapping')))) # noqa: E501
# Step 3: Check the Index Function (contains type of function and parameters)
# Step 4: Extract parameters
if(mapping_function):
param1 = float(list(g.objects(
list(g.objects(mapping,
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#indexFunction')))[0], # noqa: E501
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#parameter1')))[0]) # noqa: E501
param2 = float(list(g.objects(
list(g.objects(mapping,
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#indexFunction')))[0], # noqa: E501
rdflib.term.URIRef('http://purl.allotrope.org/ontologies/datacube-hdf-map#parameter2')))[0]) # noqa: E501
return param1, param2
return param1, param2
[docs]def read_adf_description(path, wl_len):
"""
Queries the adf data description layer to extract the wavlength vector.
For this query, the h5ld package is required which can be installed by
editable pip install from https://github.com/laura-dirocco/h5ld. In case there
are problems with installation, the user can give start and stop values
as set on the DAD manually.
"""
try:
wl_slope, wl_start = get_function_paramenters(path)
wl_stop = wl_start + wl_len * wl_slope
except AttributeError:
print("If the h5ld package cannot be installed on your machine, "
"you have to give the wavelength values manually for adf data.")
if wl_start == 0 and wl_slope == 0:
wl_start = 190
wl_stop = wl_start + wl_len
wavelength = list(np.linspace(wl_start, wl_stop, wl_len))
return wavelength
[docs]def preprocess_df(df):
"""
Preprocesses the df time column to be in line with the Chemstation API.
"""
acq_time = df.time.max() / len(df)
# generate new time column
time_series = pd.Series(range(1, (len(df) + 1))).astype(float) * acq_time / 60
df['time'] = time_series
return df
[docs]def read_adf(path, wl_high_pass=None, wl_low_pass=None):
"""
Reads adf files as exported by the Agilent ADF Adapter.
"""
absorbance, time = read_adf_datacube(path)
wavelength = read_adf_description(path, absorbance.shape[0])
df = pd.DataFrame(np.swapaxes(absorbance, 0, 1), columns=wavelength)
df.insert(0, "time", time)
df = preprocess_df(df)
df = pd.melt(df, id_vars='time', value_vars=df.columns[1:],
var_name='wavelength', value_name='absorbance')
df['wavelength'] = df['wavelength'].astype(float)
df = apply_filter(df, wl_high_pass, wl_low_pass)
data, time, wavelength = df_to_array(df)
return data, time, wavelength