import inspect
import numpy as np
import scipy.ndimage.filters as spfilt
#: Valid keyword arguments for feature types
VALID_FEATURE_TYPES = ["all", "binary", "continuous"]
[docs]class IndentationFeatures(object):
def __init__(self, dataset=None):
#: current dataset from which features are computed
self.dataset = dataset
@property
def is_fitted(self):
if self.is_valid:
return self.dataset.fit_properties["success"]
else:
return False
@property
def is_valid(self):
return bool(self.dataset.fit_properties)
@property
def has_contact_point(self):
if self.is_fitted:
pint = self.dataset.fit_properties["params_fitted"]
return "contact_point" in pint
else:
return False
@property
def contact_point(self):
if self.has_contact_point:
pint = self.dataset.fit_properties["params_fitted"]
return pint["contact_point"].value
else:
raise ValueError("No contact point in data!")
@property
def datafit_apr(self):
seg = ~self.dataset["segment"].values
f = self.dataset["fit"][seg].values.copy()
return f
@property
def datares_apr(self):
return self.datafit_apr - self.datay_apr
@property
def datax_apr(self):
xaxis = self.dataset.fit_properties["x_axis"]
seg = ~self.dataset["segment"].values
x = self.dataset[xaxis][seg].values.copy()
# Make sure everything is ok
assert x[0] > x[-1], "Approach from large distances towards lower"
return x
@property
def datay_apr(self):
yaxis = self.dataset.fit_properties["y_axis"]
seg = ~self.dataset["segment"].values
y = self.dataset[yaxis][seg].values.copy()
return y
@property
def meta(self):
return self.dataset.metadata
[docs] @staticmethod
def compute_features(idnt, which_type="all", names=None,
ret_names=False):
"""Compute the features for a data set
Parameters
----------
idnt: nanite.Indentation
A dataset to rate
names: list of str
The names of the rating methods to use,
e.g. ["rate_apr_bumps", "rate_apr_mon_incr"].
If None (default), all available rating methods are
used.
Notes
-----
`names` may include features that are excluded by `which_type`.
E.g. if a "bool" feature is in `names` but `which_type` is
"float", then the "bool" feature will be silently ignored.
"""
inst = IndentationFeatures(idnt)
# make sure to keep the order of `names`, i.e. only compute
# names if they are not given.
if names is None or which_type != "all":
names = IndentationFeatures.get_feature_names(
which_type=which_type, names=names)
samples = []
for nameii in names:
sii = float(getattr(inst, nameii)())
samples.append(sii)
samples = np.array(samples, dtype=float).flatten()
if ret_names:
return samples, names
else:
return samples
[docs] @classmethod
def get_feature_funcs(cls, which_type="all", names=None):
"""Return functions that compute features from a dataset
Parameters
----------
names: list of str
The names of the rating methods to use,
e.g. ["rate_apr_bumps", "rate_apr_mon_incr"].
If None (default), all available rating methods are
returned.
which_type: str
Which features to return: ["all", "bool", "float"].
Returns
-------
raters: list of tuples (name, callable)
Each item in the list consists contains the name
of the rating method and the corresponding rating
method.
"""
fnames = cls.get_feature_names(which_type=which_type, names=names)
ffuncs = [(ff, getattr(cls, ff)) for ff in fnames]
return ffuncs
[docs] @classmethod
def get_feature_names(cls, which_type="all", names=None,
ret_indices=False):
"""Get features names
Parameters
----------
which_type: str or list of str
Return only features that are of a certain type.
See `VALID_FEATURE_TYPES` for valid strings.
names: list of str
Return only features that are in this list.
ret_indices: bool
If True, also return the internal feature indices.
Returns
-------
name_list: list of str
List of feature names (callables of this class)
"""
if isinstance(which_type, (list, tuple)):
# recurse
fnames = []
for wt in which_type:
fnames += cls.get_feature_names(which_type=wt)
else:
if which_type not in VALID_FEATURE_TYPES:
msg = "`which_type` must be one of {}, got '{}'".format(
VALID_FEATURE_TYPES, which_type)
raise ValueError(msg)
if which_type == "all":
fstart = "feat_"
elif which_type == "binary":
fstart = "feat_bin_"
elif which_type == "continuous":
fstart = "feat_con_"
ffuncs = inspect.getmembers(cls, lambda a: (inspect.isroutine(a)))
fnames = [ff[0] for ff in ffuncs if ff[0].startswith(fstart)]
# keep only names requested by the user
if names:
# convenience: make sure the requested feature names all exists
refnames = cls.get_feature_names()
unknown = [nn for nn in names if nn not in refnames]
if unknown:
msg = "Unknown feature names: '{}'".format(",".join(unknown))
raise ValueError(msg)
# only use selected features
fnames = [ff for ff in fnames if ff in names]
fnames = sorted(fnames)
if ret_indices:
# return the internal index
allnames = cls.get_feature_names()
indices = []
for ii, aff in enumerate(allnames):
if aff in fnames:
indices.append(ii)
return fnames, np.array(indices, dtype=int)
else:
return fnames
# for bool features, 1 means good and 0 means bad
[docs] def feat_bin_apr_spikes_count(self):
"""spikes during IDT
Sudden spikes in indentation curve
"""
if self.has_contact_point:
cp = self.contact_point
# indentation part
indidx = self.datax_apr < cp
diff = self.datares_apr[indidx]
diff = diff[~np.isnan(diff)]
if len(diff) > 50:
# find regions with peaks
diff_smooth = spfilt.gaussian_filter1d(diff, sigma=11)
delta1 = diff - diff_smooth
diff_smooth2 = spfilt.gaussian_filter1d(diff, sigma=1)
delta2 = diff_smooth2 - diff_smooth
std = np.std(delta1)
peakarray = np.diff(np.abs(delta2) > 3*std)
npeaks = np.sum(peakarray)
value = npeaks <= 5
else:
value = np.nan
else:
value = np.nan
return value
[docs] def feat_bin_cp_position(self):
"""CP outside of data range
Contact point position outside of range
"""
if self.has_contact_point:
cp = self.contact_point
x = self.datax_apr
if cp < np.min(x) or cp > np.max(x):
value = False
else:
value = True
return value
else:
return np.nan
[docs] def feat_bin_size(self):
"""dataset too small
Number of points in indentation curve
"""
if self.is_valid:
a_ind = self.datay_apr
num = a_ind.shape[0]
if num < 600:
value = False
else:
value = True
else:
value = np.nan
return value
[docs] def feat_con_apr_flatness(self):
"""flatness of APR residuals
fraction of the positive-gradient residuals in the approach part
"""
if self.has_contact_point:
cp = self.contact_point
# baseline indices of approach curve
# (approaches from pos values)
blidx = self.datax_apr > cp
# get residuals of approach curve
r_bl = self.datares_apr[blidx]
r_bl = r_bl[~np.isnan(r_bl)]
# perform gaussian blur
sigma = max(5, (np.int(r_bl.shape[0]/120)//2)*2+1)
y = spfilt.gaussian_filter1d(r_bl, sigma=sigma)
if len(y) > 2:
grad = np.gradient(y)
pos = np.sum(grad > 0)
neg = np.sum(grad < 0)
value = pos/(pos + neg)
else:
value = np.nan
else:
value = np.nan
return value
[docs] def feat_con_apr_sum(self):
"""residuals of APR
absolute sum of the residuals in the approach part
"""
if self.has_contact_point:
cp = self.contact_point
# indentation part
indidx = self.datax_apr > cp
diff = self.datares_apr[indidx]
diff = diff[~np.isnan(diff)]
xin = self.datax_apr
yin = self.datay_apr
norm = xin.size * np.max(yin)
value = np.sum(np.abs(diff)) / norm * 100
value = np.log(1+value)
else:
value = np.nan
return value
[docs] def feat_con_apr_size(self):
"""relative APR size
length of the approach part relative to the indentation part
"""
if self.has_contact_point:
cp = self.contact_point
# baseline indices of approach curve
# (approaches from pos values)
x = self.datax_apr
aprsize = np.sum(x > cp)
tolsize = x.shape[0]
value = 1 - aprsize/tolsize
return value
else:
return np.nan
[docs] def feat_con_bln_slope(self):
"""slope of BLN
slope obtained from a linear least-squares fit to the baseline
"""
if self.has_contact_point:
cp = self.contact_point
x = self.datax_apr
y = self.datares_apr
# break point is between xmax and cp
breakp = (np.max(x) + cp) / 2
# left of break point and no nans
valid = (x > breakp) * (~np.isnan(x))
# slope range
x_sl = x[valid]
y_sl = y[valid]
if x_sl.size > 20:
# fit a line to the data
A = np.vstack([x_sl, np.ones(len(x_sl))]).T
m, _c = np.linalg.lstsq(A, y_sl, rcond=None)[0]
# normalize with maximum force
value = m / np.max(self.datay_apr)
value = np.log(1 + np.abs(value)) / 10
else:
value = np.nan
else:
value = np.nan
return value
[docs] def feat_con_bln_variation(self):
"""variation in BLN
comparison of the forces at the beginning and at the end
of the baseline
"""
if self.has_contact_point:
cp = self.contact_point
r_bl = self.datares_apr[self.datax_apr > cp]
r_bl = r_bl[~np.isnan(r_bl)]
# skip 10%
offset = int(r_bl.shape[0]*.1)
if offset:
r_bl = r_bl[:-offset]
if r_bl.shape[0] > 20:
avg1 = np.average(r_bl[:10])
avg2 = np.average(r_bl[-10:])
# normalize with maximum force and multiply by 1000
maxforce = np.max(self.datay_apr)
value = np.abs(avg1-avg2)/maxforce * 1e3
value = np.log(1 + value) / 5
else:
value = np.nan
return value
else:
return np.nan
[docs] def feat_con_cp_curvature(self):
"""curvature at CP
curvature of the force-distance data at the contact point
"""
if self.has_contact_point:
cp = self.contact_point
x = self.datax_apr
y = self.datay_apr
# 10 % of indentation is range
cpid = np.argmin(np.abs(x - cp))
maxid = np.argmax(y)
incl = np.abs(maxid - cpid) // 10
if incl > 5:
reg = y[cpid - incl:cpid + incl]
if len(reg):
lin = np.linspace(reg.min(), reg.max(), reg.size)
diff = np.sum(reg-lin)
value = diff / y.max() * 10
value = np.log(1 + np.abs(value)) * np.sign(value) / 4
else:
value = np.nan
else:
value = np.nan
else:
value = np.nan
return value
[docs] def feat_con_cp_magnitude(self):
"""residuals at CP
mean value of the residuals around the contact point
"""
if self.has_contact_point:
cp = self.contact_point
# use 10% of the indentation part as size
r_range = int(np.sum(self.datax_apr < cp) * .1)
cpidx = np.nanargmin(np.abs(self.datax_apr-cp))
rat_range = np.abs(
self.datares_apr[cpidx - r_range:cpidx + r_range])
# normalization with setpoint of curve gives us a number
if len(rat_range):
value = np.nansum(rat_range) / np.max(self.datay_apr) / 100
else:
value = np.nan
return value
else:
return np.nan
[docs] def feat_con_idt_maxima_75perc(self):
"""maxima in IDT residuals
sum of the indentation residuals' maxima in three intervals
in-between 25% and 100% relative to the maximum indentation
"""
if self.has_contact_point:
yin = self.datay_apr
fit = self.datafit_apr
xin = self.datax_apr
cp = self.contact_point
id100 = np.argmin(xin)
id000 = np.argmin(np.abs(xin - cp))
id025 = int(id000 + .25 * (id100 - id000))
idmin, idmax = min(id025, id100), max(id025, id100)
# find zeros
idcen = idmin + (idmax - idmin) // 2
smooth = spfilt.gaussian_filter1d(yin-fit, sigma=11)
idzero1 = idmin + np.argmin(np.abs(smooth[idmin:idcen]))
idzero2 = idcen + np.argmin(np.abs(smooth[idcen:idmax]))
# change of sign in 1st, 2nd, and 3rd part of indentation
ydiffs = []
if idmin != idzero1:
y1 = np.max(np.abs((yin - fit)[idmin:idzero1]))
ydiffs.append(y1)
if idzero1 != idzero2:
y2 = np.max(np.abs((yin - fit)[idzero1:idzero2]))
ydiffs.append(y2)
if idzero2 != idmax:
y3 = np.max(np.abs((yin - fit)[idzero2:idmax]))
ydiffs.append(y3)
if ydiffs:
# combine the changes (ydiff2 has different sign)
ydiff = np.sum(ydiffs)
ydiff /= yin.max()
value = np.log(1 + ydiff) * 2
else:
value = np.nan
else:
value = np.nan
return value
[docs] def feat_con_idt_monotony(self):
"""monotony of IDT
change of the gradient in the indentation part
"""
if self.has_contact_point:
cp = self.contact_point
# indentation part
indidx = self.datax_apr < cp
# get approach curve
a_ind = self.datay_apr[indidx]
# perform gaussian blur
y = spfilt.gaussian_filter1d(a_ind, sigma=2)
if len(y) > 2:
grad = np.gradient(y)
gz = np.abs(np.sum(grad[grad > 0]))
lz = np.abs(np.sum(grad[grad < 0]))
value = np.sum(indidx) * lz / gz
value = np.log(1 + value) / 10
else:
value = np.nan
else:
value = np.nan
return value
[docs] def feat_con_idt_sum(self):
"""overall IDT residuals
sum of the residuals in the indentation part
"""
if self.has_contact_point:
cp = self.contact_point
x = self.datax_apr
y = self.datay_apr
f = self.datafit_apr
idind = x < cp
diff = (y - f)[idind]
area = np.nansum(np.abs(diff))/diff.shape[0]
norm = np.abs(y.max() - y.min())/2
value = area/norm
value = np.log(1 + value) * 5
return value
else:
return np.nan
[docs] def feat_con_idt_sum_75perc(self):
"""residuals in 75% IDT
sum of the residuals in the indentation part in-between
25% and 100% relative to the maximum indentation
"""
if self.has_contact_point:
yin = self.datay_apr
fit = self.datafit_apr
xin = self.datax_apr
cp = self.contact_point
id100 = np.argmin(xin)
id000 = np.argmin(np.abs(xin - cp))
id025 = int(id000 + .25 * (id100 - id000))
idmin, idmax = min(id025, id100), max(id025, id100)
ydiff = np.sum(np.abs(yin - fit)[idmin:idmax])
xnorm = np.abs(xin[idmin] - xin[idmax])
# area under the curve (normalization with absolute x interval)
area = ydiff * xnorm
# normalize by max force and multiply by 1e6 to get values around 1
value = area / np.max(yin) * 1e6
value = np.log(1+value) / 8
else:
value = np.nan
return value
[docs] def feat_con_idt_spike_area(self):
"""area of IDT spikes
area of spikes appearing in the indentation part
"""
if self.has_contact_point:
cp = self.contact_point
# indentation part
indidx = self.datax_apr < cp
diff = self.datares_apr[indidx]
diff = diff[~np.isnan(diff)]
if len(diff) > 20:
# find regions with peaks
diff_smooth = spfilt.gaussian_filter1d(diff, sigma=11)
delta1 = diff - diff_smooth
diff_smooth2 = spfilt.gaussian_filter1d(diff, sigma=1)
delta2 = np.abs(diff_smooth2 - diff_smooth)
std = np.std(delta1)
peaks = np.sum(delta2[delta1 > 3*std])
# add SD to avoid too many zero-valued samples
value = (std + peaks) / np.max(self.datay_apr)
value = np.log(1 + value) * 20
else:
value = np.nan
else:
value = np.nan
return value