Source code for nanite.fit

import copy
import hashlib
import warnings

import lmfit
import numpy as np
import scipy.signal as spsig

from . import model


FP_DEFAULT = dict(model_key="hertz_para",
                  optimal_fit_edelta=False,
                  optimal_fit_num_samples=100,
                  params_initial=None,
                  preprocessing=[],
                  range_type="absolute",
                  range_x=[0, 0],
                  segment="approach",
                  weight_cp=1e-6,
                  x_axis="tip position",
                  y_axis="force",
                  )

FP_RESULTS = ["chi_sqr",
              "hash",
              "optimal_fit_delta_array",
              "optimal_fit_delta",
              "optimal_fit_E_array",
              "params_fitted",
              "success",
              "xmax",
              "xmin",
              ]


[docs]class FitKeyError(BaseException): pass
[docs]class FitDataError(BaseException): pass
[docs]class FitWarning(UserWarning): pass
[docs]class FitProperties(dict): """Fit property manager class Provide convenient access to fit properties as a dictionary and dynamically manage resets due to new initial parameters. Dynamic properties include: - set "params_initial" to `None` if the "model_key" changes - remove all keys except those in `FP_DEFAULT` if a key that is in `FP_DEFAULT` changes (All other keys are considered to be obsolete fitting results). Additional attributes: - "segment_bool": bool `False` for "approach" and `True` for "retract" """ def __getitem__(self, key): if key == "segment_bool": if self["segment"] == "approach": return False if self["segment"] == "retract": return True else: msg = "Unknown segment: {}".format(self["segment"]) raise FitKeyError(msg) else: return super(FitProperties, self).__getitem__(key) def __setitem__(self, key, value): if key in FP_DEFAULT: if (key in self and key == "params_initial" and self["params_initial"] is not None and value is not None): # check for changed initial parameters for pp in self["params_initial"]: s1 = self["params_initial"][pp].__getstate__() s2 = value[pp].__getstate__() if s1 != s2: self.reset() break elif key in self and self[key] == value: # nothing to do, parameters match pass else: if key == "model_key": # Other model has other parameters. self["params_initial"] = None elif key == "range_x": if ("optimal_fit_edelta" in self and self["optimal_fit_edelta"] and "range_x" in self and self["range_x"][1] == value[1]): # Ignore changes in range[0] return # Trigger `self.reset` self.reset() elif key not in FP_RESULTS: msg = "Key '{}' not in FP_DEFAULT".format(key) raise FitKeyError(msg) super(FitProperties, self).__setitem__(key, value)
[docs] def reset(self): for key in list(self.keys()): if key not in FP_DEFAULT: self.pop(key)
[docs] def restore(self, props): """update the dictionary without removing any keys""" for key in props: super(FitProperties, self).__setitem__(key, props[key])
[docs]class IndentationFitter(object): def __init__(self, idnt, **kwargs): """Fit force-distance curves Parameters ---------- idnt: nanite.indent.Indentation The dataset to fit model_key: str A key referring to a model in `nanite.model.models_available` params_initial: instance of lmfit.Parameters Parameters for fitting. If not given, default parameters are used. range_x: tuple of 2 The range for fitting, see `range_type` below. range_type: str One of: - absolute: Set the absolute fitting range in values given by the `x_axis`. - relative cp: In some cases it is desired to be able to fit a model only up until a certain indentation depth (tip position) measured from the contact point. Since the contact point is a fit parameter as well, this requires a two-pass fitting. preprocessing: list of str Preprocessing segment: str One of "approach" or "retract". weight_cp: float Weight the contact point region which shows artifacts that are difficult to model with e.g. Hertz. optimal_fit_edelta: bool Search for the optimal fit by varying the maximal indentation depth and determining a plateau in the resulting Young's modulus (fitting parameter "E"). optimal_fit_num_samples: int Number of samples to use for searching the optimal fit """ # Get initial fit parameters # IMPORTANT: # If there are new additions in the default values, # make sure to take these into account in `FP_DEFAULT`. self.fp = FitProperties(**FP_DEFAULT) # Get parameters from dataset # (sorted, such that `model_key` is set before `params_initial`) for key in sorted(idnt.fit_properties.keys()): if key in FP_DEFAULT: self.fp[key] = idnt.fit_properties[key] # Get parameters from kwargs # (sorted, such that `model_key` is set before `params_initial`) for key in sorted(self.fp.keys()): if key in kwargs: if key not in FP_DEFAULT: msg = "Key '{}' not in FP_DEFAULT".format(key) raise FitKeyError(msg) self.fp[key] = kwargs[key] # Set initial fitting parameters if self.fp["params_initial"] is None: self.fp["params_initial"] = self.get_initial_parameters( idnt=idnt, model_key=self.fp["model_key"] ) # Set arrays self.segment = (idnt["segment"] == self.fp["segment_bool"]) self.segment.setflags(write=False) self.x_axis = idnt[self.fp["x_axis"]] self.x_axis.setflags(write=False) self.y_axis = idnt[self.fp["y_axis"]] self.y_axis.setflags(write=False) self.fit_range = np.zeros_like(self.segment) self.fit_curve = np.zeros_like(self.y_axis) self.fit_residuals = np.zeros_like(self.y_axis) # Fitting parameters might be changed due to iterative fitting. # Store them in separate variables to prevent resetting the # `FitPropreties` instance `self.fp`. self.optimal_fit_edelta = self.fp["optimal_fit_edelta"] self.range_type = self.fp["range_type"] self.range_x = list(self.fp["range_x"]) self.hash = self._hash() self.fp["hash"] = self.hash # Perform sanity checks if self.fp["range_type"] not in ["absolute", "relative cp"]: msg = "`range_type` must be 'absolute' or 'relative cp'!" raise FitKeyError(msg) if len(self.fp["range_x"]) != 2: raise FitKeyError("`range_x` must have length 2!") if (np.isnan(self.fp["range_x"][0]) or np.isnan(self.fp["range_x"][1])): raise FitKeyError("`range_x` must not contain NaN!") if self.fp["segment"] not in ["approach", "retract"]: msg = "`segment` must be 'approach' or 'retract'!" raise FitKeyError(msg) if self.fp["model_key"] not in model.models_available: msg = "unknown model '{}'".format(self.fp["model_key"]) raise FitKeyError(msg) if self.fp["optimal_fit_edelta"]: # Make sure we have emodulus if "E" not in self.fp["params_initial"]: msg = "Search for optimal fit requires the parameter 'E'!" raise FitKeyError(msg) # This only works with absolute range if self.fp["range_type"] != "absolute": msg = "Only absolute range relative to contact point allowed!" raise FitKeyError(msg) md_key = self.fp["model_key"] md = model.models_available[md_key] params = md.get_parameter_defaults() for p in params: msg = "Unknown fitting parameter '{}' for model '{}'!" if p not in self.fp["params_initial"]: raise FitKeyError(msg.format(p, md_key)) if self.fp["range_x"][0] > self.fp["range_x"][1]: msg = "Fitting range is inverted: {}".format(self.fp["range_x"]) warnings.warn(msg, FitWarning)
[docs] def compute_emodulus_vs_mindelta(self, callback=None): """Compute elastic modulus vs. minimal indentation curve""" segid = self.segment xseg = self.x_axis[segid] yseg = self.y_axis[segid] xmax = np.max(self.fp["range_x"]) if np.isinf(xmax): xmax = np.max(xseg) # Disable and remember `optimal_fit_edelta` optimal_fit_edelta = self.optimal_fit_edelta self.optimal_fit_edelta = False # We are agnostic concerning the direction of indentation. # `xseg` should start at the baseline. seems_approach = np.average(yseg[:10]) < np.average(yseg[-10:]) if seems_approach and self.fp["segment"] == "approach": if xseg[0] < xseg[-1]: msg = "Unexpected trend in approach x data!" raise FitDataError(msg) elif seems_approach: msg = "Data appears to be 'approach', but is 'retract'!" raise FitDataError(msg) elif not seems_approach and self.fp["segment"] == "retract": if xseg[0] > xseg[-1]: msg = "Unexpected trend in retract x data!" raise FitDataError(msg) msg = "Unexpected trend in retract curve!" raise FitDataError(msg) elif not seems_approach: msg = "Data appears to be 'retract', but is 'approach'!" raise FitDataError(msg) # Fit the range of parameters xmin = xseg.min() if xmin >= 0: msg = "No negative values (indentation) found! " \ + "Did you correct for tip offset?" raise FitKeyError(msg) num_samp = self.fp["optimal_fit_num_samples"] indentations = np.linspace(xmin, xmin*.05, num_samp) emoduli = np.zeros_like(indentations) for ii, x0 in enumerate(indentations): # Perform a fit and record the elastic modulus self.range_x = [x0, xmax] self.fit() emoduli[ii] = self.fp["params_fitted"]["E"].value if callback and ii % 5 == 0: callback(emoduli, indentations) self.optimal_fit_edelta = optimal_fit_edelta return emoduli, indentations
[docs] @staticmethod def compute_opt_mindelta(emoduli, indentations): """Determine the plateau of an emodulus-indentation curve The following procedure is performed: 1. Smooth the emodulus data with a Butterworth filter 2. Label sequences that have similar values by binning into ten regions between the min and max. 3. Ignore sequences with emodulus that is smaller than the binning size. 4. Determine the longest sequence. """ # Perform smoothing of the curve # First, perform filtering with butterworth filter nb = 1 # Filter order wb = 0.05 # Cutoff frequency b, a = spsig.butter(nb, wb, output='ba') smooth_e = spsig.filtfilt(b, a, emoduli) # Second, determine the longest sequence of values # that have the same smoothed value. ni = 10 ivals, istep = np.linspace(smooth_e.min(), smooth_e.max(), ni, endpoint=False, retstep=True) ivals += istep/2 labelarray = np.zeros_like(smooth_e, dtype=int) valarray = np.zeros_like(smooth_e, dtype=int) # label each sequence with an individual `idx` for ii in range(smooth_e.shape[0]): valid = np.argmin(np.abs(ivals - smooth_e[ii])) if ii == 0: idx = 0 elif valid == valarray[ii-1]: pass else: idx += 1 labelarray[ii] = idx valarray[ii] = valid # Determine the longest sequence counts = list(np.bincount(labelarray)) # Ignore values that are below cutoff for ii in range(len(counts)): labmax = np.argmax(counts) labid = np.where(labelarray == labmax)[0][0] valmax = ivals[valarray[labid]] if valmax > istep: break counts.pop(labmax) else: # Nothing found: select the middle value warnings.warn("Could not find correct plateau.", FitWarning) labmax = 5 # Determine the interval in the original array indices = np.where(labelarray == labmax)[0] if len(indices) == 1: dopt = indentations[indices[0]] else: # compute optimal indentation as center dopt = np.average(indentations[indices[0]:indices[-1]]) return dopt
def _fit(self): """Fit a model to the data Notes ----- This method is private because it requires the array `self.fit_range` before the actual fitting. """ model_key = self.fp["model_key"] params_initial = self.fp["params_initial"] weight_cp = self.fp["weight_cp"] # boolean array indexing the segment segid = self.segment # x: the entire segment xseg = self.x_axis[segid] # y: the entire segment yseg = self.y_axis[segid] # x: the values being fitted x = self.x_axis[self.fit_range] # y: the values being fitted y = self.y_axis[self.fit_range] md = model.models_available[model_key] # short reference for better readability fit_cur = self.fit_curve fit_res = self.fit_residuals # reset values to nan fit_cur[:] = np.nan fit_res[:] = np.nan # Make sure that we can actually fit by comparing variable fitting # parameters and size of x. npvaried = np.sum([p[1].vary for p in list(params_initial.items())]) if npvaried < x.shape[0] - 1: # perform fit fit = lmfit.minimize( md.residual, params_initial, args=(x, y, weight_cp)) # fitted method fit_cur[segid] = md.model(fit.params, xseg) # residuals fit_res[segid] = md.residual(fit.params, xseg, yseg, weight_cp) # add fit results to fp dictionary self.fp.update({"params_fitted": fit.params, "chi_sqr": fit.chisqr, "xmin": x.min(), "xmax": x.max(), "success": True, }) else: self.fp["success"] = False
[docs] def fit(self): """Fit the approach-retract data to a model function """ range_type = self.range_type range_x = copy.copy(self.range_x) # Set to True in `self._fit` self.fp["success"] = False if self.optimal_fit_edelta: # emodulus-indentation curve: emoduli, indentations = self.compute_emodulus_vs_mindelta() # determine optimal x0 dopt = self.compute_opt_mindelta(emoduli, indentations) # add indentations/emoduli to self.fp self.fp["optimal_fit_E_array"] = emoduli self.fp["optimal_fit_delta_array"] = indentations self.fp["optimal_fit_delta"] = dopt # update fitting parameters for final fit self.range_x = [dopt, np.max(self.fp["range_x"])] # perform final fit self.optimal_fit_edelta = False self.fit() self.optimal_fit_edelta = True elif self.range_type == "absolute": # This is easy. Simply set the boolean array of fitting values # Exclude data points from other segment if range_x[0] != range_x[1]: x_data = self.x_axis.copy() range_bool = self.segment.copy() rmin, rmax = np.min(range_x), np.max(range_x) range_bool[x_data < rmin] = False range_bool[x_data > rmax] = False else: range_bool = self.segment self.fit_range[:] = range_bool self._fit() elif range_type == "relative cp": # Let's try to solve this with four passes # First, get the approximate contact point self.range_type = "absolute" # Set full range to get estimate of cp self.range_x = [0, 0] self.fit() # Do the following three-times. for _i in range(3): # get the fitted contact point cp = self.fp["params_fitted"]["contact_point"].value self.range_x = list(np.array(range_x)+cp) # Second, fit with the new contact point as range parameters self.fit() # Reset range data to original values self.range_type = range_type self.range_x = range_x
[docs] def get_initial_parameters(self, idnt=None, model_key=FP_DEFAULT["model_key"]): """Get initial fit parameters for a specific model Parameters ---------- """ if (model_key == self.fp["model_key"] and self.fp["params_initial"] is not None): params = self.fp["params_initial"] else: params = guess_initial_parameters(idnt=idnt, model_key=model_key) return params
def _hash(self): """Compute hash identifier for current fit The hash is computed without applying the fit, making it suitable for caching fits. """ hashlist = [] # preprocessing hashlist.append(self.fp["preprocessing"]) # axes data hashlist.append(self.x_axis) hashlist.append(self.y_axis) # fit parameters for key in FP_DEFAULT: if (key == "range_x" and self.fp["optimal_fit_edelta"]): # range only partly if "optimal_fit_edelta" is True hashlist.append(self.fp["range_x"][1]) elif (key == "optimal_fit_num_samples" and not self.fp["optimal_fit_edelta"]): # ignore number of samples if optimal fit is not used pass else: hashlist.append(self.fp[key]) # join and hash myhash = hashlib.md5(obj2str(hashlist)).hexdigest() return myhash
[docs]def guess_initial_parameters(idnt=None, model_key=FP_DEFAULT["model_key"], common_ancillaries=True, model_ancillaries=True): """Guess initial fitting parameters Parameters ---------- idnt: nanite.indent.Indentation The dataset to use for guessing initial fitting parameters using ancillary parameters model_key: str The model key common_ancillaries: bool Guess global ancillary parameters (such as contact point) model_ancillaries: bool Use model-related ancillary parameters """ md = model.models_available[model_key] params = md.get_parameter_defaults() if common_ancillaries and idnt is not None: # Guess initial contact point from actual tip position # (see `self.compute_tip_position`) # Depending on how the current dataset was pre-processed, # the column "tip position" might already be corrected by # an estimated contact point offset. # (see `self.compute_tip_offset`) if "tip position" in idnt: cpid = idnt.estimate_contact_point_index() cp = idnt["tip position"][cpid] params["contact_point"].set(cp) else: msg = "Cannot estimate contact point, because of missing "\ + "column 'tip position'" warnings.warn(msg, FitWarning) if model_ancillaries and idnt is not None: anc_dict = idnt.get_ancillary_parameters() # set the parameter values for anckey in anc_dict: if anckey in params: if not np.isnan(anc_dict[anckey]): # ignore nans params[anckey].set(value=anc_dict[anckey]) return params
[docs]def obj2str(obj): """String representation of an object for hashing""" if isinstance(obj, str): return obj.encode("utf-8") elif isinstance(obj, (bool, int, float)): return str(float(obj)).encode("utf-8") elif obj is None: return b"none" elif isinstance(obj, np.ndarray): return obj.tostring() elif isinstance(obj, tuple): return obj2str(list(obj)) elif isinstance(obj, list): return b"".join(obj2str(o) for o in obj) elif isinstance(obj, dict): return obj2str(list(obj.items())) elif isinstance(obj, lmfit.parameter.Parameter): return obj2str([obj.value, obj.max, obj.min, obj.vary, obj.expr, obj.name]) else: raise ValueError("No rule to convert object '{}' to string.". format(obj.__class__))