Source code for WrightTools.data._data

"""Central data class and associated."""

# --- import --------------------------------------------------------------------------------------

from __future__ import annotations
from contextlib import closing

import collections
import operator
import functools
import warnings

import numpy as np
import sys

import h5py

import scipy
from scipy.interpolate import griddata, interp1d
from typing import Generator

from .._group import Group
from .. import collection as wt_collection
from .. import exceptions as wt_exceptions
from .. import kit as wt_kit
from .. import units as wt_units
from ._axis import Axis, identifier_to_operator, operator_to_identifier
from ._channel import Channel
from ._constant import Constant
from ._variable import Variable


# --- define --------------------------------------------------------------------------------------


__all__ = ["Data"]


# --- class ---------------------------------------------------------------------------------------


[docs] class Data(Group): """Multidimensional dataset.""" class_name = "Data"
[docs] def __init__(self, *args, **kwargs): self._axes = [] self._constants = [] Group.__init__(self, *args, **kwargs) # populate axes, constants from attrs string for identifier in self.attrs.get("axes", []): if hasattr(identifier, "decode"): identifier = identifier.decode() expression, units = identifier.split("{") units = units.replace("}", "").strip() if units == "None": units = None # Should not be needed for wt5 >= 1.0.3, kept for opening older wt5 files. for i in identifier_to_operator.keys(): expression = expression.replace(i, identifier_to_operator[i]) expression = expression.replace(" ", "") # remove all whitespace axis = Axis(self, expression, units) self._axes.append(axis) for identifier in self.attrs.get("constants", []): if hasattr(identifier, "decode"): identifier = identifier.decode() expression, units = identifier.split("{") units = units.replace("}", "").strip() if units == "None": units = None for i in identifier_to_operator.keys(): expression = expression.replace(i, identifier_to_operator[i]) expression = expression.replace(" ", "") # remove all whitespace const = Constant(self, expression, units) self._constants.append(const) self._current_axis_identities_in_natural_namespace = [] if self.file.mode is not None and self.file.mode != "r": self._on_constants_updated() self._on_axes_updated() # the following are populated if not already recorded self.channel_names self.source self.variable_names
def __repr__(self) -> str: return "<WrightTools.Data '{0}' {1} at {2}>".format( self.natural_name, str(self.axis_names), "::".join([self.filepath, self.name]) ) def __dir__(self) -> list: default = object.__dir__(self) axes = [ax.natural_name for ax in self.axes] return default + axes + self._ipython_key_completions_() def _ipython_key_completions_(self) -> list: channels = [ch.natural_name for ch in self.channels] variables = [v.natural_name for v in self.variables] return channels + variables @property def axes(self) -> tuple: return tuple(self._axes) @property def axis_expressions(self) -> tuple: """Axis expressions.""" return tuple(a.expression for a in self._axes) @property def axis_names(self) -> tuple: """Axis names.""" return tuple(a.natural_name for a in self._axes) @property def constants(self) -> tuple: return tuple(self._constants) @property def constant_expressions(self) -> tuple: """Axis expressions.""" return tuple(a.expression for a in self._constants) @property def constant_names(self) -> tuple: """Axis names.""" return tuple(a.natural_name for a in self._constants) @property def channel_names(self) -> tuple: """Channel names.""" if "channel_names" not in self.attrs.keys(): self.attrs["channel_names"] = np.array([], dtype="S") return tuple(s.decode() for s in self.attrs["channel_names"]) @channel_names.setter def channel_names(self, value): """Set channel names.""" self.attrs["channel_names"] = np.array(value, dtype="S") @property def channels(self) -> tuple: """Channels.""" return tuple(self[n] for n in self.channel_names) @property def datasets(self) -> tuple: """Datasets.""" return tuple(v for _, v in self.items() if isinstance(v, h5py.Dataset)) @property def kind(self): """Kind.""" if "kind" not in self.attrs.keys(): self.attrs["kind"] = "None" value = self.attrs["kind"] return value if not value == "None" else None @property def ndim(self) -> int: """Get number of dimensions.""" try: assert self._ndim is not None except (AssertionError, AttributeError): if len(self.variables) == 0: self._ndim = 0 else: self._ndim = self.variables[0].ndim finally: return self._ndim @property def shape(self) -> tuple: """Shape.""" try: assert self._shape is not None except (AssertionError, AttributeError): self._shape = wt_kit.joint_shape(*self.variables) finally: return self._shape @property def size(self) -> int: """Size.""" return functools.reduce(operator.mul, self.shape) @property def source(self): """Source.""" if "source" not in self.attrs.keys(): self.attrs["source"] = "None" value = self.attrs["source"] return value if isinstance(value, str) and not value == "None" else None @property def units(self) -> tuple: """All axis units.""" return tuple(a.units for a in self._axes) @property def constant_units(self) -> tuple: """All constant units.""" return tuple(a.units for a in self._constants) @property def variable_names(self) -> tuple: """Variable names.""" if "variable_names" not in self.attrs.keys(): self.attrs["variable_names"] = np.array([], dtype="S") return tuple(s.decode() for s in self.attrs["variable_names"]) @variable_names.setter def variable_names(self, value): """Set variable names.""" self.attrs["variable_names"] = np.array(value, dtype="S") @property def variables(self) -> tuple: """Variables.""" try: assert self._variables is not None except (AssertionError, AttributeError): self._variables = [self[n] for n in self.variable_names] finally: return tuple(self._variables) @property def _leaf(self): return "{0} {1}".format(self.natural_name, self.shape) def _on_axes_updated(self): """Method to run when axes are changed in any way. Propagates updated axes properly. """ # update attrs self.attrs["axes"] = np.array([a.identity.encode() for a in self._axes], dtype="S") # remove old attributes while len(self._current_axis_identities_in_natural_namespace) > 0: key = self._current_axis_identities_in_natural_namespace.pop(0) try: delattr(self, key) except AttributeError: pass # already gone # populate new attributes for a in self._axes: key = a.natural_name setattr(self, key, a) self._current_axis_identities_in_natural_namespace.append(key) def _on_constants_updated(self): """Method to run when constants are changed in any way. Propagates updated constants properly. """ # update attrs self.attrs["constants"] = np.array( [a.identity.encode() for a in self._constants], dtype="S" ) def _print_branch(self, prefix, depth, verbose): def print_leaves(prefix, lis, vline=True): for i, item in enumerate(lis): if vline: a = "│ " else: a = " " if i + 1 == len(lis): b = "└── " else: b = "├── " s = prefix + a + b + "{0}: {1}".format(i, item._leaf) print(s) if verbose: # axes print(prefix + f"├── axes ({len(self.axes)})") print_leaves(prefix, self.axes) # constants print(prefix + f"├── constants ({len(self.constants)})") print_leaves(prefix, self.constants) # variables print(prefix + f"├── variables ({len(self.variables)})") print_leaves(prefix, self.variables) # channels print(prefix + f"└── channels ({len(self.channels)})") print_leaves(prefix, self.channels, vline=False) else: # axes s = f"axes ({len(self.axes)}): " s += ", ".join(["{0} ({1})".format(a.expression, a.units) for a in self.axes]) print(prefix + "├── " + s) # constants s = f"constants ({len(self.constants)}): " s += ", ".join( ["{0} ({1} {2})".format(a.expression, a.value, a.units) for a in self.constants] ) print(prefix + "├── " + s) # channels s = f"channels ({len(self.channels)}): " s += ", ".join(self.channel_names) print(prefix + "└── " + s)
[docs] def bring_to_front(self, channel): """Bring a specific channel to the zero-indexed position in channels. All other channels get pushed back but remain in order. Parameters ---------- channel : int or str Channel index or name. """ channel_index = wt_kit.get_index(self.channel_names, channel) new = list(self.channel_names) new.insert(0, new.pop(channel_index)) self.channel_names = new
def at(self, parent=None, name=None, **at) -> Data: """Return data of a subset of the data at specified axis position(s). kwargs ------ parent : WrightTools.Collection (optional) parent of new object. Default is no parent. name : string (optional) if specified, name of new data object. **at : keys are axis identifiers (Axis.natural_name), values lists of format [position, unit] if no coordinates are specified, returns a copy of the array. _specified axes must be one-dimensional in the array_ Returns ------- out : wt.Data Data that retains the dimension of unspecified axes. Example ------- ``` >>> from WrightTools import datasets >>> data = wt.open(datasets.wt5.v1p0p1_MoS2_TrEE_movie) # axes w2, w1=wm, d2 >>> probed_at_resonance = data.at(w1__e__wm=[690, "nm"]) >>> zero_delay_data = data.at(d2=[0, "fs"]) ``` Notes ----- * _most attrs are not retained in the new data object_. * some axis expressions use forbidden dictkey characters (e.g. "="). For these case, you must use the string substitution (e.g "=" -> "__e__"). Use `Data.axis_names` or `Axis.natural_name` to check the identifier form of the expression. Also consider using kit.string2identifier to filter. See Also -------- Data.chop : also reduces dimensionality, but returns a collection of one or more data objects. Kept axes can be sliced into a collection """ idx = self._at_to_slice(**at) return self._from_slice(idx, name=name, parent=parent) def squeeze(self, name=None, parent=None): """Reduce the data to the dimensionality of the (non-trivial) span of the axes. i.e. if the joint shape of the axes has an array dimension with length 1, this array dimension is squeezed. channels and variables that span beyond the axes are removed. Parameters ---------- name : string (optional) name of the new Data. parent : WrightTools Collection instance (optional) Collection to place the new "chop" collection within. Default is None (new parent). Returns ------- out : wt.Data new data object. The new data object has dimensions of the (non-trivial) span of the current axes Examples -------- >>> ... See also -------- Data.chop: Divide the dataset into its lower-dimensionality components. ... """ new = Data(name=name, parent=parent) attrs = { k: v for k, v in self.attrs.items() if k not in [ "axes", "channel_names", "constants", "name", "source", "item_names", "variable_names", ] } new.attrs.update(attrs) joint_shape = wt_kit.joint_shape(*[ai[:] for ai in self.axes]) cull_dims = [j == 1 for j in joint_shape] sl = [0 if cull else slice(None) for cull in cull_dims] matches_broadcast_axes = lambda a: all( [a.shape[i] == 1 for i in range(self.ndim) if cull_dims[i]] ) for v in filter(matches_broadcast_axes, self.variables): kwargs = v._to_dict() kwargs["values"] = v[sl] new.create_variable(**kwargs) for c in filter(matches_broadcast_axes, self.channels): kwargs = c._to_dict() kwargs["values"] = c[sl] new.create_channel(**kwargs) # inherit constants for c in self.constants: new.create_constant(c.expression) new.transform(*self.axis_expressions) return new def ichop(self, *args, at=None, autoclose=True) -> Generator: """ Similar to chop, but an iterable is produced insted of a collection. Useful for workflows where only one of the chopped children is needed at a time. As there is no collection, you can not assign the chopped elements to a parent. Parameters ---------- axis : str or int (args) Axes of the returned data objects. Strings refer to the names of axes in this object, integers refer to their index. Provide multiple axes to return multidimensional data objects. at : dict (optional) Choice of position along an axis. Keys are axis names, values are lists ``[position, input units]``. If exact position does not exist, the closest valid position is used. autoclose : bool (optional) If True, ichop will close each datafile after each iteration. Set to False when you wish to store spawned Datas beyond each iteration. Also consider using chop instead. Returns ------- Generator Generator spawning chopped elements for each `__next__` call Examples -------- loop through the select chop elements: ``` is_interesting = lambda d: d.signal[:].mean() > 0.5 for data in filter(is_interesting, data.ichop("w1", "w2")): ... ``` make a dictionary similar to a `chop` collection: ``` chop = {f"chop{i:0>3}":d for i, d in enumerate(data.ichop(..., autoclose=False))} ``` See Also -------- * Data.at make a lower-dimension dataset at specific coordinates * Data.chop Divide the dataset into a collection of lower-dimensionality components. """ removed_shape, at_axes, at_idx, transform_expression = self._chop_prep(*args, at=at) i_digits = int(np.log10(np.prod(removed_shape))) + 1 # iterate for i, idx in enumerate(np.ndindex(removed_shape)): name = f"chop{i:0>{i_digits}}" idx = np.array(idx, dtype=object) idx[np.array(removed_shape) == 1] = slice(None) idx[at_axes] = at_idx[at_axes] if autoclose: with closing(self._from_slice(idx, name=name)) as a_slice: a_slice.transform(*transform_expression) yield a_slice else: a_slice = self._from_slice(idx, name=name) a_slice.transform(*transform_expression) yield a_slice def _chop_prep(self, *args, at=None): """calculates important objects for performing chop""" # parse args args = list(args) for i, arg in enumerate(args): if isinstance(arg, int): args[i] = self._axes[arg].natural_name elif isinstance(arg, str): arg = arg.strip() args[i] = wt_kit.string2identifier(arg, replace=operator_to_identifier) transform_expression = [self._axes[self.axis_names.index(a)].expression for a in args] if at is None: at = {} # normalize the at keys to the natural name for k in list(at.keys()): k = k.strip() nk = wt_kit.string2identifier(k, replace=operator_to_identifier) if nk != k: at[nk] = at[k] at.pop(k) # distinguish looping and non-looping indices at_idx = self._at_to_slice(**at) at_axes = at_idx != slice(None) kept = args + [ak for ak in at.keys()] kept_axes = [self._axes[self.axis_names.index(a)] for a in kept] removed_axes = [a for a in self._axes if a not in kept_axes] removed_shape = wt_kit.joint_shape(*removed_axes) if removed_shape == (): removed_shape = (1,) * self.ndim removed_shape = list(removed_shape) for ax in kept_axes: if ax.shape.count(1) == ax.ndim - 1: removed_shape[ax.shape.index(ax.size)] = 1 removed_shape = tuple(removed_shape) return removed_shape, at_axes, at_idx, transform_expression
[docs] def chop(self, *args, at=None, parent=None, verbose=True) -> wt_collection.Collection: """Divide the dataset into its lower-dimensionality components. Parameters ---------- axis : str or int (args) Axes of the returned data objects. Strings refer to the names of axes in this object, integers refer to their index. Provide multiple axes to return multidimensional data objects. at : dict (optional) Choice of position along an axis. Keys are axis names, values are lists ``[position, input units]``. If exact position does not exist, the closest valid position is used. parent : WrightTools Collection instance (optional) Collection to place the new "chop" collection within. Default is None (new parent). verbose : bool (optional) Toggle talkback. Default is True. Returns ------- WrightTools Collection Collection of chopped data objects. Examples -------- >>> data.axis_names ['d2', 'w1', 'w2'] Get all w1 wigners. >>> datas = data.chop('d2', 'w1') >>> len(datas) 51 Get 2D frequency at d2=0 fs. >>> datas = data.chop('w1', 'w2', at={'d2': [0, 'fs']}) >>> len(datas) 0 >>> datas[0].axis_names ['w1', 'w2'] >>> datas[0].d2[:] 0. See Also -------- collapse Collapse the dataset along one axis. split Split the dataset while maintaining its dimensionality. ichop iterator version of chop at select a specific smaller dimension of data at specific coordinates """ removed_shape, at_axes, at_idx, transform_expression = self._chop_prep(*args, at=at) # get output collection out = wt_collection.Collection(name="chop", parent=parent) i_digits = int(np.log10(np.prod(removed_shape))) + 1 # iterate for i, idx in enumerate(np.ndindex(removed_shape)): name = f"chop{i:0>{i_digits}}" idx = np.array(idx, dtype=object) idx[np.array(removed_shape) == 1] = slice(None) idx[at_axes] = at_idx[at_axes] self._from_slice(idx, name=name, parent=out) out[name].transform(*transform_expression) out.flush() # return if verbose: print("chopped data into %d piece(s)" % len(out), "in", out[0].axis_expressions) return out
def __getitem__(self, key) -> Data: """ data[5, :3]; return new data object with those array slices """ if type(key) in [int, slice]: return self._from_slice(key) elif (type(key) == tuple) and np.all([type(ki) in [int, slice] for ki in key]): return self._from_slice(key) else: return super().__getitem__(key) def _at_to_slice(self, **at) -> np.array: """create array slice using at""" for k in at.keys(): if k not in self.axis_names: raise ValueError(f"Axis identifier {k} not in Axes: {self.axis_names}") idx = np.array([slice(None)] * len(self.shape)) for axis, point in at.items(): point, units = point destination_units = self._axes[self.axis_names.index(axis)].units point = wt_units.converter(point, units, destination_units) axis_index = self.axis_names.index(axis) axis = self._axes[axis_index] idx_index = np.array(axis.shape) > 1 if np.sum(idx_index) > 1: # we don't know how to handle a position within multiple array # dimensions raise wt_exceptions.MultidimensionalAxisError( "Data._at_to_slice", axis.natural_name ) idx_index = list(idx_index).index(True) idx[idx_index] = np.argmin(np.abs(axis[tuple(idx)] - point)) return idx def _from_slice(self, idx, name=None, parent=None) -> Data: """create self from an array slice of the parent self""" if parent is None: out = Data(name=name) else: out = parent.create_data(name=name) for v in self.variables: kwargs = v._to_dict() kwargs["values"] = v[idx] out.create_variable(**kwargs) for c in self.channels: kwargs = c._to_dict() kwargs["values"] = c[idx] out.create_channel(**kwargs) new_axes = [a.expression for a in self.axes if a[idx].size > 1] out.transform(*new_axes) constants = [a for a in self.axis_expressions if a not in new_axes] new_axis_units = [a.units for a in self.axes if a.expression in out.axis_expressions] for const in list(self.constant_expressions) + constants: out.create_constant(const, verbose=False) if const in self.constant_expressions: dest_units = self.constants[self.constant_expressions.index(const)].units else: dest_units = self.axes[self.axis_expressions.index(const)].units out.constants[out.constant_expressions.index(const)].convert(dest_units) for j, units in enumerate(new_axis_units): out.axes[j].convert(units) return out
[docs] def gradient(self, axis, *, channel=0): """ Compute the gradient along one axis. New channels have names ``<channel name>_<axis name>_gradient``. Parameters ---------- axis : int or str The axis to differentiate along. If given as an integer, the axis in the underlying array is used, and unitary spacing is assumed. If given as a string, the axis must exist, and be a 1D array-aligned axis. (i.e. have a shape with a single value which is not ``1``) The axis to collapse along is inferred from the shape of the axis. channel : int or str The channel to differentiate. Default is the first channel. """ # get axis index -------------------------------------------------------------------------- if isinstance(axis, int): axis_index = axis elif isinstance(axis, str): index = self.axis_names.index(axis) axes = [i for i in range(self.ndim) if self.axes[index].shape[i] > 1] if len(axes) > 1: raise wt_exceptions.MultidimensionalAxisError(axis, "collapse") elif len(axes) == 0: raise wt_exceptions.ValueError( "Axis '{}' is a single point, cannot compute gradient".format(axis) ) axis_index = axes[0] else: raise wt_exceptions.TypeError("axis: expected {int, str}, got %s" % type(axis)) channel_index = wt_kit.get_index(self.channel_names, channel) channel = self.channel_names[channel_index] if self[channel].shape[axis_index] == 1: raise wt_exceptions.ValueError( "Channel '{}' has a single point along Axis '{}', cannot compute gradient".format( channel, axis ) ) rtype = np.result_type(self[channel].dtype, float) new = self.create_channel( "{}_{}_gradient".format(channel, axis), values=np.empty(self[channel].shape, dtype=rtype), ) channel = self[channel] if axis == axis_index: new[:] = np.gradient(channel[:], axis=axis_index) else: new[:] = np.gradient(channel[:], self[axis].points, axis=axis_index)
[docs] def moment(self, axis, channel=0, moment=1, *, resultant=None): """Take the nth moment the dataset along one axis, adding lower rank channels. New channels have names ``<channel name>_<axis name>_moment_<moment num>``. Moment 0 is the integral of the slice. Moment 1 is the weighted average or "Center of Mass", normalized by the integral Moment 2 is the variance, the central moment about the center of mass, normalized by the integral Moments 3+ are central moments about the center of mass, normalized by the integral and by the standard deviation to the power of the moment. Moments, especially higher order moments, are susceptible to noise and baseline. It is recommended when used with real data to use :meth:`WrightTools.data.Channel.clip` in conjunction with moments to reduce effects of noise. Parameters ---------- axis : int or str The axis to take the moment along. If given as an integer, the axis with that index is used. If given as a string, the axis with that name is used. The axis must exist, and be a 1D array-aligned axis. (i.e. have a shape with a single value which is not ``1``) The collapsed axis must be monotonic to produce correct results. The axis to collapse along is inferred from the shape of the axis. channel : int or str The channel to take the moment. If given as an integer, the channel with that index is used. If given as a string, the channel with that name is used. The channel must have values along the axis (i.e. its shape must not be ``1`` in the dimension for which the axis is not ``1``) Default is 0, the first channel. moment : int or tuple of int The moments to take. One channel will be created for each number given. Default is 1, the center of mass. resultant : tuple of int The resultant shape after the moment operation. By default, it is intuited by the axis along which the moment is being taken. This default only works if that axis is 1D, so resultant is required if a multidimensional axis is passed as the first argument. The requirement of monotonicity applies on a per pixel basis. See Also -------- collapse Reduce dimensionality by some mathematical operation clip Set values above/below a threshold to a particular value WrightTools.kit.joint_shape Useful for setting `resultant` kwarg based off of axes not collapsed. """ # get axis index -------------------------------------------------------------------------- axis_index = None if resultant is not None: for i, (s, r) in enumerate(zip(wt_kit.joint_shape(*self.axes), resultant)): if s != r and r == 1 and axis_index is None: axis_index = i elif s == r: continue else: raise wt_exceptions.ValueError( f"Invalid resultant shape '{resultant}' for shape {wt_kit.joint_shape(*self.axes)}. " + "Consider using `wt.kit.joint_shape` to join non-collapsed axes." ) index = wt_kit.get_index(self.axis_names, axis) if axis_index is None: axes = [i for i in range(self.ndim) if self.axes[index].shape[i] > 1] if len(axes) > 1: raise wt_exceptions.MultidimensionalAxisError(axis, "moment") elif len(axes) == 0: raise wt_exceptions.ValueError( "Axis {} is a single point, cannot compute moment".format(axis) ) axis_index = axes[0] warnings.warn("moment", category=wt_exceptions.EntireDatasetInMemoryWarning) channel_index = wt_kit.get_index(self.channel_names, channel) channel = self.channel_names[channel_index] if self[channel].shape[axis_index] == 1: raise wt_exceptions.ValueError( "Channel '{}' has a single point along Axis '{}', cannot compute moment".format( channel, axis ) ) new_shape = list(self[channel].shape) new_shape[axis_index] = 1 channel = self[channel] axis_inp = axis axis = self.axes[index] x = axis[:] if np.any(np.isnan(x)): raise wt_exceptions.ValueError("Axis '{}' includes NaN".format(axis_inp)) y = np.nan_to_num(channel[:]) try: moments = tuple(moment) except TypeError: moments = (moment,) multiplier = 1 if 0 in moments: # May be possible to optimize, probably doesn't need the sum # only matters for integral, all others normalize by integral multiplier = np.sign( np.sum(np.diff(x, axis=axis_index), axis=axis_index, keepdims=True) ) for moment in moments: about = 0 norm = 1 if moment > 0: norm = np.trapz(y, x, axis=axis_index) norm = np.array(norm) norm.shape = new_shape if moment > 1: about = np.trapz(x * y, x, axis=axis_index) about = np.array(about) about.shape = new_shape about /= norm if moment > 2: sigma = np.trapz((x - about) ** 2 * y, x, axis=axis_index) sigma = np.array(sigma) sigma.shape = new_shape sigma /= norm sigma **= 0.5 norm *= sigma**moment values = np.trapz((x - about) ** moment * y, x, axis=axis_index) values = np.array(values) values.shape = new_shape values /= norm if moment == 0: values *= multiplier self.create_channel( "{}_{}_{}_{}".format(channel.natural_name, axis_inp, "moment", moment), values=values, )
[docs] def collapse(self, axis, method="sum"): """Collapse the dataset along one axis, adding lower rank channels. New channels have names ``<channel name>_<axis name>_<method>``. Parameters ---------- axis : int or str The axis to collapse along. If given as an integer, the axis in the underlying array is used. If given as a string, the axis must exist, and be a 1D array-aligned axis. (i.e. have a shape with a single value which is not ``1``) The axis to collapse along is inferred from the shape of the axis. method : {'average', 'sum', 'max', 'min'} (optional) The method of collapsing the given axis. Method may also be list of methods corresponding to the channels of the object. Default is sum. NaNs are ignored. Can also be a list, allowing for different treatment for varied channels. In this case, None indicates that no change to that channel should occur. See Also -------- chop Divide the dataset into its lower-dimensionality components. split Split the dataset while maintaining its dimensionality. moment Take the moment along a particular axis """ if method in ("int", "integrate"): warnings.warn( "integrate method of collapse is deprecated, use moment(moment=0) instead", wt_exceptions.VisibleDeprecationWarning, ) for channel in self.channel_names: try: self.moment(axis, channel, moment=0) self.rename_channels( **{self.channel_names[-1]: f"{channel}_{axis}_{method}"}, verbose=False ) except wt_exceptions.ValueError: pass # may have some channels which fail, do so silently return # get axis index -------------------------------------------------------------------------- if isinstance(axis, int): axis_index = axis elif isinstance(axis, str): index = self.axis_names.index(axis) axes = [i for i in range(self.ndim) if self.axes[index].shape[i] > 1] if len(axes) > 1: raise wt_exceptions.MultidimensionalAxisError(axis, "collapse") elif len(axes) == 0: raise wt_exceptions.ValueError( "Axis {} is a single point, cannot collapse".format(axis) ) axis_index = axes[0] else: raise wt_exceptions.TypeError("axis: expected {int, str}, got %s" % type(axis)) new_shape = list(self.shape) new_shape[axis_index] = 1 func = { "sum": np.nansum, "max": np.nanmax, "maximum": np.nanmax, "min": np.nanmin, "minimum": np.nanmin, "ave": np.nanmean, "average": np.nanmean, "mean": np.nanmean, } # methods --------------------------------------------------------------------------------- if isinstance(method, str): methods = [method for _ in self.channels] if isinstance(method, list): if len(method) == len(self.channels): methods = method else: raise wt_exceptions.ValueError( "method argument must have same number of elements as there are channels" ) for m in methods: if m not in func.keys(): raise wt_exceptions.ValueError("method '{}' not recognized".format(m)) warnings.warn("collapse", category=wt_exceptions.EntireDatasetInMemoryWarning) # collapse -------------------------------------------------------------------------------- for method, channel in zip(methods, self.channel_names): if method is None: continue if self[channel].shape[axis_index] == 1: continue # Cannot collapse any further, don't clutter data object new_shape = list(self[channel].shape) new_shape[axis_index] = 1 rtype = self[channel].dtype if method in ["ave", "average", "mean"]: rtype = np.result_type(self[channel].dtype, float) new = self.create_channel( "{}_{}_{}".format(channel, axis, method), values=np.empty(new_shape, dtype=rtype), units=self[channel].units, ) new[:] = func[method](self[channel], axis=axis_index, keepdims=True)
[docs] def convert(self, destination_units, *, convert_variables=False, verbose=True): """Convert all compatable axes and constants to given units. Parameters ---------- destination_units : str Destination units. convert_variables : boolean (optional) Toggle conversion of stored arrays. Default is False verbose : bool (optional) Toggle talkback. Default is True. See Also -------- Axis.convert Convert a single axis object to compatable units. Call on an axis object in data.axes. """ # apply to all compatible axes for axis in self.axes: if wt_units.is_valid_conversion(axis.units, destination_units): orig = axis.units axis.convert(destination_units, convert_variables=convert_variables) if verbose: print( "axis {} converted from {} to {}".format( axis.expression, orig, destination_units ) ) # apply to all compatible constants for constant in self.constants: if wt_units.is_valid_conversion(constant.units, destination_units): orig = constant.units constant.convert(destination_units, convert_variables=convert_variables) if verbose: print( "constant {} converted from {} to {}".format( constant.expression, orig, destination_units ) ) if convert_variables: for var in self.variables: if wt_units.is_valid_conversion(var.units, destination_units): orig = var.units var.convert(destination_units) if verbose: print( "variable {} converted from {} to {}".format( var.natural_name, orig, destination_units ) ) self._on_axes_updated() self._on_constants_updated()
[docs] def create_channel( self, name, values=None, *, shape=None, units=None, dtype=None, **kwargs ) -> Channel: """Append a new channel. Parameters ---------- name : string Unique name for this channel. values : array (optional) Array. If None, an empty array equaling the data shape is created. Default is None. shape : tuple of int Shape to use. Must broadcast with the full shape. Only used if `values` is None. Default is the full shape of self. units : string (optional) Channel units. Default is None. dtype : numpy.dtype (optional) dtype to use for dataset, default is np.float64. Only used if `values` is None. kwargs : dict Additional keyword arguments passed to Channel instantiation. Returns ------- Channel Created channel. """ if name in self.channel_names: warnings.warn(name, wt_exceptions.ObjectExistsWarning) return self[name] elif name in self.variable_names: raise wt_exceptions.NameNotUniqueError(name) require_kwargs = {"chunks": True} if values is None: if shape is None: require_kwargs["shape"] = self.shape else: require_kwargs["shape"] = shape if dtype is None: require_kwargs["dtype"] = np.dtype(np.float64) else: require_kwargs["dtype"] = dtype if require_kwargs["dtype"].kind == "f": require_kwargs["fillvalue"] = np.nan elif require_kwargs["dtype"].kind == "M": require_kwargs["fillvalue"] = np.datetime64("NaT") elif require_kwargs["dtype"].kind == "c": require_kwargs["fillvalue"] = complex(np.nan, np.nan) else: require_kwargs["fillvalue"] = 0 else: require_kwargs["data"] = values require_kwargs["shape"] = values.shape require_kwargs["dtype"] = values.dtype if np.prod(require_kwargs["shape"]) == 1: require_kwargs["chunks"] = None if "compression" in kwargs: require_kwargs["compression"] = kwargs["compression"] if "compression_opts" in kwargs: require_kwargs["compression_opts"] = kwargs["compression_opts"] if "shuffle" in kwargs: require_kwargs["shuffle"] = kwargs["shuffle"] # create dataset dataset_id = self.require_dataset(name=name, **require_kwargs).id channel = Channel(self, dataset_id, units=units, **kwargs) # finish self.attrs["channel_names"] = np.append(self.attrs["channel_names"], name.encode()) return channel
[docs] def create_variable( self, name, values=None, *, shape=None, units=None, dtype=None, **kwargs ) -> Variable: """Add new child variable. Parameters ---------- name : string Unique identifier. values : array-like (optional) Array to populate variable with. If None, an variable will be filled with NaN. Default is None. shape : tuple of int Shape to use. must broadcast with the full shape. Only used if `values` is None. Default is the full shape of self. units : string (optional) Variable units. Default is None. dtype : numpy.dtype (optional) dtype to use for dataset, default is np.float64. Only used if `values` is None. kwargs Additional kwargs to variable instantiation. Returns ------- WrightTools Variable New child variable. """ if name in self.variable_names: warnings.warn(name, wt_exceptions.ObjectExistsWarning) return self[name] elif name in self.channel_names: raise wt_exceptions.NameNotUniqueError(name) if values is None: if shape is None: shape = self.shape if dtype is None: dtype = np.dtype(np.float64) if dtype.kind in "f": fillvalue = np.nan elif dtype.kind in "M": fillvalue = np.datetime64("NaT") elif dtype.kind in "c": fillvalue = complex(np.nan, np.nan) else: fillvalue = 0 else: shape = values.shape dtype = values.dtype fillvalue = None require_kwargs = {"chunks": True} if "compression" in kwargs: require_kwargs["compression"] = kwargs["compression"] if "compression_opts" in kwargs: require_kwargs["compression_opts"] = kwargs["compression_opts"] if "shuffle" in kwargs: require_kwargs["shuffle"] = kwargs["shuffle"] if np.prod(shape) == 1: require_kwargs["chunks"] = None # create dataset id = self.require_dataset( name=name, data=values, shape=shape, dtype=dtype, fillvalue=fillvalue, **require_kwargs ).id variable = Variable(self, id, units=units, **kwargs) # finish self._variables = None self.attrs["variable_names"] = np.append(self.attrs["variable_names"], name.encode()) return variable
[docs] def get_nadir(self, channel=0) -> tuple: """Get the coordinates, in units, of the minimum in a channel. Parameters ---------- channel : int or str (optional) Channel. Default is 0. Returns ------- generator of numbers Coordinates in units for each axis. """ # get channel if isinstance(channel, int): channel_index = channel elif isinstance(channel, str): channel_index = self.channel_names.index(channel) else: raise TypeError("channel: expected {int, str}, got %s" % type(channel)) channel = self.channels[channel_index] # get indicies idx = channel.argmin() # finish return tuple(a[idx] for a in self._axes)
[docs] def get_zenith(self, channel=0) -> tuple: """Get the coordinates, in units, of the maximum in a channel. Parameters ---------- channel : int or str (optional) Channel. Default is 0. Returns ------- generator of numbers Coordinates in units for each axis. """ # get channel if isinstance(channel, int): channel_index = channel elif isinstance(channel, str): channel_index = self.channel_names.index(channel) else: raise TypeError("channel: expected {int, str}, got %s" % type(channel)) channel = self.channels[channel_index] # get indicies idx = channel.argmax() # finish return tuple(a[idx] for a in self._axes)
[docs] def heal(self, channel=0, method="linear", fill_value=np.nan, verbose=True): """ Remove nans from channel using interpolation. Parameters ---------- channel : int or str (optional) Channel to heal. Default is 0. method : {'linear', 'nearest', 'cubic'} (optional) The interpolation method. Note that cubic interpolation is only possible for 1D and 2D data. See `griddata`__ for more information. Default is linear. fill_value : number-like (optional) The value written to pixels that cannot be filled by interpolation. Default is nan. verbose : bool (optional) Toggle talkback. Default is True. __ http://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html .. note:: Healing may take several minutes for large datasets. Interpolation time goes as nearest, linear, then cubic. """ warnings.warn("heal", category=wt_exceptions.EntireDatasetInMemoryWarning) timer = wt_kit.Timer(verbose=False) with timer: # channel if isinstance(channel, int): channel_index = channel elif isinstance(channel, str): channel_index = self.channel_names.index(channel) else: raise TypeError("channel: expected {int, str}, got %s" % type(channel)) channel = self.channels[channel_index] values = self.channels[channel_index][:] points = [axis[:] for axis in self._axes] xi = tuple(np.meshgrid(*points, indexing="ij")) # 'undo' gridding arr = np.zeros((len(self._axes) + 1, values.size)) for i in range(len(self._axes)): arr[i] = xi[i].flatten() arr[-1] = values.flatten() # remove nans arr = arr[:, ~np.isnan(arr).any(axis=0)] # grid data wants tuples tup = tuple([arr[i] for i in range(len(arr) - 1)]) # grid data out = griddata(tup, arr[-1], xi, method=method, fill_value=fill_value) self.channels[channel_index][:] = out # print if verbose: print( "channel {0} healed in {1} seconds".format( channel.name, np.around(timer.interval, decimals=3) ) )
[docs] def level(self, channel, axis, npts, *, verbose=True): """Subtract the average value of npts at the edge of a given axis. Parameters ---------- channel : int or str Channel to level. axis : int Axis to level along. npts : int Number of points to average for each slice. Positive numbers take points at leading indicies and negative numbers take points at trailing indicies. verbose : bool (optional) Toggle talkback. Default is True. """ warnings.warn("level", category=wt_exceptions.EntireDatasetInMemoryWarning) channel_index = wt_kit.get_index(self.channel_names, channel) channel = self.channels[channel_index] # verify npts not zero npts = int(npts) if npts == 0: raise wt_exceptions.ValueError("npts must not be zero") # get subtrahend ss = [slice(None)] * self.ndim if npts > 0: ss[axis] = slice(0, npts, None) else: ss[axis] = slice(npts, None, None) subtrahend = np.nanmean(channel[ss], axis=axis) if self.ndim > 1: subtrahend = np.expand_dims(subtrahend, axis=axis) # level channel -= subtrahend # finish channel._null = 0 if verbose: print("channel {0} leveled along axis {1}".format(channel.natural_name, axis))
[docs] def map_variable( self, variable, points, input_units="same", *, name=None, parent=None, verbose=True ) -> "Data": """ Map points of an axis to new points using linear interpolation. Out-of-bounds points are written nan. Non-mapped variables are kept in the data object only if they are orthogonal to the mapped variable (see `kit.orthogonal`). Parameters ---------- variable : string The variable to map onto. points : array-like or int If array, the new points. If int, new points will have the same limits, with int defining the number of evenly spaced points between. input_units : str (optional) The units of the new points. Default is same, which assumes the new points have the same units as the variable. name : string (optional) The name of the new data object. If None, generated from natural_name. Default is None. parent : WrightTools.Collection (optional) Parent of new data object. If None, data is made at root of a new temporary file. verbose : bool (optional) Toggle talkback. Default is True. Returns ------- WrightTools.Data New data object. """ # get variable index variable_index = wt_kit.get_index(self.variable_names, variable) variable = self.variables[variable_index] # get points if isinstance(points, int): points = np.linspace(variable.min(), variable.max(), points) points = np.array(points) # points dimensionality if points.ndim < variable.ndim: for i, d in enumerate(variable.shape): if d == 1: points = np.expand_dims(points, axis=i) # convert points if input_units == "same": pass else: points = wt_units.converter(points, input_units, variable.units) # construct new data object special = [ "name", "axes", "constants", "channel_names", "variable_names", "filepath", "edit_local", ] kwargs = {k: v for k, v in self.attrs.items() if k not in special} if name is None: name = "{0}_{1}_mapped".format(self.natural_name, variable.natural_name) kwargs["name"] = name kwargs["parent"] = parent out = Data(**kwargs) # mapped variable values = points out.create_variable(values=values, **variable.attrs) # orthogonal variables for v in self.variables: if wt_kit.orthogonal(v.shape, variable.shape): out.create_variable(values=v[:], **v.attrs) out.transform(*self.axis_expressions) # interpolate if self.ndim == 1: def interpolate(dataset, points): function = scipy.interpolate.interp1d(variable[:], dataset[:], bounds_error=False) return function(points) else: pts = np.array([a.full.flatten() for a in self.axes]).T out_pts = np.array([a.full.flatten() for a in out.axes]).T def interpolate(dataset, points): values = dataset.full.flatten() function = scipy.interpolate.LinearNDInterpolator(pts, values, rescale=True) new = function(out_pts) new.shape = out.shape return new for v in self.variables: if v.natural_name not in out.variable_names: out.create_variable(values=interpolate(v, points), **v.attrs) out.variable_names = self.variable_names # enforce old order out._variables = None # force regeneration of variables @property for channel in self.channels: out.create_channel(values=interpolate(channel, points), **channel.attrs) # finish if verbose: print("data mapped from {0} to {1}".format(self.shape, out.shape)) return out
[docs] def offset( self, points, offsets, along, offset_axis, units="same", offset_units="same", mode="valid", method="linear", verbose=True, ): """Offset one axis based on another axis' values. Useful for correcting instrumental artifacts such as zerotune. Parameters ---------- points : 1D array-like Points. offsets : 1D array-like Offsets. along : str or int Axis that points array lies along. offset_axis : str or int Axis to offset using offsets. units : str (optional) Units of points array. offset_units : str (optional) Units of offsets aray. mode : {'valid', 'full', 'old'} (optional) Define how far the new axis will extend. Points outside of valid interpolation range will be written nan. method : {'linear', 'nearest', 'cubic'} (optional) The interpolation method. Note that cubic interpolation is only possible for 1D and 2D data. See `griddata`__ for more information. Default is linear. verbose : bool (optional) Toggle talkback. Default is True. __ http://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html >>> points # an array of w1 points >>> offsets # an array of d1 corrections >>> data.offset(points, offsets, 'w1', 'd1') """ raise NotImplementedError # axis ------------------------------------------------------------------------------------ if isinstance(along, int): axis_index = along elif isinstance(along, str): axis_index = self.axis_names.index(along) else: raise TypeError("along: expected {int, str}, got %s" % type(along)) axis = self._axes[axis_index] # values & points ------------------------------------------------------------------------- # get values, points, units if units == "same": input_units = axis.units else: input_units = units # check offsets is 1D or 0D if len(offsets.shape) == 1: pass else: raise RuntimeError("values must be 1D or 0D in offset!") # check if units is compatible, convert dictionary = getattr(wt_units, axis.units_kind) if input_units in dictionary.keys(): pass else: raise RuntimeError("units incompatible in offset!") points = wt_units.converter(points, input_units, axis.units) # create correction array function = interp1d(points, offsets, bounds_error=False) corrections = function(axis[:]) # remove nans finite_indicies = np.where(np.isfinite(corrections))[0] left_pad_width = finite_indicies[0] right_pad_width = len(corrections) - finite_indicies[-1] - 1 corrections = np.pad( corrections[np.isfinite(corrections)], (int(left_pad_width), int(right_pad_width)), mode="edge", ) # do correction --------------------------------------------------------------------------- # transpose so axis is last transpose_order = np.arange(len(self._axes)) transpose_order[axis_index] = len(self._axes) - 1 transpose_order[-1] = axis_index self.transpose(transpose_order, verbose=False) # get offset axis index if isinstance(offset_axis, int): offset_axis_index = offset_axis elif isinstance(offset_axis, str): offset_axis_index = self.axis_names.index(offset_axis) else: raise TypeError("offset_axis: expected {int, str}, got %s" % type(offset_axis)) # new points new_points = [a[:] for a in self._axes] old_offset_axis_points = self._axes[offset_axis_index][:] spacing = abs( (old_offset_axis_points.max() - old_offset_axis_points.min()) / float(len(old_offset_axis_points)) ) if mode == "old": new_offset_axis_points = old_offset_axis_points elif mode == "valid": _max = old_offset_axis_points.max() + corrections.min() _min = old_offset_axis_points.min() + corrections.max() n = int(abs(np.ceil((_max - _min) / spacing))) new_offset_axis_points = np.linspace(_min, _max, n) elif mode == "full": _max = old_offset_axis_points.max() + corrections.max() _min = old_offset_axis_points.min() + corrections.min() n = np.ceil((_max - _min) / spacing) new_offset_axis_points = np.linspace(_min, _max, n) new_points[offset_axis_index] = new_offset_axis_points new_xi = tuple(np.meshgrid(*new_points, indexing="ij")) xi = tuple(np.meshgrid(*[a[:] for a in self._axes], indexing="ij")) for channel in self.channels: # 'undo' gridding arr = np.zeros((len(self._axes) + 1, channel[:].size)) for i in range(len(self._axes)): arr[i] = xi[i].flatten() arr[-1] = channel[:].flatten() # do corrections corrections = list(corrections) corrections = corrections * int((len(arr[0]) / len(corrections))) arr[offset_axis_index] += corrections # grid data tup = tuple([arr[i] for i in range(len(arr) - 1)]) # note that rescale is crucial in this operation out = griddata(tup, arr[-1], new_xi, method=method, fill_value=np.nan, rescale=True) channel[:] = out self._axes[offset_axis_index][:] = new_offset_axis_points # transpose out self.transpose(transpose_order, verbose=False)
[docs] def print_tree(self, *, verbose=True): """Print a ascii-formatted tree representation of the data contents.""" print("{0} ({1}) {2}".format(self.natural_name, self.filepath, self.shape)) self._print_branch("", depth=0, verbose=verbose)
[docs] def prune(self, keep_channels=True, *, verbose=True): """Remove unused variables and (optionally) channels from the Data object. Unused variables are those that are not included in either axes or constants. Unused channels are those not specified in keep_channels, or the first channel. Parameters ---------- keep_channels : boolean or int or str or tuple If False, removes all but the first channel. If int or str, removes all but that index/name channel. If tuple, removes all channels except those in the tuple by index or name. Default is True: do not delete channels verbose : boolean Toggle talkback. Default is True. """ for v in self.variables: for var in wt_kit.flatten_list([ax.variables for ax in self._axes + self._constants]): if v == var: break else: self.remove_variable(v.natural_name, implied=False, verbose=verbose) if keep_channels is not True: try: if isinstance(keep_channels, str): raise TypeError indexes = tuple(keep_channels) except TypeError: indexes = (keep_channels,) for i, ch in enumerate(self.channels): if i not in indexes and not ch.natural_name in indexes: self.remove_channel(ch.natural_name, verbose=verbose)
[docs] def remove_channel(self, channel, *, verbose=True): """Remove channel from data. Parameters ---------- channel : int or str Channel index or name to remove. verbose : boolean (optional) Toggle talkback. Default is True. """ channel_index = wt_kit.get_index(self.channel_names, channel) new = list(self.channel_names) name = new.pop(channel_index) del self[name] self.channel_names = new if verbose: print("channel {0} removed".format(name))
[docs] def remove_variable(self, variable, *, implied=True, verbose=True): """Remove variable from data. Parameters ---------- variable : int or str Variable index or name to remove. implied : boolean (optional) Toggle deletion of other variables that start with the same name. Default is True. verbose : boolean (optional) Toggle talkback. Default is True. """ if isinstance(variable, int): variable = self.variable_names[variable] # find all of the implied variables removed = [] if implied: for n in self.variable_names: if n.startswith(variable): removed.append(n) else: removed = [variable] # check that axes will not be ruined for n in removed: for a in self._axes: if n in [v.natural_name for v in a.variables]: message = "{0} is contained in axis {1}".format(n, a.expression) raise RuntimeError(message) for c in self._constants: if n in [v.natural_name for v in c.variables]: warnings.warn( "Variable being removed used in a constant", wt_exceptions.WrightToolsWarning, ) # do removal for n in removed: variable_index = wt_kit.get_index(self.variable_names, n) new = list(self.variable_names) name = new.pop(variable_index) del self[name] self.variable_names = new self._variables = None # finish if verbose: print("{0} variable(s) removed:".format(len(removed))) for n in removed: print(" {0}".format(n))
[docs] def rename_channels(self, *, verbose=True, **kwargs): """Rename a set of channels. Parameters ---------- kwargs Keyword arguments of the form current:'new'. verbose : boolean (optional) Toggle talkback. Default is True """ # ensure that items will remain unique changed = kwargs.keys() for k, v in kwargs.items(): if v not in changed and v in self.keys(): raise wt_exceptions.NameNotUniqueError(v) # compile references to items that are changing new = {} for k, v in kwargs.items(): obj = self[k] index = self.channel_names.index(k) # rename new[v] = obj, index Group._instances.pop(obj.fullpath, None) obj.natural_name = str(v) # remove old references del self[k] # apply new references names = list(self.channel_names) for v, value in new.items(): obj, index = value self[v] = obj names[index] = v self.channel_names = names # finish if verbose: print("{0} channel(s) renamed:".format(len(kwargs))) for k, v in kwargs.items(): print(" {0} --> {1}".format(k, v))
[docs] def rename_variables(self, *, implied=True, verbose=True, **kwargs): """Rename a set of variables. Parameters ---------- kwargs Keyword arguments of the form current:'new'. implied : boolean (optional) Toggle inclusion of other variables that start with the same name. Default is True. verbose : boolean (optional) Toggle talkback. Default is True """ # find all of the implied variables kwargs = collections.OrderedDict(kwargs) if implied: new = collections.OrderedDict() for k, v in kwargs.items(): for n in self.variable_names: if n.startswith(k): new[n] = n.replace(k, v, 1) kwargs = new # ensure that items will remain unique changed = kwargs.keys() for k, v in kwargs.items(): if v not in changed and v in self.keys(): raise wt_exceptions.NameNotUniqueError(v) # compile references to items that are changing new = {} for k, v in kwargs.items(): obj = self[k] index = self.variable_names.index(k) # rename new[v] = obj, index Group._instances.pop(obj.fullpath, None) obj.natural_name = str(v) # remove old references del self[k] # apply new references names = list(self.variable_names) for v, value in new.items(): obj, index = value self[v] = obj names[index] = v self.variable_names = names units = self.units new = list(self.axis_expressions) for i, v in enumerate(kwargs.keys()): for j, n in enumerate(new): new[j] = n.replace(v, "{%i}" % i) for i, n in enumerate(new): new[i] = n.format(*kwargs.values()) self.transform(*new) for a, u in zip(self._axes, units): a.convert(u) units = self.constant_units new = list(self.constant_expressions) for i, v in enumerate(kwargs.keys()): for j, n in enumerate(new): new[j] = n.replace(v, "{%i}" % i) for i, n in enumerate(new): new[i] = n.format(*kwargs.values()) self.set_constants(*new) for c, u in zip(self._constants, units): c.convert(u) # finish if verbose: print("{0} variable(s) renamed:".format(len(kwargs))) for k, v in kwargs.items(): print(" {0} --> {1}".format(k, v))
[docs] def share_nans(self): """Share not-a-numbers between all channels. If any channel is nan at a given index, all channels will be nan at that index after this operation. Uses the share_nans method found in wt.kit. """ def f(_, s, channels): outs = wt_kit.share_nans(*[c[s] for c in channels]) for c, o in zip(channels, outs): c[s] = o self.channels[0].chunkwise(f, self.channels)
[docs] def smooth(self, factors, channel=None, verbose=True) -> "Data": """Smooth a channel using an n-dimenional kaiser window. Note, all arrays are loaded into memory. For more info see `Kaiser_window`__ wikipedia entry. __ https://en.wikipedia.org/wiki/Kaiser_window Parameters ---------- factors : int or list of int The smoothing factor. You may provide a list of smoothing factors for each axis. channel : int or str or None (optional) The channel to smooth. If None, all channels will be smoothed. Default is None. verbose : bool (optional) Toggle talkback. Default is True. """ warnings.warn("smooth", category=wt_exceptions.EntireDatasetInMemoryWarning) # get factors ----------------------------------------------------------------------------- if isinstance(factors, list): pass else: dummy = np.zeros(len(self._axes)) dummy[::] = factors factors = list(dummy) # get channels ---------------------------------------------------------------------------- if channel is None: channels = self.channels else: if isinstance(channel, int): channel_index = channel elif isinstance(channel, str): channel_index = self.channel_names.index(channel) else: raise TypeError("channel: expected {int, str}, got %s" % type(channel)) channels = [self.channels[channel_index]] # smooth ---------------------------------------------------------------------------------- for channel in channels: values = channel[:] for axis_index in range(len(factors)): factor = factors[axis_index] # transpose so the axis of interest is last transpose_order = range(len(values.shape)) # replace axis_index with zero transpose_order = [ len(values.shape) - 1 if i == axis_index else i for i in transpose_order ] transpose_order[len(values.shape) - 1] = axis_index values = values.transpose(transpose_order) # get kaiser window beta = 5.0 w = np.kaiser(2 * factor + 1, beta) # for all slices... for index in np.ndindex(values[..., 0].shape): current_slice = values[index] temp_slice = np.pad(current_slice, int(factor), mode=str("edge")) values[index] = np.convolve(temp_slice, w / w.sum(), mode=str("valid")) # transpose out values = values.transpose(transpose_order) # return array to channel object channel[:] = values if verbose: print("smoothed data")
[docs] def split( self, expression, positions, *, units=None, parent=None, verbose=True ) -> wt_collection.Collection: """ Split the data object along a given expression, in units. Parameters ---------- expression : int or str The expression to split along. If given as an integer, the axis at that index is used. positions : number-type or 1D array-type The position(s) to split at, in units. units : str (optional) The units of the given positions. Default is same, which assumes input units are identical to first variable units. parent : WrightTools.Collection (optional) The parent collection in which to place the 'split' collection. Default is a new Collection. verbose : bool (optional) Toggle talkback. Default is True. Returns ------- WrightTools.collection.Collection A Collection of data objects. The order of the objects is such that the axis points retain their original order. See Also -------- chop Divide the dataset into its lower-dimensionality components. collapse Collapse the dataset along one axis. """ # axis ------------------------------------------------------------------------------------ old_expr = self.axis_expressions old_units = self.units out = wt_collection.Collection(name="split", parent=parent) if isinstance(expression, int): if units is None: units = self._axes[expression].units expression = self._axes[expression].expression elif isinstance(expression, str): pass else: raise TypeError("expression: expected {int, str}, got %s" % type(expression)) self.transform(expression) if units: self.convert(units, verbose=False) try: positions = [-np.inf] + sorted(list(positions)) + [np.inf] except TypeError: positions = [-np.inf, positions, np.inf] values = self._axes[0].full masks = [(values >= lo) & (values < hi) for lo, hi in wt_kit.pairwise(positions)] omasks = [] cuts = [] for mask in masks: try: omasks.append(wt_kit.mask_reduce(mask)) cuts.append([i == 1 for i in omasks[-1].shape]) # Ensure at least one axis is kept if np.all(cuts[-1]): cuts[-1][0] = False except ValueError: omasks.append(None) cuts.append(None) for i in range(len(positions) - 1): out.create_data("split%03i" % i) for var in self.variables: for i, (imask, omask, cut) in enumerate(zip(masks, omasks, cuts)): if omask is None: # Zero length split continue omask = wt_kit.enforce_mask_shape(omask, var.shape) omask.shape = tuple([s for s, c in zip(omask.shape, cut) if not c]) out_arr = np.full(omask.shape, np.nan) imask = wt_kit.enforce_mask_shape(imask, var.shape) out_arr[omask] = var[:][imask] out[i].create_variable(values=out_arr, **var.attrs, dtype=var.dtype) for ch in self.channels: for i, (imask, omask, cut) in enumerate(zip(masks, omasks, cuts)): if omask is None: # Zero length split continue omask = wt_kit.enforce_mask_shape(omask, ch.shape) omask.shape = tuple([s for s, c in zip(omask.shape, cut) if not c]) out_arr = np.full(omask.shape, np.nan, dtype=ch.dtype) imask = wt_kit.enforce_mask_shape(imask, ch.shape) out_arr[omask] = ch[:][imask] out[i].create_channel(values=out_arr, **ch.attrs, dtype=ch.dtype) if verbose: for d in out.values(): try: d.transform(expression) except IndexError: continue print("split data into {0} pieces along <{1}>:".format(len(positions) - 1, expression)) for i, (lo, hi) in enumerate(wt_kit.pairwise(positions)): new_data = out[i] if new_data.shape == (): print(" {0} : None".format(i)) else: new_axis = new_data.axes[0] print( " {0} : {1:0.2f} to {2:0.2f} {3} {4}".format( i, lo, hi, self.axes[0].units, new_axis.shape ) ) for d in out.values(): try: d.transform(*old_expr) keep = [] keep_units = [] for ax, u in zip(d.axes, old_units): if ax.size > 1: keep.append(ax.expression) keep_units.append(u) else: d.create_constant(ax.expression, verbose=False) d.transform(*keep) for ax, u in zip(d.axes, keep_units): ax.convert(u) except IndexError: continue tempax = Axis(d, expression) if all( np.all( np.sum(~np.isnan(tempax.masked), axis=tuple(set(range(tempax.ndim)) - {j})) <= 1 ) for j in range(tempax.ndim) ): d.create_constant(expression, verbose=False) self.transform(*old_expr) for ax, u in zip(self.axes, old_units): ax.convert(u) return out
[docs] def transform(self, *axes, verbose=True): """Transform the data. Parameters ---------- axes : strings Expressions for the new set of axes. verbose : boolean (optional) Toggle talkback. Default is True See Also -------- set_constants Similar method except for constants """ # TODO: ensure that transform does not break data # create new = [] newt = "newt" in self.axis_expressions current = {a.expression: a for a in self._axes} for expression in axes: axis = current.get(expression, Axis(self, expression)) new.append(axis) self._axes = new # units for a in self._axes: if a.units is None: a.convert(a.variables[0].units) # finish self.flush() self._on_axes_updated() nownewt = "newt" in self.axis_expressions if verbose and nownewt and not newt: print("Look she turned me into a newt") elif verbose and newt and not nownewt: print("I got better")
def translate_to_txt( self, filepath, delimiter="\t", channels=None, variables=None, fmt=".5g", verbose=True ): """ Write a serialized, readable list of the channels and variables to file. Each line (row) denotes a seperate data point. Each column represents a unique variable or channel. Axes are neglected. Parameters ---------- filepath: path-like path to output file. delimiter: str separation character. Defaults to "\t" channels: list[str] (optional) List of channel names to include. Default is all channels. variables: list[str] (optional) List of variable names to include. Default is all variables. fmt: [python format spec](https://docs.python.org/3/library/string.html#formatspec) format specifier for variables and channels Returns ------- None Notes ----- * This is a lossy write procedure; some properties, such as axes, are not recorded. * The shape structure of the data is recorded as a series of indices (`{a_i}`) comprising the first few columns. A vertical line separates these indexes from variables and channels * wt5, the native file format for Data objects, is a specific variant of the HDF5 file format. HDF5 has well-developed tools for working with generic datasets. See https://portal.hdfgroup.org/display/knowledge/How+to+convert+an+HDF4+or+HDF5+file+to+ASCII+%28text%29+or+Excel for more information. """ # attrs import tidy_headers tidy_headers.write(filepath, {k: v for k, v in self.attrs.items()}) columns = [f"a_{i}" for i in range(self.ndim)] columns.append("|") is_broadcast = [] variables = list(self.variable_names) if variables is None else variables for var in variables: columns.append(f"{var} ({self[var].units})") is_broadcast.append([i == 1 for i in self[var].shape]) channels = list(self.channel_names) if channels is None else channels for ch in channels: columns.append(f"{ch} ({self[ch].units})") is_broadcast.append([i == 1 for i in self[ch].shape]) with open(filepath, "a") as f: f.write(delimiter.join(columns) + "\n") chunk = "" for i, ndi in enumerate(np.ndindex(self.shape)): line = [str(i) for i in ndi] line += ["|"] for j, name in enumerate(variables + channels): arr = self[name] # broadcast reduced dimensions arrays to full idxs = tuple(xi * (not yi) for xi, yi in zip(ndi, is_broadcast[j])) line.append(f"{arr[idxs]:{fmt}}") if verbose and ((i == 0) or (not (i % 10))): frac = round(i / self.size, 3) sys.stdout.write( f"[{'=' * int(frac * 60): <60}] {frac * 100:0.1f}% ...to_txt\r" ) sys.stdout.flush() chunk += delimiter.join(line) + "\n" if (i % 100) == 99: # write to disk periodically f.write(chunk) chunk = "" if chunk: f.write(chunk) if verbose: sys.stdout.write(f"[{'=' * 60}] {100:0.1f}% ...done! \r") sys.stdout.flush()
[docs] def set_constants(self, *constants, verbose=True): """Set the constants associated with the data. Parameters ---------- constants : str Expressions for the new set of constants. verbose : boolean (optional) Toggle talkback. Default is True See Also -------- transform Similar method except for axes. create_constant Add an individual constant. remove_constant Remove an individual constant. """ # create new = [] current = {c.expression: c for c in self._constants} for expression in constants: constant = current.get(expression, Constant(self, expression)) new.append(constant) self._constants = new # units for c in self._constants: if c.units is None: c.convert(c.variables[0].units) # finish self.flush() self._on_constants_updated()
[docs] def create_constant(self, expression, *, verbose=True): """Append a constant to the stored list. Parameters ---------- expression : str Expression for the new constant. verbose : boolean (optional) Toggle talkback. Default is True See Also -------- set_constants Remove and replace all constants. remove_constant Remove an individual constant. """ if expression in self.constant_expressions: wt_exceptions.ObjectExistsWarning.warn(expression) return self.constants[self.constant_expressions.index(expression)] constant = Constant(self, expression) if constant.units is None: constant.convert(constant.variables[0].units) self._constants.append(constant) self.flush() self._on_constants_updated() if verbose: print("Constant '{}' added".format(constant.expression)) return constant
[docs] def remove_constant(self, constant, *, verbose=True): """Remove a constant from the stored list. Parameters ---------- constant : str or Constant or int Expression for the new constant. verbose : boolean (optional) Toggle talkback. Default is True See Also -------- set_constants Remove and replace all constants. create_constant Add an individual constant. """ if isinstance(constant, (str, int)): constant_index = wt_kit.get_index(self.constant_expressions, constant) elif isinstance(constant, Constant): constant_index = wt_kit.get_index(self.constants, constant) else: raise TypeError(f"unsupported type for 'constant': {type(constant).__name__}") constant = self._constants[constant_index] self._constants.pop(constant_index) self.flush() self._on_constants_updated() if verbose: print("Constant '{}' removed".format(constant.expression))
[docs] def zoom(self, factor, order=1, verbose=True): """Zoom the data array using spline interpolation of the requested order. The number of points along each axis is increased by factor. See `scipy ndimage`__ for more info. __ http://docs.scipy.org/doc/scipy/reference/ generated/scipy.ndimage.zoom.html Parameters ---------- factor : float The number of points along each axis will increase by this factor. order : int (optional) The order of the spline used to interpolate onto new points. verbose : bool (optional) Toggle talkback. Default is True. """ raise NotImplementedError import scipy.ndimage # axes for axis in self._axes: axis[:] = scipy.ndimage.zoom(axis[:], factor, order=order) # channels for channel in self.channels: channel[:] = scipy.ndimage.zoom(channel[:], factor, order=order) # return if verbose: print("data zoomed to new shape:", self.shape)