Source code for WrightTools.data._channel

"""Channel class and associated."""

# --- import --------------------------------------------------------------------------------------


import numpy as np

import h5py

import warnings

from .. import kit as wt_kit
from .._dataset import Dataset
from .. import exceptions as wt_exceptions

__all__ = ["Channel"]

# --- class ---------------------------------------------------------------------------------------



[docs]
class Channel(Dataset):
    """Channel."""

    class_name = "Channel"


[docs]
    def __init__(
        self,
        parent,
        id,
        *,
        units=None,
        null=None,
        signed=None,
        label=None,
        label_seed=None,
        **kwargs
    ):
        """Construct a channel object.

        Parameters
        ----------
        values : array-like
            Values.
        name : string
            Channel name.
        units : string (optional)
            Channel units. Default is None.
        null : number (optional)
            Channel null. Default is None (0).
        signed : booelan (optional)
            Channel signed flag. Default is None (guess).
        label : string.
            Label. Default is None.
        label_seed : list of strings
            Label seed. Default is None.
        **kwargs
            Additional keyword arguments are added to the attrs dictionary
            and to the natural namespace of the object (if possible).
        """
        self._parent = parent
        super().__init__(id)
        self.label = label
        self.label_seed = label_seed
        self.units = units
        self.dimensionality = len(self.shape)
        # attrs
        if self._parent.file.mode is not None and self._parent.file.mode != "r":
            try:
                self.attrs.update(kwargs)
                self.attrs["name"] = h5py.h5i.get_name(self.id).decode().split("/")[-1]
                self.attrs["class"] = "Channel"
                if signed is not None:
                    self.attrs["signed"] = signed
                if null is not None:
                    self.attrs["null"] = null
            except (RuntimeError, KeyError):
                # e.g. readonly file
                pass
        for key, value in self.attrs.items():
            identifier = wt_kit.string2identifier(key)
            if not hasattr(self, identifier):
                setattr(self, identifier, value)


    @property
    def major_extent(self) -> complex:
        """Maximum deviation from null."""
        return max((self.max() - self.null, self.null - self.min()))

    @property
    def minor_extent(self) -> complex:
        """Minimum deviation from null."""
        return min((self.max() - self.null, self.null - self.min()))

    @property
    def null(self) -> complex:
        if "null" not in self.attrs.keys():
            self.attrs["null"] = 0
        return self.attrs["null"]

    @null.setter
    def null(self, value):
        self.attrs["null"] = value

    @property
    def signed(self) -> bool:
        if "signed" not in self.attrs.keys():
            self.attrs["signed"] = False
        return self.attrs["signed"]

    @signed.setter
    def signed(self, value):
        self.attrs["signed"] = value


[docs]
    def mag(self) -> complex:
        """Channel magnitude (maximum deviation from null)."""
        return self.major_extent



[docs]
    def normalize(self, mag=1.0):
        """Normalize a Channel, set `null` to 0 and the mag to given value.

        Parameters
        ----------
        mag : float (optional)
            New value of mag. Default is 1.
        """

        def f(dataset, s, null, mag):
            dataset[s] -= null
            dataset[s] /= mag

        if self.signed:
            mag = self.mag() / mag
        else:
            mag = self.max() / mag
        self.chunkwise(f, null=self.null, mag=mag)
        self._null = 0



[docs]
    def trim(self, neighborhood, method="ztest", factor=3, replace="nan", verbose=True):
        """Remove outliers from the dataset.

        Identifies outliers by comparing each point to its
        neighbors using a statistical test.

        Parameters
        ----------
        neighborhood : list of integers
            Size of the neighborhood in each dimension. Length of the list must
            be equal to the dimensionality of the channel.
        method : {'ztest'} (optional)
            Statistical test used to detect outliers. Default is ztest.

            ztest
                Compare point deviation from neighborhood mean to neighborhood
                standard deviation.

        factor : number (optional)
            Tolerance factor.  Default is 3.
        replace : {'nan', 'mean', 'exclusive_mean', number} (optional)
            Behavior of outlier replacement. Default is nan.

            nan
                Outliers are replaced by numpy nans.

            mean
                Outliers are replaced by the mean of its neighborhood, including itself.

            exclusive_mean
                Outilers are replaced by the mean of its neighborhood, not including itself.

            number
                Array becomes given number.

        Returns
        -------
        list of tuples
            Indicies of trimmed outliers.

        See Also
        --------
        clip
            Remove pixels outside of a certain range.
        """
        warnings.warn("trim", category=wt_exceptions.EntireDatasetInMemoryWarning)
        outliers = []
        means = []
        ex_means = []
        # find outliers
        for idx in np.ndindex(self.shape):
            slices = []
            for i, di, size in zip(idx, neighborhood, self.shape):
                start = max(0, i - di)
                stop = min(size, i + di + 1)
                slices.append(slice(start, stop, 1))
            neighbors = self[slices]
            mean = np.nanmean(neighbors)
            sum_ = np.nansum(neighbors)
            limit = np.nanstd(neighbors) * factor
            if np.abs(self[idx] - mean) > limit:
                outliers.append(idx)
                means.append(mean)
                # Note, "- 1" is to exclude the point itself, which is not nan, in order
                # to enter this if block, as `np.abs(nan - mean)` is nan, which would
                # evaluate to False
                ex_means.append((sum_ - self[idx]) / (np.sum(~np.isnan(neighbors)) - 1))

        # replace outliers
        i = tuple(zip(*outliers))

        if len(i) == 0:
            if verbose:
                print("No outliers found")
            return []

        replace = {"nan": np.nan, "mean": means, "exclusive_mean": ex_means}.get(replace, replace)

        # This may someday be available in h5py directly, but seems that day is not yet.
        # This is annoying because it is the only reason we hold the whole set in memory.
        # KFS 2019-03-21
        arr = self[:]
        arr[i] = replace
        self[:] = arr

        # finish
        if verbose:
            print("%i outliers removed" % len(outliers))
        return outliers


    def _to_dict(self):
        out = {}
        out["name"] = self.natural_name
        out["values"] = self[:]
        out["units"] = self.units
        out["label"] = self.label
        out["signed"] = self.signed
        out.update(self.attrs)
        return out