Source code for WrightTools.kit._discover_dimensions

"""Discover dimensions of a flattened ND array."""

# --- import --------------------------------------------------------------------------------------


import warnings
import collections

import numpy as np

from .. import units as wt_units


# --- define --------------------------------------------------------------------------------------


__all__ = ["discover_dimensions"]


# --- function ------------------------------------------------------------------------------------


[docs] def discover_dimensions(arr, cols) -> collections.OrderedDict: """Discover the dimensions of a flattened multidimensional array. Parameters ---------- arr : 2D numpy ndarray Array in [col, value]. cols : dictionary Dictionary with column names as keys, and idx, tolerance and units as values. Returns ------- dictionary expression: points """ # import values ------------------------------------------------------------------------------- di = [cols[key]["idx"] for key in cols.keys()] dt = [cols[key]["tolerance"] for key in cols.keys()] du = [cols[key]["units"] for key in cols.keys()] dk = [key for key in cols.keys()] dims = list(zip(di, dt, du, dk)) # remove nan dimensions and bad dimensions ---------------------------------------------------- to_pop = [] for i in range(len(dims)): if np.all(np.isnan(arr[dims[i][0]])): to_pop.append(i) to_pop.reverse() for i in to_pop: dims.pop(i) # which dimensions are equal ------------------------------------------------------------------ # find d_equal = np.zeros((len(dims), len(dims)), dtype=bool) d_equal[:, :] = True for i in range(len(dims)): # test for j in range(len(dims)): # against for k in range(len(arr[0])): upper_bound = arr[dims[i][0], k] + dims[i][1] lower_bound = arr[dims[i][0], k] - dims[i][1] test_point = arr[dims[j][0], k] if upper_bound > test_point > lower_bound: pass else: d_equal[i, j] = False break # condense dims_unaccounted = list(range(len(dims))) dims_condensed = [] while dims_unaccounted: dim_current = dims_unaccounted[0] index = dims[dim_current][0] tolerance = [dims[dim_current][1]] units = dims[dim_current][2] key = [dims[dim_current][3]] dims_unaccounted.pop(0) indicies = list(range(len(dims_unaccounted))) indicies.reverse() for i in indicies: dim_check = dims_unaccounted[i] if d_equal[dim_check, dim_current]: tolerance.append(dims[dim_check][1]) key.append(dims[dim_check][3]) dims_unaccounted.pop(i) tolerance = max(tolerance) dims_condensed.append([index, tolerance, units, key]) dims = dims_condensed # which dimensions are scanned ---------------------------------------------------------------- # find scanned = [] constant_list = [] for dim in dims: name = dim[3] index = dim[0] vals = arr[index] tolerance = dim[1] if vals.max() - vals.min() > tolerance: scanned.append([name, index, tolerance, None]) else: constant_list.append([name, index, tolerance, arr[index, 0]]) # order scanned dimensions (..., zi, yi, xi) first_change_indicies = [] for axis in scanned: first_point = arr[axis[1], 0] for i in range(len(arr[0])): upper_bound = arr[axis[1], i] + axis[2] lower_bound = arr[axis[1], i] - axis[2] if upper_bound > first_point > lower_bound: pass else: first_change_indicies.append(i) break scanned_ordered = [scanned[i] for i in np.argsort(first_change_indicies)] scanned_ordered.reverse() # shape --------------------------------------------------------------------------------------- out = collections.OrderedDict() for a in scanned_ordered: key = a[0][0] axis = cols[key] # generate lists from data lis = sorted(arr[axis["idx"]]) tol = axis["tolerance"] # values are binned according to their averages now, so min and max # are better represented xstd = [] xs = [] # check to see if unique values are sufficiently unique # deplete to list of values by finding points that are within # tolerance while len(lis) > 0: # find all the xi's that are like this one and group them # after grouping, remove from the list set_val = lis[0] xi_lis = [xi for xi in lis if np.abs(set_val - xi) < tol] # the complement of xi_lis is what remains of xlis, then lis = [xi for xi in lis if not np.abs(xi_lis[0] - xi) < tol] xi_lis_average = sum(xi_lis) / len(xi_lis) xs.append(xi_lis_average) xstdi = sum(np.abs(xi_lis - xi_lis_average)) / len(xi_lis) xstd.append(xstdi) tol = sum(xstd) / len(xstd) tol = max(tol, 1e-4) if axis["units"] == "nm": min_wn = 1e7 / max(xs) + tol max_wn = 1e7 / min(xs) - tol points = np.linspace(min_wn, max_wn, num=len(xs)) points = wt_units.converter(points, "wn", "nm") else: points = np.linspace(min(xs) + tol, max(xs) - tol, num=len(xs)) key = "=".join(a[0]) out[key] = points # warn if data doesn't seem like the right shape ---------------------------------------------- length = len(arr[0]) size = 1 for a in out.values(): size *= a.size if not size == length: message = "array length ({0}) inconsistent with data size ({1})".format(length, size) warnings.warn(message) # return -------------------------------------------------------------------------------------- return out