Source code for gridded.variable

from textwrap import dedent
import collections
import hashlib
from functools import wraps
import os
import numpy as np
import netCDF4 as nc4

from gridded.utilities import (get_dataset,
                               _reorganize_spatial_data,
                               _align_results_to_spatial_data,
                               asarraylike,
                               search_dataset_for_variables_by_varname)
from gridded import VALID_LOCATIONS
from gridded.grids import Grid, Grid_U, Grid_S, Grid_R
from gridded.depth import Depth
from gridded.time import Time

import logging

log = logging.getLogger(__name__)


[docs]class Variable(object):
    """
    Variable object: represents a field of values associated with the grid.

    Abstractly, it is usually a scalar physical property such a temperature,
    salinity that varies over a the domain of the model.

    This more or less maps to a variable in a netcdf file, but does not have
    to come form a netcdf file, and this provides and abstraction where the
    user can access the value in world coordinates, interpolated from the grid.

    It holds a reference to its own grid object, and its data.
    """
    default_names = []
    cf_names = []
    _def_count = 0

    _default_component_types = {'time': Time,
                                'grid': Grid,
                                'depth': Depth}

    def __init__(self,
                 name=None,
                 units=None,
                 time=None,
                 data=None,
                 grid=None,
                 depth=None,
                 data_file=None,
                 grid_file=None,
                 varname=None,
                 fill_value=0,
                 location=None,
                 attributes=None,
                 surface_boundary_condition='extrapolate',
                 bottom_boundary_condition='mask',
                 **kwargs):
        '''
        This class represents a value defined on a grid

        :param name: Name
        :type name: string

        :param units: Units
        :type units: string

        :param time: Time axis of the data
        :type time: list of `datetime.datetime`, netCDF4 Variable, or Time object

        :param data: Underlying data source
        :type data: array-like object such as netCDF4.Variable or numpy.ndarray

        :param grid: Grid that the data corresponds with
        :type grid: Grid object (pysgrid or pyugrid or )

        :param data_file: Name of data source file
        :type data_file: string

        :param grid_file: Name of grid source file
        :type grid_file: string

        :param varname: Name of the variable in the data source file
        :type varname: string

        :param fill_value: the fill value used for undefined data

        :param location: location on the grid -- possible values
                         depend on the grid type
        :type location: str

        :param attributes: attributes associated with the Variable
                           (analogous to netcdf variable attributes)
        :type attributes: dict
        '''

#         if any([grid is None, data is None]):
#             raise ValueError("Grid and Data must be defined")
#         if not hasattr(data, 'shape'):
#             if grid.infer_location is None:
#                 raise ValueError('Data must be able to fit to the grid')

        self.grid = grid
        self.depth = depth
        self.name = self._units = self._time = self._data = None

        self.name = name
        self.units = units
        self.location = location
        self.data = data
        self.time = (time if time is not None else
                     self._default_component_types['time'].constant_time())
        self.data_file = data_file
        # the "main" filename for a Varibale should be the grid data.
        self.filename = data_file
        self.grid_file = grid_file
        self.varname = varname
        self._result_memo = collections.OrderedDict()
        self.fill_value = fill_value

        self.attributes = {} if attributes is None else attributes
        # if the data is a netcdf variable, pull the attributes from there
        try:
            for attr in self.data.ncattrs():
                self.attributes[attr] = data.getncattr(attr)
        except AttributeError:  # must not be a netcdf variable
            pass                # so just use what was passed in.
        self.surface_boundary_condition = surface_boundary_condition
        self.bottom_boundary_condition = bottom_boundary_condition

#         for k in kwargs:
#             setattr(self, k, kwargs[k])

[docs]    @classmethod
    def from_netCDF(cls,
                    filename=None,
                    varname=None,
                    grid_topology=None,
                    name=None,
                    units=None,
                    time=None,
                    time_origin=None,
                    grid=None,
                    depth=None,
                    dataset=None,
                    data_file=None,
                    grid_file=None,
                    location=None,
                    load_all=False,  # Do we need this? I think not --- maybe a method to fully load later if wanted.
                    fill_value=0,
                    **kwargs
                    ):
        '''
        Allows one-function creation of a Variable from a file.

        :param filename: Default data source. Has lowest priority.
                         If dataset, grid_file, or data_file are provided,
                         this function uses them first
        :type filename: string

        :param varname: Explicit name of the data in the data source file.
                        Equivalent to the key used to look the item up
                        directly eg 'ds["lon_u"]' for a netCDF4 Dataset.
        :type varname: string

        :param grid_topology: Description of the relationship between grid
                              attributes and variable names.
        :type grid_topology: {string : string, ...}

        :param name: Name of this object
        :type name: string

        :param units: string such as 'm/s'
        :type units: string

        :param time: Time axis of the data. May be a constructed ``gridded.Time``
                     object, or collection of datetime.datetime objects
        :type time: [] of datetime.datetime, netCDF4 Variable, or Time object

        :param data: Underlying data object. May be any array-like,
                     including netCDF4 Variable, etc
        :type data: netCDF4.Variable or numpy.array

        :param grid: Grid that the data corresponds to
        :type grid: pysgrid or pyugrid

        :param location: The feature where the data aligns with the grid.
        :type location: string

        :param depth: Depth axis object from ``gridded.depth``
        :type depth: Depth, S_Depth or L_Depth

        :param dataset: Instance of open netCDF4.Dataset
        :type dataset: netCDF4.Dataset

        :param data_file: Name of data source file, if data and grid files are separate
        :type data_file: string

        :param grid_file: Name of grid source file, if data and grid files are separate
        :type grid_file: string
        '''

        Grid = cls._default_component_types['grid']
        Time = cls._default_component_types['time']
        Depth = cls._default_component_types['depth']
        if filename is not None:
            data_file = filename
            grid_file = filename

        ds = None
        dg = None
        if dataset is None:
            if grid_file == data_file:
                ds = dg = get_dataset(grid_file)
            else:
                ds = get_dataset(data_file)
                dg = get_dataset(grid_file)
        else:
            if grid_file is not None:
                dg = get_dataset(grid_file)
            else:
                dg = dataset
            ds = dataset
        if data_file is None:
            data_file = os.path.split(ds.filepath())[-1]

        if grid is None:
            grid = Grid.from_netCDF(grid_file,
                                    dataset=dg,
                                    grid_topology=grid_topology)
        if varname is None:
            varname = cls._gen_varname(data_file,
                                       dataset=ds)
            if varname is None:
                raise NameError('Default current names are not in the data file, '
                                'must supply variable name')
        data = ds.variables[varname]
        if name is None:
            name = cls.__name__ + str(cls._def_count)
            cls._def_count += 1
        if units is None:
            try:
                units = data.units
            except AttributeError:
                units = None
        if time is None:
            time = Time.from_netCDF(filename=data_file,
                                    dataset=ds,
                                    datavar=data)
            if time_origin is not None:
                time = Time(data=time.data,
                            filename=time.filename,
                            varname=time.varname,
                            origin=time_origin)
        if depth is None:
            if (isinstance(grid, (Grid_S, Grid_R)) and len(data.shape) == 4 or
                    isinstance(grid, Grid_U) and len(data.shape) == 3):
                depth = Depth.from_netCDF(grid_file=dg,
                                          dataset=ds,
                                          **kwargs
                                          )
        if location is None:
            if hasattr(data, 'location'):
                location = data.location
#             if len(data.shape) == 4 or (len(data.shape) == 3 and time is None):
#                 from gnome.environment.environment_objects import S_Depth
#                 depth = S_Depth.from_netCDF(grid=grid,
#                                             depth=1,
#                                             data_file=data_file,
#                                             grid_file=grid_file,
#                                             **kwargs)
        if load_all:
            data = data[:]
        return cls(name=name,
                   units=units,
                   time=time,
                   data=data,
                   grid=grid,
                   depth=depth,
                   grid_file=grid_file,
                   data_file=data_file,
                   fill_value=fill_value,
                   location=location,
                   varname=varname,
                   **kwargs)

    def __str__(self):
        return self.__repr__()

    def __repr__(self):
        return ('{0.__class__.__module__}.{0.__class__.__name__}('
                'name="{0.name}", '
                'time="{0.time}", '
                'units="{0.units}", '
                'data="{0.data}", '
                ')').format(self)

[docs]    @classmethod
    def constant(cls, value):
        #Sets a Variable up to represent a constant scalar field. The result
        #will return a constant value for all times and places.
        Grid = Grid_S
        Time = cls._default_component_types['time']
        _data = np.full((3,3), value)
        _node_lon = np.array(([-360, 0, 360], [-360, 0, 360], [-360, 0, 360]))
        _node_lat = np.array(([-89.95, -89.95, -89.95], [0, 0, 0], [89.95, 89.95, 89.95]))
        _grid = Grid(node_lon=_node_lon, node_lat=_node_lat)
        _time = Time.constant_time()
        return cls(grid=_grid, time=_time, data=_data, fill_value=value)

    @property
    def location(self):
        if self._location is None and self.data is not None and hasattr(self.data, 'location'):
            return self.data.location
        else:
            return self._location

    @location.setter
    def location(self, location):
        # Fixme: perhaps we need Variable subclasses,
        #        to distingish between variable types.
        if location not in VALID_LOCATIONS:
            raise ValueError("Invalid location: {}, must be one of: {}".format(location, VALID_LOCATIONS))
        self._location = location

    @property
    def info(self):
        """
        Information about the variable object
        This could be filled out more
        """
        try:
            std_name = self.attributes['standard_name']
        except KeyError:
            std_name = None
        msg = """
              Variable:
                filename: {0.filename}
                varname: {0.varname}
                standard name: {1}
                units: {0.units}
                grid: {0.grid}
                data shape: {0.data.shape}
              """.format(self, std_name)
        return dedent(msg)

    @property
    def time(self):
        return self._time

    @time.setter
    def time(self, t):
        Time_class = self.__class__._default_component_types['time']
        if t is None:
            self._time = None
            return
        if self.data is not None and len(t) != self.data.shape[0] and len(t) > 1:
            raise ValueError("Data/time interval mismatch")
        if isinstance(t, Time_class):
            self._time = t
        elif isinstance(t, collections.Iterable) or isinstance(t, nc4.Variable):
            self._time = Time_class(t)
        else:
            raise ValueError("Time must be set with an iterable container or netCDF variable")

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, d):
        d = asarraylike(d)
        # Fixme: maybe all this checking should be done when it gets added to the Dataset??
        if self.time is not None and len(d) != len(self.time):
            raise ValueError("Data/time interval mismatch")
        ## fixme: we should check Depth, too.
        # if self.grid is not None and self.grid.infer_location(d) is None:
        #     raise ValueError("Data/grid shape mismatch. Data shape is {0}, Grid shape is {1}".format(d.shape, self.grid.node_lon.shape))
        if self.grid is not None:  # if there is not a grid, we can't check this
            if self.location is None:  # not set, let's try to figure it out
                self.location = self.grid.infer_location(d)
            if self.location is None:
                raise ValueError("Data/grid shape mismatch: Data shape is {0}, "
                                 "Grid shape is {1}".format(d.shape, self.grid.node_lon.shape))
        self._data = d


    @property
    def units(self):
        '''
        Units of underlying data

        :rtype: string
        '''
        return self._units

    @units.setter
    def units(self, unit):
#         if unit is not None:
#             if not unit_conversion.is_supported(unit):
#                 raise ValueError('Units of {0} are not supported'.format(unit))
        self._units = unit

    @property
    def grid_shape(self):
        if hasattr(self.grid, 'shape'):
            return self.grid.shape
        else:
            return self.grid.node_lon.shape

    @property
    def data_shape(self):
        return self.data.shape

    @property
    def is_data_on_nodes(self):
        return self.grid.infer_location(self._data) == 'node'

    def _get_hash(self, points, time):
        """
        Returns a SHA1 hash of the array of points passed in
        """
        return (hashlib.sha1(points.tobytes()).hexdigest(),
                hashlib.sha1(str(time).encode('utf-8')).hexdigest())

    def _memoize_result(self, points, time, result, D, _copy=False, _hash=None):
        if _copy:
            result = result.copy()
        result.setflags(write=False)
        if _hash is None:
            _hash = self._get_hash(points, time)
        if D is not None and len(D) > 4:
            D.popitem(last=False)
        D[_hash] = result
        D[_hash].setflags(write=False)

    def _get_memoed(self, points, time, D, _copy=False, _hash=None):
        if _hash is None:
            _hash = self._get_hash(points, time)
        if (D is not None and _hash in D):
            return D[_hash].copy() if _copy else D[_hash]
        else:
            return None

[docs]    def center_values(self, time, units=None, extrapolate=False):
        """
        interpolate data to the center of the cells

        :param time: the time to interpolate at

        **Warning:** NOT COMPLETE

        NOTE: what if this data is already on the cell centers?
        """
        raise NotImplementedError("center_values is not finished")

        if not extrapolate:
            self.time.valid_time(time)
        if len(self.time) == 1:
            if len(self.data.shape) == 2:
                if isinstance(self.grid, Grid_S):
                    # curv grid
                    value = self.data[0:1:-2, 1:-2]
                else:
                    value = self.data
        else:
            centers = self.grid.get_center_points()
            value = self.at(centers, time, units)
        return value

    @property
    def dimension_ordering(self):
        '''
        Returns a list that describes the dimensions of the property's data.
        If a dimension_ordering is assigned, it will continue to use that.
        If no dimension_ordering is set, then a default ordering will be generated
        based on the object properties and data shape.

        For example, if the data has 4 dimensions and is represented by a
        Grid_S (structured grid), and the Variable has a depth and time assigned,
        then the assumed ordering is ['time','depth','lon','lat']

        If the data has 3 dimensions, self.grid is a Grid_S, and self.time is None,
        then the ordering is ['depth','lon','lat']
        If the data has 3 dimensions, self.grid is a Grid_U, the ordering is
        ['time','depth','ele']
        '''
        if not hasattr(self, '_order'):
            self._order = None
        if self._order is not None:
            return self._order
        else:
            if isinstance(self.grid, (Grid_S, Grid_R)):
                order = ['time', 'depth', 'lon', 'lat']
            else:
                order = ['time', 'depth', 'ele']
            ndim = len(self.data.shape)
            diff = len(order) - ndim
            if diff == 0:
                return order
            elif diff == 1:
                if self.time is not None:
                    del order[1]
                elif self.depth is not None:
                    del order[0]
                else:
                    raise ValueError('Generated ordering too short to fit data. '
                                     'Time or depth must not be None')
            elif diff == 2:
                order = order[2:]
            else:
                raise ValueError('Too many/too few dimensions ndim={0}'.format(ndim))
            return order

    @dimension_ordering.setter
    def dimension_ordering(self, order):
        self._order = order

#     @profile
[docs]    def at(self,
           points=None,
           time=None,
           units=None,
           extrapolate=False,
           lons=None,
           lats=None,
           unmask=False,
           _hash=None,
           _mem=True,
           **kwargs):
        """
        Find the value of the property at positions P at time T

        :param points: Cartesian coordinates to be queried (P). Lon, Lat required, Depth (Z) is optional
            Coordinates must be organized as a 2D array or list, one coordinate per row or list element.
            ``[[Lon1, Lat1, Z1],``
            ``[Lon2, Lat2, Z2],``
            ``[Lon3, Lat3, Z3],``
            ``...]``
            Failure to provide point data in this format may cause unexpected behavior
            If you wish to provide point data using separate longitude and latitude arrays,
            use the `lons=` and `lats=` kwargs. 
            Note that if your Z is positive-up, self.depth.positive_down should be
            set to False
        :type points: Nx3 array of double

        :param time: The time at which to query these points (T)
        :type time: datetime.datetime object

        :param units: units the values will be returned in (or converted to)
        :type units: string such as ('m/s', 'knots', etc)

        :param extrapolate: if True, extrapolation will be supported
        :type extrapolate: boolean (default False)

        :param unmask: if True and return array is a masked array, returns filled array
        :type unmask: boolean (default False)
        
        :param surface_boundary_condition: specifies how to evaluate points above the depth interval
        :type surface_boundary_condition: string ('extrapolate' or 'mask', default 'extrapolate')
        
        :param bottom_boundary_condition: specifies how to evaluate points below the depth interval
        :type bottom_boundary_condition: string ('extrapolate' or 'mask', default 'extrapolate')

        :param lons: 1D iterable of longitude values. This is ignored if points is provided
        :type lons: iterable

        :param lats 1D iterable of latitude values. This is ignored if points is provided
        :type lons: iterable

        :return: returns a Nx1 array of interpolated values
        :rtype: double
        """
        if points is None and (lons is None or lats is None):
            raise ValueError("Must provide either points or separate lons and lats")
        if points is None:
            points = np.column_stack((np.array(lons), np.array(lats)))
        pts = _reorganize_spatial_data(points)

        if _hash is None:
            _hash = self._get_hash(pts, time)

        if _mem:
            res = self._get_memoed(pts, time, self._result_memo, _hash=_hash)
            if res is not None:
                return res

        order = self.dimension_ordering
        if order[0] == 'time':
            value = self._time_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs)
        elif order[0] == 'depth':
            value = self._depth_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs)
        else:
            value = self._xy_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs)

        value = value.reshape(-1,1)

        if isinstance(value, np.ma.MaskedArray):
            np.ma.set_fill_value(value, self.fill_value)
        if unmask:
            value = np.ma.filled(value)

        if _mem:
            self._memoize_result(pts, time, value, self._result_memo, _hash=_hash)
        return value

    interpolate = at #common request

    def _xy_interp(self, points, time, extrapolate, slices=(), **kwargs):
        '''
        Uses the py(s/u)grid interpolation to determine the values at the points, and returns it
        :param points: Coordinates to be queried (3D)
        :type points: Nx3 array of double

        :param time: Time of the query
        :type time: datetime.datetime object

        :param extrapolate: Turns extrapolation on or off
        :type extrapolate: boolean

        :param slices: describes how the data needs to be sliced to reach the appropriate dimension
        :type slices: tuple of integers or slice objects
        '''
        _hash = kwargs['_hash'] if '_hash' in kwargs else None
        units = kwargs['units'] if 'units' in kwargs else None

        value = self.grid.interpolate_var_to_points(points[:, 0:2],
                                                    self.data,
                                                    location=self.location,
                                                    _hash=self._get_hash(points[:, 0:2],
                                                                         time),
                                                    slices=slices,
                                                    _memo=True)
        return value

    def _time_interp(self, points, time, extrapolate, slices=(), **kwargs):
        '''
        Uses the Time object to interpolate the result of the next level of interpolation, as specified
        by the dimension_ordering attribute.
        :param points: Coordinates to be queried (3D)
        :type points: Nx3 array of double

        :param time: Time of the query
        :type time: datetime.datetime object

        :param extrapolate: Turns extrapolation on or off
        :type extrapolate: boolean

        :param slices: describes how the data needs to be sliced to reach the appropriate dimension
        :type slices: tuple of integers or slice objects
        '''
        order = self.dimension_ordering
        idx = order.index('time')
        if order[idx + 1] != 'depth':
            val_func = self._xy_interp
        else:
            val_func = self._depth_interp

        if time == self.time.min_time or (extrapolate and time < self.time.min_time):
            # min or before
            return val_func(points, time, extrapolate, slices=(0,), ** kwargs)
        elif time == self.time.max_time or (extrapolate and time > self.time.max_time):
            return val_func(points, time, extrapolate, slices=(-1,), **kwargs)
        else:
            ind = self.time.index_of(time)
            s1 = slices + (ind,)
            s0 = slices + (ind - 1,)
            v0 = val_func(points, time, extrapolate, slices=s0, **kwargs)
            v1 = val_func(points, time, extrapolate, slices=s1, **kwargs)
            alphas = self.time.interp_alpha(time, extrapolate)

            return v0 + (v1 - v0) * alphas


    def _depth_interp(self, points, time, extrapolate, slices=(), **kwargs):
        '''
        Uses the Depth object to interpolate the result of the next level of interpolation, as specified
        by the dimension_ordering attribute.
        :param points: Coordinates to be queried (3D)
        :type points: Nx3 array of double

        :param time: Time of the query
        :type time: datetime.datetime object

        :param extrapolate: Turns extrapolation on or off
        :type extrapolate: boolean

        :param slices: describes how the data needs to be sliced to reach the appropriate dimension
        :type slices: tuple of integers or slice objects
        '''
        surface_boundary_condition = kwargs.pop('surface_boundary_condition', self.surface_boundary_condition)
        bottom_boundary_condition = kwargs.pop('bottom_boundary_condition', self.bottom_boundary_condition)
        order = self.dimension_ordering
        dim_idx = order.index('depth')
        if order[dim_idx + 1] != 'time':
            val_func = self._xy_interp
        else:
            val_func = self._time_interp
        d_indices, d_alphas = self.depth.interpolation_alphas(points,
                                                              time,
                                                              self.data.shape[1:],
                                                              extrapolate=extrapolate,
                                                              surface_boundary_condition=surface_boundary_condition,
                                                              bottom_boundary_condition=bottom_boundary_condition,
                                                              **kwargs)

        # Check the surface index against the data shape to determine if we are using rho or w coordinates
        surface_index = self.depth.surface_index
        if self.depth.surface_index == self.data.shape[dim_idx]:
            surface_index -= 1

        if isinstance(d_indices, np.ma.MaskedArray) and np.all(d_indices.mask):
            # all particles ended up masked
            rtv = np.empty((points.shape[0],), dtype=np.float64) * np.nan
            rtv =  np.ma.MaskedArray(data=rtv, mask=np.isnan(rtv))
            return rtv
        
        #the two cases may be optimizations that are not worth the trouble
        #if problems continue to arise, get rid of them
        elif np.all(d_indices == -1) and not np.any(d_indices.mask):
            return val_func(points, time, extrapolate, slices=slices + (0,), **kwargs)
        elif np.all(d_indices == self.data.shape[dim_idx] - 1) and not np.any(d_indices.mask):
            return val_func(points, time, extrapolate, slices=slices + (self.data.shape[dim_idx] - 1,), **kwargs)
        else:
            msk = np.isnan(d_indices) if not np.ma.is_masked(d_indices) else d_indices.mask
            values = np.ma.MaskedArray(data=np.empty((points.shape[0], )) * np.nan, mask=msk)
            # Points are mixed within the grid. Some may be above the surface or under the ground
            uniq_idx = np.unique(d_indices)
            if np.ma.masked in uniq_idx: #the [0:-1] is required to skip all masked indices
                uniq_idx = uniq_idx[0:-1]
            for idx in uniq_idx: 
                lay_idxs = np.where(d_indices == idx)[0]
                if idx == self.data.shape[dim_idx] - 1:
                    #special case, index == depth dim length, so only v0 is valid
                    v0 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx,), **kwargs)
                    values.put(lay_idxs, v0)
                    continue
                    
                v1 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx+1,), **kwargs)
                v0 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx,), **kwargs)
                sub_vals = v0 + (v1 - v0) * d_alphas[lay_idxs]
                #partially fill the values array for this layer
                values.put(lay_idxs, sub_vals)
        
        return values

    def _transect(self, times, depths, points):
        '''
        returns a transect of the Variable at given values.
        This function is not close to finished.
        '''
        output_shape = (len(times), len(depths), len(points))
        outarr = np.array(shape=output_shape)
        for t in range(0,len(times)):
            for d in range(0, len(depths)):
                pts = np.array(shape=(len(points),3))
                pts[:,0:2] = points
                pts[:,2] = depths[d]
                layer = d

    @classmethod
    def _gen_varname(cls,
                     filename=None,
                     dataset=None,
                     names_list=None,
                     std_names_list=None):
        """
        Function to find the default variable names if they are not provided. This
        function does nothing without defined default_names or cf_names class
        attributes

        :param filename: Name of file that will be searched for variables
        :type filename: string

        :param dataset: Existing instance of a netCDF4.Dataset
        :type dataset: netCDF.Dataset

        :return: name of first netCDF4.Variable that matches
        """
        df = None
        if dataset is not None:
            df = dataset
        else:
            df = get_dataset(filename)
        if names_list is None:
            names_list = cls.default_names
        if std_names_list is None:
            std_names_list = cls.cf_names
        for n in names_list:
            if n in df.variables.keys():
                return n
        for n in std_names_list:
            for var in df.variables.values():
                if (hasattr(var, 'standard_name') and var.standard_name == n or
                        hasattr(var, 'long_name') and var.long_name == n):
                    return var.name
        raise ValueError("Default names not found.")


[docs]class VectorVariable(object):

    default_names = {}
    cf_names = {}
    comp_order = []

    _def_count = 0

    ''''
    These are the classes which are used when internal components are created
    by default, such as automatically from a file or other python data structure.
    Subclasses of this type may override this to use different classes for it's
    components
    '''
    _default_component_types = {'time': Time,
                                'grid': Grid,
                                'depth': Depth,
                                'variable': Variable}
    def __init__(self,
                 name=None,
                 units=None,
                 time=None,
                 variables=None,
                 grid=None,
                 depth=None,
                 grid_file=None,
                 data_file=None,
                 dataset=None,
                 varnames=None,
                 **kwargs):

        super(VectorVariable, self).__init__()
        self.name = self._units = self._time = self._variables = None

        self.name = name

        if all([isinstance(v, Variable) for v in variables]):
            if time is not None and not isinstance(time, Time):
                time = Time(time)
            units = variables[0].units if units is None else units
            time = variables[0].time if time is None else time
        if units is None:
            units = variables[0].units
        self._units = units
        if variables is None or len(variables) < 2:
            raise ValueError('Variables must be an array-like of 2 or more Variable objects')
        self.variables = variables
        self._time = time
        unused_args = kwargs.keys() if kwargs is not None else None
        if len(unused_args) > 0:
            kwargs = {}
        if isinstance(self.variables[0], Variable):
            self.grid = self.variables[0].grid if grid is None else grid
            self.depth = self.variables[0].depth if depth is None else depth
            self.grid_file = self.variables[0].grid_file if grid_file is None else grid_file
            self.data_file = self.variables[0].data_file if data_file is None else data_file

        self._result_memo = collections.OrderedDict()
        for i, comp in enumerate(self.__class__.comp_order):
            setattr(self, comp, self.variables[i])

[docs]    @classmethod
    def from_netCDF(cls,
                    filename=None,
                    varnames=None,
                    grid_topology=None,
                    name=None,
                    units=None,
                    time=None,
                    time_origin=None,
                    grid=None,
                    depth=None,
                    data_file=None,
                    grid_file=None,
                    dataset=None,
                    load_all=False,
                    variables=None,
                    **kwargs
                    ):
        '''
        Allows one-function creation of a VectorVariable from a file.

        :param filename: Default data source. Parameters below take precedence
        :type filename: string

        :param varnames: Names of the variables in the data source file
        :type varnames: [] of string

        :param grid_topology: Description of the relationship between grid attributes and variable names.
        :type grid_topology: {string : string, ...}

        :param name: Name of property
        :type name: string

        :param units: Units
        :type units: string

        :param time: Time axis of the data
        :type time: [] of datetime.datetime, netCDF4 Variable, or Time object

        :param data: Underlying data source
        :type data: netCDF4.Variable or numpy.array

        :param grid: Grid that the data corresponds with
        :type grid: pysgrid or pyugrid

        :param dataset: Instance of open Dataset
        :type dataset: netCDF4.Dataset

        :param data_file: Name of data source file
        :type data_file: string

        :param grid_file: Name of grid source file
        :type grid_file: string
        '''
        Grid = cls._default_component_types['grid']
        Time = cls._default_component_types['time']
        Variable = cls._default_component_types['variable']
        Depth = cls._default_component_types['depth']
        if filename is not None:
            data_file = filename
            grid_file = filename

        ds = None
        dg = None
        if dataset is None:
            if grid_file == data_file:
                ds = dg = get_dataset(grid_file)
            else:
                ds = get_dataset(data_file)
                dg = get_dataset(grid_file)
        else:
            if grid_file is not None:
                dg = get_dataset(grid_file)
            else:
                dg = dataset
            ds = dataset

        if grid is None:
            grid = Grid.from_netCDF(grid_file,
                                    dataset=dg,
                                    grid_topology=grid_topology)
        if varnames is None:
            varnames = cls._gen_varnames(data_file,
                                         dataset=ds)
            if all([v is None for v in varnames]):
                raise ValueError('No compatible variable names found!')
        if name is None:
            name = cls.__name__ + str(cls._def_count)
            cls._def_count += 1
        data = ds[varnames[0]]
        if time is None:
            time = Time.from_netCDF(filename=data_file,
                                    dataset=ds,
                                    datavar=data)
            if time_origin is not None:
                time = Time(data=time.data, filename=data_file, varname=time.varname, origin=time_origin)
        if depth is None:
            if (isinstance(grid, (Grid_S, Grid_R)) and len(data.shape) == 4 or
                    isinstance(grid, Grid_U) and len(data.shape) == 3):
                depth = Depth.from_netCDF(grid_file,
                                          dataset=dg,
                                          **kwargs
                                          )

#         if depth is None:
#             if (isinstance(grid, Grid_S) and len(data.shape) == 4 or
#                         (len(data.shape) == 3 and time is None) or
#                     (isinstance(grid, Grid_U) and len(data.shape) == 3 or
#                         (len(data.shape) == 2 and time is None))):
#                 from gnome.environment.environment_objects import S_Depth
#                 depth = S_Depth.from_netCDF(grid=grid,
#                                             depth=1,
#                                             data_file=data_file,
#                                             grid_file=grid_file,
#                                             **kwargs)
        if variables is None:
            variables = []
            for vn in varnames:
                if vn is not None:
                    # Fixme: We're calling from_netCDF from itself ?!?!?
                    variables.append(Variable.from_netCDF(filename=filename,
                                                          varname=vn,
                                                          grid_topology=grid_topology,
                                                          units=units,
                                                          time=time,
                                                          grid=grid,
                                                          depth=depth,
                                                          data_file=data_file,
                                                          grid_file=grid_file,
                                                          dataset=ds,
                                                          load_all=load_all,
                                                          location=None,
                                                          **kwargs))
        if units is None:
            units = [v.units for v in variables]
            if all(u == units[0] for u in units):
                units = units[0]
        return cls(name=name,
                   filename=filename,
                   varnames=varnames,
                   grid_topology=grid_topology,
                   units=units,
                   time=time,
                   grid=grid,
                   depth=depth,
                   variables=variables,
                   data_file=data_file,
                   grid_file=grid_file,
                   dataset=ds,
                   load_all=load_all,
                   location=None,
                   **kwargs)

    @classmethod
    def _gen_varnames(cls,
                      filename=None,
                      dataset=None,
                      names_dict=None,
                      std_names_dict=None):
        """
        Function to find the default variable names if they are not provided.

        :param filename: Name of file that will be searched for variables
        :type filename: string

        :param dataset: Existing instance of a netCDF4.Dataset
        :type dataset: netCDF.Dataset

        :return: dict of component to name mapping (eg {'u': 'water_u', 'v': 'water_v', etc})
        """
        df = None
        if dataset is not None:
            df = dataset
        else:
            df = get_dataset(filename)
        if names_dict is None:
            names_dict = cls.default_names
        if std_names_dict is None:
            std_names_dict = cls.cf_names
        rd = {}
        for k in cls.comp_order:
            v = names_dict[k] if k in names_dict else []
            for n in v:
                if n in df.variables.keys():
                    rd[k] = n
                    continue
            if k not in rd.keys():
                rd[k] = None
        for k in cls.comp_order:
            v = std_names_dict[k] if k in std_names_dict else []
            if rd[k] is None:
                for n in v:
                    for var in df.variables.values():
                        if (hasattr(var, 'standard_name') and var.standard_name == n or
                                hasattr(var, 'long_name') and var.long_name == n):
                            rd[k] = var.name
                            break
        return collections.namedtuple('varnames', cls.comp_order)(**rd)

    def __str__(self):
        return self.__repr__()

    def __repr__(self):
        return ('{0.__class__.__module__}.{0.__class__.__name__}('
                'name="{0.name}", '
                'time="{0.time}", '
                'units="{0.units}", '
                'variables="{0.variables}", '
                'grid="{0.grid}", '
                ')').format(self)

    @property
    def location(self):
        return [v.location for v in self.variables]

    locations = location

    @property
    def is_data_on_nodes(self):
        return self.grid.infer_location(self.variables[0].data) == 'node'

    @property
    def time(self):
        return self._time

    @time.setter
    def time(self, t):
        Time_class = self.__class__._default_component_types['time']
        if self.variables is not None:
            for v in self.variables:
                try:
                    v.time = t
                except ValueError as e:
                    raise ValueError('''Time was not compatible with variables.
                    Set variables attribute to None to allow changing other attributes
                    Original error: {0}'''.format(str(e)))
        if isinstance(t, Time_class):
            self._time = t
        elif isinstance(t, collections.Iterable) or isinstance(t, nc4.Variable):
            self._time = Time_class(t)
        else:
            raise ValueError("Time must be set with an iterable container or netCDF variable")

    @property
    def units(self):
        '''
        Units of underlying data

        :rtype: string
        '''
        if hasattr(self._units, '__iter__'):
            if len(set(self._units)) > 1:
                return self._units
            else:
                return self._units[0]
        else:
            return self._units

    @units.setter
    def units(self, unit):
        self._units = unit
        if self.variables is not None:
            for v in self.variables:
                v.units = unit

    @property
    def varnames(self):
        '''
        Names of underlying variables

        :rtype: [] of strings
        '''
        return [v.varname if hasattr(v, 'varname') else v.name for v in self.variables]

    @property
    def data_shape(self):
        if self.variables is not None:
            return self.variables[0].data.shape
        else:
            return None

    def _get_hash(self, points, time):
        """
        Returns a SHA1 hash of the array of points passed in
        """
        return (hashlib.sha1(points.tobytes()).hexdigest(),
                hashlib.sha1(str(time).encode('utf-8')).hexdigest())

    def _memoize_result(self, points, time, result, D, _copy=True, _hash=None):
        if _copy:
            result = result.copy()
        result.setflags(write=False)
        if _hash is None:
            _hash = self._get_hash(points, time)
        if D is not None and len(D) > 8:
            D.popitem(last=False)
        D[_hash] = result

    def _get_memoed(self, points, time, D, _copy=True, _hash=None):
        if _hash is None:
            _hash = self._get_hash(points, time)
        if (D is not None and _hash in D):
            return D[_hash].copy() if _copy else D[_hash]
        else:
            return None

[docs]    def at(self,
           points=None,
           time=None,
           units=None,
           extrapolate=False,
           lons=None,
           lats=None,
           unmask=False,
           _hash=None,
           _mem=True,
           **kwargs):
        """
        Find the value of the property at positions P at time T

        :param points: Cartesian coordinates to be queried (P). Lon, Lat required, Depth (Z) is optional
                       Coordinates must be organized as a 2D array or list, one coordinate per row or list element.
                       ::
                          [[Lon1, Lat1, Z1],
                           [Lon2, Lat2, Z2],
                           [Lon3, Lat3, Z3],
                           ...]

                       Failure to provide point data in this format may cause unexpected behavior
                       If you wish to provide point data using separate longitude and latitude arrays,
                       use the `lons=` and `lats=` kwargs.

                       Note that if your Z is positive-up, self.depth.positive_down should be
                       set to False

        :type points: Nx3 array of double

        :param time: The time at which to query these points (T)
        :type time: `datetime.datetime` object

        :param units: units the values will be returned in (or converted to)
        :type units: string such as ('m/s', 'knots', etc)

        :param extrapolate: if True, extrapolation will be supported
        :type extrapolate: boolean (default False)

        :param unmask: if True and return array is a masked array, returns filled array
        :type unmask: boolean (default False)

        :param zero_ref: Specifies whether the zero datum moves with zeta or not. Only
                         applicable if depth dimension is present with full sigma layers
        :type zero_ref: string 'absolute' or 'relative'

        :param lons: 1D iterable of longitude values. This is ignored if points is provided
        :type lons: iterable

        :param lats 1D iterable of latitude values. This is ignored if points is provided
        :type lons: iterable

        :return: NxM array of interpolated values N = len(points) M = len(self.variables)
        :rtype: np.array or np.ma.MaskedArray
        """
        if points is None and (lons is None or lats is None):
            raise ValueError("Must provide either points or separate lons and lats")
        if points is None:
            points = np.column_stack((np.array(lons), np.array(lats)))
        pts = _reorganize_spatial_data(points)
        if _hash is None:
            _hash = self._get_hash(pts, time)

        if _mem:
            res = self._get_memoed(pts, time, self._result_memo, _hash=_hash)
            if res is not None:
                return res

        value = np.ma.column_stack([var.at(points=pts,
                                        time=time,
                                        units=units,
                                        extrapolate=extrapolate,
                                        unmask=unmask,
                                        _mem=_mem,
                                        _hash=_hash,
                                        **kwargs) for var in self.variables])

        if _mem:
            self._memoize_result(pts, time, value, self._result_memo, _hash=_hash)

        return value

    @classmethod
    def _get_shared_vars(cls, *sh_args):
        default_shared = ['dataset', 'data_file', 'grid_file', 'grid']
        if len(sh_args) != 0:
            shared = sh_args
        else:
            shared = default_shared

        def getvars(func):
            @wraps(func)
            def wrapper(*args, **kws):
                def _mod(n):
                    k = kws
                    s = shared
                    return (n in s) and ((n not in k) or (n in k and k[n] is None))

                if 'filename' in kws and kws['filename'] is not None:
                    kws['data_file'] = kws['grid_file'] = kws['filename']
                ds = dg =  None
                if _mod('dataset'):
                    if 'grid_file' in kws and 'data_file' in kws:
                        if kws['grid_file'] == kws['data_file']:
                            ds = dg = get_dataset(kws['grid_file'])
                        else:
                            ds = get_dataset(kws['data_file'])
                            dg = get_dataset(kws['grid_file'])
                    kws['dataset'] = ds
                else:
                    if 'grid_file' in kws and kws['grid_file'] is not None:
                        dg = get_dataset(kws['grid_file'])
                    else:
                        dg = kws['dataset']
                    ds = kws['dataset']
                if _mod('grid'):
                    gt = kws.get('grid_topology', None)
                    kws['grid'] = Grid.from_netCDF(kws['filename'], dataset=dg, grid_topology=gt)
                if kws.get('varnames', None) is None:
                    varnames = cls._gen_varnames(kws['data_file'],
                                                 dataset=ds)
#                 if _mod('time'):
#                     time = Time.from_netCDF(filename=kws['data_file'],
#                                             dataset=ds,
#                                             varname=data)
#                     kws['time'] = time
                return func(*args, **kws)
            return wrapper
        return getvars

[docs]    def save(self, filepath, format='netcdf4'):
        """
        Save the variable object to a netcdf file.

        :param filepath: path to file you want o save to. or a writable
                         netCDF4 Dataset An existing one
                         If a path, an existing file will be clobbered.
        :type filepath: string

        Follows the convention established by the netcdf UGRID working group:

        http://ugrid-conventions.github.io/ugrid-conventions

        """
        format_options = ('netcdf3', 'netcdf4')
        if format not in format_options:
            raise ValueError("format: {} not supported. Options are: {}".format(format, format_options))