Source code for gridded.variable

from textwrap import dedent
import collections
import hashlib
from functools import wraps
import os
import numpy as np
import netCDF4 as nc4

from gridded.utilities import (get_dataset,
                               _reorganize_spatial_data,
                               _align_results_to_spatial_data,
                               asarraylike,
                               search_dataset_for_variables_by_varname)
from gridded import VALID_LOCATIONS
from gridded.grids import Grid, Grid_U, Grid_S, Grid_R
from gridded.depth import Depth
from gridded.time import Time

import logging

log = logging.getLogger(__name__)


[docs]class Variable(object): """ Variable object: represents a field of values associated with the grid. Abstractly, it is usually a scalar physical property such a temperature, salinity that varies over a the domain of the model. This more or less maps to a variable in a netcdf file, but does not have to come form a netcdf file, and this provides and abstraction where the user can access the value in world coordinates, interpolated from the grid. It holds a reference to its own grid object, and its data. """ default_names = [] cf_names = [] _def_count = 0 _default_component_types = {'time': Time, 'grid': Grid, 'depth': Depth} def __init__(self, name=None, units=None, time=None, data=None, grid=None, depth=None, data_file=None, grid_file=None, varname=None, fill_value=0, location=None, attributes=None, surface_boundary_condition='extrapolate', bottom_boundary_condition='mask', **kwargs): ''' This class represents a value defined on a grid :param name: Name :type name: string :param units: Units :type units: string :param time: Time axis of the data :type time: list of `datetime.datetime`, netCDF4 Variable, or Time object :param data: Underlying data source :type data: array-like object such as netCDF4.Variable or numpy.ndarray :param grid: Grid that the data corresponds with :type grid: Grid object (pysgrid or pyugrid or ) :param data_file: Name of data source file :type data_file: string :param grid_file: Name of grid source file :type grid_file: string :param varname: Name of the variable in the data source file :type varname: string :param fill_value: the fill value used for undefined data :param location: location on the grid -- possible values depend on the grid type :type location: str :param attributes: attributes associated with the Variable (analogous to netcdf variable attributes) :type attributes: dict ''' # if any([grid is None, data is None]): # raise ValueError("Grid and Data must be defined") # if not hasattr(data, 'shape'): # if grid.infer_location is None: # raise ValueError('Data must be able to fit to the grid') self.grid = grid self.depth = depth self.name = self._units = self._time = self._data = None self.name = name self.units = units self.location = location self.data = data self.time = (time if time is not None else self._default_component_types['time'].constant_time()) self.data_file = data_file # the "main" filename for a Varibale should be the grid data. self.filename = data_file self.grid_file = grid_file self.varname = varname self._result_memo = collections.OrderedDict() self.fill_value = fill_value self.attributes = {} if attributes is None else attributes # if the data is a netcdf variable, pull the attributes from there try: for attr in self.data.ncattrs(): self.attributes[attr] = data.getncattr(attr) except AttributeError: # must not be a netcdf variable pass # so just use what was passed in. self.surface_boundary_condition = surface_boundary_condition self.bottom_boundary_condition = bottom_boundary_condition # for k in kwargs: # setattr(self, k, kwargs[k])
[docs] @classmethod def from_netCDF(cls, filename=None, varname=None, grid_topology=None, name=None, units=None, time=None, time_origin=None, grid=None, depth=None, dataset=None, data_file=None, grid_file=None, location=None, load_all=False, # Do we need this? I think not --- maybe a method to fully load later if wanted. fill_value=0, **kwargs ): ''' Allows one-function creation of a Variable from a file. :param filename: Default data source. Has lowest priority. If dataset, grid_file, or data_file are provided, this function uses them first :type filename: string :param varname: Explicit name of the data in the data source file. Equivalent to the key used to look the item up directly eg 'ds["lon_u"]' for a netCDF4 Dataset. :type varname: string :param grid_topology: Description of the relationship between grid attributes and variable names. :type grid_topology: {string : string, ...} :param name: Name of this object :type name: string :param units: string such as 'm/s' :type units: string :param time: Time axis of the data. May be a constructed ``gridded.Time`` object, or collection of datetime.datetime objects :type time: [] of datetime.datetime, netCDF4 Variable, or Time object :param data: Underlying data object. May be any array-like, including netCDF4 Variable, etc :type data: netCDF4.Variable or numpy.array :param grid: Grid that the data corresponds to :type grid: pysgrid or pyugrid :param location: The feature where the data aligns with the grid. :type location: string :param depth: Depth axis object from ``gridded.depth`` :type depth: Depth, S_Depth or L_Depth :param dataset: Instance of open netCDF4.Dataset :type dataset: netCDF4.Dataset :param data_file: Name of data source file, if data and grid files are separate :type data_file: string :param grid_file: Name of grid source file, if data and grid files are separate :type grid_file: string ''' Grid = cls._default_component_types['grid'] Time = cls._default_component_types['time'] Depth = cls._default_component_types['depth'] if filename is not None: data_file = filename grid_file = filename ds = None dg = None if dataset is None: if grid_file == data_file: ds = dg = get_dataset(grid_file) else: ds = get_dataset(data_file) dg = get_dataset(grid_file) else: if grid_file is not None: dg = get_dataset(grid_file) else: dg = dataset ds = dataset if data_file is None: data_file = os.path.split(ds.filepath())[-1] if grid is None: grid = Grid.from_netCDF(grid_file, dataset=dg, grid_topology=grid_topology) if varname is None: varname = cls._gen_varname(data_file, dataset=ds) if varname is None: raise NameError('Default current names are not in the data file, ' 'must supply variable name') data = ds.variables[varname] if name is None: name = cls.__name__ + str(cls._def_count) cls._def_count += 1 if units is None: try: units = data.units except AttributeError: units = None if time is None: time = Time.from_netCDF(filename=data_file, dataset=ds, datavar=data) if time_origin is not None: time = Time(data=time.data, filename=time.filename, varname=time.varname, origin=time_origin) if depth is None: if (isinstance(grid, (Grid_S, Grid_R)) and len(data.shape) == 4 or isinstance(grid, Grid_U) and len(data.shape) == 3): depth = Depth.from_netCDF(grid_file=dg, dataset=ds, **kwargs ) if location is None: if hasattr(data, 'location'): location = data.location # if len(data.shape) == 4 or (len(data.shape) == 3 and time is None): # from gnome.environment.environment_objects import S_Depth # depth = S_Depth.from_netCDF(grid=grid, # depth=1, # data_file=data_file, # grid_file=grid_file, # **kwargs) if load_all: data = data[:] return cls(name=name, units=units, time=time, data=data, grid=grid, depth=depth, grid_file=grid_file, data_file=data_file, fill_value=fill_value, location=location, varname=varname, **kwargs)
def __str__(self): return self.__repr__() def __repr__(self): return ('{0.__class__.__module__}.{0.__class__.__name__}(' 'name="{0.name}", ' 'time="{0.time}", ' 'units="{0.units}", ' 'data="{0.data}", ' ')').format(self)
[docs] @classmethod def constant(cls, value): #Sets a Variable up to represent a constant scalar field. The result #will return a constant value for all times and places. Grid = Grid_S Time = cls._default_component_types['time'] _data = np.full((3,3), value) _node_lon = np.array(([-360, 0, 360], [-360, 0, 360], [-360, 0, 360])) _node_lat = np.array(([-89.95, -89.95, -89.95], [0, 0, 0], [89.95, 89.95, 89.95])) _grid = Grid(node_lon=_node_lon, node_lat=_node_lat) _time = Time.constant_time() return cls(grid=_grid, time=_time, data=_data, fill_value=value)
@property def location(self): if self._location is None and self.data is not None and hasattr(self.data, 'location'): return self.data.location else: return self._location @location.setter def location(self, location): # Fixme: perhaps we need Variable subclasses, # to distingish between variable types. if location not in VALID_LOCATIONS: raise ValueError("Invalid location: {}, must be one of: {}".format(location, VALID_LOCATIONS)) self._location = location @property def info(self): """ Information about the variable object This could be filled out more """ try: std_name = self.attributes['standard_name'] except KeyError: std_name = None msg = """ Variable: filename: {0.filename} varname: {0.varname} standard name: {1} units: {0.units} grid: {0.grid} data shape: {0.data.shape} """.format(self, std_name) return dedent(msg) @property def time(self): return self._time @time.setter def time(self, t): Time_class = self.__class__._default_component_types['time'] if t is None: self._time = None return if self.data is not None and len(t) != self.data.shape[0] and len(t) > 1: raise ValueError("Data/time interval mismatch") if isinstance(t, Time_class): self._time = t elif isinstance(t, collections.Iterable) or isinstance(t, nc4.Variable): self._time = Time_class(t) else: raise ValueError("Time must be set with an iterable container or netCDF variable") @property def data(self): return self._data @data.setter def data(self, d): d = asarraylike(d) # Fixme: maybe all this checking should be done when it gets added to the Dataset?? if self.time is not None and len(d) != len(self.time): raise ValueError("Data/time interval mismatch") ## fixme: we should check Depth, too. # if self.grid is not None and self.grid.infer_location(d) is None: # raise ValueError("Data/grid shape mismatch. Data shape is {0}, Grid shape is {1}".format(d.shape, self.grid.node_lon.shape)) if self.grid is not None: # if there is not a grid, we can't check this if self.location is None: # not set, let's try to figure it out self.location = self.grid.infer_location(d) if self.location is None: raise ValueError("Data/grid shape mismatch: Data shape is {0}, " "Grid shape is {1}".format(d.shape, self.grid.node_lon.shape)) self._data = d @property def units(self): ''' Units of underlying data :rtype: string ''' return self._units @units.setter def units(self, unit): # if unit is not None: # if not unit_conversion.is_supported(unit): # raise ValueError('Units of {0} are not supported'.format(unit)) self._units = unit @property def grid_shape(self): if hasattr(self.grid, 'shape'): return self.grid.shape else: return self.grid.node_lon.shape @property def data_shape(self): return self.data.shape @property def is_data_on_nodes(self): return self.grid.infer_location(self._data) == 'node' def _get_hash(self, points, time): """ Returns a SHA1 hash of the array of points passed in """ return (hashlib.sha1(points.tobytes()).hexdigest(), hashlib.sha1(str(time).encode('utf-8')).hexdigest()) def _memoize_result(self, points, time, result, D, _copy=False, _hash=None): if _copy: result = result.copy() result.setflags(write=False) if _hash is None: _hash = self._get_hash(points, time) if D is not None and len(D) > 4: D.popitem(last=False) D[_hash] = result D[_hash].setflags(write=False) def _get_memoed(self, points, time, D, _copy=False, _hash=None): if _hash is None: _hash = self._get_hash(points, time) if (D is not None and _hash in D): return D[_hash].copy() if _copy else D[_hash] else: return None
[docs] def center_values(self, time, units=None, extrapolate=False): """ interpolate data to the center of the cells :param time: the time to interpolate at **Warning:** NOT COMPLETE NOTE: what if this data is already on the cell centers? """ raise NotImplementedError("center_values is not finished") if not extrapolate: self.time.valid_time(time) if len(self.time) == 1: if len(self.data.shape) == 2: if isinstance(self.grid, Grid_S): # curv grid value = self.data[0:1:-2, 1:-2] else: value = self.data else: centers = self.grid.get_center_points() value = self.at(centers, time, units) return value
@property def dimension_ordering(self): ''' Returns a list that describes the dimensions of the property's data. If a dimension_ordering is assigned, it will continue to use that. If no dimension_ordering is set, then a default ordering will be generated based on the object properties and data shape. For example, if the data has 4 dimensions and is represented by a Grid_S (structured grid), and the Variable has a depth and time assigned, then the assumed ordering is ['time','depth','lon','lat'] If the data has 3 dimensions, self.grid is a Grid_S, and self.time is None, then the ordering is ['depth','lon','lat'] If the data has 3 dimensions, self.grid is a Grid_U, the ordering is ['time','depth','ele'] ''' if not hasattr(self, '_order'): self._order = None if self._order is not None: return self._order else: if isinstance(self.grid, (Grid_S, Grid_R)): order = ['time', 'depth', 'lon', 'lat'] else: order = ['time', 'depth', 'ele'] ndim = len(self.data.shape) diff = len(order) - ndim if diff == 0: return order elif diff == 1: if self.time is not None: del order[1] elif self.depth is not None: del order[0] else: raise ValueError('Generated ordering too short to fit data. ' 'Time or depth must not be None') elif diff == 2: order = order[2:] else: raise ValueError('Too many/too few dimensions ndim={0}'.format(ndim)) return order @dimension_ordering.setter def dimension_ordering(self, order): self._order = order # @profile
[docs] def at(self, points=None, time=None, units=None, extrapolate=False, lons=None, lats=None, unmask=False, _hash=None, _mem=True, **kwargs): """ Find the value of the property at positions P at time T :param points: Cartesian coordinates to be queried (P). Lon, Lat required, Depth (Z) is optional Coordinates must be organized as a 2D array or list, one coordinate per row or list element. ``[[Lon1, Lat1, Z1],`` ``[Lon2, Lat2, Z2],`` ``[Lon3, Lat3, Z3],`` ``...]`` Failure to provide point data in this format may cause unexpected behavior If you wish to provide point data using separate longitude and latitude arrays, use the `lons=` and `lats=` kwargs. Note that if your Z is positive-up, self.depth.positive_down should be set to False :type points: Nx3 array of double :param time: The time at which to query these points (T) :type time: datetime.datetime object :param units: units the values will be returned in (or converted to) :type units: string such as ('m/s', 'knots', etc) :param extrapolate: if True, extrapolation will be supported :type extrapolate: boolean (default False) :param unmask: if True and return array is a masked array, returns filled array :type unmask: boolean (default False) :param surface_boundary_condition: specifies how to evaluate points above the depth interval :type surface_boundary_condition: string ('extrapolate' or 'mask', default 'extrapolate') :param bottom_boundary_condition: specifies how to evaluate points below the depth interval :type bottom_boundary_condition: string ('extrapolate' or 'mask', default 'extrapolate') :param lons: 1D iterable of longitude values. This is ignored if points is provided :type lons: iterable :param lats 1D iterable of latitude values. This is ignored if points is provided :type lons: iterable :return: returns a Nx1 array of interpolated values :rtype: double """ if points is None and (lons is None or lats is None): raise ValueError("Must provide either points or separate lons and lats") if points is None: points = np.column_stack((np.array(lons), np.array(lats))) pts = _reorganize_spatial_data(points) if _hash is None: _hash = self._get_hash(pts, time) if _mem: res = self._get_memoed(pts, time, self._result_memo, _hash=_hash) if res is not None: return res order = self.dimension_ordering if order[0] == 'time': value = self._time_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs) elif order[0] == 'depth': value = self._depth_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs) else: value = self._xy_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs) value = value.reshape(-1,1) if isinstance(value, np.ma.MaskedArray): np.ma.set_fill_value(value, self.fill_value) if unmask: value = np.ma.filled(value) if _mem: self._memoize_result(pts, time, value, self._result_memo, _hash=_hash) return value
interpolate = at #common request def _xy_interp(self, points, time, extrapolate, slices=(), **kwargs): ''' Uses the py(s/u)grid interpolation to determine the values at the points, and returns it :param points: Coordinates to be queried (3D) :type points: Nx3 array of double :param time: Time of the query :type time: datetime.datetime object :param extrapolate: Turns extrapolation on or off :type extrapolate: boolean :param slices: describes how the data needs to be sliced to reach the appropriate dimension :type slices: tuple of integers or slice objects ''' _hash = kwargs['_hash'] if '_hash' in kwargs else None units = kwargs['units'] if 'units' in kwargs else None value = self.grid.interpolate_var_to_points(points[:, 0:2], self.data, location=self.location, _hash=self._get_hash(points[:, 0:2], time), slices=slices, _memo=True) return value def _time_interp(self, points, time, extrapolate, slices=(), **kwargs): ''' Uses the Time object to interpolate the result of the next level of interpolation, as specified by the dimension_ordering attribute. :param points: Coordinates to be queried (3D) :type points: Nx3 array of double :param time: Time of the query :type time: datetime.datetime object :param extrapolate: Turns extrapolation on or off :type extrapolate: boolean :param slices: describes how the data needs to be sliced to reach the appropriate dimension :type slices: tuple of integers or slice objects ''' order = self.dimension_ordering idx = order.index('time') if order[idx + 1] != 'depth': val_func = self._xy_interp else: val_func = self._depth_interp if time == self.time.min_time or (extrapolate and time < self.time.min_time): # min or before return val_func(points, time, extrapolate, slices=(0,), ** kwargs) elif time == self.time.max_time or (extrapolate and time > self.time.max_time): return val_func(points, time, extrapolate, slices=(-1,), **kwargs) else: ind = self.time.index_of(time) s1 = slices + (ind,) s0 = slices + (ind - 1,) v0 = val_func(points, time, extrapolate, slices=s0, **kwargs) v1 = val_func(points, time, extrapolate, slices=s1, **kwargs) alphas = self.time.interp_alpha(time, extrapolate) return v0 + (v1 - v0) * alphas def _depth_interp(self, points, time, extrapolate, slices=(), **kwargs): ''' Uses the Depth object to interpolate the result of the next level of interpolation, as specified by the dimension_ordering attribute. :param points: Coordinates to be queried (3D) :type points: Nx3 array of double :param time: Time of the query :type time: datetime.datetime object :param extrapolate: Turns extrapolation on or off :type extrapolate: boolean :param slices: describes how the data needs to be sliced to reach the appropriate dimension :type slices: tuple of integers or slice objects ''' surface_boundary_condition = kwargs.pop('surface_boundary_condition', self.surface_boundary_condition) bottom_boundary_condition = kwargs.pop('bottom_boundary_condition', self.bottom_boundary_condition) order = self.dimension_ordering dim_idx = order.index('depth') if order[dim_idx + 1] != 'time': val_func = self._xy_interp else: val_func = self._time_interp d_indices, d_alphas = self.depth.interpolation_alphas(points, time, self.data.shape[1:], extrapolate=extrapolate, surface_boundary_condition=surface_boundary_condition, bottom_boundary_condition=bottom_boundary_condition, **kwargs) # Check the surface index against the data shape to determine if we are using rho or w coordinates surface_index = self.depth.surface_index if self.depth.surface_index == self.data.shape[dim_idx]: surface_index -= 1 if isinstance(d_indices, np.ma.MaskedArray) and np.all(d_indices.mask): # all particles ended up masked rtv = np.empty((points.shape[0],), dtype=np.float64) * np.nan rtv = np.ma.MaskedArray(data=rtv, mask=np.isnan(rtv)) return rtv #the two cases may be optimizations that are not worth the trouble #if problems continue to arise, get rid of them elif np.all(d_indices == -1) and not np.any(d_indices.mask): return val_func(points, time, extrapolate, slices=slices + (0,), **kwargs) elif np.all(d_indices == self.data.shape[dim_idx] - 1) and not np.any(d_indices.mask): return val_func(points, time, extrapolate, slices=slices + (self.data.shape[dim_idx] - 1,), **kwargs) else: msk = np.isnan(d_indices) if not np.ma.is_masked(d_indices) else d_indices.mask values = np.ma.MaskedArray(data=np.empty((points.shape[0], )) * np.nan, mask=msk) # Points are mixed within the grid. Some may be above the surface or under the ground uniq_idx = np.unique(d_indices) if np.ma.masked in uniq_idx: #the [0:-1] is required to skip all masked indices uniq_idx = uniq_idx[0:-1] for idx in uniq_idx: lay_idxs = np.where(d_indices == idx)[0] if idx == self.data.shape[dim_idx] - 1: #special case, index == depth dim length, so only v0 is valid v0 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx,), **kwargs) values.put(lay_idxs, v0) continue v1 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx+1,), **kwargs) v0 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx,), **kwargs) sub_vals = v0 + (v1 - v0) * d_alphas[lay_idxs] #partially fill the values array for this layer values.put(lay_idxs, sub_vals) return values def _transect(self, times, depths, points): ''' returns a transect of the Variable at given values. This function is not close to finished. ''' output_shape = (len(times), len(depths), len(points)) outarr = np.array(shape=output_shape) for t in range(0,len(times)): for d in range(0, len(depths)): pts = np.array(shape=(len(points),3)) pts[:,0:2] = points pts[:,2] = depths[d] layer = d @classmethod def _gen_varname(cls, filename=None, dataset=None, names_list=None, std_names_list=None): """ Function to find the default variable names if they are not provided. This function does nothing without defined default_names or cf_names class attributes :param filename: Name of file that will be searched for variables :type filename: string :param dataset: Existing instance of a netCDF4.Dataset :type dataset: netCDF.Dataset :return: name of first netCDF4.Variable that matches """ df = None if dataset is not None: df = dataset else: df = get_dataset(filename) if names_list is None: names_list = cls.default_names if std_names_list is None: std_names_list = cls.cf_names for n in names_list: if n in df.variables.keys(): return n for n in std_names_list: for var in df.variables.values(): if (hasattr(var, 'standard_name') and var.standard_name == n or hasattr(var, 'long_name') and var.long_name == n): return var.name raise ValueError("Default names not found.")
[docs]class VectorVariable(object): default_names = {} cf_names = {} comp_order = [] _def_count = 0 '''' These are the classes which are used when internal components are created by default, such as automatically from a file or other python data structure. Subclasses of this type may override this to use different classes for it's components ''' _default_component_types = {'time': Time, 'grid': Grid, 'depth': Depth, 'variable': Variable} def __init__(self, name=None, units=None, time=None, variables=None, grid=None, depth=None, grid_file=None, data_file=None, dataset=None, varnames=None, **kwargs): super(VectorVariable, self).__init__() self.name = self._units = self._time = self._variables = None self.name = name if all([isinstance(v, Variable) for v in variables]): if time is not None and not isinstance(time, Time): time = Time(time) units = variables[0].units if units is None else units time = variables[0].time if time is None else time if units is None: units = variables[0].units self._units = units if variables is None or len(variables) < 2: raise ValueError('Variables must be an array-like of 2 or more Variable objects') self.variables = variables self._time = time unused_args = kwargs.keys() if kwargs is not None else None if len(unused_args) > 0: kwargs = {} if isinstance(self.variables[0], Variable): self.grid = self.variables[0].grid if grid is None else grid self.depth = self.variables[0].depth if depth is None else depth self.grid_file = self.variables[0].grid_file if grid_file is None else grid_file self.data_file = self.variables[0].data_file if data_file is None else data_file self._result_memo = collections.OrderedDict() for i, comp in enumerate(self.__class__.comp_order): setattr(self, comp, self.variables[i])
[docs] @classmethod def from_netCDF(cls, filename=None, varnames=None, grid_topology=None, name=None, units=None, time=None, time_origin=None, grid=None, depth=None, data_file=None, grid_file=None, dataset=None, load_all=False, variables=None, **kwargs ): ''' Allows one-function creation of a VectorVariable from a file. :param filename: Default data source. Parameters below take precedence :type filename: string :param varnames: Names of the variables in the data source file :type varnames: [] of string :param grid_topology: Description of the relationship between grid attributes and variable names. :type grid_topology: {string : string, ...} :param name: Name of property :type name: string :param units: Units :type units: string :param time: Time axis of the data :type time: [] of datetime.datetime, netCDF4 Variable, or Time object :param data: Underlying data source :type data: netCDF4.Variable or numpy.array :param grid: Grid that the data corresponds with :type grid: pysgrid or pyugrid :param dataset: Instance of open Dataset :type dataset: netCDF4.Dataset :param data_file: Name of data source file :type data_file: string :param grid_file: Name of grid source file :type grid_file: string ''' Grid = cls._default_component_types['grid'] Time = cls._default_component_types['time'] Variable = cls._default_component_types['variable'] Depth = cls._default_component_types['depth'] if filename is not None: data_file = filename grid_file = filename ds = None dg = None if dataset is None: if grid_file == data_file: ds = dg = get_dataset(grid_file) else: ds = get_dataset(data_file) dg = get_dataset(grid_file) else: if grid_file is not None: dg = get_dataset(grid_file) else: dg = dataset ds = dataset if grid is None: grid = Grid.from_netCDF(grid_file, dataset=dg, grid_topology=grid_topology) if varnames is None: varnames = cls._gen_varnames(data_file, dataset=ds) if all([v is None for v in varnames]): raise ValueError('No compatible variable names found!') if name is None: name = cls.__name__ + str(cls._def_count) cls._def_count += 1 data = ds[varnames[0]] if time is None: time = Time.from_netCDF(filename=data_file, dataset=ds, datavar=data) if time_origin is not None: time = Time(data=time.data, filename=data_file, varname=time.varname, origin=time_origin) if depth is None: if (isinstance(grid, (Grid_S, Grid_R)) and len(data.shape) == 4 or isinstance(grid, Grid_U) and len(data.shape) == 3): depth = Depth.from_netCDF(grid_file, dataset=dg, **kwargs ) # if depth is None: # if (isinstance(grid, Grid_S) and len(data.shape) == 4 or # (len(data.shape) == 3 and time is None) or # (isinstance(grid, Grid_U) and len(data.shape) == 3 or # (len(data.shape) == 2 and time is None))): # from gnome.environment.environment_objects import S_Depth # depth = S_Depth.from_netCDF(grid=grid, # depth=1, # data_file=data_file, # grid_file=grid_file, # **kwargs) if variables is None: variables = [] for vn in varnames: if vn is not None: # Fixme: We're calling from_netCDF from itself ?!?!? variables.append(Variable.from_netCDF(filename=filename, varname=vn, grid_topology=grid_topology, units=units, time=time, grid=grid, depth=depth, data_file=data_file, grid_file=grid_file, dataset=ds, load_all=load_all, location=None, **kwargs)) if units is None: units = [v.units for v in variables] if all(u == units[0] for u in units): units = units[0] return cls(name=name, filename=filename, varnames=varnames, grid_topology=grid_topology, units=units, time=time, grid=grid, depth=depth, variables=variables, data_file=data_file, grid_file=grid_file, dataset=ds, load_all=load_all, location=None, **kwargs)
@classmethod def _gen_varnames(cls, filename=None, dataset=None, names_dict=None, std_names_dict=None): """ Function to find the default variable names if they are not provided. :param filename: Name of file that will be searched for variables :type filename: string :param dataset: Existing instance of a netCDF4.Dataset :type dataset: netCDF.Dataset :return: dict of component to name mapping (eg {'u': 'water_u', 'v': 'water_v', etc}) """ df = None if dataset is not None: df = dataset else: df = get_dataset(filename) if names_dict is None: names_dict = cls.default_names if std_names_dict is None: std_names_dict = cls.cf_names rd = {} for k in cls.comp_order: v = names_dict[k] if k in names_dict else [] for n in v: if n in df.variables.keys(): rd[k] = n continue if k not in rd.keys(): rd[k] = None for k in cls.comp_order: v = std_names_dict[k] if k in std_names_dict else [] if rd[k] is None: for n in v: for var in df.variables.values(): if (hasattr(var, 'standard_name') and var.standard_name == n or hasattr(var, 'long_name') and var.long_name == n): rd[k] = var.name break return collections.namedtuple('varnames', cls.comp_order)(**rd) def __str__(self): return self.__repr__() def __repr__(self): return ('{0.__class__.__module__}.{0.__class__.__name__}(' 'name="{0.name}", ' 'time="{0.time}", ' 'units="{0.units}", ' 'variables="{0.variables}", ' 'grid="{0.grid}", ' ')').format(self) @property def location(self): return [v.location for v in self.variables] locations = location @property def is_data_on_nodes(self): return self.grid.infer_location(self.variables[0].data) == 'node' @property def time(self): return self._time @time.setter def time(self, t): Time_class = self.__class__._default_component_types['time'] if self.variables is not None: for v in self.variables: try: v.time = t except ValueError as e: raise ValueError('''Time was not compatible with variables. Set variables attribute to None to allow changing other attributes Original error: {0}'''.format(str(e))) if isinstance(t, Time_class): self._time = t elif isinstance(t, collections.Iterable) or isinstance(t, nc4.Variable): self._time = Time_class(t) else: raise ValueError("Time must be set with an iterable container or netCDF variable") @property def units(self): ''' Units of underlying data :rtype: string ''' if hasattr(self._units, '__iter__'): if len(set(self._units)) > 1: return self._units else: return self._units[0] else: return self._units @units.setter def units(self, unit): self._units = unit if self.variables is not None: for v in self.variables: v.units = unit @property def varnames(self): ''' Names of underlying variables :rtype: [] of strings ''' return [v.varname if hasattr(v, 'varname') else v.name for v in self.variables] @property def data_shape(self): if self.variables is not None: return self.variables[0].data.shape else: return None def _get_hash(self, points, time): """ Returns a SHA1 hash of the array of points passed in """ return (hashlib.sha1(points.tobytes()).hexdigest(), hashlib.sha1(str(time).encode('utf-8')).hexdigest()) def _memoize_result(self, points, time, result, D, _copy=True, _hash=None): if _copy: result = result.copy() result.setflags(write=False) if _hash is None: _hash = self._get_hash(points, time) if D is not None and len(D) > 8: D.popitem(last=False) D[_hash] = result def _get_memoed(self, points, time, D, _copy=True, _hash=None): if _hash is None: _hash = self._get_hash(points, time) if (D is not None and _hash in D): return D[_hash].copy() if _copy else D[_hash] else: return None
[docs] def at(self, points=None, time=None, units=None, extrapolate=False, lons=None, lats=None, unmask=False, _hash=None, _mem=True, **kwargs): """ Find the value of the property at positions P at time T :param points: Cartesian coordinates to be queried (P). Lon, Lat required, Depth (Z) is optional Coordinates must be organized as a 2D array or list, one coordinate per row or list element. :: [[Lon1, Lat1, Z1], [Lon2, Lat2, Z2], [Lon3, Lat3, Z3], ...] Failure to provide point data in this format may cause unexpected behavior If you wish to provide point data using separate longitude and latitude arrays, use the `lons=` and `lats=` kwargs. Note that if your Z is positive-up, self.depth.positive_down should be set to False :type points: Nx3 array of double :param time: The time at which to query these points (T) :type time: `datetime.datetime` object :param units: units the values will be returned in (or converted to) :type units: string such as ('m/s', 'knots', etc) :param extrapolate: if True, extrapolation will be supported :type extrapolate: boolean (default False) :param unmask: if True and return array is a masked array, returns filled array :type unmask: boolean (default False) :param zero_ref: Specifies whether the zero datum moves with zeta or not. Only applicable if depth dimension is present with full sigma layers :type zero_ref: string 'absolute' or 'relative' :param lons: 1D iterable of longitude values. This is ignored if points is provided :type lons: iterable :param lats 1D iterable of latitude values. This is ignored if points is provided :type lons: iterable :return: NxM array of interpolated values N = len(points) M = len(self.variables) :rtype: np.array or np.ma.MaskedArray """ if points is None and (lons is None or lats is None): raise ValueError("Must provide either points or separate lons and lats") if points is None: points = np.column_stack((np.array(lons), np.array(lats))) pts = _reorganize_spatial_data(points) if _hash is None: _hash = self._get_hash(pts, time) if _mem: res = self._get_memoed(pts, time, self._result_memo, _hash=_hash) if res is not None: return res value = np.ma.column_stack([var.at(points=pts, time=time, units=units, extrapolate=extrapolate, unmask=unmask, _mem=_mem, _hash=_hash, **kwargs) for var in self.variables]) if _mem: self._memoize_result(pts, time, value, self._result_memo, _hash=_hash) return value
@classmethod def _get_shared_vars(cls, *sh_args): default_shared = ['dataset', 'data_file', 'grid_file', 'grid'] if len(sh_args) != 0: shared = sh_args else: shared = default_shared def getvars(func): @wraps(func) def wrapper(*args, **kws): def _mod(n): k = kws s = shared return (n in s) and ((n not in k) or (n in k and k[n] is None)) if 'filename' in kws and kws['filename'] is not None: kws['data_file'] = kws['grid_file'] = kws['filename'] ds = dg = None if _mod('dataset'): if 'grid_file' in kws and 'data_file' in kws: if kws['grid_file'] == kws['data_file']: ds = dg = get_dataset(kws['grid_file']) else: ds = get_dataset(kws['data_file']) dg = get_dataset(kws['grid_file']) kws['dataset'] = ds else: if 'grid_file' in kws and kws['grid_file'] is not None: dg = get_dataset(kws['grid_file']) else: dg = kws['dataset'] ds = kws['dataset'] if _mod('grid'): gt = kws.get('grid_topology', None) kws['grid'] = Grid.from_netCDF(kws['filename'], dataset=dg, grid_topology=gt) if kws.get('varnames', None) is None: varnames = cls._gen_varnames(kws['data_file'], dataset=ds) # if _mod('time'): # time = Time.from_netCDF(filename=kws['data_file'], # dataset=ds, # varname=data) # kws['time'] = time return func(*args, **kws) return wrapper return getvars
[docs] def save(self, filepath, format='netcdf4'): """ Save the variable object to a netcdf file. :param filepath: path to file you want o save to. or a writable netCDF4 Dataset An existing one If a path, an existing file will be clobbered. :type filepath: string Follows the convention established by the netcdf UGRID working group: http://ugrid-conventions.github.io/ugrid-conventions """ format_options = ('netcdf3', 'netcdf4') if format not in format_options: raise ValueError("format: {} not supported. Options are: {}".format(format, format_options))