from textwrap import dedent
import collections
import hashlib
from functools import wraps
import os
import numpy as np
import netCDF4 as nc4
from gridded.utilities import (get_dataset,
_reorganize_spatial_data,
_align_results_to_spatial_data,
asarraylike,
search_dataset_for_variables_by_varname,
parse_filename_dataset_args)
from gridded import VALID_LOCATIONS
from gridded.grids import Grid, Grid_U, Grid_S, Grid_R
from gridded.depth import Depth, DepthBase
from gridded.time import Time
import logging
log = logging.getLogger(__name__)
[docs]
class Variable(object):
"""
Variable object: represents a field of values associated with the grid.
Abstractly, it is usually a scalar physical property such a temperature,
salinity that varies over a the domain of the model.
This more or less maps to a variable in a netcdf file, but does not have
to come form a netcdf file, and this provides and abstraction where the
user can access the value in world coordinates, interpolated from the grid.
It holds a reference to its own grid object, and its data.
"""
default_names = []
cf_names = []
_instance_count = 0
_default_component_types = {'time': Time,
'grid': Grid,
'depth': Depth}
def __init__(self,
name=None,
units=None,
time=None,
data=None,
grid=None,
depth=None,
data_file=None,
grid_file=None,
varname=None,
fill_value=0,
location=None,
attributes=None,
surface_boundary_condition='extrapolate',
bottom_boundary_condition='mask',
**kwargs):
'''
This class represents a value defined on a grid
:param name: Name
:type name: string
:param units: Units
:type units: string
:param time: Time axis of the data
:type time: list of `datetime.datetime`, netCDF4 Variable, or Time object
:param data: Underlying data source
:type data: array-like object such as netCDF4.Variable or numpy.ndarray
:param grid: Grid that the data corresponds with
:type grid: Grid object (pysgrid or pyugrid or )
:param data_file: Name of data source file
:type data_file: string
:param grid_file: Name of grid source file
:type grid_file: string
:param varname: Name of the variable in the data source file
:type varname: string
:param fill_value: the fill value used for undefined data
:param location: location on the grid -- possible values
depend on the grid type
:type location: str
:param attributes: attributes associated with the Variable
(analogous to netcdf variable attributes)
:type attributes: dict
'''
# if any([grid is None, data is None]):
# raise ValueError("Grid and Data must be defined")
# if not hasattr(data, 'shape'):
# if grid.infer_location is None:
# raise ValueError('Data must be able to fit to the grid')
self.grid = grid
self.depth = depth
self.name = self._units = self._time = self._data = None
self.name = name
self.units = units
self.location = location
self.data = data
self.time = time
self.data_file = data_file
# the "main" filename for a Varibale should be the grid data.
self.filename = data_file
self.grid_file = grid_file
self.varname = varname
self._result_memo = collections.OrderedDict()
self.fill_value = fill_value
self.attributes = {} if attributes is None else attributes
# if the data is a netcdf variable, pull the attributes from there
try:
for attr in self.data.ncattrs():
self.attributes[attr] = data.getncattr(attr)
except AttributeError: # must not be a netcdf variable
pass # so just use what was passed in.
self.surface_boundary_condition = surface_boundary_condition
self.bottom_boundary_condition = bottom_boundary_condition
super(Variable, self).__init__(**kwargs)
# for k in kwargs:
# setattr(self, k, kwargs[k])
[docs]
@classmethod
def from_netCDF(cls,
filename=None,
varname=None,
grid_topology=None,
name=None,
units=None,
time=None,
time_origin=None,
displacement=None,
tz_offset=None,
grid=None,
depth=None,
dataset=None,
data_file=None,
grid_file=None,
location=None,
load_all=False, # Do we need this? I think not --- maybe a method to fully load later if wanted.
fill_value=0,
**kwargs
):
'''
Allows one-function creation of a Variable from a file.
:param filename: Default data source. Has lowest priority.
If dataset, grid_file, or data_file are provided,
this function uses them first
:type filename: string
:param varname: Explicit name of the data in the data source file.
Equivalent to the key used to look the item up
directly eg 'ds["lon_u"]' for a netCDF4 Dataset.
:type varname: string
:param grid_topology: Description of the relationship between grid
attributes and variable names.
:type grid_topology: {string : string, ...}
:param name: Name of this object
:type name: string
:param units: string such as 'm/s'
:type units: string
:param time: Time axis of the data. May be a constructed ``gridded.Time``
object, or collection of datetime.datetime objects
:type time: [] of datetime.datetime, netCDF4 Variable, or Time object
:param data: Underlying data object. May be any array-like,
including netCDF4 Variable, etc
:type data: netCDF4.Variable or numpy.array
:param grid: Grid that the data corresponds to
:type grid: pysgrid or pyugrid
:param location: The feature where the data aligns with the grid.
:type location: string
:param depth: Depth axis object from ``gridded.depth``
:type depth: Depth, S_Depth or L_Depth
:param dataset: Instance of open netCDF4.Dataset
:type dataset: netCDF4.Dataset
:param data_file: Name of data source file, if data and grid files are separate
:type data_file: string
:param grid_file: Name of grid source file, if data and grid files are separate
:type grid_file: string
:param tz_offset: offset to compensate for time zone shifts
:type tz_offset: `datetime.timedelta` or float or integer hours
:param origin: shifts the time interval to begin at the time specified
:type origin: `datetime.datetime`
:param displacement: displacement to apply to the time data.
Allows shifting entire time interval into future or past
:type displacement: `datetime.timedelta`
'''
Grid = cls._default_component_types['grid']
Time = cls._default_component_types['time']
Depth = cls._default_component_types['depth']
if filename is not None:
data_file = filename
grid_file = filename
ds, dg = parse_filename_dataset_args(filename=filename,
dataset=dataset,
grid_file=grid_file,
data_file=data_file)
if grid is None:
grid = Grid.from_netCDF(grid_file,
dataset=dg,
grid_topology=grid_topology)
if varname is None:
varname = cls._gen_varname(data_file,
dataset=ds)
if varname is None:
raise NameError('Default current names are not in the data file, '
'must supply variable name')
data = ds.variables[varname]
if name is None:
name = cls.__name__ + '_' + str(cls._instance_count)
cls._instance_count += 1
if units is None:
try:
units = data.units
except AttributeError:
units = None
if time is None:
timevarname = Time.locate_time_var_from_var(data)
if timevarname is None:
time = Time()
else:
time = Time.from_netCDF(
filename=data_file,
dataset=ds,
varname=timevarname,
# datavar=None,
tz_offset=tz_offset,
new_tz_offset=None,
origin=time_origin,
displacement=displacement
)
else:
timevarname = 1 if len(time) > 1 else 0
if depth is None:
istimevar = 0 if timevarname is None else 1
if (isinstance(grid, (Grid_S, Grid_R)) and len(data.shape) == 3 + istimevar or
isinstance(grid, Grid_U) and len(data.shape) == 2 + istimevar):
depth = Depth.from_netCDF(grid_file=dg,
dataset=ds,
time=time,
grid=grid,
**kwargs
)
if location is None:
if hasattr(data, 'location'):
location = data.location
# if len(data.shape) == 4 or (len(data.shape) == 3 and time is None):
# from gnome.environment.environment_objects import S_Depth
# depth = S_Depth.from_netCDF(grid=grid,
# depth=1,
# data_file=data_file,
# grid_file=grid_file,
# **kwargs)
if load_all:
data = data[:]
return cls(name=name,
units=units,
time=time,
data=data,
grid=grid,
depth=depth,
grid_file=grid_file,
data_file=data_file,
fill_value=fill_value,
location=location,
varname=varname,
**kwargs)
def __str__(self):
return self.__repr__()
def __repr__(self):
return ('{0.__class__.__module__}.{0.__class__.__name__}('
'name="{0.name}", \n'
'time="{0.time}", \n'
'units="{0.units}", \n'
'location="{0.location}" \n'
'data=Type:{1}, shape:{0.data.shape}", '
')').format(self, type(self.data))
[docs]
@classmethod
def constant(cls, value):
#Sets a Variable up to represent a constant scalar field. The result
#will return a constant value for all times and places.
Grid = Grid_S
_data = np.full((3,3), value)
_node_lon = np.array(([-360, 0, 360], [-360, 0, 360], [-360, 0, 360]))
_node_lat = np.array(([-89.95, -89.95, -89.95], [0, 0, 0], [89.95, 89.95, 89.95]))
_grid = Grid(node_lon=_node_lon, node_lat=_node_lat)
return cls(grid=_grid, data=_data, fill_value=value)
@property
def location(self):
if self._location is None and self.data is not None and hasattr(self.data, 'location'):
return self.data.location
else:
return self._location
@location.setter
def location(self, location):
# Fixme: perhaps we need Variable subclasses,
# to distingish between variable types.
if location not in VALID_LOCATIONS:
raise ValueError("Invalid location: {}, must be one of: {}".format(location, VALID_LOCATIONS))
self._location = location
@property
def info(self):
"""
Information about the variable object
This could be filled out more
"""
try:
std_name = self.attributes['standard_name']
except KeyError:
std_name = None
msg = """
Variable:
filename: {0.filename}
varname: {0.varname}
standard name: {1}
units: {0.units}
grid: {0.grid}
data shape: {0.data.shape}
""".format(self, std_name)
return dedent(msg)
@property
def time(self):
return self._time
@time.setter
def time(self, t):
Time_class = self.__class__._default_component_types['time']
if t is None:
self._time = None
return
if self.data is not None and len(t) != self.data.shape[0] and len(t) > 1:
raise ValueError("Data/time interval mismatch")
if isinstance(t, Time_class):
self._time = t
elif isinstance(t, collections.abc.Iterable) or isinstance(t, nc4.Variable):
self._time = Time_class(t)
else:
raise ValueError("Time must be set with an iterable container or netCDF variable")
@property
def data(self):
return self._data
@data.setter
def data(self, d):
d = asarraylike(d)
# Fixme: maybe all this checking should be done when it gets added to the Dataset??
if self.time is not None and len(d) != len(self.time):
raise ValueError("Data/time interval mismatch")
## fixme: we should check Depth, too.
# if self.grid is not None and self.grid.infer_location(d) is None:
# raise ValueError("Data/grid shape mismatch. Data shape is {0}, Grid shape is {1}".format(d.shape, self.grid.node_lon.shape))
if self.grid is not None: # if there is not a grid, we can't check this
if self.location is None: # not set, let's try to figure it out
self.location = self.grid.infer_location(d)
if self.location is None:
raise ValueError("Data/grid shape mismatch: Data shape is {0}, "
"Grid shape is {1}".format(d.shape, self.grid.node_lon.shape))
self._data = d
@property
def units(self):
'''
Units of underlying data
:rtype: string
'''
return self._units
@units.setter
def units(self, unit):
# if unit is not None:
# if not unit_conversion.is_supported(unit):
# raise ValueError('Units of {0} are not supported'.format(unit))
self._units = unit
@property
def grid_shape(self):
if hasattr(self.grid, 'shape'):
return self.grid.shape
else:
return self.grid.node_lon.shape
@property
def data_shape(self):
return self.data.shape
@property
def is_data_on_nodes(self):
return self.grid.infer_location(self._data) == 'node'
def _get_hash(self, points, time):
"""
Returns a SHA1 hash of the array of points passed in
"""
return (hashlib.sha1(points.tobytes()).hexdigest(),
hashlib.sha1(str(time).encode('utf-8')).hexdigest())
def _memoize_result(self, points, time, result, D, _copy=False, _hash=None):
if _copy:
result = result.copy()
result.setflags(write=False)
if _hash is None:
_hash = self._get_hash(points, time)
if D is not None and len(D) > 4:
D.popitem(last=False)
D[_hash] = result
D[_hash].setflags(write=False)
def _get_memoed(self, points, time, D, _copy=False, _hash=None):
if _hash is None:
_hash = self._get_hash(points, time)
if (D is not None and _hash in D):
return D[_hash].copy() if _copy else D[_hash]
else:
return None
[docs]
def center_values(self, time, units=None, extrapolate=False):
"""
interpolate data to the center of the cells
:param time: the time to interpolate at
**Warning:** NOT COMPLETE
NOTE: what if this data is already on the cell centers?
"""
raise NotImplementedError("center_values is not finished")
if not extrapolate:
self.time.valid_time(time)
if len(self.time) == 1:
if len(self.data.shape) == 2:
if isinstance(self.grid, Grid_S):
# curv grid
value = self.data[0:1:-2, 1:-2]
else:
value = self.data
else:
centers = self.grid.get_center_points()
value = self.at(centers, time, units)
return value
@property
def dimension_ordering(self):
'''
Returns a list that describes the dimensions of the property's data.
If a dimension_ordering is assigned, it will continue to use that.
If no dimension_ordering is set, then a default ordering will be generated
based on the object properties and data shape.
For example, if the data has 4 dimensions and is represented by a
Grid_S (structured grid), and the Variable has a depth and time assigned,
then the assumed ordering is ['time','depth','lon','lat']
If the data has 3 dimensions, self.grid is a Grid_S, and self.time is None,
then the ordering is ['depth','lon','lat']
If the data has 3 dimensions, self.grid is a Grid_U, the ordering is
['time','depth','ele']
'''
if not hasattr(self, '_order'):
self._order = None
if self._order is not None:
return self._order
else:
if isinstance(self.grid, (Grid_S, Grid_R)):
order = ['time', 'depth', 'lon', 'lat']
else:
order = ['time', 'depth', 'ele']
ndim = len(self.data.shape)
diff = len(order) - ndim
if diff == 0:
return order
elif diff == 1:
if self.time is not None:
del order[1]
elif self.depth is not None:
del order[0]
else:
raise ValueError('Generated ordering too short to fit data. '
'Time or depth must not be None')
elif diff == 2:
order = order[2:]
else:
raise ValueError('Too many/too few dimensions ndim={0}'.format(ndim))
return order
@dimension_ordering.setter
def dimension_ordering(self, order):
self._order = order
[docs]
def at(self,
points=None,
time=None,
units=None,
extrapolate=False,
lons=None,
lats=None,
unmask=False,
_hash=None,
_mem=True,
**kwargs):
"""
Find the value of the property at positions P at time T
:param points: Cartesian coordinates to be queried (P).
Lon, Lat required, Depth (Z) is optional
Coordinates must be organized as a 2D array or list,
one coordinate per row.
Failure to provide point data in this format may cause
unexpected behavior. If you wish to provide point data
using separate longitude and latitude arrays,
use the ``lons=`` and ``lats=`` kwargs. ::
[[Lon1, Lat1, Z1],
[Lon2, Lat2, Z2],
[Lon3, Lat3, Z3],
...]
:type points: Nx3 array of double
:param time: The time at which to query these points (T)
:type time: datetime.datetime object
:param units: units the values will be returned in (or converted to)
:type units: string such as ('m/s', 'knots', etc)
:param extrapolate: if True, extrapolation will be supported
:type extrapolate: boolean (default False)
:param unmask: If True and return array is a masked array, returns
filled array.
:type unmask: boolean (default False)
:param surface_boundary_condition: Specifies how to evaluate points
above the depth interval.
:type surface_boundary_condition: string ('extrapolate' or 'mask',
default 'extrapolate')
:param bottom_boundary_condition: Specifies how to evaluate points
below the depth interval.
:type bottom_boundary_condition: string ('extrapolate' or 'mask',
default 'extrapolate')
:param lons: 1D iterable of longitude values. This is ignored
if points is provided.
:type lons: iterable
:param lats: 1D iterable of latitude values. This is ignored
if points is provided
:type lons: iterable
:return: returns a Nx1 array of interpolated values
:rtype: double
If time is out of bounds of the time series, and extrapolate is False,
a gridded.time.OutOfTimeRangeError is raised.
"""
if points is None and (lons is None or lats is None):
raise ValueError("Must provide either points or separate lons and lats")
if points is None:
points = np.column_stack((np.array(lons), np.array(lats)))
pts = _reorganize_spatial_data(points)
if _hash is None:
_hash = self._get_hash(pts, time)
if _mem:
res = self._get_memoed(pts, time, self._result_memo, _hash=_hash)
if res is not None:
return res
order = self.dimension_ordering
if order[0] == 'time':
value = self._time_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs)
elif order[0] == 'depth':
value = self._depth_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs)
else:
value = self._xy_interp(pts, time, extrapolate, _mem=_mem, _hash=_hash, **kwargs)
value = value.reshape(-1,1)
if isinstance(value, np.ma.MaskedArray):
np.ma.set_fill_value(value, self.fill_value)
if unmask:
value = np.ma.filled(value)
if _mem:
self._memoize_result(pts, time, value, self._result_memo, _hash=_hash)
return value
interpolate = at #common request
def _xy_interp(self, points, time, extrapolate, slices=(), **kwargs):
'''
Uses the py(s/u)grid interpolation to determine the values at the points, and returns it
:param points: Coordinates to be queried (3D)
:type points: Nx3 array of double
:param time: Time of the query
:type time: datetime.datetime object
:param extrapolate: Turns extrapolation on or off
:type extrapolate: boolean
:param slices: describes how the data needs to be sliced to reach the appropriate dimension
:type slices: tuple of integers or slice objects
'''
_hash = kwargs['_hash'] if '_hash' in kwargs else None
units = kwargs['units'] if 'units' in kwargs else None
value = self.grid.interpolate_var_to_points(points[:, 0:2],
self.data,
location=self.location,
_hash=self._get_hash(points[:, 0:2],
time),
slices=slices,
_memo=True)
return value
def _time_interp(self, points, time, extrapolate, slices=(), **kwargs):
'''
Uses the Time object to interpolate the result of the next level of interpolation, as specified
by the dimension_ordering attribute.
:param points: Coordinates to be queried (3D)
:type points: Nx3 array of double
:param time: Time of the query
:type time: datetime.datetime object
:param extrapolate: Turns extrapolation on or off
:type extrapolate: boolean
:param slices: describes how the data needs to be sliced to reach the appropriate dimension
:type slices: tuple of integers or slice objects
'''
order = self.dimension_ordering
idx = order.index('time')
if order[idx + 1] != 'depth':
val_func = self._xy_interp
else:
val_func = self._depth_interp
if time == self.time.min_time or (extrapolate and time < self.time.min_time):
# min or before
return val_func(points, time, extrapolate, slices=(0,), ** kwargs)
elif time == self.time.max_time or (extrapolate and time > self.time.max_time):
return val_func(points, time, extrapolate, slices=(-1,), **kwargs)
else:
ind = self.time.index_of(time)
s1 = slices + (ind,)
s0 = slices + (ind - 1,)
v0 = val_func(points, time, extrapolate, slices=s0, **kwargs)
v1 = val_func(points, time, extrapolate, slices=s1, **kwargs)
alphas = self.time.interp_alpha(time, extrapolate)
return v0 + (v1 - v0) * alphas
def _depth_interp(self, points, time, extrapolate, slices=(), **kwargs):
'''
Uses the Depth object to interpolate the result of the next level of interpolation, as specified
by the dimension_ordering attribute.
:param points: Coordinates to be queried (3D)
:type points: Nx3 array of double
:param time: Time of the query
:type time: datetime.datetime object
:param extrapolate: Turns extrapolation on or off
:type extrapolate: boolean
:param slices: describes how the data needs to be sliced to reach the appropriate dimension
:type slices: tuple of integers or slice objects
'''
surface_boundary_condition = kwargs.pop('surface_boundary_condition', self.surface_boundary_condition)
bottom_boundary_condition = kwargs.pop('bottom_boundary_condition', self.bottom_boundary_condition)
order = self.dimension_ordering
dim_idx = order.index('depth')
if order[dim_idx + 1] != 'time':
val_func = self._xy_interp
else:
val_func = self._time_interp
d_indices, d_alphas = self.depth.interpolation_alphas(points,
time,
self.data.shape[1:],
extrapolate=extrapolate,
surface_boundary_condition=surface_boundary_condition,
bottom_boundary_condition=bottom_boundary_condition,
**kwargs)
# Check the surface index against the data shape to determine if we are using rho or w coordinates
surface_index = self.depth.surface_index
if self.depth.surface_index == self.data.shape[dim_idx]:
surface_index -= 1
if isinstance(d_indices, np.ma.MaskedArray) and np.all(d_indices.mask):
# all particles ended up masked
rtv = np.empty((points.shape[0],), dtype=np.float64) * np.nan
rtv = np.ma.MaskedArray(data=rtv, mask=np.isnan(rtv))
return rtv
#the two cases may be optimizations that are not worth the trouble
#if problems continue to arise, get rid of them
#they are *meant* to handle cases where the particles are 'off grid'
#
elif np.all(d_indices == 0) and not np.any(d_indices.mask):
#all particles are
return val_func(points, time, extrapolate, slices=slices + (0,), **kwargs)
elif np.all(d_indices == self.data.shape[dim_idx] - 1) and not np.any(d_indices.mask):
return val_func(points, time, extrapolate, slices=slices + (self.data.shape[dim_idx] - 1,), **kwargs)
else:
msk = np.isnan(d_indices) if not np.ma.is_masked(d_indices) else d_indices.mask
values = np.ma.MaskedArray(data=np.empty((points.shape[0], )) * np.nan, mask=msk)
# Points are mixed within the grid. Some may be above the surface or under the ground
uniq_idx = np.unique(d_indices)
if np.ma.masked in uniq_idx: #the [0:-1] is required to skip all masked indices
uniq_idx = uniq_idx[0:-1]
for idx in uniq_idx:
lay_idxs = np.where(d_indices == idx)[0]
if idx == self.data.shape[dim_idx] - 1:
#special case, index == depth dim length, so only v0 is valid
v0 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx,), **kwargs)
values.put(lay_idxs, v0)
continue
v1 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx+1,), **kwargs)
v0 = val_func(points[lay_idxs], time, extrapolate, slices=slices + (idx,), **kwargs)
sub_vals = v0 + (v1 - v0) * d_alphas[lay_idxs]
#partially fill the values array for this layer
values.put(lay_idxs, sub_vals)
return values
def _transect(self, times, depths, points):
'''
returns a transect of the Variable at given values.
This function is not close to finished.
'''
output_shape = (len(times), len(depths), len(points))
outarr = np.array(shape=output_shape)
for t in range(0,len(times)):
for d in range(0, len(depths)):
pts = np.array(shape=(len(points),3))
pts[:,0:2] = points
pts[:,2] = depths[d]
layer = d
@classmethod
def _gen_varname(cls,
filename=None,
dataset=None,
names_list=None,
std_names_list=None):
"""
Function to find the default variable names if they are not provided. This
function does nothing without defined default_names or cf_names class
attributes
:param filename: Name of file that will be searched for variables
:type filename: string
:param dataset: Existing instance of a netCDF4.Dataset
:type dataset: netCDF.Dataset
:return: name of first netCDF4.Variable that matches
"""
df = None
if dataset is not None:
df = dataset
else:
df = get_dataset(filename)
if names_list is None:
names_list = cls.default_names
if std_names_list is None:
std_names_list = cls.cf_names
for n in names_list:
if n in df.variables.keys():
return n
for n in std_names_list:
for var in df.variables.values():
if (hasattr(var, 'standard_name') and var.standard_name == n or
hasattr(var, 'long_name') and var.long_name == n):
return var.name
raise ValueError("Default names not found.")
[docs]
class VectorVariable(object):
# Fixme: a lot of code duplication in here
# Keys are component names ('u', 'v', etc) and values are the netCDF4 names.
# eg {'u': ['u', 'U', 'eastward_sea_water_velocity']}
default_names = {}
# Keys are component names ('u', 'v', etc) and values are the CF names.
# eg {'u': ['u', 'U', 'eastward_sea_water_velocity']}
cf_names = {}
# This list of strings specify names for each component of the vector variable.
# The names should be the same as keys in default_names and cf_names
# for example, ['u', 'v'] will allow vv.u and vv.v to be used to access the components
# instead of vv.variables[0] and vv.variables[1]
# An error will raise if comp_order is longer than the number of components (vv.variables)
# upon object initialization
comp_order = []
_instance_count = 0
''''
These are the classes which are used when internal components are created
by default, such as automatically from a file or other python data structure.
Subclasses of this type may override this to use different classes for it's
components
'''
_default_component_types = {'time': Time,
'grid': Grid,
'depth': Depth,
'variable': Variable}
def __init__(self,
name=None,
units=None,
time=None,
variables=None,
grid=None,
depth=None,
grid_file=None,
data_file=None,
dataset=None,
varnames=None,
**kwargs):
super(VectorVariable, self).__init__()
self.name = self._units = self._time = self._variables = None
self.name = name
if all([isinstance(v, Variable) for v in variables]):
if time is not None and not isinstance(time, Time):
time = Time(time)
units = variables[0].units if units is None else units
time = variables[0].time if time is None else time
self._time = time
if units is None:
units = variables[0].units
self._units = units
if variables is None or len(variables) < 2:
raise ValueError('Variables must be an array-like of 2 or more Variable objects')
self.variables = variables
unused_args = kwargs.keys() if kwargs is not None else None
if len(unused_args) > 0:
kwargs = {}
if isinstance(self.variables[0], Variable):
self.grid = self.variables[0].grid if grid is None else grid
self.depth = self.variables[0].depth if depth is None else depth
self.grid_file = self.variables[0].grid_file if grid_file is None else grid_file
self.data_file = self.variables[0].data_file if data_file is None else data_file
self._result_memo = collections.OrderedDict()
for i, comp in enumerate(self.__class__.comp_order):
setattr(self, comp, self.variables[i])
[docs]
@classmethod
def from_netCDF(cls,
filename=None,
varnames=None,
grid_topology=None,
name=None,
units=None,
time=None,
time_origin=None,
displacement=None,
tz_offset=None,
grid=None,
depth=None,
data_file=None,
grid_file=None,
dataset=None,
load_all=False,
variables=None,
**kwargs
):
'''
Allows one-function creation of a VectorVariable from a file.
:param filename: Default data source. Parameters below take precedence
:type filename: string
:param varnames: Names of the variables in the data source file
:type varnames: [] of string
:param grid_topology: Description of the relationship between grid attributes and variable names.
:type grid_topology: {string : string, ...}
:param name: Name of property
:type name: string
:param units: Units
:type units: string
:param time: Time axis of the data
:type time: [] of datetime.datetime, netCDF4 Variable, or Time object
:param tz_offset: offset to compensate for time zone shifts
:type tz_offset: `datetime.timedelta` or float or integer hours
:param origin: shifts the time interval to begin at the time specified
:type origin: `datetime.datetime`
:param displacement: displacement to apply to the time data.
Allows shifting entire time interval into future or past
:type displacement: `datetime.timedelta`
:param data: Underlying data source
:type data: netCDF4.Variable or numpy.array
:param grid: Grid that the data corresponds with
:type grid: pysgrid or pyugrid
:param dataset: Instance of open Dataset
:type dataset: netCDF4.Dataset
:param data_file: Name of data source file
:type data_file: string
:param grid_file: Name of grid source file
:type grid_file: string
'''
Grid = cls._default_component_types['grid']
Time = cls._default_component_types['time']
Variable = cls._default_component_types['variable']
Depth = cls._default_component_types['depth']
if filename is not None:
data_file = filename
grid_file = filename
ds, dg = parse_filename_dataset_args(filename=filename,
dataset=dataset,
grid_file=grid_file,
data_file=data_file)
if grid is None:
grid = Grid.from_netCDF(grid_file,
dataset=dg,
grid_topology=grid_topology)
if varnames is None:
varnames = cls._gen_varnames(data_file,
dataset=ds)
if all([v is None for v in varnames]):
raise ValueError('No compatible variable names found!')
if name is None:
name = cls.__name__ + '_' + str(cls._instance_count)
cls._instance_count += 1
data = ds[varnames[0]]
if time is None:
timevarname = Time.locate_time_var_from_var(data)
if timevarname is None:
time = Time()
else:
time = Time.from_netCDF(
filename=data_file,
dataset=ds,
varname=timevarname,
# datavar=None,
tz_offset=tz_offset,
new_tz_offset=None,
origin=time_origin,
displacement=displacement
)
else:
timevarname = 1 if len(time) > 1 else 0
if depth is None:
istimevar = 0 if timevarname is None else 1
if (isinstance(grid, (Grid_S, Grid_R)) and len(data.shape) == 3 + istimevar or
isinstance(grid, Grid_U) and len(data.shape) == 2 + istimevar):
depth = Depth.from_netCDF(grid_file=dg,
dataset=ds,
time=time,
grid=grid,
**kwargs
)
if variables is None:
variables = []
for vn in varnames:
if vn is not None:
variables.append(Variable.from_netCDF(filename=filename,
varname=vn,
grid_topology=grid_topology,
units=units,
time=time,
grid=grid,
depth=depth,
data_file=data_file,
grid_file=grid_file,
dataset=ds,
load_all=load_all,
location=None,
**kwargs))
if units is None:
units = [v.units for v in variables]
if all(u == units[0] for u in units):
units = units[0]
return cls(name=name,
filename=filename,
varnames=varnames,
grid_topology=grid_topology,
units=units,
time=time,
grid=grid,
depth=depth,
variables=variables,
data_file=data_file,
grid_file=grid_file,
dataset=ds,
load_all=load_all,
location=None,
**kwargs)
@classmethod
def _gen_varnames(cls,
filename=None,
dataset=None,
names_dict=None,
std_names_dict=None):
"""
Function to find the default variable names if they are not provided.
:param filename: Name of file that will be searched for variables
:type filename: string
:param dataset: Existing instance of a netCDF4.Dataset
:type dataset: netCDF.Dataset
:return: dict of component to name mapping (eg {'u': 'water_u', 'v': 'water_v', etc})
"""
df = None
if dataset is not None:
df = dataset
else:
df = get_dataset(filename)
if names_dict is None:
names_dict = cls.default_names
if std_names_dict is None:
std_names_dict = cls.cf_names
rd = {}
for k in cls.comp_order:
v = names_dict[k] if k in names_dict else []
for n in v:
if n in df.variables.keys():
rd[k] = n
continue
if k not in rd.keys():
rd[k] = None
for k in cls.comp_order:
v = std_names_dict[k] if k in std_names_dict else []
if rd[k] is None:
for n in v:
for var in df.variables.values():
if (hasattr(var, 'standard_name') and var.standard_name == n or
hasattr(var, 'long_name') and var.long_name == n):
rd[k] = var.name
break
return collections.namedtuple('varnames', cls.comp_order)(**rd)
def __str__(self):
return self.__repr__()
def __repr__(self):
return ('{0.__class__.__module__}.{0.__class__.__name__}('
'name="{0.name}", '
'time="{0.time}", '
'units="{0.units}", '
'variables="{0.variables}", '
'grid="{0.grid}", '
')').format(self)
@property
def location(self):
return [v.location for v in self.variables]
locations = location
@property
def is_data_on_nodes(self):
return self.grid.infer_location(self.variables[0].data) == 'node'
@property
def time(self):
return self._time
@time.setter
def time(self, t):
Time_class = self.__class__._default_component_types['time']
if self.variables is not None:
for v in self.variables:
try:
v.time = t
except ValueError as e:
raise ValueError('''Time was not compatible with variables.
Set variables attribute to None to allow changing other attributes
Original error: {0}'''.format(str(e)))
if isinstance(t, Time_class):
self._time = t
elif isinstance(t, collections.Iterable) or isinstance(t, nc4.Variable):
self._time = Time_class(t)
else:
raise ValueError("Time must be set with an iterable container or netCDF variable")
@property
def units(self):
'''
Units of underlying data
:rtype: string
'''
if hasattr(self._units, '__iter__'):
if len(set(self._units)) > 1:
return self._units
else:
return self._units[0]
else:
return self._units
@units.setter
def units(self, unit):
self._units = unit
if self.variables is not None:
for v in self.variables:
v.units = unit
@property
def varnames(self):
'''
Names of underlying variables
:rtype: [] of strings
'''
return [v.varname if hasattr(v, 'varname') else v.name for v in self.variables]
@property
def data_shape(self):
if self.variables is not None:
return self.variables[0].data.shape
else:
return None
def _get_hash(self, points, time):
"""
Returns a SHA1 hash of the array of points passed in
"""
return (hashlib.sha1(points.tobytes()).hexdigest(),
hashlib.sha1(str(time).encode('utf-8')).hexdigest())
def _memoize_result(self, points, time, result, D, _copy=True, _hash=None):
if _copy:
result = result.copy()
result.setflags(write=False)
if _hash is None:
_hash = self._get_hash(points, time)
if D is not None and len(D) > 8:
D.popitem(last=False)
D[_hash] = result
def _get_memoed(self, points, time, D, _copy=True, _hash=None):
if _hash is None:
_hash = self._get_hash(points, time)
if (D is not None and _hash in D):
return D[_hash].copy() if _copy else D[_hash]
else:
return None
[docs]
def at(self,
points=None,
time=None,
units=None,
extrapolate=False,
lons=None,
lats=None,
unmask=False,
_hash=None,
_mem=True,
**kwargs):
"""
Find the value of the property at positions P at time T
:param points: Cartesian coordinates to be queried (P). Lon, Lat required, Depth (Z) is optional
Coordinates must be organized as a 2D array or list, one coordinate per row or list element.
::
[[Lon1, Lat1, Z1],
[Lon2, Lat2, Z2],
[Lon3, Lat3, Z3],
...]
Failure to provide point data in this format may cause unexpected behavior
If you wish to provide point data using separate longitude and latitude arrays,
use the `lons=` and `lats=` kwargs.
:type points: Nx3 array of double
:param time: The time at which to query these points (T)
:type time: `datetime.datetime` object
:param units: units the values will be returned in (or converted to)
:type units: string such as ('m/s', 'knots', etc)
:param extrapolate: if True, extrapolation will be supported
:type extrapolate: boolean (default False)
:param unmask: if True and return array is a masked array, returns filled array
:type unmask: boolean (default False)
:param zero_ref: Specifies whether the zero datum moves with zeta or not. Only
applicable if depth dimension is present with full sigma layers
:type zero_ref: string 'absolute' or 'relative'
:param lons: 1D iterable of longitude values. This is ignored if points is provided
:type lons: iterable
:param lats 1D iterable of latitude values. This is ignored if points is provided
:type lons: iterable
:return: NxM array of interpolated values N = len(points) M = len(self.variables)
:rtype: np.array or np.ma.MaskedArray
If time is out of bounds of the time series, and extrapolate is False, a
gridded.time.OutOfTimeRangeError is raised.
"""
if points is None and (lons is None or lats is None):
raise ValueError("Must provide either points or separate lons and lats")
if points is None:
points = np.column_stack((np.array(lons), np.array(lats)))
pts = _reorganize_spatial_data(points)
if _hash is None:
_hash = self._get_hash(pts, time)
if _mem:
res = self._get_memoed(pts, time, self._result_memo, _hash=_hash)
if res is not None:
return res
value = np.ma.column_stack([var.at(points=pts,
time=time,
units=units,
extrapolate=extrapolate,
unmask=unmask,
_mem=_mem,
_hash=_hash,
**kwargs) for var in self.variables])
if _mem:
self._memoize_result(pts, time, value, self._result_memo, _hash=_hash)
return value
@classmethod
def _get_shared_vars(cls, *sh_args):
default_shared = ['dataset', 'data_file', 'grid_file', 'grid']
if len(sh_args) != 0:
shared = sh_args
else:
shared = default_shared
def getvars(func):
@wraps(func)
def wrapper(*args, **kws):
def _mod(n):
k = kws
s = shared
return (n in s) and ((n not in k) or (n in k and k[n] is None))
if 'filename' in kws and kws['filename'] is not None:
kws['data_file'] = kws['grid_file'] = kws['filename']
ds = dg = None
if _mod('dataset'):
if 'grid_file' in kws and 'data_file' in kws:
if kws['grid_file'] == kws['data_file']:
ds = dg = get_dataset(kws['grid_file'])
else:
ds = get_dataset(kws['data_file'])
dg = get_dataset(kws['grid_file'])
kws['dataset'] = ds
else:
if 'grid_file' in kws and kws['grid_file'] is not None:
dg = get_dataset(kws['grid_file'])
else:
dg = kws['dataset']
ds = kws['dataset']
if _mod('grid'):
gt = kws.get('grid_topology', None)
kws['grid'] = Grid.from_netCDF(kws['filename'], dataset=dg, grid_topology=gt)
if kws.get('varnames', None) is None:
varnames = cls._gen_varnames(kws['data_file'],
dataset=ds)
# if _mod('time'):
# time = Time.from_netCDF(filename=kws['data_file'],
# dataset=ds,
# varname=data)
# kws['time'] = time
return func(*args, **kws)
return wrapper
return getvars
[docs]
def save(self, filepath, format='netcdf4'):
"""
Save the variable object to a netcdf file.
:param filepath: path to file you want o save to. or a writable
netCDF4 Dataset An existing one
If a path, an existing file will be clobbered.
:type filepath: string
Follows the convention established by the netcdf UGRID working group:
http://ugrid-conventions.github.io/ugrid-conventions
"""
format_options = ('netcdf3', 'netcdf4')
if format not in format_options:
raise ValueError("format: {} not supported. Options are: {}".format(format, format_options))