Source code for gridded.grids


from gridded.pysgrid.sgrid import SGrid
from gridded.pyugrid.ugrid import UGrid
import numpy as np

from gridded.utilities import get_dataset  # , gen_celltree_mask_from_center_mask


[docs]class GridBase(object): ''' Base object for grids to share common behavior ''' _def_count = 0 def __init__(self, filename=None, *args, **kwargs): """ Init common to all Grid types. This initializer will take all the kwargs of both pyugrid.UGrid and pysgrid.SGrid. See their documentation for details :param filename: Name of the file this grid was constructed from, if available. """ if 'name' in kwargs: self.name = kwargs['name'] else: self.name = self.__class__.__name__ + '_' + str(type(self)._def_count) self.filename = filename type(self)._def_count += 1 super(GridBase, self).__init__(**kwargs)
[docs] @classmethod def from_netCDF(cls, *args, **kwargs): kwargs['grid_type'] = cls return Grid.from_netCDF(*args, **kwargs)
@classmethod def _find_required_grid_attrs(cls, filename, dataset=None, grid_topology=None,): ''' This function is the top level 'search for attributes' function. If there are any common attributes to all potential grid types, they will be sought here. This function returns a dict, which maps an attribute name to a netCDF4 Variable or numpy array object extracted from the dataset. When called from Grid_U or Grid_S, this function should provide all the kwargs needed to create a valid instance. ''' gf_vars = dataset.variables if dataset is not None else get_dataset(filename).variables gf_vars = dict([(k.lower(), v) for k, v in gf_vars.items()]) init_args = {} gt = {} init_args['filename'] = filename node_attrs = ['node_lon', 'node_lat'] node_coord_names = [['node_lon', 'node_lat'], ['lon', 'lat'], ['lon_psi', 'lat_psi'], ['longitude', 'latitude']] composite_node_names = ['nodes', 'node'] if grid_topology is None: for n1, n2 in node_coord_names: if n1 in gf_vars and n2 in gf_vars: init_args[node_attrs[0]] = gf_vars[n1][:] init_args[node_attrs[1]] = gf_vars[n2][:] gt[node_attrs[0]] = n1 gt[node_attrs[1]] = n2 break if node_attrs[0] not in init_args: for n in composite_node_names: if n in gf_vars: v = gf_vars[n][:].reshape(-1, 2) init_args[node_attrs[0]] = v[:, 0] init_args[node_attrs[1]] = v[:, 1] gt['node_coordinates'] = n break if node_attrs[0] not in init_args: raise ValueError('Unable to find node coordinates.') else: for n, v in grid_topology.items(): if n in node_attrs: init_args[n] = gf_vars[v][:] if n in composite_node_names: v = gf_vars[n][:].reshape(-1, 2) init_args[node_attrs[0]] = v[:, 0] init_args[node_attrs[1]] = v[:, 1] return init_args, gt @property def shape(self): return self.node_lon.shape def _write_grid_to_file(self, pth): self.save_as_netcdf(pth)
[docs] def import_variable(self, variable, location='node'): """ Takes a Variable or VectorVariable and interpolates the data onto this grid. You may pass a location ('nodes', 'faces', 'edge1', 'edge2) and the variable will be interpolated there if possible If no location is passed, the variable will be interpolated to the nodes of this grid. If the Variable's grid and this grid are the same, this function will return the Variable unchanged. If this grid covers area that the source grid does not, all values in this area will be masked. If regridding from cell centers to the nodes, The values of any border point not within will be equal to the value at the center of the border cell. """ raise NotImplementedError("GridBase cannot interpolate variables to itself")
[docs]class Grid_U(GridBase, UGrid): @classmethod def _find_required_grid_attrs(cls, filename, dataset=None, grid_topology=None): gf_vars = dataset.variables if dataset is not None else get_dataset(filename).variables gf_vars = dict([(k.lower(), v) for k, v in gf_vars.items()]) # Get superset attributes init_args, gt = super(Grid_U, cls)._find_required_grid_attrs(filename=filename, dataset=dataset, grid_topology=grid_topology) face_attrs = ['faces'] face_var_names = ['faces', 'tris', 'nv', 'ele', 'nele'] if grid_topology is None: for n in face_var_names: if n in gf_vars: init_args[face_attrs[0]] = gf_vars[n][:] gt[face_attrs[0]] = n break if face_attrs[0] not in init_args: raise ValueError('Unable to find face connectivity array.') else: for n, v in grid_topology.items(): if n in face_attrs: init_args[n] = gf_vars[v][:] break # fixme: This is assuming that the array will be in Fortran order and index # from 1, or in C order and index from 0 # Those are actually independent concepts! if init_args['faces'].shape[0] == 3: init_args['faces'] = np.ascontiguousarray(np.array(init_args['faces']).T - 1) print("found grid vars:", init_args, gt) return init_args, gt
# @classmethod # def gen_from_quads(cls, nodes): # # Fixme: this looks incomplete -- used anywhere? # if not len(nodes.shape) == 3: # raise ValueError('Nodes of a quad grid must be 2 dimensional') # lin_nodes = None # if isinstance(nodes, np.ma.MaskedArray): # lin_nodes = nodes.reshape(-1, 2)[nodes]
[docs]class Grid_S(GridBase, SGrid): @classmethod def _find_required_grid_attrs(cls, filename, dataset=None, grid_topology=None): # THESE ARE ACTUALLY ALL OPTIONAL. This should be migrated when optional attributes # are dealt with # Get superset attributes # get_datset is not defined -- this must not be used gf_vars = dataset.variables if dataset is not None else get_dataset(filename).variables gf_vars = dict([(k.lower(), v) for k, v in gf_vars.items()]) init_args, gt = super(Grid_S, cls)._find_required_grid_attrs(filename, dataset=dataset, grid_topology=grid_topology) center_attrs = ['center_lon', 'center_lat'] edge1_attrs = ['edge1_lon', 'edge1_lat'] edge2_attrs = ['edge2_lon', 'edge2_lat'] node_mask = 'node_mask' center_mask = 'center_mask' edge1_mask = 'edge1_mask' edge2_mask = 'edge2_mask' center_coord_names = [['center_lon', 'center_lat'], ['lon_rho', 'lat_rho'], ['lonc', 'latc']] edge1_coord_names = [['edge1_lon', 'edge1_lat'], ['lon_u', 'lat_u']] edge2_coord_names = [['edge2_lon', 'edge2_lat'], ['lon_v', 'lat_v']] node_mask_names = ['mask_psi'] center_mask_names = ['mask_rho'] edge1_mask_names = ['mask_u'] edge2_mask_names = ['mask_v'] if grid_topology is None: for attr, names, maskattr, maskname in zip( (center_attrs, edge1_attrs, edge2_attrs), (center_coord_names, edge1_coord_names, edge2_coord_names), (center_mask, edge1_mask, edge2_mask), (center_mask_names, edge1_mask_names, edge2_mask_names)): for n1, n2 in names: if n1 in gf_vars and n2 in gf_vars: mask = False # for n in maskname: # if n in gf_vars: # mask = gen_mask(gf_vars[n]) a1 = gf_vars[n1][:] a2 = gf_vars[n2][:] init_args[attr[0]] = a1 init_args[attr[1]] = a2 if maskname[0] in gf_vars: init_args[maskattr] = gf_vars[maskname[0]] gt[maskattr] = maskname[0] gt[attr[0]] = n1 gt[attr[1]] = n2 break if 'node_lon' in init_args and 'node_lat' in init_args: mask = False # fixme -- is mask used?? for name in node_mask_names: if name in gf_vars: init_args[node_mask] = gf_vars[name] gt[node_mask] = name else: for n, v in grid_topology.items(): if n in center_attrs + edge1_attrs + edge2_attrs and v in gf_vars: init_args[n] = gf_vars[v][:] return init_args, gt
[docs]class Grid_R(GridBase): """ Rectangular Grid lon and lat of the nodes are vectors """ def __init__(self, node_lon=None, node_lat=None, grid_topology=None, node_dimensions=None, node_coordinates=None, *args, **kwargs): """ :param node_lon=None: vector of the node longitudes :param node_lat=None: vector of the node latitudes :param grid_topology=None: ???? :param node_dimensions=None: (should only be required for netcdf) :param node_coordinates=None: ????? """ self.node_lon = node_lon self.node_lat = node_lat self.grid_topology = grid_topology if self.grid_topology is not None: self.dimensions = [grid_topology['node_lat'], grid_topology['node_lon']] else: self.dimensions = ['lat', 'lon'] self.node_dimensions = node_dimensions self.node_coordinates = node_coordinates super(Grid_R, self).__init__(*args, **kwargs) @classmethod def _find_required_grid_attrs(cls, filename, dataset=None, grid_topology=None): # THESE ARE ACTUALLY ALL OPTIONAL. This should be migrated when optional attributes # are dealt with # Get superset attributes gf_vars = dataset.variables if dataset is not None else get_dataset(filename).variables gf_vars = dict([(k.lower(), v) for k, v in gf_vars.items()]) init_args, gt = super(Grid_R, cls)._find_required_grid_attrs(filename, dataset=dataset, grid_topology=grid_topology) # Grid_R only needs node_lon and node_lat. # However, they must be a specific shape (1D) node_lon = init_args['node_lon'] node_lat = init_args['node_lat'] if len(node_lon.shape) != 1: raise ValueError('Too many dimensions in node_lon. ' 'Must be 1D, was {0}D'.format(len(node_lon.shape))) if len(node_lat.shape) != 1: raise ValueError('Too many dimensions in node_lat. ' 'Must be 1D, was {0}D'.format(len(node_lat.shape))) return init_args, gt @property def nodes(self): return np.stack((np.meshgrid(self.node_lon, self.node_lat)), axis=-1) @property def center_lon(self): return (self.node_lon[0:-1] + self.node_lon[1:]) / 2 @property def center_lat(self): return (self.node_lat[0:-1] + self.node_lat[1:]) / 2 @property def centers(self): return np.stack((np.meshgrid(self.center_lon, self.center_lat)), axis=-1)
[docs] def locate_faces(self, points): """ Returns the node grid indices, one per point. Points that are not on the node grid will have an index of -1 If a single point is passed in, a single index will be returned. If a sequence of points is passed in an array of indexes will be returned. :param points: The points that you want to locate -- (lon, lat). If the shape of point is 1D, function will return a scalar index. If it is 2D, it will return a 1D array of indices. :type points: array-like containing one or more points: shape (2,) for one point, shape (N, 2) for more than one point. """ points = np.asarray(points, dtype=np.float64) just_one = (points.ndim == 1) points = points.reshape(-1, 2) lons = points[:, 0] lats = points[:, 1] lon_idxs = np.digitize(lons, self.node_lon) - 1 for i, n in enumerate(lon_idxs): if n == len(self.node_lon) - 1: lon_idxs[i] = -1 # if n == 0 and not lons[i] < self.node_lon.max() and not lons[i] >= self.node_lon.min(): # lon_idxs[i] = -1 lat_idxs = np.digitize(lats, self.node_lat) - 1 for i, n in enumerate(lat_idxs): if n == len(self.node_lat) - 1: lat_idxs[i] = -1 # if n == 0 and not lats[i] < self.node_lat.max() and not lats[i] >= self.node_lat.min(): # lat_idxs[i] = -1 idxs = np.column_stack((lon_idxs, lat_idxs)) idxs[:, 0] = np.where(idxs[:, 1] == -1, -1, idxs[:, 0]) idxs[:, 1] = np.where(idxs[:, 0] == -1, -1, idxs[:, 1]) if just_one: res = idxs[0] return res else: return idxs
[docs] def lonlat_to_yx(self, variable): ''' The RegualarGridInterpolator needs to have its two dimensions x and y be associated correctly with lon and lat (or vice versa). The order depends on the orientation in the variable if the variable provided does not have a dimensions attribute, it will use the dimensions arg ''' retval = (self.node_lat, self.node_lon) var_shape = variable.shape[-2::] grid_shape = (len(self.node_lat), len(self.node_lon)) if (hasattr(variable, 'dimensions')): if not all([k in self.dimensions for k in variable.dimensions[-2:]]): raise ValueError('Dimension provided by variable is not compatible \ with this Grid_R object. Provided: {0} \ self.dimensions: {1}'.format(variable.dimensions, self.dimensions)) var_dims = variable.dimensions[-2:] # assume the last two are the lon/lat x/y else: var_dims = self.dimensions # self.dimensions is always [y(lat), x(lon)], # so if var.dimensions is [lon, lat] we need # to reverse x/y association if not all([dlen in grid_shape for dlen in var_shape]): raise ValueError('Incompatible dimensions. ' 'Variable: {0}, Grid_R: {1}'.format(variable.shape, grid_shape)) if hasattr(variable, 'dimensions'): if var_dims[0] == self.dimensions[1]: # case 2, dims provided, dims swapped retval = retval[::-1] # else: case 1, no change else: if var_shape[0] == var_shape[1]: # case 4, dims not provided, dim length same raise ValueError('Provided square variable with no dimensions attribute') # case 3, dims not provided, dim length different if var_shape[0] == len(self.node_lon): retval = retval[::-1] return retval
[docs] def interpolate_var_to_points(self, points, variable, method='linear', indices=None, slices=None, mask=None, **kwargs): try: from scipy.interpolate import RegularGridInterpolator except ImportError: raise ImportError("The scipy package is required to use " "Grid_R.interpolate_var_to_points\n" " -- interpolating a regular grid") points = np.asarray(points, dtype=np.float64) just_one = (points.ndim == 1) points = points.reshape(-1, 2) y, x = self.lonlat_to_yx(variable) if slices is not None: variable = variable[slices] if np.ma.isMA(variable): variable = variable.filled(0) # eventually should use Variable fill value interp_func = RegularGridInterpolator((y, x), variable, method=method, bounds_error=False, fill_value=0) if y is self.node_lon: vals = interp_func(points, method=method) else: vals = interp_func(points[:, ::-1], method=method) if just_one: return vals[0] else: return vals
[docs] def infer_location(self, variable): """ fixme: should first look for "location" attribute. But now we are checking variable dimensions to which part of the grid it is on. """ shape = None node_shape = self.nodes.shape[0:-1] # centers_shape = self.centers.shape[0:-1] try: shape = np.array(variable.shape) except: # fixme -- AttributeError?? return None # Variable has no shape attribute! if len(variable.shape) < 2: return None difference = (shape[-2:] - node_shape).tolist() if (difference == [1, 1] or difference == [-1, -1]) and self.center_lon is not None: return 'center' elif difference == [1, 0] and self.edge1_lon is not None: return 'edge1' elif difference == [0, 1] and self.edge2_lon is not None: return 'edge2' elif difference == [0, 0] and self.node_lon is not None: return 'node' else: return None
[docs]class Grid(object): ''' Factory class that generates grid objects. Also handles common loading and parsing operations ''' def __init__(self): ''' Init common to all Grid types. This constructor will take all the kwargs of both pyugrid.UGrid and pysgrid.SGrid. See their documentation for details :param filename: Name of the file this grid was constructed from, if available. ''' raise NotImplementedError("Grid is not meant to be instantiated. " "Please use the from_netCDF function. " "or initialize the type of grid you want directly") @staticmethod def _load_grid(filename, grid_type, dataset=None): ''' Redirect to grid-specific loading routine. ''' if issubclass(grid_type, UGrid): return grid_type.from_ncfile(filename) elif issubclass(grid_type, SGrid): ds = get_dataset(filename, dataset) g = grid_type.load_grid(ds) g.filename = filename return g else: return grid_type.from_ncfile(filename) pass
[docs] @staticmethod def from_netCDF(filename=None, dataset=None, grid_type=None, grid_topology=None, _default_types=(('ugrid', Grid_U), ('sgrid', Grid_S), ('rgrid', Grid_R)), *args, **kwargs): ''' :param filename: File containing a grid :param dataset: Takes precedence over filename, if provided. :param grid_type: Must be provided if Dataset does not have a 'grid_type' attribute, or valid topology variable :param grid_topology: A dictionary mapping of grid attribute to variable name. Takes precedence over discovered attributes :param kwargs: All kwargs to SGrid, UGrid, or RGrid are valid, and take precedence over all. :returns: Instance of Grid_U, Grid_S, or Grid_R ''' gf = dataset if filename is None else get_dataset(filename, dataset) if gf is None: raise ValueError('No filename or dataset provided') cls = grid_type if (grid_type is None or isinstance(grid_type, str) or not issubclass(grid_type, GridBase)): cls = Grid._get_grid_type(gf, grid_type, grid_topology, _default_types) # if grid_topology is passed in, don't look for the variable if not grid_topology: compliant = Grid._find_topology_var(None, gf) else: compliant = None if compliant is not None: c = Grid._load_grid(filename, cls, dataset) c.grid_topology = compliant.__dict__ else: init_args, gt = cls._find_required_grid_attrs(filename, dataset=dataset, grid_topology=grid_topology) c = cls(grid_topology=gt, **init_args) return c
@staticmethod def _get_grid_type(dataset, grid_type=None, grid_topology=None, _default_types=(('ugrid', Grid_U), ('sgrid', Grid_S), ('rgrid', Grid_R))): # fixme: this logic should probably be deferred to # the grid type code -- that is, ask each grid # type if this dataset is its type. # # It also should be refactored to start with the standards # and maybe have a pedantic mode where it won't load non-standard # files if _default_types is None: _default_types = dict() else: _default_types = dict(_default_types) Grid_U = _default_types.get('ugrid', None) Grid_S = _default_types.get('sgrid', None) Grid_R = _default_types.get('rgrid', None) sgrid_names = ['sgrid', 'pygrid_s', 'staggered', 'curvilinear', 'roms'] ugrid_names = ['ugrid', 'pygrid_u', 'triangular', 'unstructured'] rgrid_names = ['rgrid', 'regular', 'rectangular', 'rectilinear'] if grid_type is not None: if grid_type.lower() in sgrid_names: return Grid_S elif grid_type.lower() in ugrid_names: return Grid_U elif grid_type.lower() in rgrid_names: return Grid_R else: raise ValueError('Specified grid_type not recognized/supported') if grid_topology is not None: if ('faces' in grid_topology.keys() or grid_topology.get('grid_type', 'notype').lower() in ugrid_names): return Grid_U elif grid_topology.get('grid_type', 'notype').lower() in rgrid_names: return Grid_R else: return Grid_S else: # no topology, so search dataset for grid_type variable if (hasattr(dataset, 'grid_type') and dataset.grid_type in sgrid_names + ugrid_names): if dataset.grid_type.lower() in ugrid_names: return Grid_U elif dataset.grid_type.lower() in rgrid_names: return Grid_R else: return Grid_S else: # TODO: Determine an effective decision tree for picking if # a topology variable is present # no grid type explicitly specified. is a topology variable present? topology = Grid._find_topology_var(None, dataset=dataset) if topology is not None: if (hasattr(topology, 'node_coordinates') and not hasattr(topology, 'node_dimensions')): return Grid_U else: return Grid_S else: # no topology variable either, so generate and try again. # if no defaults are found, _gen_topology will raise an error try: u_init_args, u_gf_vars = Grid_U._find_required_grid_attrs(None, dataset) return Grid_U except ValueError: try: r_init_args, r_gf_vars = Grid_R._find_required_grid_attrs(None, dataset) return Grid_R except ValueError: try: s_init_args, s_gf_vars = Grid_S._find_required_grid_attrs(None, dataset) except ValueError: raise ValueError("Can not figure out what type of grid this is. " "Try specifying the grid_topology attributes " "or specifying the grid type") return Grid_S @staticmethod def _find_topology_var(filename, dataset=None): gf = get_dataset(filename, dataset) gts = [] for k, v in gf.variables.items(): if hasattr(v, 'cf_role') and 'topology' in v.cf_role: gts.append(v) # gts = gf.get_variables_by_attributes(cf_role=lambda t: t is not None and 'topology' in t) if len(gts) != 0: return gts[0] else: return None