Source code for gridded.gridded

#!/usr/bin/env python

"""
gridded module:

This module defines the gridded.Dataset --
The core class that encapsulates the gridded data model

"""



# py2/3 compatibility

from gridded.grids import Grid
from gridded.variable import Variable

from gridded.utilities import (get_dataset,
                               get_writable_dataset,
                               get_dataset_attrs,
                               )
from . import VALID_LOCATIONS

"""
The main gridded.Dataset code
"""


[docs]class Dataset(): """ An object that represent an entire complete dataset -- a collection of Variables and the Grid that they are stored on. """ def __init__(self, ncfile=None, grid=None, variables=None, grid_topology=None, attributes=None): """ Construct a gridded.Dataset object. Can be constructed from a data file, or also raw grid and variable objects. :param ncfile: A file or files to load the Dataset from. :type ncfile: Can be one of: - file path of netcdf file as a string - opendap url - list of file paths (uses a netCDF4 MFDataset) - open netCDF4 Dataset object (could be other file types in the future) :param grid: a dataset.Grid object or anything that presents the same API. :param variables: a dict of dataset.Variable objects -- or anything that presents the same API. :param grid_topology: mapping of grid topology components to netcdf variable names. used to load non-confirming files. :type grid_topology: mapping with keys of topology components and values are variable names. :param attributes: The global attributes of the dataset -- usually the global attributes of a netcdf file. :type attributes: Mapping of attribute name to attributes themselves (usually strings) Either a filename or grid and variable objects should be provided -- not both. If a filename is passed in, the attributes will be pulled from the file, and the input ones ignored. """ if ncfile is not None: if (grid is not None or variables is not None or attributes is not None): raise ValueError("You can create a Dataset from a file, or from raw data" "but not both.") self.nc_dataset = get_dataset(ncfile) self.filename = self.nc_dataset.filepath() self.grid = Grid.from_netCDF(filename=self.filename, dataset=self.nc_dataset, grid_topology=grid_topology) self.variables = self._load_variables(self.nc_dataset) self.attributes = get_dataset_attrs(self.nc_dataset) else: # no file passed in -- create from grid and variables self.filename = None self.grid = grid self.variables = {} if variables is None else variables self.attributes = {} if attributes is None else attributes def __getitem__(self, key): """ shortcut to getting a variable object """ return self.variables[key] def _load_variables(self, ds): """ load up the variables in the nc file """ variables = {} for k in ds.variables.keys(): # find which netcdf variables are used to define the grid is_not_grid_attr = all([k not in str(v).split() for v in self.grid.grid_topology.values()]) if is_not_grid_attr: ncvar = ds[k] # find the location of the variable # print("working with:", ncvar) try: location = ncvar.location if location not in VALID_LOCATIONS: raise AttributeError("not a valid location name") except AttributeError: # that didn't work, need to try to infer it location = self.grid.infer_location(ncvar) if location is not None: try: ln = ds[k].long_name except AttributeError: # no long_name attribute ln = ds[k].name # use the name attribute # fixme: Variable.from_netCDF should really be able to figure out the location itself # maybe we need multiple Variable subclasses for different grid types? # CHB: yes, we really should do that! variables[k] = Variable.from_netCDF(dataset=ds, name=ln, varname=k, grid=self.grid, location=location, ) return variables # This should be covered by Grid.from_netCDF # def load_from_topology_varnames(self, ncfile, topology): # """ # Load a Gridded dataset by specifying the variable names used for the topology # :param ncfile: a file to load the Dataset from. # :type ncfile: filename of netcdf file or opendap url or open netCDF4 Dataset object # (could be other file types in the future) # :param topology: variables that define the topology # :type topology: dict of topology_role keys, and variable name values # Docs about what is required for each grid type here. # NOTE: the grid type will be inferred by what topology is provided. # """ # raise NotImplementedError
[docs] def save(self, filename, format='netcdf4'): """ save the dataset to a file :param filename: full path to file to save to. :param format: format to save -- 'netcdf3' or 'netcdf4' are the only options at this point. """ format_options = ('netcdf3', 'netcdf4') if format not in format_options: raise ValueError("format: {} not supported. Options are: {}".format(format, format_options)) # create an ncdataset ncds = get_writable_dataset(filename) # Save the grid and variables self.grid.save(ncds, format='netcdf4', variables=self.variables) ncds.close()
[docs] def get_variables_by_attribute(self, attr, value): """ return the variables that have attributes that fit the defined input :param attr: the name of the attribute you want to match :param value: the value of the attribute you want to match fixme: make this a bit more flexible, more like the netCDF4 version """ variables = [] for var in self.variables.values(): try: if var.attributes[attr] == value: variables.append(var) except KeyError: pass return variables
@property def info(self): """ Information about the Dataset object """ vars = [var.info for var in self.variables.values()] vars = "".join([" " * 8 + v for v in vars]) vars = "\n".join([" " * 8 + line for line in vars.split("\n")]) attrs = "\n".join([" {}: {}".format(k, v) for k, v in self.attributes.items()]) grid = "\n".join([" " * 8 + line for line in self.grid.info.split("\n")]) msg = ("gridded.Dataset:\n" " filename: {0.filename}\n" " grid:\n{3}\n" " variables: {1}\n" " attributes:\n{2}".format(self, vars, attrs, grid )) return msg