Source code for gridded.gridded

#!/usr/bin/env python

"""
gridded module:

This module defines the gridded.Dataset --
The core class that encapsulates the gridded data model

"""



# py2/3 compatibility

from gridded.grids import Grid
from gridded.variable import Variable

from gridded.utilities import (get_dataset,
                               get_writable_dataset,
                               get_dataset_attrs,
                               )
from . import VALID_LOCATIONS

"""
The main gridded.Dataset code
"""


[docs]class Dataset():
    """
    An object that represent an entire complete dataset --
    a collection of Variables and the Grid that they are stored on.
    """

    def __init__(self,
                 ncfile=None,
                 grid=None,
                 variables=None,
                 grid_topology=None,
                 attributes=None):
        """
        Construct a gridded.Dataset object. Can be constructed from a data file,
        or also raw grid and variable objects.

        :param ncfile: A file or files to load the Dataset from.
        :type ncfile: Can be one of:
                      - file path of netcdf file as a string
                      - opendap url
                      - list of file paths (uses a netCDF4 MFDataset)
                      - open netCDF4 Dataset object
                     (could be other file types in the future)

        :param grid: a dataset.Grid object or anything that presents the same API.

        :param variables: a dict
        of dataset.Variable objects -- or anything that
                          presents the same API.

        :param grid_topology: mapping of grid topology components to netcdf variable names.
                              used to load non-confirming files.
        :type grid_topology: mapping with keys of topology components and values are
                             variable names.

        :param attributes: The global attributes of the dataset -- usually the global
                           attributes of a netcdf file.
        :type attributes: Mapping of attribute name to attributes themselves
                          (usually strings)

        Either a filename or grid and variable objects should be provided -- not both.

        If a filename is passed in, the attributes will be pulled from the file, and
        the input ones ignored.
        """
        if ncfile is not None:
            if (grid is not None or
                  variables is not None or
                  attributes is not None):
                raise ValueError("You can create a Dataset from a file, or from raw data"
                                 "but not both.")
            self.nc_dataset = get_dataset(ncfile)
            self.filename = self.nc_dataset.filepath()
            self.grid = Grid.from_netCDF(filename=self.filename,
                                         dataset=self.nc_dataset,
                                         grid_topology=grid_topology)
            self.variables = self._load_variables(self.nc_dataset)
            self.attributes = get_dataset_attrs(self.nc_dataset)
        else:  # no file passed in -- create from grid and variables
            self.filename = None
            self.grid = grid
            self.variables = {} if variables is None else variables
            self.attributes = {} if attributes is None else attributes

    def __getitem__(self, key):
        """
        shortcut to getting a variable object
        """
        return self.variables[key]

    def _load_variables(self, ds):
        """
        load up the variables in the nc file
        """
        variables = {}
        for k in ds.variables.keys():
            # find which netcdf variables are used to define the grid
            is_not_grid_attr = all([k not in str(v).split()
                                    for v in self.grid.grid_topology.values()])
            if is_not_grid_attr:
                ncvar = ds[k]
                # find the location of the variable
                # print("working with:", ncvar)
                try:
                    location = ncvar.location
                    if location not in VALID_LOCATIONS:
                        raise AttributeError("not a valid location name")
                except AttributeError:
                    # that didn't work, need to try to infer it
                    location = self.grid.infer_location(ncvar)
                if location is not None:
                    try:
                        ln = ds[k].long_name
                    except AttributeError:  # no long_name attribute
                        ln = ds[k].name # use the name attribute
                    # fixme: Variable.from_netCDF should really be able to figure out the location itself
                    #        maybe we need multiple Variable subclasses for different grid types?
                    #        CHB: yes, we really should do that!
                    variables[k] = Variable.from_netCDF(dataset=ds,
                                                        name=ln,
                                                        varname=k,
                                                        grid=self.grid,
                                                        location=location,
                                                        )
        return variables

    # This should be covered by Grid.from_netCDF
    # def load_from_topology_varnames(self, ncfile, topology):
    #     """
    #     Load a Gridded dataset by specifying the variable names used for the topology

    #     :param ncfile: a file to load the Dataset from.
    #     :type ncfile: filename of netcdf file or opendap url or open netCDF4 Dataset object
    #                  (could be other file types in the future)

    #     :param topology: variables that define the topology
    #     :type topology: dict of topology_role keys, and variable name values

    #     Docs about what is required for each grid type here.

    #     NOTE: the grid type will be inferred by what topology is provided.
    #     """

    #     raise NotImplementedError

[docs]    def save(self, filename, format='netcdf4'):
        """
        save the dataset to a file

        :param filename: full path to file to save to.

        :param format: format to save -- 'netcdf3' or 'netcdf4'
                       are the only options at this point.
        """
        format_options = ('netcdf3', 'netcdf4')
        if format not in format_options:
            raise ValueError("format: {} not supported. Options are: {}".format(format, format_options))

        # create an ncdataset
        ncds = get_writable_dataset(filename)

        # Save the grid and variables
        self.grid.save(ncds, format='netcdf4', variables=self.variables)

        ncds.close()

[docs]    def get_variables_by_attribute(self, attr, value):
        """
        return the variables that have attributes that fit the defined input

        :param attr: the name of the attribute you want to match

        :param value: the value of the attribute you want to match

        fixme: make this a bit more flexible, more like the netCDF4 version
        """
        variables = []
        for var in self.variables.values():
            try:
                if var.attributes[attr] == value:
                    variables.append(var)
            except KeyError:
                pass
        return variables

    @property
    def info(self):
        """
        Information about the Dataset object
        """
        vars = [var.info for var in self.variables.values()]
        vars = "".join([" " * 8 + v for v in vars])
        vars = "\n".join([" " * 8 + line for line in vars.split("\n")])
        attrs = "\n".join(["        {}: {}".format(k, v) for k, v in self.attributes.items()])
        grid = "\n".join([" " * 8 + line for line in self.grid.info.split("\n")])
        msg = ("gridded.Dataset:\n"
               "    filename: {0.filename}\n"
               "    grid:\n{3}\n"
               "    variables: {1}\n"
               "    attributes:\n{2}".format(self,
                                             vars,
                                             attrs,
                                             grid
                                             ))
        return msg