Source code for hypothesis_gufunc.extra.xr

# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module implements strategies for creating :class:`xarray:xarray.DataArray` and
:class:`xarray:xarray.Dataset` objects.
"""
import string
from collections import OrderedDict, defaultdict

import xarray as xr
from hypothesis.extra.numpy import arrays, order_check
from hypothesis.internal.validation import check_valid_bound
from hypothesis.strategies import fixed_dictionaries, floats, integers, lists, nothing, sampled_from, text, tuples

DEFAULT_DTYPE = int
DEFAULT_SIDE = 5
DEFAULT_DIMS = 5
DEFAULT_VARS = 5


def _check_valid_size_interval(min_size, max_size, name, floor=0):
    """Check valid for integers strategy and array shapes."""
    # same checks as done in integers
    check_valid_bound(min_size, name)
    check_valid_bound(max_size, name)
    if max_size is None:
        order_check(name, floor, min_size, min_size)
    else:
        order_check(name, floor, min_size, max_size)


def _easy_text():
    return text(alphabet=string.ascii_lowercase, min_size=0, max_size=5)


def _hashable():
    S = floats() | integers() | _easy_text()
    return S


def _get_all_dims(vars_to_dims):
    all_dims = sorted(set(sum((list(dd) for dd in vars_to_dims.values()), [])))
    return all_dims


xr_dims = _easy_text
xr_vars = _hashable


[docs]def subset_lists(L, min_size=0, max_size=None): """Strategy to generate a subset of a `list`. This should be built in to hypothesis (see hypothesis issue #1115), but was rejected. Parameters ---------- L : list List of elements we want to get a subset of. min_size : int Minimum size of the resulting subset list. max_size : int or None Maximum size of the resulting subset list. Returns ------- L : list List that is subset of `L` with all unique elements. """ _check_valid_size_interval(min_size, max_size, "subset list size") uniq_len = len(set(L)) order_check("input list size", 0, min_size, uniq_len) max_size = uniq_len if max_size is None else min(uniq_len, max_size) # Avoid deprecation warning HypothesisDeprecationWarning: sampled_from() elements_st = nothing() if uniq_len == 0 else sampled_from(L) S = lists(elements=elements_st, min_size=min_size, max_size=max_size, unique=True) return S
[docs]def xr_dim_lists(min_dims=0, max_dims=DEFAULT_DIMS): """Generate `list` of dimension names for a :class:`xarray:xarray.DataArray`. Parameters ---------- min_dims : int Minimum size of the resulting dimension list. max_dims : int or None Maximum size of the resulting dimension list. Returns ------- L : list(str) List of dimension names. """ _check_valid_size_interval(min_dims, max_dims, "dimensions") S = lists(elements=xr_dims(), min_size=min_dims, max_size=max_dims, unique=True) return S
[docs]def xr_var_lists(min_vars=0, max_vars=DEFAULT_VARS): """Generate `list` of variable names for a :class:`xarray:xarray.Dataset`. Parameters ---------- min_vars : int Minimum size of the resulting variable list. max_vars : int or None Maximum size of the resulting variable list. Returns ------- L : list(typing.Hashable) List of variable names. """ _check_valid_size_interval(min_vars, max_vars, "variables") S = lists(elements=xr_vars(), min_size=min_vars, max_size=max_vars, unique=True) return S
def _vars_and_dims_pairs(min_vars=0, max_vars=DEFAULT_VARS, min_dims=0, max_dims=DEFAULT_DIMS): """Generate both variable and dimension names. xarray requires that there are no name collisions between the two. """ def no_overlap(args): vars_, dims = args # Dataset does not allow the same names for variable and dimensions, so we filter by looking at intersection ok = len(set(dims).intersection(vars_)) == 0 return ok S = tuples(xr_var_lists(min_vars, max_vars), xr_dim_lists(min_dims, max_dims)).filter(no_overlap) return S
[docs]def vars_to_dims_dicts(min_vars=0, max_vars=DEFAULT_VARS, min_dims=0, max_dims=DEFAULT_DIMS): """Generate mapping of variable name to `list` of dimensions, which is compatible with building a :class:`xarray:xarray.Dataset`. Parameters ---------- min_vars : int Minimum size of the resulting variable list. max_vars : int or None Maximum size of the resulting variable list. min_dims : int Minimum size of the resulting dimension list. max_dims : int or None Maximum size of the resulting dimension list. Returns ------- D : dict(typing.Hashable, list(str)) Mapping of variable names to `list` of dimensions, which can be fed to constructor for a :class:`xarray:xarray.Dataset`. """ _check_valid_size_interval(min_vars, max_vars, "variables") _check_valid_size_interval(min_dims, max_dims, "dimensions") def map_dict(args): vars_, dims = args dim_st = subset_lists(dims, min_size=min_dims, max_size=max_dims) S = fixed_dictionaries(OrderedDict([(vv, dim_st) for vv in vars_])) return S S = _vars_and_dims_pairs(min_vars, max_vars, min_dims, max_dims).flatmap(map_dict) return S
[docs]def xr_coords(elements=None, min_side=0, max_side=DEFAULT_SIDE, unique=True): """Generate values for the coordinates in a :class:`xarray:xarray.DataArray`. Non-unique coords do not make much sense, but xarray allows it. So we should be able to generate it. Parameters ---------- elements : SearchStrategy or None Strategy to fill the elements of coordinates. Uses :func:`hypothesis:hypothesis.strategies.integers` by default. min_side : int Minimum length of coordinates array. max_side : int or None Maximum length of coordinates array. unique : bool If all coordinate values should be unique. `xarray` allows non-unique values, but it makes no sense. Returns ------- L : list The coordinates filled with samples from `elements`. """ _check_valid_size_interval(min_side, max_side, "side") if elements is None: elements = integers() S = lists(elements=elements, min_size=min_side, max_size=max_side, unique=unique) return S
[docs]def simple_coords(min_side=0, max_side=DEFAULT_SIDE): """Generate a simple coordinate for a :class:`xarray:xarray.DataArray`. A simple coordinate is one in which the values go: 0, 1, ..., n. Parameters ---------- min_side : int Minimum length of coordinates array. max_side : int or None Maximum length of coordinates array. Returns ------- L : list(int) The coordinates filled with values of: ``list(range(len(L)))``. """ _check_valid_size_interval(min_side, max_side, "side") n = integers(min_value=min_side, max_value=max_side) S = n.map(range).map(list) # Always make list to be consistent with xr_coords return S
[docs]def xr_coords_dicts(dims, elements=None, min_side=0, max_side=DEFAULT_SIDE, unique_coords=True, coords_st={}): """Build a dictionary of coordinates for the purpose of building a :class:`xarray:xarray.DataArray`. `xarray` allows some dims to not have any specified coordinate. This strategy assigns a coord to every dimension. If we really want to test those possibilities we need to take a subset of the `dict` that is sampled from this strategy. Parameters ---------- dims : list(str) Dimensions we need to generate coordinates for. elements : SearchStrategy or None Strategy to fill the elements of coordinates. Uses `integers` by default. min_side : int Minimum length of coordinates array. max_side : int or None Maximum length of coordinates array. unique_coords : bool If all coordinate values should be unique. `xarray` allows non-unique values, but it makes no sense. coords_st : dict(str, SearchStrategy) Special strategies for filling specific dimensions. Use the dimension name as the key and the strategy for generating the coordinate as the value. Returns ------- coords : dict(str, list) Dictionary mapping dimension name to its coordinate values (a list with elements from the `elements` strategy). """ _check_valid_size_interval(min_side, max_side, "side") default_st = xr_coords(elements=elements, min_side=min_side, max_side=max_side, unique=unique_coords) C = OrderedDict([(dd, coords_st.get(dd, default_st)) for dd in dims]) S = fixed_dictionaries(C) return S
[docs]def fixed_coords_dataarrays(dims, coords, dtype=DEFAULT_DTYPE, elements=None): """Generate a :class:`xarray:xarray.DataArray` with coordinates that are fixed a-priori. Parameters ---------- dims : list(str) Dimensions we need to generate coordinates for. coords : dict(str, list) Dictionary mapping dimension name to its coordinate values. dtype : type Data type for values in the :class:`xarray:xarray.DataArray`. This can be anything understood by :func:`hypothesis:hypothesis.extra.numpy.arrays`. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.DataArray`. If `None`, a default is selected based on `dtype`. Returns ------- da : :class:`xarray:xarray.DataArray` :class:`xarray:xarray.DataArray` generated with the specified coordinates and elements from the specified strategy. """ shape = [len(coords[dd]) for dd in dims] data_st = arrays(dtype, shape, elements=elements) coords = {dd: cc for dd, cc in coords.items() if dd in dims} S = data_st.map(lambda data: xr.DataArray(data, coords=coords, dims=dims)) return S
[docs]def fixed_dataarrays( dims, dtype=DEFAULT_DTYPE, elements=None, coords_elements=None, min_side=0, max_side=DEFAULT_SIDE, coords_st={} ): """Generate :class:`xarray:xarray.DataArray` with dimensions (but not coordinates) that are fixed a-priori. Parameters ---------- dims : list(str) Dimensions we need to generate coordinates for. dtype : type Data type for values in the :class:`xarray:xarray.DataArray`. This can be anything understood by :func:`hypothesis:hypothesis.extra.numpy.arrays`. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.DataArray`. If `None`, a default is selected based on `dtype`. coords_elements : SearchStrategy or None Strategy to fill the elements of coordinates. min_side : int Minimum side length of the :class:`xarray:xarray.DataArray`. max_side : int or None Maximum side length of the :class:`xarray:xarray.DataArray`. coords_st : dict(str, SearchStrategy) Special strategies for filling specific dimensions. Use the dimension name as the key and the strategy for generating the coordinate as the value. Returns ------- da : :class:`xarray:xarray.DataArray` :class:`xarray:xarray.DataArray` generated with the dimensions and elements from the specified strategy. """ _check_valid_size_interval(min_side, max_side, "side") coords_st = xr_coords_dicts( dims, elements=coords_elements, min_side=min_side, max_side=max_side, coords_st=coords_st ) S = coords_st.flatmap(lambda C: fixed_coords_dataarrays(dims, C, dtype=dtype, elements=elements)) return S
[docs]def simple_dataarrays(dims, dtype=DEFAULT_DTYPE, elements=None, min_side=0, max_side=DEFAULT_SIDE): """Generate a :class:`xarray:xarray.DataArray` with dimensions that are fixed a-priori and simple coordinates. Parameters ---------- dims : list(str) Dimensions we need to generate coordinates for. dtype : type Data type for values in the :class:`xarray:xarray.DataArray`. This can be anything understood by :func:`hypothesis:hypothesis.extra.numpy.arrays`. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.DataArray`. If `None`, a default is selected based on `dtype`. min_side : int Minimum side length of the :class:`xarray:xarray.DataArray`. max_side : int or None Maximum side length of the :class:`xarray:xarray.DataArray`. Returns ------- da : :class:`xarray:xarray.DataArray` :class:`xarray:xarray.DataArray` generated with the dimensions, simple coordinates, and elements from the specified strategy. """ _check_valid_size_interval(min_side, max_side, "side") default_st = simple_coords(min_side=min_side, max_side=max_side) coords_st = OrderedDict([(dd, default_st) for dd in dims]) S = fixed_dataarrays(dims, dtype=dtype, elements=elements, coords_st=coords_st) return S
[docs]def dataarrays( dtype=DEFAULT_DTYPE, elements=None, coords_elements=None, min_side=0, max_side=DEFAULT_SIDE, min_dims=0, max_dims=DEFAULT_DIMS, ): """Generate a :class:`xarray:xarray.DataArray` with no dimensions or coordinates fixed a-priori. Parameters ---------- dtype : type Data type for values in the :class:`xarray:xarray.DataArray`. This can be anything understood by :func:`hypothesis:hypothesis.extra.numpy.arrays`. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.DataArray`. If `None`, a default is selected based on `dtype`. coords_elements : SearchStrategy or None Strategy to fill the elements of coordinates. min_side : int Minimum side length of the :class:`xarray:xarray.DataArray`. max_side : int or None Maximum side length of the :class:`xarray:xarray.DataArray`. min_dims : int Minimum number of dimensions. max_dims : int or None Maximum number of dimensions. Returns ------- da : :class:`xarray:xarray.DataArray` :class:`xarray:xarray.DataArray` generated with the dimensions, simple coordinates, and elements from the specified strategies. """ _check_valid_size_interval(min_side, max_side, "side") _check_valid_size_interval(min_dims, max_dims, "dimensions") def mapper(D): S = fixed_dataarrays( D, dtype=dtype, elements=elements, coords_elements=coords_elements, min_side=min_side, max_side=max_side ) return S dims_st = xr_dim_lists(min_dims, max_dims) S = dims_st.flatmap(mapper) return S
[docs]def fixed_coords_datasets(vars_to_dims, coords, dtype=None, elements=None): """Generate a :class:`xarray:xarray.Dataset` where the variables, dimensions, and coordinates are specified a-priori. Parameters ---------- vars_to_dims : dict(typing.Hashable, list(str)) Mapping of variable names to list of dimensions, which can be fed to constructor for a :class:`xarray:xarray.Dataset`. coords : dict(str, list) Dictionary mapping dimension name to its coordinate values. dtype : dict(typing.Hashable, type) or None Dictionary mapping variables names to the data type for that variable's elements. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.Dataset`. If `None`, a default is selected based on `dtype`. Returns ------- ds : :class:`xarray:xarray.Dataset` :class:`xarray:xarray.Dataset` with the specified variables, dimensions, and coordinates. """ if dtype is None: dtype = defaultdict(lambda: DEFAULT_DTYPE) C = OrderedDict([(vv, fixed_coords_dataarrays(dd, coords, dtype[vv], elements)) for vv, dd in vars_to_dims.items()]) data_st = fixed_dictionaries(C) S = data_st.map(lambda data: xr.Dataset(data, coords=coords)) return S
[docs]def fixed_datasets( vars_to_dims, dtype=None, elements=None, coords_elements=None, min_side=0, max_side=DEFAULT_SIDE, coords_st={} ): """Generate :class:`xarray:xarray.Dataset` where the variables and dimensions (but not coordinates) are specified a-priori. Parameters ---------- vars_to_dims : dict(typing.Hashable, list(str)) Mapping of variable names to list of dimensions, which can be fed to constructor for a :class:`xarray:xarray.Dataset`. dtype : dict(typing.Hashable, type) or None Dictionary mapping variables names to the data type for that variable's elements. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.Dataset`. If `None`, a default is selected based on `dtype`. coords_elements : SearchStrategy or None Strategy to fill the elements of coordinates. min_side : int Minimum side length of the :class:`xarray:xarray.Dataset`. max_side : int or None Maximum side length of the :class:`xarray:xarray.Dataset`. coords_st : dict(str, SearchStrategy) Special strategies for filling specific dimensions. Use the dimension name as the key and the strategy for generating the coordinate as the value. Returns ------- ds: :class:`xarray:xarray.Dataset` :class:`xarray:xarray.Dataset` with the specified variables and dimensions. """ _check_valid_size_interval(min_side, max_side, "side") all_dims = _get_all_dims(vars_to_dims) coords_st = xr_coords_dicts( all_dims, elements=coords_elements, min_side=min_side, max_side=max_side, coords_st=coords_st ) S = coords_st.flatmap(lambda C: fixed_coords_datasets(vars_to_dims, C, dtype=dtype, elements=elements)) return S
[docs]def simple_datasets(vars_to_dims, dtype=None, elements=None, min_side=0, max_side=DEFAULT_SIDE): """Generate :class:`xarray:xarray.Dataset` with variables and dimensions that are fixed a-priori and simple coordinates. Parameters ---------- vars_to_dims : dict(typing.Hashable, list(str)) Mapping of variable names to list of dimensions, which can be fed to constructor for a :class:`xarray:xarray.Dataset`. dtype : dict(typing.Hashable, type) or None Dictionary mapping variables names to the data type for that variable's elements. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.Dataset`. If `None`, a default is selected based on `dtype`. min_side : int Minimum side length of the :class:`xarray:xarray.Dataset`. max_side : int or None Maximum side length of the :class:`xarray:xarray.Dataset`. Returns ------- ds: :class:`xarray:xarray.Dataset` A :class:`xarray:xarray.Dataset` with the specified variables and dimensions, and simple coordinates. """ _check_valid_size_interval(min_side, max_side, "side") all_dims = _get_all_dims(vars_to_dims) default_st = simple_coords(min_side=min_side, max_side=max_side) coords_st = OrderedDict([(dd, default_st) for dd in all_dims]) S = fixed_datasets(vars_to_dims, dtype=dtype, elements=elements, coords_st=coords_st) return S
[docs]def datasets( dtype=DEFAULT_DTYPE, elements=None, coords_elements=None, min_side=0, max_side=DEFAULT_SIDE, min_vars=0, max_vars=DEFAULT_VARS, min_dims=0, max_dims=DEFAULT_DIMS, ): """Generate a :class:`xarray:xarray.Dataset` with no variables, dimensions, or coordinates fixed a-priori. We could also allow a strategy with a different data type per variable, but until there is a use case for that, we will leave `dtype` as a scalar input. Parameters ---------- dtype : type Data type used to fill the elements of the :class:`xarray:xarray.Dataset`. elements : SearchStrategy or None Strategy to fill the elements of the :class:`xarray:xarray.Dataset`. If `None`, a default is selected based on `dtype`. coords_elements : SearchStrategy or None Strategy to fill the elements of coordinates. min_side : int Minimum side length of the :class:`xarray:xarray.Dataset`. max_side : int or None Maximum side length of the :class:`xarray:xarray.Dataset`. min_vars : int Minimum number of variables. max_vars : int or None Maximum number of variables. min_dims : int Minimum number of dimensions. max_dims : int or None Maximum number of dimensions. Returns ------- ds : :class:`xarray:xarray.Dataset` :class:`xarray:xarray.Dataset` generated with the variables, dimensions, coordinates, and elements from the specified strategies. """ _check_valid_size_interval(min_side, max_side, "side") _check_valid_size_interval(min_vars, max_vars, "variables") _check_valid_size_interval(min_dims, max_dims, "dimensions") dtype_d = defaultdict(lambda: dtype) vars_to_dims = vars_to_dims_dicts(min_vars, max_vars, min_dims, max_dims) def mapper(V): S = fixed_datasets( V, dtype=dtype_d, elements=elements, coords_elements=coords_elements, min_side=min_side, max_side=max_side ) return S S = vars_to_dims.flatmap(mapper) return S