#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright 1999-2020 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from ... import opcodes as OperandDef from ...serialize import Int64Field, Float64Field from ..array_utils import array_module from .core import TensorRandomOperandMixin, TensorSimpleRandomData class TensorRandint(TensorSimpleRandomData, TensorRandomOperandMixin): __slots__ = '_low', '_high', '_density', '_size' _op_type_ = OperandDef.RAND_RANDINT _low = Int64Field('low') _high = Int64Field('high') _density = Float64Field('density') _func_name = 'randint' def __init__(self, state=None, size=None, dtype=None, low=None, high=None, sparse=False, density=None, gpu=None, **kw): dtype = np.dtype(dtype) if dtype is not None else dtype super().__init__(_state=state, _size=size, _low=low, _high=high, _dtype=dtype, _sparse=sparse, _density=density, _gpu=gpu, **kw) @property def low(self): return self._low @property def high(self): return self._high @property def density(self): return self._density def __call__(self, chunk_size=None): return self.new_tensor(None, None, raw_chunk_size=chunk_size) @classmethod def execute(cls, ctx, op): if op.sparse: cls.execute_sparse(ctx, op) else: super().execute(ctx, op) @classmethod def execute_sparse(cls, ctx, op): from ...lib.sparse import SparseNDArray from ...lib.sparse.core import cps, sps xp = array_module(op.gpu) if op.state: rs = op.state.random_state else: rs = None chunk = op.outputs[0] if chunk.ndim > 2: raise NotImplementedError low = 1 if op.low == 0 else op.low rs = rs or xp.random size = int(np.ceil(np.prod(chunk.shape) * op.density)) xps = cps if op.gpu else sps ij = xp.empty((2, size)) ij[0] = rs.randint(chunk.shape[0], size=size) ij[1] = rs.randint(chunk.shape[1], size=size) data = rs.randint(low, op.high, size=size).astype(op.dtype) m = xps.coo_matrix((data, ij), chunk.shape).tocsr() m.data[m.data >= op.high] = op.high - 1 # scipy.sparse is too slow, we remove the precise version due to the performance # m = sps.random(*chunk.shape, density=op.density, format='csr') # m.data = (rs or xp.random).randint(low, op.high, size=m.data.size)\ # .astype(op.dtype) ctx[chunk.key] = SparseNDArray(m) @classmethod def estimate_size(cls, ctx, op): chunk = op.outputs[0] if not op.sparse or not getattr(op, '_density', None): super().estimate_size(ctx, op) else: # use density to estimate real memory usage nbytes = int(chunk.nbytes * getattr(chunk.op, '_density')) ctx[chunk.key] = (nbytes, nbytes) [docs]def randint(random_state, low, high=None, size=None, dtype='l', density=None, chunk_size=None, gpu=None): """ Return random integers from `low` (inclusive) to `high` (exclusive). Return random integers from the "discrete uniform" distribution of the specified dtype in the "half-open" interval [`low`, `high`). If `high` is None (the default), then results are from [0, `low`). Parameters ---------- low : int Lowest (signed) integer to be drawn from the distribution (unless ``high=None``, in which case this parameter is one above the *highest* such integer). high : int, optional If provided, one above the largest (signed) integer to be drawn from the distribution (see above for behavior if ``high=None``). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. dtype : dtype, optional Desired dtype of the result. All dtypes are determined by their name, i.e., 'int64', 'int', etc, so byteorder is not available and a specific precision may have different C types depending on the platform. The default value is 'np.int'. density: float, optional if density specified, a sparse tensor will be created chunk_size : int or tuple of int or tuple of ints, optional Desired chunk size on each dimension gpu : bool, optional Allocate the tensor on GPU if True, False as default dtype : data-type, optional Data-type of the returned tensor. Returns ------- out : int or Tensor of ints `size`-shaped tensor of random integers from the appropriate distribution, or a single such random int if `size` not provided. See Also -------- random.random_integers : similar to `randint`, only for the closed interval [`low`, `high`], and 1 is the lowest value if `high` is omitted. In particular, this other one is the one to use to generate uniformly distributed discrete non-integers. Examples -------- >>> import mars.tensor as mt >>> mt.random.randint(2, size=10).execute() array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0]) >>> mt.random.randint(1, size=10).execute() array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) Generate a 2 x 4 tensor of ints between 0 and 4, inclusive: >>> mt.random.randint(5, size=(2, 4)).execute() array([[4, 0, 2, 1], [3, 2, 2, 0]]) """ sparse = bool(density) size = random_state._handle_size(size) op = TensorRandint(state=random_state.to_numpy(), low=low, high=high, size=size, dtype=dtype, gpu=gpu, sparse=sparse, density=density) return op(chunk_size=chunk_size)