Source code for mars.tensor.random.logseries

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 1999-2020 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ...serialize import AnyField
from .core import TensorRandomOperandMixin, handle_array, TensorDistribution


class TensorLogseries(TensorDistribution, TensorRandomOperandMixin):
    __slots__ = '_p', '_size'
    _input_fields_ = ['_p']
    _op_type_ = OperandDef.RAND_LOGSERIES

    _p = AnyField('p')
    _func_name = 'logseries'

    def __init__(self, state=None, size=None, dtype=None, gpu=None, **kw):
        dtype = np.dtype(dtype) if dtype is not None else dtype
        super().__init__(_state=state, _size=size, _dtype=dtype, _gpu=gpu, **kw)

    @property
    def p(self):
        return self._p

    def __call__(self, p, chunk_size=None):
        return self.new_tensor([p], None, raw_chunk_size=chunk_size)


[docs]def logseries(random_state, p, size=None, chunk_size=None, gpu=None, dtype=None):
    r"""
    Draw samples from a logarithmic series distribution.

    Samples are drawn from a log series distribution with specified
    shape parameter, 0 < ``p`` < 1.

    Parameters
    ----------
    p : float or array_like of floats
        Shape parameter for the distribution.  Must be in the range (0, 1).
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
        a single value is returned if ``p`` is a scalar.  Otherwise,
        ``np.array(p).size`` samples are drawn.
    chunk_size : int or tuple of int or tuple of ints, optional
        Desired chunk size on each dimension
    gpu : bool, optional
        Allocate the tensor on GPU if True, False as default
    dtype : data-type, optional
      Data-type of the returned tensor.

    Returns
    -------
    out : Tensor or scalar
        Drawn samples from the parameterized logarithmic series distribution.

    See Also
    --------
    scipy.stats.logser : probability density function, distribution or
        cumulative density function, etc.

    Notes
    -----
    The probability density for the Log Series distribution is

    .. math:: P(k) = \frac{-p^k}{k \ln(1-p)},

    where p = probability.

    The log series distribution is frequently used to represent species
    richness and occurrence, first proposed by Fisher, Corbet, and
    Williams in 1943 [2].  It may also be used to model the numbers of
    occupants seen in cars [3].

    References
    ----------
    .. [1] Buzas, Martin A.; Culver, Stephen J.,  Understanding regional
           species diversity through the log series distribution of
           occurrences: BIODIVERSITY RESEARCH Diversity & Distributions,
           Volume 5, Number 5, September 1999 , pp. 187-195(9).
    .. [2] Fisher, R.A,, A.S. Corbet, and C.B. Williams. 1943. The
           relation between the number of species and the number of
           individuals in a random sample of an animal population.
           Journal of Animal Ecology, 12:42-58.
    .. [3] D. J. Hand, F. Daly, D. Lunn, E. Ostrowski, A Handbook of Small
           Data Sets, CRC Press, 1994.
    .. [4] Wikipedia, "Logarithmic distribution",
           http://en.wikipedia.org/wiki/Logarithmic_distribution

    Examples
    --------
    Draw samples from the distribution:

    >>> import mars.tensor as mt
    >>> import matplotlib.pyplot as plt

    >>> a = .6
    >>> s = mt.random.logseries(a, 10000)
    >>> count, bins, ignored = plt.hist(s.execute())

    #   plot against distribution

    >>> def logseries(k, p):
    ...     return -p**k/(k*mt.log(1-p))
    >>> plt.plot(bins, (logseries(bins, a)*count.max()/
    ...          logseries(bins, a).max()).execute(), 'r')
    >>> plt.show()
    """
    if dtype is None:
        dtype = np.random.RandomState().logseries(
            handle_array(p), size=(0,)).dtype
    size = random_state._handle_size(size)
    op = TensorLogseries(state=random_state.to_numpy(), size=size, gpu=gpu, dtype=dtype)
    return op(p, chunk_size=chunk_size)