Source code for mars.tensor.random.noncentral_f

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 1999-2020 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ...serialize import AnyField
from .core import TensorRandomOperandMixin, handle_array, TensorDistribution


class TensorNoncentralF(TensorDistribution, TensorRandomOperandMixin):
    __slots__ = '_dfnum', '_dfden', '_nonc', '_size'
    _input_fields_ = ['_dfnum', '_dfden', '_nonc']
    _op_type_ = OperandDef.RAND_NONCENTRAL_F

    _dfnum = AnyField('dfnum')
    _dfden = AnyField('dfden')
    _nonc = AnyField('nonc')
    _func_name = 'noncentral_f'

    def __init__(self, size=None, state=None, dtype=None, gpu=None, **kw):
        dtype = np.dtype(dtype) if dtype is not None else dtype
        super().__init__(_size=size, _state=state, _dtype=dtype, _gpu=gpu, **kw)

    @property
    def dfnum(self):
        return self._dfnum

    @property
    def dfden(self):
        return self._dfden

    @property
    def nonc(self):
        return self._nonc

    def __call__(self, dfnum, dfden, nonc, chunk_size=None):
        return self.new_tensor([dfnum, dfden, nonc], None, raw_chunk_size=chunk_size)


[docs]def noncentral_f(random_state, dfnum, dfden, nonc, size=None, chunk_size=None, gpu=None, dtype=None):
    """
    Draw samples from the noncentral F distribution.

    Samples are drawn from an F distribution with specified parameters,
    `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
    freedom in denominator), where both parameters > 1.
    `nonc` is the non-centrality parameter.

    Parameters
    ----------
    dfnum : float or array_like of floats
        Numerator degrees of freedom, should be > 0.
    dfden : float or array_like of floats
        Denominator degrees of freedom, should be > 0.
    nonc : float or array_like of floats
        Non-centrality parameter, the sum of the squares of the numerator
        means, should be >= 0.
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
        a single value is returned if ``dfnum``, ``dfden``, and ``nonc``
        are all scalars.  Otherwise, ``np.broadcast(dfnum, dfden, nonc).size``
        samples are drawn.
    chunk_size : int or tuple of int or tuple of ints, optional
        Desired chunk size on each dimension
    gpu : bool, optional
        Allocate the tensor on GPU if True, False as default
    dtype : data-type, optional
      Data-type of the returned tensor.

    Returns
    -------
    out : Tensor or scalar
        Drawn samples from the parameterized noncentral Fisher distribution.

    Notes
    -----
    When calculating the power of an experiment (power = probability of
    rejecting the null hypothesis when a specific alternative is true) the
    non-central F statistic becomes important.  When the null hypothesis is
    true, the F statistic follows a central F distribution. When the null
    hypothesis is not true, then it follows a non-central F statistic.

    References
    ----------
    .. [1] Weisstein, Eric W. "Noncentral F-Distribution."
           From MathWorld--A Wolfram Web Resource.
           http://mathworld.wolfram.com/NoncentralF-Distribution.html
    .. [2] Wikipedia, "Noncentral F-distribution",
           http://en.wikipedia.org/wiki/Noncentral_F-distribution

    Examples
    --------
    In a study, testing for a specific alternative to the null hypothesis
    requires use of the Noncentral F distribution. We need to calculate the
    area in the tail of the distribution that exceeds the value of the F
    distribution for the null hypothesis.  We'll plot the two probability
    distributions for comparison.

    >>> import mars.tensor as mt
    >>> import matplotlib.pyplot as plt

    >>> dfnum = 3 # between group deg of freedom
    >>> dfden = 20 # within groups degrees of freedom
    >>> nonc = 3.0
    >>> nc_vals = mt.random.noncentral_f(dfnum, dfden, nonc, 1000000)
    >>> NF = np.histogram(nc_vals.execute(), bins=50, normed=True)  # TODO(jisheng): implement mt.histogram
    >>> c_vals = mt.random.f(dfnum, dfden, 1000000)
    >>> F = np.histogram(c_vals.execute(), bins=50, normed=True)
    >>> plt.plot(F[1][1:], F[0])
    >>> plt.plot(NF[1][1:], NF[0])
    >>> plt.show()
    """
    if dtype is None:
        dtype = np.random.RandomState().noncentral_f(
            handle_array(dfnum), handle_array(dfden), handle_array(nonc), size=(0,)).dtype
    size = random_state._handle_size(size)
    op = TensorNoncentralF(size=size, state=random_state.to_numpy(), gpu=gpu, dtype=dtype)
    return op(dfnum, dfden, nonc, chunk_size=chunk_size)