Source code for mars.tensor.core

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 1999-2021 Alibaba Group Holding Ltd.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from import Iterable
from enum import Enum
from operator import attrgetter
from typing import Any, Dict

import numpy as np

from ..core import (
from ..core.entity.utils import refresh_tileable_shape
from ..serialization.serializables import (
from ..utils import on_serialize_shape, on_deserialize_shape
from .utils import get_chunk_slices, fetch_corner_data

logger = logging.getLogger(__name__)

class TensorOrder(Enum):
    # C order
    C_ORDER = "C"
    # Fortran order
    F_ORDER = "F"

class TensorChunkData(ChunkData):
    __slots__ = ()
    _no_copy_attrs_ = ChunkData._no_copy_attrs_ | {"dtype"}
    type_name = "Tensor"

    # required fields
    _shape = TupleField(
    _order = ReferenceField("order", TensorOrder)
    # optional fields
    _dtype = DataTypeField("dtype")

    def __init__(self, op=None, index=None, shape=None, dtype=None, order=None, **kw):
        if isinstance(order, str):
            order = getattr(TensorOrder, order)
            _op=op, _index=index, _shape=shape, _dtype=dtype, _order=order, **kw
        if self.order is None and self.op is not None:
            if len(self.inputs) == 0:
                self._order = TensorOrder.C_ORDER
            elif all(
                hasattr(inp, "order") and inp.order == TensorOrder.F_ORDER
                for inp in self.inputs
                self._order = TensorOrder.F_ORDER
                self._order = TensorOrder.C_ORDER

    def params(self) -> Dict[str, Any]:
        # params return the properties which useful to rebuild a new chunk
        return {
            "shape": self.shape,
            "dtype": self.dtype,
            "order": self.order,
            "index": self.index,

    def params(self, new_params: Dict[str, Any]):
        params = new_params.copy()
        params.pop("index", None)  # index not needed to update
        new_shape = params.pop("shape", None)
        if new_shape is not None:
            self._shape = new_shape
        dtype = params.pop("dtype", None)
        if dtype is not None:
            self._dtype = dtype
        order = params.pop("order", None)
        if order is not None:
            self._order = order
        if params:  # pragma: no cover
            raise TypeError(f"Unknown params: {list(params)}")

    def get_params_from_data(cls, data: np.ndarray) -> Dict[str, Any]:
        from .array_utils import is_cupy

        if not is_cupy(data):
            data = np.asarray(data)
        order = (
            TensorOrder.C_ORDER if data.flags["C_CONTIGUOUS"] else TensorOrder.F_ORDER
        return {"shape": data.shape, "dtype": data.dtype, "order": order}

    def __len__(self):
            return self.shape[0]
        except IndexError:
            if is_build_mode():
                return 0
            raise TypeError("len() of unsized object")

    def shape(self):
        return getattr(self, "_shape", None)

    def ndim(self):
        return len(self.shape)

    def size(self):

    def dtype(self):
        return getattr(self, "_dtype", None) or self.op.dtype

    def order(self):
        return getattr(self, "_order", None)

    def nbytes(self):
        return * self.dtype.itemsize

class TensorChunk(Chunk):
    __slots__ = ()
    _allow_data_type_ = (TensorChunkData,)
    type_name = "Tensor"

    def __len__(self):
        return len(self._data)

class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
    __slots__ = ()
    type_name = "Tensor"

    # required fields
    _order = StringField(
        "order", on_serialize=attrgetter("value"), on_deserialize=TensorOrder
    # optional fields
    _dtype = DataTypeField("dtype")
    _chunks = ListField(
        on_serialize=lambda x: [ for it in x] if x is not None else x,
        on_deserialize=lambda x: [TensorChunk(it) for it in x] if x is not None else x,

    def __init__(
        if isinstance(order, str):
            order = getattr(TensorOrder, order)
        if self.order is None and self.op is not None:
            if len(self.inputs) == 0:
                self._order = TensorOrder.C_ORDER
            elif all(
                hasattr(inp, "order") and inp.order == TensorOrder.F_ORDER
                for inp in self.inputs
                self._order = TensorOrder.F_ORDER
                self._order = TensorOrder.C_ORDER

    def _to_str(self, representation=False):
        if is_build_mode() or len(self._executed_sessions) == 0:
            # in build mode, or not executed, just return representation
            if representation:
                return f"Tensor <op={type(self._op).__name__}, shape={self._shape}, key={self._key}>"
                return f"Tensor(op={type(self._op).__name__}, shape={self._shape})"
            print_options = np.get_printoptions()
            threshold = print_options["threshold"]

            corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
            # if less than default threshold, just set it as default,
            # if not, set to corner_data.size - 1 make sure ... exists in repr
            threshold = threshold if self.size <= threshold else corner_data.size - 1
            with np.printoptions(threshold=threshold):
                corner_str = repr(corner_data) if representation else str(corner_data)
            return corner_str

    def __str__(self):
        return self._to_str(representation=False)

    def __repr__(self):
        return self._to_str(representation=True)

    def params(self):
        # params return the properties which useful to rebuild a new tileable object
        return {"shape": self.shape, "dtype": self.dtype, "order": self.order}

    def params(self, new_params: Dict[str, Any]):
        params = new_params.copy()
        shape = params.pop("shape", None)
        if shape is not None:
            self._shape = shape
        dtype = params.pop("dtype", None)
        if dtype is not None:
            self._dtype = dtype
        order = params.pop("order", None)
        if order is not None:
            self._order = order
        if params:  # pragma: no cover
            raise TypeError(f"Unknown params: {list(params)}")

    def refresh_params(self):
        if self._dtype is None:
            self._dtype = self.chunks[0].dtype

    def flags(self):
        c_order = True if self.ndim <= 1 else self.order == TensorOrder.C_ORDER
        f_order = True if self.ndim <= 1 else self.order == TensorOrder.F_ORDER
        return {"C_CONTIGUOUS": c_order, "F_CONTIGUOUS": f_order}

    def real(self):
        from .arithmetic import real

        return real(self)

    def imag(self):
        from .arithmetic import imag

        return imag(self)

    def dtype(self):
        return getattr(self, "_dtype", None) or self.op.dtype

    def order(self):
        return getattr(self, "_order", None)

    def nbytes(self):
        return * self.dtype.itemsize

    def get_chunk_slices(self, idx):
        return get_chunk_slices(self.nsplits, idx)

    def is_scalar(self):
        return self.ndim == 0

    isscalar = is_scalar

    def tosparse(self, missing=None):
        if self.issparse():
            return self

        from .datasource import fromdense

        return fromdense(self, missing=missing)

    def todense(self, fill_value=None):
        if not self.issparse():
            return self

        from .datasource import fromsparse

        return fromsparse(self, fill_value=fill_value)

    def transpose(self, *axes):
        from .base import transpose

        if len(axes) == 1 and isinstance(axes[0], Iterable):
            axes = axes[0]

        return transpose(self, axes)

    def T(self):
        return self.transpose()

    def reshape(self, shape, *shapes, **kw):
        from .reshape import reshape

        order = kw.pop("order", "C")
        if kw:
            raise TypeError(
                f"'{next(iter(kw))}' is an invalid keyword argument for this function"

        if isinstance(shape, Iterable):
            shape = tuple(shape)
            shape = (shape,)
        shape += shapes

        return reshape(self, shape, order=order)

    def totiledb(self, uri, ctx=None, key=None, timestamp=None):
        from .datastore import totiledb

        return totiledb(uri, self, ctx=ctx, key=key, timestamp=timestamp)

    def from_dataframe(in_df):
        from .datasource import from_dataframe

        return from_dataframe(in_df)

    def to_dataframe(self, *args, **kwargs):
        from ..dataframe.datasource.from_tensor import dataframe_from_tensor

        return dataframe_from_tensor(self, *args, **kwargs)

    def flat(self):
        return flatiter(self)

    def to_numpy(self, session=None, **kw):
        return self._execute_and_fetch(session=session, **kw)

class Tensor(HasShapeTileable):
    __slots__ = ()
    _allow_data_type_ = (TensorData,)
    type_name = "Tensor"

    def __len__(self):
        return len(self._data)

    def shape(self):
        return self._data.shape

    def shape(self, new_shape):
        self._data = self._data.reshape(new_shape).data

    def _update_shape(self, new_shape):

    def real(self):

    def real(self, new_real):
        from .arithmetic.setreal import set_real

        self._data = set_real(self._data, new_real).data

    def imag(self):

    def imag(self, new_imag):
        from .arithmetic.setimag import set_imag

        self._data = set_imag(self._data, new_imag).data

    def __array__(self, dtype=None):
        return np.asarray(self.to_numpy(), dtype=dtype)

    def __array_function__(self, func, types, args, kwargs):
        from .. import tensor as module

        for submodule in func.__module__.split(".")[1:]:
                module = getattr(module, submodule)
            except AttributeError:
                return NotImplemented
        if not hasattr(module, func.__name__):
            return NotImplemented
        mars_func = getattr(module, func.__name__)
        if mars_func is func:
            # avoid Numpy func
            return NotImplemented
        return mars_func(*args, **kwargs)

    def view(self):
        return self._view()

    def ndim(self):
        Number of array dimensions.

        >>> import mars.tensor as mt
        >>> x = mt.array([1, 2, 3])
        >>> x.ndim
        >>> y = mt.zeros((2, 3, 4))
        >>> y.ndim
        return super().ndim

    def transpose(self, *axes):
        Returns a view of the tensor with axes transposed.

        For a 1-D tensor, this has no effect. (To change between column and
        row vectors, first cast the 1-D tensor into a matrix object.)
        For a 2-D tensor, this is the usual matrix transpose.
        For an n-D tensor, if axes are given, their order indicates how the
        axes are permuted (see Examples). If axes are not provided and
        ``a.shape = (i[0], i[1], ... i[n-2], i[n-1])``, then
        ``a.transpose().shape = (i[n-1], i[n-2], ... i[1], i[0])``.

        axes : None, tuple of ints, or `n` ints

         * None or no argument: reverses the order of the axes.

         * tuple of ints: `i` in the `j`-th place in the tuple means `a`'s
           `i`-th axis becomes `a.transpose()`'s `j`-th axis.

         * `n` ints: same as an n-tuple of the same ints (this form is
           intended simply as a "convenience" alternative to the tuple form)

        out : Tensor
            View of `a`, with axes suitably permuted.

        See Also
        Tensor.T : Tensor property returning the tensor transposed.

        >>> import mars.tensor as mt

        >>> a = mt.array([[1, 2], [3, 4]])
        >>> a.execute()
        array([[1, 2],
               [3, 4]])
        >>> a.transpose().execute()
        array([[1, 3],
               [2, 4]])
        >>> a.transpose((1, 0))
        array([[1, 3],
               [2, 4]])
        >>> a.transpose(1, 0).execute()
        array([[1, 3],
               [2, 4]])
        return self._data.transpose(*axes)

    def T(self):
        Same as self.transpose(), except that self is returned if
        self.ndim < 2.

        >>> import mars.tensor as mt

        >>> x = mt.array([[1.,2.],[3.,4.]])
        >>> x.execute()
        array([[ 1.,  2.],
               [ 3.,  4.]])
        >>> x.T.execute()
        array([[ 1.,  3.],
               [ 2.,  4.]])
        >>> x = mt.array([1.,2.,3.,4.])
        >>> x.execute()
        array([ 1.,  2.,  3.,  4.])
        >>> x.T.execute()
        array([ 1.,  2.,  3.,  4.])
        return self._data.T

    def totiledb(self, uri, ctx=None, key=None, timestamp=None):
        return self._data.totiledb(uri, ctx=ctx, key=key, timestamp=timestamp)

    def copy(self, order="C"):
        return super().copy().astype(self.dtype, order=order, copy=False)

[docs] def sort(self, axis=-1, kind=None, parallel_kind=None, psrs_kinds=None, order=None): """ Sort a tensor, in-place. Parameters ---------- axis : int, optional Axis along which to sort. Default is -1, which means sort along the last axis. kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional Sorting algorithm. Default is 'quicksort'. parallel_kind: {'PSRS'}, optional Parallel sorting algorithm, for the details, refer to: psrs_kinds: list with 3 elements, optional Sorting algorithms during PSRS algorithm. order : str or list of str, optional When `a` is a tensor with fields defined, this argument specifies which fields to compare first, second, etc. A single field can be specified as a string, and not all fields need be specified, but unspecified fields will still be used, in the order in which they come up in the dtype, to break ties. See Also -------- numpy.sort : Return a sorted copy of a tensor. argsort : Indirect sort. lexsort : Indirect stable sort on multiple keys. searchsorted : Find elements in sorted tensor. partition: Partial sort. Notes ----- See ``sort`` for notes on the different sorting algorithms. Examples -------- >>> import mars.tensor as mt >>> a = mt.array([[1,4], [3,1]]) >>> a.sort(axis=1) >>> a.execute() array([[1, 4], [1, 3]]) >>> a.sort(axis=0) >>> a.execute() array([[1, 3], [1, 4]]) Use the `order` keyword to specify a field to use when sorting a structured tensor: >>> a = mt.array([('a', 2), ('c', 1)], dtype=[('x', 'S1'), ('y', int)]) >>> a.sort(order='y') >>> a.execute() array([('c', 1), ('a', 2)], dtype=[('x', '|S1'), ('y', '<i4')]) """ from .base import sort self._data = sort( self, axis=axis, kind=kind, parallel_kind=parallel_kind, psrs_kinds=psrs_kinds, order=order, ).data
def partition(self, kth, axis=-1, kind="introselect", order=None, **kw): """ Rearranges the elements in the tensor in such a way that the value of the element in kth position is in the position it would be in a sorted tensor. All elements smaller than the kth element are moved before this element and all equal or greater are moved behind it. The ordering of the elements in the two partitions is undefined. Parameters ---------- kth : int or sequence of ints Element index to partition by. The kth element value will be in its final sorted position and all smaller elements will be moved before it and all equal or greater elements behind it. The order of all elements in the partitions is undefined. If provided with a sequence of kth it will partition all elements indexed by kth of them into their sorted position at once. axis : int, optional Axis along which to sort. Default is -1, which means sort along the last axis. kind : {'introselect'}, optional Selection algorithm. Default is 'introselect'. order : str or list of str, optional When `a` is a tensor with fields defined, this argument specifies which fields to compare first, second, etc. A single field can be specified as a string, and not all fields need to be specified, but unspecified fields will still be used, in the order in which they come up in the dtype, to break ties. See Also -------- mt.partition : Return a partitioned copy of an tensor. argpartition : Indirect partition. sort : Full sort. Notes ----- See ``mt.partition`` for notes on the different algorithms. Examples -------- >>> import mars.tensor as mt >>> a = mt.array([3, 4, 2, 1]) >>> a.partition(3) >>> a.execute() array([2, 1, 3, 4]) >>> a.partition((1, 3)) >>> a.execute() array([1, 2, 3, 4]) """ from .base import partition self._data = partition(self, kth, axis=axis, kind=kind, order=order, **kw).data @property def flat(self): """ Flat iterator object to iterate over arrays. A `flatiter` iterator is returned by ``x.flat`` for any tensor `x`. It allows iterating over the tensor as if it were a 1-D array, either in a for-loop or by calling its `next` method. Iteration is done in row-major, C-style order (the last index varying the fastest). The iterator can also be indexed using basic slicing or advanced indexing. See Also -------- Tensor.flat : Return a flat iterator over a tensor. Tensor.flatten : Returns a flattened copy of a tensor. Examples -------- >>> import mars.tensor as mt >>> x = mt.arange(6).reshape(2, 3) >>> fl = x.flat >>> fl[2:4].execute() array([2, 3]) """ return self._data.flat def from_dataframe(self, in_df): return self._data.from_dataframe(in_df) def to_dataframe(self, *args, **kwargs): return self._data.to_dataframe(*args, **kwargs) def to_numpy(self, session=None, **kw): return self._data.to_numpy(session, **kw) SparseTensor = Tensor class flatiter(object): def __init__(self, tensor): # flatten creates a copy self._flatten_tensor = tensor.flatten() # ravel creates a view self._ravel_tensor = tensor.ravel() def __getitem__(self, item): # a.flat[item] create a copy return self._flatten_tensor[item] def __setitem__(self, key, value): # a.flat[item] = value will apply changes to original tensor self._ravel_tensor[key] = value class Indexes(Serializable): indexes = AnyField("indexes") TENSOR_TYPE = (Tensor, TensorData) TENSOR_CHUNK_TYPE = (TensorChunk, TensorChunkData) register_output_types(OutputType.tensor, TENSOR_TYPE, TENSOR_CHUNK_TYPE) register_output_types(OutputType.scalar, TENSOR_TYPE, TENSOR_CHUNK_TYPE)