#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union
import numpy as np
from ... import opcodes as OperandDef
from ...serialization.serializables import BoolField
from ...typing import TileableType
from ..operands import TensorOperand, TensorOperandMixin
from ..datasource import tensor as astensor
from ..array_utils import as_same_device, device
from ..core import TensorOrder
class TensorIsIn(TensorOperand, TensorOperandMixin):
_op_type_ = OperandDef.ISIN
assume_unique = BoolField("assume_unique")
invert = BoolField("invert")
def __call__(self, element, test_elements):
self.dtype = np.dtype(bool)
return self.new_tensor(
[element, test_elements], shape=element.shape, order=TensorOrder.C_ORDER
)
@classmethod
def tile(cls, op):
from ..merge.stack import TensorStack
from ..reduction import TensorAll, TensorAny
ar1, ar2 = op.inputs
invert = op.invert
out = op.outputs[0]
out_chunks = []
for ar1_chunk in ar1.chunks:
to_concat_chunks = []
for ar2_chunk in ar2.chunks:
chunk_op = op.copy().reset_key()
out_chunk = chunk_op.new_chunk(
[ar1_chunk, ar2_chunk],
dtype=out.dtype,
shape=ar1_chunk.shape,
order=out.order,
index=ar1_chunk.index,
)
to_concat_chunks.append(out_chunk)
if len(to_concat_chunks) == 1:
out_chunks.append(to_concat_chunks[0])
else:
# concat chunks
concat_op = TensorStack(axis=0)
shape = (len(to_concat_chunks),) + ar1_chunk.shape
concat_chunk = concat_op.new_chunk(
to_concat_chunks, shape=shape, dtype=out.dtype, order=out.order
)
if not invert:
chunk_op = TensorAny(axis=(0,), dtype=out.dtype)
out_chunk = chunk_op.new_chunk(
[concat_chunk],
shape=ar1_chunk.shape,
dtype=out.dtype,
order=out.order,
index=ar1_chunk.index,
)
else:
chunk_op = TensorAll(axis=(0,), dtype=out.dtype)
out_chunk = chunk_op.new_chunk(
[concat_chunk],
shape=ar1_chunk.shape,
dtype=out.dtype,
order=out.order,
index=ar1_chunk.index,
)
out_chunks.append(out_chunk)
params = out.params.copy()
params["nsplits"] = ar1.nsplits
params["chunks"] = out_chunks
new_op = op.copy()
return new_op.new_tensors(op.inputs, kws=[params])
@classmethod
def execute(cls, ctx, op):
(element, test_elements), device_id, xp = as_same_device(
[ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
)
with device(device_id):
ctx[op.outputs[0].key] = xp.isin(
element, test_elements, assume_unique=op.assume_unique, invert=op.invert
)
[docs]def isin(
element: Union[TileableType, np.ndarray],
test_elements: Union[TileableType, np.ndarray, list],
assume_unique: bool = False,
invert: bool = False,
):
"""
Calculates `element in test_elements`, broadcasting over `element` only.
Returns a boolean array of the same shape as `element` that is True
where an element of `element` is in `test_elements` and False otherwise.
Parameters
----------
element : array_like
Input tensor.
test_elements : array_like
The values against which to test each value of `element`.
This argument is flattened if it is a tensor or array_like.
See notes for behavior with non-array-like parameters.
assume_unique : bool, optional
If True, the input tensors are both assumed to be unique, which
can speed up the calculation. Default is False.
invert : bool, optional
If True, the values in the returned tensor are inverted, as if
calculating `element not in test_elements`. Default is False.
``mt.isin(a, b, invert=True)`` is equivalent to (but faster
than) ``mt.invert(mt.isin(a, b))``.
Returns
-------
isin : Tensor, bool
Has the same shape as `element`. The values `element[isin]`
are in `test_elements`.
See Also
--------
in1d : Flattened version of this function.
Notes
-----
`isin` is an element-wise function version of the python keyword `in`.
``isin(a, b)`` is roughly equivalent to
``mt.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
`element` and `test_elements` are converted to tensors if they are not
already. If `test_elements` is a set (or other non-sequence collection)
it will be converted to an object tensor with one element, rather than a
tensor of the values contained in `test_elements`. This is a consequence
of the `tensor` constructor's way of handling non-sequence collections.
Converting the set to a list usually gives the desired behavior.
Examples
--------
>>> import mars.tensor as mt
>>> element = 2*mt.arange(4).reshape((2, 2))
>>> element.execute()
array([[0, 2],
[4, 6]])
>>> test_elements = [1, 2, 4, 8]
>>> mask = mt.isin(element, test_elements)
>>> mask.execute()
array([[ False, True],
[ True, False]])
>>> element[mask].execute()
array([2, 4])
>>> mask = mt.isin(element, test_elements, invert=True)
>>> mask.execute()
array([[ True, False],
[ False, True]])
>>> element[mask]
array([0, 6])
Because of how `array` handles sets, the following does not
work as expected:
>>> test_set = {1, 2, 4, 8}
>>> mt.isin(element, test_set).execute()
array([[ False, False],
[ False, False]])
Casting the set to a list gives the expected result:
>>> mt.isin(element, list(test_set)).execute()
array([[ False, True],
[ True, False]])
"""
element, test_elements = astensor(element), astensor(test_elements).ravel()
op = TensorIsIn(assume_unique=assume_unique, invert=invert)
return op(element, test_elements)