Source code for mars.learn.metrics.pairwise.cosine

```# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

import numpy as np

from .... import opcodes as OperandDef
from ....core import recursive_tile
from ....serialization.serializables import KeyField
from .... import tensor as mt
from ....tensor.core import TensorOrder
from ...preprocessing import normalize
from .core import PairwiseDistances

class CosineDistances(PairwiseDistances):
_op_type_ = OperandDef.PAIRWISE_COSINE_DISTANCES

_x = KeyField("x")
_y = KeyField("y")

def __init__(self, x=None, y=None, **kw):
super().__init__(_x=x, _y=y, **kw)

@property
def x(self):
return self._x

@property
def y(self):
return self._y

def _set_inputs(self, inputs):
super()._set_inputs(inputs)
self._x = self._inputs[0]
self._y = self._inputs[1]

def __call__(self, x, y=None):
x, y = self.check_pairwise_arrays(x, y)
return self.new_tensor(
[x, y], shape=(x.shape[0], y.shape[0]), order=TensorOrder.C_ORDER
)

@classmethod
def tile(cls, op):
x, y = op.x, op.y
if x is y:
S = cosine_similarity(x)
else:
S = cosine_similarity(x, y)
S = (S * -1) + 1
S = mt.clip(S, 0, 2)
if x is y:
mt.fill_diagonal(S, 0.0)
return [(yield from recursive_tile(S))]

[docs]def cosine_similarity(X, Y=None, dense_output=True):
"""Compute cosine similarity between samples in X and Y.

Cosine similarity, or the cosine kernel, computes similarity as the
normalized dot product of X and Y:

K(X, Y) = <X, Y> / (||X||*||Y||)

On L2-normalized data, this function is equivalent to linear_kernel.

Read more in the :ref:`User Guide <cosine_similarity>`.

Parameters
----------
X : Tensor or sparse tensor, shape: (n_samples_X, n_features)
Input data.

Y : Tensor or sparse tensor, shape: (n_samples_Y, n_features)
Input data. If ``None``, the output will be the pairwise
similarities between all samples in ``X``.

dense_output : boolean (optional), default True
Whether to return dense output even when the input is sparse. If
``False``, the output is sparse if both input tensors are sparse.

Returns
-------
kernel matrix : Tensor
A tensor with shape (n_samples_X, n_samples_Y).
"""
X, Y = PairwiseDistances.check_pairwise_arrays(X, Y)

X_normalized = normalize(X, copy=True)
if X is Y:
Y_normalized = X_normalized
else:
Y_normalized = normalize(Y, copy=True)

K = X_normalized.dot(Y_normalized.T)
if dense_output:
K = K.todense()
return K

[docs]def cosine_distances(X, Y=None):
"""Compute cosine distance between samples in X and Y.

Cosine distance is defined as 1.0 minus the cosine similarity.

Read more in the :ref:`User Guide <metrics>`.

Parameters
----------
X : array_like, sparse matrix
with shape (n_samples_X, n_features).

Y : array_like, sparse matrix (optional)
with shape (n_samples_Y, n_features).

Returns
-------
distance matrix : Tensor
A tensor with shape (n_samples_X, n_samples_Y).