# Copyright 1999-2020 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from .... import opcodes as OperandDef from ....serialize import KeyField from .... import tensor as mt from ....utils import recursive_tile from ....tensor.core import TensorOrder from ...preprocessing import normalize from .core import PairwiseDistances class CosineDistances(PairwiseDistances): _op_type_ = OperandDef.PAIRWISE_COSINE_DISTANCES _x = KeyField('x') _y = KeyField('y') def __init__(self, x=None, y=None, dtype=None, gpu=None, **kw): super().__init__(_x=x, _y=y, _dtype=dtype, _gpu=gpu, **kw) @property def x(self): return self._x @property def y(self): return self._y def _set_inputs(self, inputs): super()._set_inputs(inputs) self._x = self._inputs[0] self._y = self._inputs[1] def __call__(self, x, y=None): x, y = self.check_pairwise_arrays(x, y) return self.new_tensor([x, y], shape=(x.shape[0], y.shape[0]), order=TensorOrder.C_ORDER) @classmethod def tile(cls, op): x, y = op.x, op.y if x is y: S = cosine_similarity(x) else: S = cosine_similarity(x, y) S = (S * -1) + 1 S = mt.clip(S, 0, 2) if x is y: mt.fill_diagonal(S, 0.0) return [recursive_tile(S)] [docs]def cosine_similarity(X, Y=None, dense_output=True): """Compute cosine similarity between samples in X and Y. Cosine similarity, or the cosine kernel, computes similarity as the normalized dot product of X and Y: K(X, Y) = <X, Y> / (||X||*||Y||) On L2-normalized data, this function is equivalent to linear_kernel. Read more in the :ref:`User Guide <cosine_similarity>`. Parameters ---------- X : Tensor or sparse tensor, shape: (n_samples_X, n_features) Input data. Y : Tensor or sparse tensor, shape: (n_samples_Y, n_features) Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``. dense_output : boolean (optional), default True Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input tensors are sparse. Returns ------- kernel matrix : Tensor A tensor with shape (n_samples_X, n_samples_Y). """ X, Y = PairwiseDistances.check_pairwise_arrays(X, Y) X_normalized = normalize(X, copy=True) if X is Y: Y_normalized = X_normalized else: Y_normalized = normalize(Y, copy=True) K = X_normalized.dot(Y_normalized.T) if dense_output: K = K.todense() return K [docs]def cosine_distances(X, Y=None): """Compute cosine distance between samples in X and Y. Cosine distance is defined as 1.0 minus the cosine similarity. Read more in the :ref:`User Guide <metrics>`. Parameters ---------- X : array_like, sparse matrix with shape (n_samples_X, n_features). Y : array_like, sparse matrix (optional) with shape (n_samples_Y, n_features). Returns ------- distance matrix : Tensor A tensor with shape (n_samples_X, n_samples_Y). See also -------- mars.learn.metrics.pairwise.cosine_similarity mars.tensor.spatial.distance.cosine : dense matrices only """ op = CosineDistances(x=X, y=Y, dtype=np.dtype(np.float64)) return op(X, y=Y)