# Copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import numpy as np
from sklearn.exceptions import UndefinedMetricWarning
from ... import execute
from ... import tensor as mt
from ..utils.validation import (
check_array,
check_consistent_length,
column_or_1d,
_num_samples,
)
def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
"""Check that y_true and y_pred belong to the same regression task.
Parameters
----------
y_true : array-like
y_pred : array-like
multioutput : array-like or string in ['raw_values', uniform_average',
'variance_weighted'] or None
None is accepted due to backward compatibility of r2_score().
Returns
-------
type_true : one of {'continuous', continuous-multioutput'}
The type of the true target data, as output by
'utils.multiclass.type_of_target'.
y_true : array-like of shape (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples, n_outputs)
Estimated target values.
multioutput : array-like of shape (n_outputs) or string in ['raw_values',
uniform_average', 'variance_weighted'] or None
Custom output weights if ``multioutput`` is array-like or
just the corresponding argument if ``multioutput`` is a
correct keyword.
dtype : str or list, default="numeric"
the dtype argument passed to check_array.
"""
check_consistent_length(y_true, y_pred)
y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
if y_true.ndim == 1:
y_true = y_true.reshape((-1, 1))
if y_pred.ndim == 1:
y_pred = y_pred.reshape((-1, 1))
if y_true.shape[1] != y_pred.shape[1]:
raise ValueError(
"y_true and y_pred have different number of output "
"({0}!={1})".format(y_true.shape[1], y_pred.shape[1])
)
n_outputs = y_true.shape[1]
allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted")
if isinstance(multioutput, str):
if multioutput not in allowed_multioutput_str:
raise ValueError(
"Allowed 'multioutput' string values are {}. "
"You provided multioutput={!r}".format(
allowed_multioutput_str, multioutput
)
)
elif multioutput is not None:
multioutput = check_array(multioutput, ensure_2d=False)
if n_outputs == 1:
raise ValueError("Custom weights are useful only in multi-output cases.")
elif n_outputs != len(multioutput):
raise ValueError(
("There must be equally many custom weights (%d) as outputs (%d).")
% (len(multioutput), n_outputs)
)
y_type = "continuous" if n_outputs == 1 else "continuous-multioutput"
return y_type, y_true, y_pred, multioutput
[docs]def r2_score(
y_true,
y_pred,
*,
sample_weight=None,
multioutput="uniform_average",
session=None,
run_kwargs=None
):
""":math:`R^2` (coefficient of determination) regression score function.
Best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). A constant model that always
predicts the expected value of y, disregarding the input features,
would get a :math:`R^2` score of 0.0.
Read more in the :ref:`User Guide <r2_score>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \
array-like of shape (n_outputs,) or None, default='uniform_average'
Defines aggregating of multiple output scores.
Array-like value defines weights used to average scores.
Default is "uniform_average".
'raw_values' :
Returns a full set of scores in case of multioutput input.
'uniform_average' :
Scores of all outputs are averaged with uniform weight.
'variance_weighted' :
Scores of all outputs are averaged, weighted by the variances
of each individual output.
Returns
-------
z : float or tensor of floats
The :math:`R^2` score or ndarray of scores if 'multioutput' is
'raw_values'.
Notes
-----
This is not a symmetric function.
Unlike most other scores, :math:`R^2` score may be negative (it need not
actually be the square of a quantity R).
This metric is not well-defined for single samples and will return a NaN
value if n_samples is less than two.
References
----------
.. [1] `Wikipedia entry on the Coefficient of determination
<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_
Examples
--------
>>> from mars.learn.metrics import r2_score
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> r2_score(y_true, y_pred)
0.948...
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> r2_score(y_true, y_pred,
... multioutput='variance_weighted')
0.938...
>>> y_true = [1, 2, 3]
>>> y_pred = [1, 2, 3]
>>> r2_score(y_true, y_pred)
1.0
>>> y_true = [1, 2, 3]
>>> y_pred = [2, 2, 2]
>>> r2_score(y_true, y_pred)
0.0
>>> y_true = [1, 2, 3]
>>> y_pred = [3, 2, 1]
>>> r2_score(y_true, y_pred)
-3.0
"""
_, y_true, y_pred, multioutput = _check_reg_targets(y_true, y_pred, multioutput)
check_consistent_length(y_true, y_pred, sample_weight)
if _num_samples(y_pred) < 2:
msg = "R^2 score is not well-defined with less than two samples."
warnings.warn(msg, UndefinedMetricWarning)
return float("nan")
if sample_weight is not None:
sample_weight = column_or_1d(sample_weight)
weight = sample_weight[:, np.newaxis]
else:
weight = 1.0
numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
denominator = (
weight * (y_true - mt.average(y_true, axis=0, weights=sample_weight)) ** 2
).sum(axis=0, dtype=np.float64)
nonzero_denominator = denominator != 0
nonzero_numerator = numerator != 0
valid_score = nonzero_denominator & nonzero_numerator
output_scores = mt.ones((y_true.shape[1],))
output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
# arbitrary set to zero to avoid -inf scores, having a constant
# y_true is not interesting for scoring a regression anyway
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
if isinstance(multioutput, str):
if multioutput == "raw_values":
# return scores individually
return output_scores
elif multioutput == "uniform_average":
# passing None as weights results is uniform mean
avg_weights = None
elif multioutput == "variance_weighted":
avg_weights = denominator
# avoid fail on constant y or one-element arrays
cond1 = mt.any(nonzero_denominator)
execute(
cond1, nonzero_denominator, session=session, **(run_kwargs or dict())
)
if not cond1.fetch():
if not mt.any(nonzero_numerator).to_numpy(
session=session, **(run_kwargs or dict())
):
return 1.0
else:
return 0.0
else:
avg_weights = multioutput
return mt.average(output_scores, weights=avg_weights).execute(
session=session, **(run_kwargs or dict())
)