Spaces:
Sleeping
Sleeping
import evaluate | |
from datasets import Features, Value | |
from scipy.stats import kendalltau, pearsonr, spearmanr | |
from sklearn.metrics import ( | |
max_error, | |
mean_absolute_error, | |
mean_absolute_percentage_error, | |
mean_squared_error, | |
r2_score, | |
) | |
_CITATION = """ | |
@article{scikit-learn, | |
title={Scikit-learn: Machine Learning in {P}ython}, | |
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
journal={Journal of Machine Learning Research}, | |
volume={12}, | |
pages={2825--2830}, | |
year={2011} | |
} | |
@article{2020SciPy-NMeth, | |
author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and | |
Haberland, Matt and Reddy, Tyler and Cournapeau, David and | |
Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and | |
Bright, Jonathan and {van der Walt}, St{\'e}fan J. and | |
Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and | |
Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and | |
Kern, Robert and Larson, Eric and Carey, C J and | |
Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and | |
{VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and | |
Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and | |
Harris, Charles R. and Archibald, Anne M. and | |
Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and | |
{van Mulbregt}, Paul and {SciPy 1.0 Contributors}}, | |
title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific | |
Computing in Python}}, | |
journal = {Nature Methods}, | |
year = {2020}, | |
volume = {17}, | |
pages = {261--272}, | |
adsurl = {https://rdcu.be/b08Wh}, | |
doi = {10.1038/s41592-019-0686-2}, | |
} | |
""" | |
_DESCRIPTION = """ | |
This evaluator computes multiple regression metrics to assess the performance of a model. Metrics calculated include: mean absolute error (MAE), | |
mean absolute percentage error (MAPE), mean squared error (MSE), R-squared (R2), max error (ME), Pearson, Spearman and Kendall Tau correlation measures. | |
""" | |
_KWARGS_DESCRIPTION = """ | |
Args: | |
predictions (`list` of `float`): Predicted values. | |
references (`list` of `float`): Ground truth values. | |
Returns: | |
Returns: a dict containing: | |
mean_absolute_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html | |
mean_absolute_performance_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html | |
mean_squared_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html | |
r2_score (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html | |
max_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.max_error.html | |
pearson_correlation (Tuple[float, float]): the first value being the score and the second one the p-value | |
(https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html) | |
spearman_correlation (Tuple[float, float]): the first value being the score and the second one the p-value | |
(https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html) | |
kendall_tau_correlation (Tuple[float, float]): the first value being the score and the second one the p-value | |
(https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html) | |
""" | |
class RegressionEvaluator(evaluate.Metric): | |
def _info(self): | |
return evaluate.MetricInfo( | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
features=Features( | |
{"predictions": Value("float"), "references": Value("float")} | |
), | |
) | |
def _compute(self, predictions, references): | |
error_fns = [ | |
mean_absolute_error, | |
mean_absolute_percentage_error, | |
mean_squared_error, | |
max_error, | |
r2_score, | |
] | |
correlation_fns = [pearsonr, spearmanr, kendalltau] | |
results = {} | |
# Compute error functions | |
for fn in error_fns: | |
results[fn.__name__] = float(fn(references, predictions)) | |
# Compute statistical measures with p-values | |
for fn in correlation_fns: | |
output = fn(references, predictions) | |
score, p_value = output.statistic, output.pvalue | |
results[fn.__name__] = (float(score), float(p_value)) | |
return results | |