import evaluate
from datasets import Features, Value
from scipy.stats import kendalltau, pearsonr, spearmanr
from sklearn.metrics import (
    max_error,
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    r2_score,
)

_CITATION = """
@article{scikit-learn,
  title={Scikit-learn: Machine Learning in {P}ython},
  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
  journal={Journal of Machine Learning Research},
  volume={12},
  pages={2825--2830},
  year={2011}
}

@article{2020SciPy-NMeth,
  author  = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
            Haberland, Matt and Reddy, Tyler and Cournapeau, David and
            Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
            Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
            Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
            Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
            Kern, Robert and Larson, Eric and Carey, C J and
            Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
            {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
            Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
            Harris, Charles R. and Archibald, Anne M. and
            Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
            {van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
  title   = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
            Computing in Python}},
  journal = {Nature Methods},
  year    = {2020},
  volume  = {17},
  pages   = {261--272},
  adsurl  = {https://rdcu.be/b08Wh},
  doi     = {10.1038/s41592-019-0686-2},
}
"""


_DESCRIPTION = """
This evaluator computes multiple regression metrics to assess the performance of a model. Metrics calculated include: mean absolute error (MAE),
mean absolute percentage error (MAPE), mean squared error (MSE), R-squared (R2), max error (ME), Pearson, Spearman and Kendall Tau correlation measures.
"""

_KWARGS_DESCRIPTION = """
Args:
    predictions (`list` of `float`): Predicted values.
    references (`list` of `float`): Ground truth values.
Returns:
    Returns: a dict containing:
        mean_absolute_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html
        mean_absolute_performance_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html
        mean_squared_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
        r2_score (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html
        max_error (float): https://scikit-learn.org/stable/modules/generated/sklearn.metrics.max_error.html
        pearson_correlation (Tuple[float, float]): the first value being the score and the second one the p-value
                                                   (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html)
        spearman_correlation (Tuple[float, float]): the first value being the score and the second one the p-value
                                                    (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html)
        kendall_tau_correlation (Tuple[float, float]): the first value being the score and the second one the p-value
                                                       (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html)
"""


class RegressionEvaluator(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=Features(
                {"predictions": Value("float"), "references": Value("float")}
            ),
        )

    def _compute(self, predictions, references):
        error_fns = [
            mean_absolute_error,
            mean_absolute_percentage_error,
            mean_squared_error,
            max_error,
            r2_score,
        ]
        correlation_fns = [pearsonr, spearmanr, kendalltau]
        results = {}

        # Compute error functions
        for fn in error_fns:
            results[fn.__name__] = float(fn(references, predictions))

        # Compute statistical measures with p-values
        for fn in correlation_fns:
            output = fn(references, predictions)
            score, p_value = output.statistic, output.pvalue
            results[fn.__name__] = (float(score), float(p_value))

        return results