Source code for SeqMetrics._rgr


# https://github.com/aziele/statistical-distance/blob/main/distance.py
# https://github.com/pyRiemann/pyRiemann/blob/master/pyriemann/utils/distance.py

import warnings
from math import sqrt
from typing import Union

import numpy as np

try:
    from scipy.stats import kendalltau
    from scipy.signal import find_peaks
except (ImportError, ModuleNotFoundError):
    kendalltau, find_peaks = None, None

from .utils import maybe_treat_arrays
from .utils import _geometric_mean, _mean_tweedie_deviance, _foo, list_subclass_methods
from ._main import Metrics, EPS, ERR_STATE


[docs]class RegressionMetrics(Metrics): """ Calculates more than 100 regression performance metrics related to sequence data. Example ------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> errors = RegressionMetrics(t,p) >>> all_errors = errors.calculate_all() """
[docs] def __init__(self, *args, **kwargs): """ Initializes ``Metrics``. args and kwargs go to parent class :py:class:`SeqMetrics.Metrics`. """ super().__init__(*args, **kwargs) self.all_methods: list = list_subclass_methods(RegressionMetrics, True, additional_ignores=['calculate_hydro_metrics', # 'calculate_scale_dependent_metrics', # 'calculate_scale_independent_metrics' ]) if kendalltau is None and 'kendall_tau' in self.all_methods: self.all_methods.remove('kendall_tau') if find_peaks is None and 'mape_for_peaks' in self.all_methods: self.all_methods.remove('mape_for_peaks') # if arrays contain negative values, following three errors can not be computed for array in [self.true, self.predicted]: assert len(array) > 0, "Input arrays should not be empty" if len(array[array < 0.0]) > 0: self.all_methods = [m for m in self.all_methods if m not in ('mean_gamma_deviance', 'mean_poisson_deviance', 'mean_square_log_error')] if (array <= 0).any(): # mean tweedie error is not computable self.all_methods = [m for m in self.all_methods if m not in ('mean_gamma_deviance', 'mean_poisson_deviance')]
def _hydro_metrics(self) -> list: """Names of metrics related to hydrology""" return self._minimal() + [ 'fdc_flv', 'fdc_fhv', 'kge', 'kge_np', 'kge_mod', 'kge_bound', 'kgeprime_bound', 'kgenp_bound', 'nse', 'nse_alpha', 'nse_beta', 'nse_mod', 'nse_bound'] @staticmethod def _scale_independent_metrics() -> list: """Names of scale independent metrics.""" return ['mape', 'r2', 'nse'] @staticmethod def _scale_dependent_metrics() -> list: """Names of scale dependent metrics.""" return ['mse', 'rmse', 'mae'] @staticmethod def _minimal() -> list: """some minimal and basic metrics""" return ['r2', 'mape', 'nrmse', 'corr_coeff', 'rmse', 'mae', 'mse', 'mpe', 'mase', 'r2_score'] # def abs_pbias(self) -> float: # """Absolute Percent bias # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import RegressionMetrics # >>> t = np.random.random(10) # >>> p = np.random.random(10) # >>> metrics= RegressionMetrics(t, p) # >>> metrics.abs_pbias() # """ # return abs_pbias(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def acc(self) -> float: """Anomaly correction coefficient. See `Langland et al., 2012 <https://doi.org/10.3402/tellusa.v64i0.17531>`_; `Miyakoda_ et al., 1972 <https://doi.org/10.1080/02723646.1972.10642213>`_ and Murphy_ et al., 1989. .. math:: ACC = \\frac{\\sum_{i=1}^{N} \\left( (\\text{predicted}_i - \\overline{\\text{predicted}})(\\text{true}_i - \\overline{\\text{true}}) \\right)}{(N-1) \\cdot \\sigma_{\\text{true}} \\cdot \\sigma_{\\text{predicted}}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.acc() """ return acc(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def adjusted_r2(self) -> float: """ Adjusted R squared also known as Ezekiel estimate <https://www.glmj.org/archives/MLRV_2007_33_1.pdf>`_. .. math:: \\text{Adjusted } R^2 = 1 - \\left( \\frac{(1 - R^2) \\cdot (n - 1)}{n - k - 1} \\right) where n = number of observations and k = 1. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.adjusted_r2() """ return adjusted_r2(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def agreement_index(self) -> float: """ Agreement Index (d) developed by `Willmott, 1981 <https://doi.org/10.1080/02723646.1981.10642213>`_. It detects additive and pro-portional differences in the observed and simulated means and variances (Moriasi_ et al., 2015 <https://web.ics.purdue.edu/~mgitau/pdf/Moriasi%20et%20al%202015.pdf>`_). It is overly sensitive to extreme values due to the squared differences. It can also be used as a substitute for R2 to identify the degree to which model predictions are error-free. .. math:: d = 1 - \\frac{\\sum_{i=1}^{N}(e_{i} - s_{i})^2}{\\sum_{i=1}^{N}(\\left | s_{i} - \\bar{e} \\right | + \\left | e_{i} - \\bar{e} \\right |)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.agreement_index() """ return agreement_index(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def aic(self, p=1) -> float: """ It estimates relative quality of a model for a given input. By comparing AIC for differnt models, we can identify the model which best explains the data. Theoretically, it penlizes those models with more parameters thereby reducing overfitting/model complexity. When comparing multiple models, the one with the lowest value is generally preferred. When sample size is small, then AIC can be biased. `Akaike_ Information Criterion <https://doi.org/10.1007/978-1-4612-1694-0_15>`_. Modifying from this `source <https://github.com/UBC-MDS/RegscorePy/blob/master/RegscorePy/aic.py>`_ .. math:: AIC = n \\cdot \\ln\\left(\\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{n}\\right) + 2p Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.aic( ) """ return aic(true=self.true, predicted=self.predicted, treat_arrays=False, p=p)
[docs] def aitchison(self, center='mean') -> float: """ Aitchison distance as used in `Zhang et al., 2020 <https://doi.org/10.5194/hess-24-2505-2020>`_. .. math:: d_{\\text{Aitchison}} = \\sqrt{\\sum_{i=1}^{n} \\left( \\log(\\text{true}_i) - \\text{center}(\\log(\\text{true})) - \\left(\\log(\\text{predicted}_i) - \\text{center}(\\log(\\text{predicted}))\\right) \\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.aitchison( ) """ return aitchison(true=self.true, predicted=self.predicted, treat_arrays=False, center=center)
[docs] def amemiya_adj_r2(self) -> float: """Amemiya's Adjusted R-squared .. math:: R^2_{\\text{adj, Amemiya}} = 1 - \\left( \\frac{(1 - R^2) \\cdot (n + k)}{n - k - 1} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.amemiya_adj_r2( ) """ return amemiya_adj_r2(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def amemiya_pred_criterion(self) -> float: """Amemiya's Prediction Criterion .. math:: \\text{APC} = \\left( \\frac{n + k}{n - k} \\right) \\left( \\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2 \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.amemiya_pred_criterion() """ return amemiya_pred_criterion(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def bias(self) -> float: """ Bias as and given by `Gupta1998 et al., 1998 <https://doi.org/10.1029/97WR03495>`_ It is also called mean error. .. math:: Bias=\\frac{1}{N}\\sum_{i=1}^{N}(e_{i}-s_{i}) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.bias() """ return bias(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def bic(self, p=1) -> float: """ Bayesian Information Criterion Minimising the BIC_ is intended to give the best model. The model chosen by the BIC is either the same as that chosen by the AIC, or one with fewer terms. This is because the BIC penalises the number of parameters more heavily than the AIC. Modified after RegscorePy_. .. math:: BIC = n \\cdot \\ln\\left(\\frac{\\text{SSE}}{n}\\right) + p \\cdot \\ln(n) .. _BIC: https://otexts.com/fpp2/selecting-predictors.html#schwarzs-bayesian-information-criterion .. _RegscorePy: https://github.com/UBC-MDS/RegscorePy/blob/master/RegscorePy/bic.py Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.bic() """ return bic(true=self.true, predicted=self.predicted, treat_arrays=False, p=p)
[docs] def brier_score(self) -> float: """ Adopted from `SkillMetrics <https://github.com/PeterRochford/SkillMetrics/blob/master/skill_metrics/brier_score.py>`_ This function calculates the `Brier score (BS) <https://viterbi-web.usc.edu/~shaddin/teaching/cs699fa17/docs/Brier50.pdf>`_, which is a measure of the mean-square error of probability forecasts for a dichotomous (two-category) event, such as the occurrence/non-occurrence of precipitation. The score is calculated using the formula: .. math:: BS = sum_(n=1)^N (f_n - o_n)^2/N where f is the forecast probabilities, o is the observed probabilities (0 or 1), and N is the total number of values in f & o. Note that f & o must have the same number of values, and those values must be in the range_ [0,1]. Returns -------- float BS : Brier score References --------- `Glenn W. Brier, 1950: Verification of forecasts expressed in terms of probabilities. Mon. We. Rev., 78, 1-23. D. S. Wilks, 1995: Statistical Methods in the Atmospheric Sciences. Cambridge Press. 547 pp. <https://viterbi-web.usc.edu/~shaddin/teaching/cs699fa17/docs/Brier50.pdf>`_ .. _SkillMetrics: https://github.com/PeterRochford/SkillMetrics/blob/master/skill_metrics/brier_score.py .. _range: https://data.library.virginia.edu/a-brief-on-brier-scores/ Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.brier_score() """ return brier_score(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def corr_coeff(self) -> float: """ Pearson correlation coefficient as proposed by `Pearson, 1895 <https://royalsocietypublishing.org/doi/abs/10.1098/rsta.1895.0010>`_. It measures linear correlatin between true and predicted arrays. It is sensitive to outliers. The following equation is taken after `Jiang et al., 2022 <https://doi.org/10.5194/essd-15-621-2023>`_ . .. math:: r = \\frac{\\sum ^n _{i=1}(predicted_i - \\bar{predicted})(s_i - \\bar{observed})}{\\sqrt{\\sum ^n _{i=1}(predicted_i - \\bar{predicted})^2} \\sqrt{\\sum ^n _{i=1}(true_i - \\bar{true})^2}} Where n is length of true/predicted arrays. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.corr_coeff() """ return corr_coeff(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def covariance(self) -> float: """ `Covariance as defined in Eq. 3 at mathworld <https://mathworld.wolfram.com/Covariance.html>`_ A positive covariance means that the means of true and predicted values increase or decrease together. .. math:: Covariance = \\frac{1}{N} \\sum_{i=1}^{N}((true_{i} - \\bar{true}) * (predicted_{i} - \\bar{predicted})) The bar represents the mean of the array. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.covariance() """ return covariance(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def cronbach_alpha(self) -> float: """ It is a measure of internal consitency of data following Cheung and Yip, 2005 `<https://doi.org/10.1016/B0-12-369398-5/00396-0>`_. See ucla_ and stackoverflow_ pages for more info. .. math:: alpha = \\frac{N}{N - 1} \\left(1 - \\frac{\\sum_{i=1}^{N} \\sigma^2_{i}}{\\sigma^2_{\\text{total}}}\\right) .. _ucla: https://stats.idre.ucla.edu/spss/faq/what-does-cronbachs-alpha-mean/ .. _stackoverflow: https://stackoverflow.com/a/20799687/5982232 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.cronbach_alpha() """ return cronbach_alpha(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def centered_rms_dev(self) -> float: """ Modified after SkillMetrics_. Calculates the centered root-mean-square (RMS) difference between true and predicted using the formula: (E')^2 = sum_(n=1)^N [(p_n - mean(p))(r_n - mean(r))]^2/N where p is the predicted values, r is the true values, and N is the total number of values in p & r. .. math:: CRMSD = \\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} \\left( (p_i - \\text{mean}(p)) - (r_i - \\text{mean}(r)) \\right)^2} Output: CRMSDIFF : centered root-mean-square (RMS) difference (E')^2 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.centered_rms_dev() """ return centered_rms_dev(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def cosine_similarity(self) -> float: """ It is a judgment of orientation and not magnitude: two vectors with the same orientation have a cosine similarity of 1, two vectors oriented at 90° relative to each other have a similarity of 0, and two vectors diametrically opposed have a similarity of -1, independent of their magnitude. `See <https://en.wikipedia.org/wiki/Cosine_similarity>`_ .. math:: \\text{Cosine Similarity} = \\frac{\\sum_{i=1}^{n} \\text{true}_i \\cdot \\text{predicted}_i}{\\sqrt{\\sum_{i=1}^{n} (\\text{true}_i)^2} \\cdot \\sqrt{\\sum_{i=1}^{n} (\\text{predicted}_i)^2}} References ---------- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.cosine_similarity() """ return cosine_similarity(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def decomposed_mse(self) -> float: """ Decomposed MSE developed by Kobayashi and Salam (2000) .. math :: dMSE = (\\frac{1}{N}\\sum_{i=1}^{N}(e_{i}-s_{i}))^2 + SDSD + LCS .. math:: SDSD = (\\sigma(e) - \\sigma(s))^2 .. math:: LCS = 2 \\sigma(e) \\sigma(s) * (1 - \\frac{\\sum ^n _{i=1}(e_i - \\bar{e})(s_i - \\bar{s})} {\\sqrt{\\sum ^n _{i=1}(e_i - \\bar{e})^2} \\sqrt{\\sum ^n _{i=1}(s_i - \\bar{s})^2}}) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.decomposed_mse() """ return decomposed_mse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def euclid_distance(self) -> float: """ `Euclidian distance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.euclidean_distances.html>`_ taken from `this book <https://doi.org/10.1016/B978-0-12-088735-4.50006-7`_. .. math:: D = \\sqrt{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2} Referneces: Kennard et al., 2010 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.euclid_distance() """ return euclid_distance(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def exp_var_score(self, weights=None) -> Union[float, None]: """ Explained variance `score <https://stackoverflow.com/q/24378176/5982232>`_ . Best value is 1, lower values are less accurate. .. math:: \\text{EVS} = 1 - \\frac{\\sum_{i=1}^{n} w_i \\left( (true_i - predicted_i) - \\frac{\\sum_{j=1}^{n} w_j (true_j - predicted_j)}{\\sum_{j=1}^{n} w_j} \\right)^2}{\\sum_{i=1}^{n} w_i (true_i - \\frac{\\sum_{j=1}^{n} w_j true_j}{\\sum_{j=1}^{n} w_j})^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.exp_var_score() """ return exp_var_score(true=self.true, predicted=self.predicted, treat_arrays=False, weights=weights)
[docs] def expanded_uncertainty(self, cov_fact=1.96) -> float: """By default, it calculates uncertainty with 95% confidence interval. 1.96 is the coverage factor corresponding 95% confidence level .This indicator is used in order to show more information about the model deviation. Using formula from by Behar_ et al., 2015 and Gueymard_ et al., 2014. .. math:: U = \\text{cov_fact} \\times \\sqrt{\\frac{1}{n-1} \\sum_{i=1}^{n} \\left( \\left(\\text{true}_i - \\text{predicted}_i\\right) - \\overline{\\left(\\text{true} - \\text{predicted}\\right)} \\right)^2 + \\frac{1}{n} \\sum_{i=1}^{n} \\left(\\text{true}_i - \\text{predicted}_i\\right)^2} .. _Behar: https://doi.org/10.1016/j.enconman.2015.03.067 .. _Gueymard: https://doi.org/10.1016/j.rser.2014.07.117 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.expanded_uncertainty() """ return expanded_uncertainty(true=self.true, predicted=self.predicted, treat_arrays=False, cov_fact=cov_fact)
[docs] def fdc_fhv(self, h: float = 0.02) -> float: """ Peak flow bias of the flow duration curve `(Yilmaz 2008) <doi:10.1029/2007WR006716>`_ as used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. Code modified `Kratzert2018 <https://github.com/kratzert/ealstm_regional_modeling/blob/64a446e9012ecd601e0a9680246d3bbf3f002f6d/papercode/metrics.py#L190>`_ code. .. math:: FHV = \\frac{\\sum_{i=1}^{k} (predicted_i - true_i)}{\\sum_{i=1}^{k} true_i} \\times 100 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.fdc_fhv() """ return fdc_fhv(true=self.true, predicted=self.predicted, treat_arrays=False, h=h)
[docs] def fdc_flv(self, low_flow: float = 0.3) -> float: """ bias of the bottom 30 % low flows as used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. .. math:: \\text{FLV} = -1 \\times \\frac{\\sum (\\log(\\text{predicted}) - \\min(\\log(\\text{predicted}))) - \\sum (\\log(\\text{true}) - \\min(\\log(\\text{true})))}{\\sum (\\log(\\text{true}) - \\min(\\log(\\text{true}))) + 1 \\times 10^{-6}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.fdc_flv() """ return fdc_flv(true=self.true, predicted=self.predicted, treat_arrays=False, low_flow=low_flow)
[docs] def gmae(self) -> float: """ `Geometric Mean Absolute Error <https://doi.org/10.1016/j.isprsjprs.2024.04.015>`_ .. math:: GMAE = \\left( \\prod_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right| \\right)^{\\frac{1}{n}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.gmae() """ return gmae(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def gmean_diff(self) -> float: """ `Geometric mean difference <https://www.sciencedirect.com/science/article/abs/pii/S0022316624002281>`_. First geometric mean is calculated for each of two samples and their difference is calculated. .. math:: \\text{gmean_diff} = \\left( \\prod_{i=1}^{n} \\text{true}_i \\right)^{\\frac{1}{n}} - \\left( \\prod_{i=1}^{n} \\text{predicted}_i \\right)^{\\frac{1}{n}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.gmean_diff() """ return gmean_diff(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def gmrae(self, benchmark: np.ndarray = None) -> float: """ `Geometric Mean Relative Absolute Error <https://doi.org/10.1016/0169-2070(92)90008-W>`_ .. math:: GMRAE = \\left( \\prod_{i=1}^{n} \\frac{|true_i - predicted_i|}{|true_i - benchmark_i|} \\right)^{\\frac{1}{n}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.gmrae() """ return gmrae(true=self.true, predicted=self.predicted, treat_arrays=False, benchmark=benchmark)
[docs] def calculate_hydro_metrics(self): """ Calculates the following performance metrics related to hydrology. - fdc_flv - fdc_fhv - kge - kge_np - kge_mod - kge_bound - kgeprime_bound - kgenp_bound - nse - nse_alpha - nse_beta - nse_mod - nse_bound - r2 - mape - nrmse - corr_coeff - rmse - mae - mse - mpe - mase - r2_score Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.calculate_hydro_metrics() """ return calculate_hydro_metrics(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def inrse(self) -> float: """ `Integral Normalized Root Squared Error <https://doi.org/10.1016/j.engappai.2023.107559>`_ .. math:: IN\\text{-}RSE = \\sqrt{\\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} (\\text{true}_i - \\overline{\\text{true}})^2}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.inrse() """ return inrse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def irmse(self) -> float: """ `Inertial RMSE <https://link.springer.com/article/10.1007/s11069-008-9299-2>`_. RMSE divided by standard deviation of the gradient of true. .. math:: \\text{IRMSE} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left( \\text{true}_i - \\text{predicted}_i \\right)^2}}{\\sqrt{\\frac{1}{n-2} \\sum_{i=1}^{n-1} \\left( (\\text{true}_{i+1} - \\text{true}_i) - \\overline{(\\text{true}_{i+1} - \\text{true}_i)} \\right)^2}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.irmse() """ return irmse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def JS(self) -> float: """Jensen-shannon divergence .. math:: JS(P \parallel Q) = \\frac{1}{2} \\sum_{i} \\left( P(i) \\log_2 \\left( \\frac{2P(i)}{P(i) + Q(i)} \\right) + Q(i) \\log_2 \\left( \\frac{2Q(i)}{P(i) + Q(i)} \\right) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.JS() """ return JS(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def kendall_tau(self, return_p=False) -> Union[float, tuple]: """Kendall's tau_ .used in Probst_ et al., 2019. .. math:: tau = \\frac{(C - D)}{\\sqrt{(C + D + T_{\\text{true}})(C + D + T_{\\text{predicted}})}} .. _tau: https://machinelearningmastery.com/how-to-calculate-nonparametric-rank-correlation-in-python/ .. _Probst: https://www.jmlr.org/papers/volume20/18-444/18-444.pdf Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kendall_tau() """ if kendalltau is None: raise NotImplementedError("scipy is not installed. Please install scipy to use this method") return kendall_tau(true=self.true, predicted=self.predicted, return_p=return_p, treat_arrays=False)
[docs] def kge( self, return_all:bool = False ) -> Union[float, np.ndarray]: """ Kling-Gupta Efficiency following `Gupta et al. 2009 <https://doi.org/10.1016/j.jhydrol.2009.08.003>`_. This error considers correlation (r), variability (:math:`\\alpha`) and mean difference/error which is also called bias (:math:`\\beta`). KGE values varies from -infinity to 1 with higher the better. KGE values above -0.41 means the simulted/predicted (by the model) is better than the mean of the observed data (`Knoben et al, 2019 <https://doi.org/10.5194/hess-23-4323-2019>`_). .. math:: \\text{KGE} = 1 - \\sqrt{(r - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2} .. math:: \\alpha = \\frac{\\sigma_{\\text{predicted}}}{\\sigma_{\\text{true}}} .. math:: \\beta = \\frac{\\mu_{\\text{predicted}}}{\\mu_{\\text{true}}} Please note that bias (:math:`\\beta`) is not same as :py:func:`SeqMetrics.bias` method. The term :math:`\\sqrt{(r - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2}` is also called euclidean distance which means KGE can also be defined as below .. math:: \\text{KGE} = 1 - ED Another form of KGE equation is below: .. math:: \\text{KGE} = \\frac{\\sum_{i=1}^{N} ( \\text{true}_i - \\bar{\\text{true}} ) ( \\text{predicted}_i - \\bar{\\text{predicted}} )}{\\sqrt{\\sum_{i=1}^{N} ( \\text{true}_i - \\bar{\\text{true}} )^2} \\sqrt{\\sum_{i=1}^{N} ( \\text{predicted}_i - \\bar{\\text{predicted}} )^2}} output ------- If ``return_all`` is True, it returns a numpy array of shape (4, ) containing kge, correlation (r), variability (:math:`\\alpha`) and bias (:math:`\\beta`). Otherwise, it returns kge score. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kge() >>> kge, corr, var, bias = metrics.kge(return_all=True) """ return kge(true=self.true, predicted=self.predicted, treat_arrays=False, return_all=return_all)
[docs] def kge_bound(self) -> float: """ `Mathevet et al. 2006 <https://iahs.info/uploads/dms/13614.21--211-219-41-MATHEVET.pdf>`_ proposed a bounded version of NSE since the original NSE lacks a lower bound and thus have skewed distribution when calculated for large number of basins. To avoid its skewed distributions and make it vary between -1 and +1, they proposed a bounder version of the statistic i.e. NSE. The same concept is applied here to KGE. As per the authors, this bounded version of the statistic makes it less optimistic for positive values. .. math:: \\text{KGE}_{\\text{bound}} = \\frac{\\text{KGE}}{2 - \\text{KGE}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kge_bound() """ return kge_bound(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def kge_mod(self, return_all:bool = False): """ Modified Kling-Gupta Efficiency after `Kling et al. 2012 <https://doi.org/10.1016/j.jhydrol.2012.01.011>`_. Similar to original KGE, its values varies fro -infinity to 1 with higher the better. This version of KGE was introduced to avoid cross-correlation between bias and variability which happens when the precipitation data is biased. This is done by calculating the variability (:math:`\\alpha`) by :math:`{CV}_s/{CV}_o` instaed of :math:`{\sigma}_s/{\sigma}_o` where CV is the coefficient of variation which is defined as the ratio of the standard deviation to the mean (:math:`{\sigma}/{\mu}`). .. math:: \\text{KGE`} = 1 - \\sqrt{(r - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2} output ------- If return_all is True, it returns a numpy array of shape (4, ) containing kge, :math:`\gamma`, :math:`\\alpha` and :math:`\\beta`. Otherwise, it returns kge. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kge_mod() """ return kge_mod(true=self.true, predicted=self.predicted, treat_arrays=False, return_all=return_all)
[docs] def kge_np(self, return_all:bool = False)-> Union[float, np.ndarray]: """ Non-parametric Kling-Gupta Efficiency after `Pool et al. 2018 <https://doi.org/10.1080/02626667.2018.1552002>`_. This differs from original KGE by using non-parameteric components of KGE i.e. :math:`\\alpha` and :math:`\gamma` / cc. The variability (:math:`\\alpha`) is non-parametrized by using the FDCs of the true and predicted values. The FDCs are normalized to remove the volume information. It also differs from normal kge by using the Spearman's rank correlation instead of Pearson's correlation coefficient. .. math:: cc = \\rho(\\text{true}, \\text{predicted}) .. math:: \\alpha = 1 - 0.5 \\sum_{i=1}^{n} \\left| \\frac{\\text{sorted(predicted}_i\\text{)}}{\\text{mean(predicted)} \\cdot n} - \\frac{\\text{sorted(true}_i\\text{)}}{\\text{mean(true)} \\cdot n} \\right| .. math:: \\beta = \\frac{\\text{mean(predicted)}}{\\text{mean(true)}} .. math:: \\text{KGE}_{\\text{np}} = 1 - \\sqrt{(cc - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kge_np() """ return kge_np(true=self.true, predicted=self.predicted, return_all=return_all, treat_arrays=False)
[docs] def kgeprime_bound(self) -> float: """ `Bounded Version of the Modified Kling-Gupta Efficiency <https://iahs.info/uploads/dms/13614.21--211-219-41-MATHEVET.pdf>`_ .. math:: KGE'_{\\text{bounded}} = \\frac{1 - \\sqrt{(r - 1)^2 + (\\gamma - 1)^2 + (\\beta - 1)^2}}{2 - (1 - \\sqrt{(r - 1)^2 + (\\gamma - 1)^2 + (\\beta - 1)^2})} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kgeprime_bound() """ return kgeprime_bound(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def kgenp_bound(self): """ `Bounded Version of the Non-Parametric Kling-Gupta Efficiency <https://thibhlln.github.io/hydroeval/functions/hydroeval.kgenp_c2m.html#hydroeval.kgenp_c2m>`_ .. math:: KGE_{np_{bound}} = \\frac{1 - \\sqrt{\\left(\\rho(t, p) - 1\\right)^2 + \\left(1 - 0.5 \\sum_{i=1}^{n} \\left| \\frac{\\text{sorted}(p_i)}{\\text{mean}(p) \\cdot n} - \\frac{\\text{sorted}(t_i)}{\\text{mean}(t) \\cdot n} \\right| - 1\\right)^2 + \\left(\\frac{\\text{mean}(p)}{\\text{mean}(t)} - 1\\right)^2}}{2 - \\left(1 - \\sqrt{\\left(\\rho(t, p) - 1\\right)^2 + \\left(1 - 0.5 \\sum_{i=1}^{n} \\left| \\frac{\\text{sorted}(p_i)}{\\text{mean}(p) \\cdot n} - \\frac{\\text{sorted}(t_i)}{\\text{mean}(t) \\cdot n} \\right| - 1\\right)^2 + \\left(\\frac{\\text{mean}(p)}{\\text{mean}(t)} - 1\\right)^2}\\right)} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kgenp_bound() """ return kgenp_bound(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def kl_sym(self) -> Union[float, None]: """ `Symmetric kullback-leibler divergence <https://doi.org/10.1016/j.procs.2018.10.144>`_ .. math:: \\text{KL}_{\\text{sym}}(P || Q) = \\frac{1}{2} \\sum_{i=1}^{n} \\left( P_i - Q_i \\right) \\left( \\log_2 \\frac{P_i}{Q_i} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.kl_sym() """ return kl_sym(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def lm_index(self, obs_bar_p=None) -> float: """ `Legate-McCabe Efficiency Index <https://doi.org/10.1016/j.cmpb.2023.107737>`_. Less sensitive to outliers in the data. The larger, the better .. math:: a_i = |predicted_i - true_i| .. math:: b_i = |true_i - \\text{obs\\_bar\\_p}| \\text{if } \\text{obs\\_bar\\_p} \\text{ is provided} \\|true_i - \\bar{true}| \\text{otherwise} .. math:: \\text{LM Index} = 1 - \\frac{\\sum_{i=1}^{n} a_i}{\\sum_{i=1}^{n} b_i} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.lm_index() """ return lm_index(true=self.true, predicted=self.predicted, treat_arrays=False, obs_bar_p=obs_bar_p)
[docs] def maape(self) -> float: """ `Mean Arctangent Absolute Percentage Error <https://doi.org/10.1016/j.ijforecast.2015.12.003>`_ Note: result is NOT multiplied by 100 .. math:: MAAPE = \\frac{1}{n} \\sum_{i=1}^{n} \\arctan \\left( \\frac{| \\text{true}_i - \\text{predicted}_i |}{| \\text{true}_i | + \\epsilon} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.maape() """ return maape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mae(self) -> float: """ `Mean Absolute Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html>`_. It is less sensitive to outliers as compared to mse/rmse. .. math:: \\text{MAE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right| Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mae() """ return mae(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mape(self) -> float: """ Mean Absolute Percentage Error. The MAPE is often used when the quantity to predict is known to remain way above zero_. It is useful when the size or size of a prediction variable is significant in evaluating the accuracy of a prediction_. It has advantages of scale-independency and interpretability. However, it has the significant disadvantage that it produces infinite or undefined values for zero or close-to-zero actual values_. .. math:: MAPE = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{true_i - predicted_i}{true_i} \\right| \\times 100 .. _zero: https://doi.org/10.1016/j.neucom.2015.12.114 .. _prediction: https://doi.org/10.1088/1742-6596/930/1/012002 .. _values: https://doi.org/10.1016/j.ijforecast.2015.12.003 References --------- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mape() """ return mape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mbrae(self, benchmark: np.ndarray = None) -> float: """ `Mean Bounded Relative Absolute Error <https://doi.org/10.1371/journal.pone.0174202>`_ .. math:: MBRAE = \\frac{1}{n} \\sum_{i=1}^{n} \\frac{| \\text{true}_i - \\text{predicted}_i |}{| \\text{true}_i - \\text{benchmark}_i |} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mbrae() """ return mbrae(true=self.true, predicted=self.predicted, benchmark=benchmark, treat_arrays=False)
[docs] def mapd(self) -> float: #ToDo equation not multiplied by 100 """ `Mean absolute percentage deviation <https://doi.org/10.1016/j.rinma.2022.100347>`_ .. math:: MAPD = \\frac{\\sum_{i=1}^{n} \\left| predicted_i - true_i \\right|}{\\sum_{i=1}^{n} \\left| true_i \\right|} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mapd(t, p) """ return mapd(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mase(self, seasonality: int = 1): """ Mean Absolute Scaled Error following `Hyndman et al., 2006 <http://datascienceassn.org/sites/default/files/Another%20Look%20at%20Measures%20of%20Forecast%20Accuracy.pdf>`_. Baseline (benchmark) is computed with naive forecasting (shifted by seasonality) modified after `this <https://gist.github.com/bshishov/5dc237f59f019b26145648e2124ca1c9>`_. It is the ratio of MAE of used model and MAE of naive forecast. .. math:: \\text{MASE} = \\frac{\\frac{1}{n} \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\frac{1}{n-s} \\sum_{i=s+1}^{n} \\left| \\text{true}_i - \\text{true}_{i-s} \\right|} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mase() """ return mase(true=self.true, treat_arrays=False, predicted=self.predicted, seasonality= seasonality)
[docs] def mare(self) -> float: """ Mean Absolute Relative Error. When expressed in %age, it is also known as mape_. .. math:: \\text{MARE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\right| .. _mape: https://doi.org/10.1016/j.rser.2015.08.035 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mare() """ return mare(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def max_error(self) -> float: """ `maximum absolute error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.max_error.html>`_ In Sklearn, there is "absolute" in equation but not in name of metric. .. math:: \\text{Max Error} = \\max_{i=1}^n \\left| \\text{true}_i - \\text{predicted}_i \\right| Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.max_error() """ return max_error(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mb_r(self) -> float: """ `Mielke-Berry R value <https://link.springer.com/book/10.1007/978-1-4757-3449-2>`_. Berry and Mielke, 1988. .. math:: R = 1 - \\frac{n^2 \\cdot \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right|}{\\sum_{i=1}^{n} \\sum_{j=1}^{n} \\left| \\text{predicted}_j - \\text{true}_i \\right|} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mb_r() """ return mb_r(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mda(self) -> float: """ Mean Directional Accuracy modified `after <https://gist.github.com/bshishov/5dc237f59f019b26145648e2124ca1c9>`_ .. math:: \\text{MDA} = \\frac{1}{n-1} \\sum_{i=1}^{n-1} \\left( \\text{sign}( \\text{true}_{i+1} - \\text{true}_i) == \\text{sign}( \\text{predicted}_{i+1} - \\text{predicted}_i) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mda() """ return mda(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mde(self) -> float: """ `Median Error <https://doi.org/10.1016/j.cma.2024.116842>`_ .. math:: MDE = \\text{median}(\\text{predicted}_i - \\text{true}_i) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mde() """ return mde(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mdape(self) -> float: """ `Median Absolute Percentage Error <https://doi.org/10.1016/j.petrol.2021.109265>`_. The value is multiplied by 100. .. math:: \\text{MdAPE} = 100 \\times \\text{Median} \\left( \\left\\{ \\frac{|\\text{true}_i - \\text{predicted}_i|}{|\\text{true}_i|} \\right\\}_{i=1}^n \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mdape() """ return mdape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mdrae(self, benchmark: np.ndarray = None) -> float: """ `Median Relative Absolute Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.median_absolute_error.html>`_ In Sklearn, there is "absolute" in equation but not in name of metric. .. math:: MdRAE = \\text{median} \\left( \\left| \\frac{true_i - predicted_i}{true_i - benchmark_i} \\right| \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mdrae() """ return mdrae(true=self.true, predicted=self.predicted, treat_arrays=False, benchmark=benchmark)
[docs] def me(self): """ `Mean error <https://doi.org/10.1016/j.scitotenv.2024.174533>`_ or bias .. math:: ME = \\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.me() """ return me(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mean_bias_error(self) -> float: """ Mean Bias Error It represents overall bias error or systematic error. It shows average interpolation bias; i.e. average over- or underestimation. [1][2].This indicator expresses a tendency of model to underestimate (negative value) or overestimate (positive value) global radiation, while the mean bias error values closest to zero are desirable. The drawback of this test is that it does not show the correct performance when the model presents overestimated and underestimated values at the same time, since overestimation and underestimation values cancel each other. .. math:: \\text{MBE} = \\frac{1}{N} \\sum_{i=1}^{N} (true_i - predicted_i) References ---------- - `Willmott, C. J., & Matsuura, K. (2006). On the use of dimensioned measures of error to evaluate the performance of spatial interpolators. International Journal of Geographical Information Science, 20(1), 89-102. <https://doi.org/10.1080/1365881050028697>`_ - `Valipour, M. (2015). Retracted: Comparative Evaluation of Radiation-Based Methods for Estimation of Potential Evapotranspiration. Journal of Hydrologic Engineering, 20(5), 04014068. <https://dx.doi.org/10.1061/(ASCE)HE.1943-5584.0001066>`_ - - `Despotovic, M., Nedic, V., Despotovic, D., & Cvetanovic, S. (2015). Review and statistical analysis of different global solar radiation sunshine models. Renewable and Sustainable Energy Reviews, 52, 1869-1880. <https://doi.org/10.1016/j.rser.2015.08.035>`_ Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mean_bias_error() """ return mean_bias_error(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mean_var(self) -> float: """Mean variance, adopted from `HydroErr <https://github.com/BYU-Hydroinformatics/HydroErr/blob/master/HydroErr/HydroErr.py#L6072>`_ .. math:: \\text{mean_var} = \\text{Var} \\left( \\log(1 + \\text{true}) - \\log(1 + \\text{predicted}) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mean_var() """ return mean_var(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mean_poisson_deviance(self, weights=None) -> float: """ `mean poisson deviance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_poisson_deviance.html>`_ .. math:: \\text{MPD} = \\frac{1}{n} \\sum_{i=1}^{n} 2 \\left( \\text{true}_i \\log \\left( \\frac{\\text{true}_i}{\\text{predicted}_i} \\right) - (\\text{true}_i - \\text{predicted}_i) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mean_poisson_deviance() """ return mean_poisson_deviance(true=self.true, predicted=self.predicted, weights=weights, treat_arrays=False)
[docs] def mean_gamma_deviance(self, weights=None) -> float: """ `mean gamma deviance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_gamma_deviance.html>`_ .. math:: \\text{Mean Gamma Deviance (Weighted)} = \\frac{1}{\\sum_{i=1}^{n} w_i} \\sum_{i=1}^{n} w_i \\frac{2}{\\text{true}_i} \\left( \\text{predicted}_i - \\text{true}_i - \\text{true}_i \\ln \\left( \\frac{\\text{predicted}_i}{\\text{true}_i} \\right) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mean_gamma_deviance() """ return mean_gamma_deviance(true=self.true, predicted=self.predicted, weights=weights, treat_arrays=False)
[docs] def median_abs_error(self) -> float: """ median absolute error .. math:: \\text{MedAE} = \\text{median} \\left( \\left| \\text{true}_i - \\text{predicted}_i \\right| \\right) References ---------- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.median_absolute_error.html Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.median_abs_error() """ return median_abs_error(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def med_seq_error(self) -> float: """`Median Squared Error <https://www.sciencedirect.com/science/article/pii/S2468227620301757>`_ Same as mse, but it takes median which reduces the impact of outliers. .. math:: \\text{MedSE} = \\text{median} \\left( (\\text{predicted}_i - \\text{true}_i)^2 \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics = RegressionMetrics(t, p) >>> metrics.med_seq_error() """ return med_seq_error(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mle(self) -> float: """ `Mean log error <https://doi.org/10.1038/s41598-023-29871-8>`_ .. math:: \\text{MLE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left( \\log(1 + \\text{predicted}_i) - \\log(1 + \\text{true}_i) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics = RegressionMetrics(t, p) >>> metrics.mle() """ return mle(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mod_agreement_index(self, j:int=1) -> float: """ `Modified agreement of index <https://search.r-project.org/CRAN/refmans/hydroGOF/html/md.html>`_. It varies between 0 and 1 where 1 indicates perfect match between the observed and predicted values. .. math:: MAI = 1 - \\frac{\\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right|^j}{\\sum_{i=1}^{n} \\left( \\left| \\text{predicted}_i - \\overline{\\text{true}} \\right| + \\left| \\text{true}_i - \\overline{\\text{true}} \\right| \\right)^j} Parameters ---------- j : int, default 1 when j is 2, this is same as agreement_index. Higher j means more impact of outliers. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics = RegressionMetrics(t, p) >>> metrics.mod_agreement_index() """ return mod_agreement_index(true=self.true, predicted=self.predicted, treat_arrays=False, j=j)
[docs] def mpe(self) -> float: """ `Mean Percentage Error <https://doi.org/10.1016/j.molliq.2023.123378>`_ The value is multiplied by 100 to reflect percentage. .. math:: MPE = \\frac{1}{n} \\sum_{i=1}^{n} \\left( \\frac{true_i - predicted_i}{true_i} \\right) \\times 100 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mpe() """ return mpe(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def mrae(self, benchmark: np.ndarray = None): """ `Mean Relative Absolute Error <https://doi.org/10.1016/j.comnet.2024.110237>`_ .. math:: MRAE = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{benchmark}_i} \\right| Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mrae() """ return mrae(true=self.true, predicted=self.predicted, treat_arrays=False, benchmark=benchmark)
[docs] def msle(self, weights=None) -> float: """ `Mean square logrithmic error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html>`_ .. math:: \\text{MSLE} = \\frac{\\sum_{i=1}^{n} w_i \\cdot \\text{sq_log_error}_i}{\\sum_{i=1}^{n} w_i} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.msle() """ return msle(true=self.true, predicted=self.predicted, treat_arrays=False, weights=weights)
[docs] def mre(self, benchmark:np.ndarray=None): """ `mean relative error <https://doi.org/10.1016/j.trd.2022.103505>`_ .. math:: \\text{MRE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\right| Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mre() """ return mre(self.true, self.predicted, treat_arrays=False, benchmark=benchmark)
[docs] def norm_euclid_distance(self) -> float: """ `Normalized Euclidian distance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.euclidean_distances.html>`_ .. math:: D_{norm} = \\sqrt{\\sum_{i=1}^{n} \\left( \\frac{\\text{true}_i}{\\bar{\\text{true}}} - \\frac{\\text{predicted}_i}{\\bar{\\text{predicted}}} \\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.norm_euclid_distance() """ return norm_euclid_distance(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nrmse_range(self) -> float: """ Range Normalized Root Mean Squared Error after `Pontius et al., 2008 <https://link.springer.com/article/10.1007/s10651-007-0043-y>`_ RMSE normalized by true values. This allows comparison between data sets with different scales. It is more sensitive to outliers. Reference: .. math:: \\text{NRMSE} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{predicted}_i - \\text{true}_i)^2}}{\\max(\\text{true}) - \\min(\\text{true})} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nrmse_range() """ return nrmse_range(true=self.true, treat_arrays=False, predicted=self.predicted)
[docs] def nrmse_ipercentile(self, q1=25, q2=75) -> float: """ RMSE normalized by inter percentile range of true. This is the least sensitive to outliers. q1: any interger between 1 and 99 q2: any integer between 2 and 100. Should be greater than q1. Reference: `Pontius et al., 2008 <https://link.springer.com/article/10.1007/s10651-007-0043-y>`_ .. math:: \\text{NRMSE}_{\\text{IP}} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}}{Q_{q2} - Q_{q1}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nrmse_ipercentile() """ return nrmse_ipercentile(true=self.true, treat_arrays=False, q1=q1, q2=q2, predicted=self.predicted)
[docs] def nrmse_mean(self) -> float: """Mean Normalized RMSE RMSE normalized by mean of true values.This allows comparison between datasets with different scales. Reference: `Pontius et al., 2008 <https://link.springer.com/article/10.1007/s10651-007-0043-y>`_ .. math:: NRMSE_{mean} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}}{\\bar{\\text{true}}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nrmse_mean() """ return nrmse_mean(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def norm_ae(self) -> float: """ `Normalized Absolute Error <https://doi.org/10.1016/j.apor.2024.104042>`_ .. math:: norm\\_ae = \\sqrt{\\frac{\\sum_{i=1}^{n} (error_i - MAE)^2}{n - 1}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.norm_ae() """ return norm_ae(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def norm_ape(self) -> float: """ Normalized Absolute Percentage Error .. math:: \\text{norm_APE} = \\sqrt{ \\frac{1}{n-1} \\sum_{i=1}^{n} \\left( \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\right| - \\frac{1}{n} \\sum_{j=1}^{n} \\left| \\frac{\\text{true}_j - \\text{predicted}_j}{\\text{true}_j} \\right| \\right)^2 } Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.norm_ape() """ return norm_ape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nrmse(self) -> float: """ `Normalized Root Mean Squared Error <https://www.sciencedirect.com/science/article/pii/S0957417411003289>`_ .. math:: NRMSE = \\frac{\\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} (\\text{true}_i - \\text{predicted}_i)^2}}{\\max(\\text{true}) - \\min(\text{true})} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nrmse() """ return nrmse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nse(self) -> float: """Nash-Sutcliff Efficiency. The Nash-Sutcliffe efficiency (NSE) is a normalized statistic that determines the relative magnitude of the residual variance compared to the measured data variance It determines how well the model simulates trends for the output response of concern. But cannot help identify model bias and cannot be used to identify differences in timing and magnitude of peak flows and shape of recession curves; in other words, it cannot be used for single-event simulations. It is sensitive to extreme values due to the squared differ-ences `(Modirasi et al., 2015) <https://elibrary.asabe.org/abstract.asp?aid=46548>`_. To make it less sensitive to outliers, `(Krause et al., 2005) <https://dx.doi.org/10.5194/adgeo-5-89-2005>`_ proposed log and relative nse. .. math:: \\text{NSE} = 1 - \\frac{\\sum_{i=1}^{N} (predicted_i - true_i)^2}{\\sum_{i=1}^{N} (true_i - \\bar{true})^2} where the bar above predicted and true indicates the mean of the array. References ---------- - Moriasi, D. N., Gitau, M. W., Pai, N., & Daggupati, P. (2015). Hydrologic and water quality models: Performance measures and evaluation criteria. Transactions of the ASABE, 58(6), 1763-1785. - Krause, P., Boyle, D., & Bäse, F. (2005). Comparison of different efficiency criteria for hydrological model assessment. Adv. Geosci., 5, 89-97. https://dx.doi.org/10.5194/adgeo-5-89-2005. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nse() """ return nse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nse_alpha(self) -> float: """ Alpha decomposition of the NSE, see `Gupta et al., 2009 <https://doi.org/10.1029/97WR03495>`_ used in `Kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. .. math:: \\text{NSE}_{\\text{alpha}} = \\frac{\\sigma_{\\text{predicted}}}{\\sigma_{\\text{true}}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nse_alpha() """ return nse_alpha(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nse_beta(self) -> float: """ Beta decomposition of NSE. `Gupta et al. 2009 <https://doi.org/10.1029/97WR03495>`_ used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. .. math:: \\text{NSE}_{\\text{beta}} = \\frac{\\mu_{\\text{predicted}} - \\mu_{\\text{true}}}{\\sigma_{\\text{true}}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nse_beta() """ return nse_beta(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nse_mod(self, j=1) -> float: """ Gives less weightage to outliers if j=1 and if j>1 then it gives more weightage to outliers. Reference: `Krause_ et al., 2005 <https://adgeo.copernicus.org/articles/5/89/2005/adgeo-5-89-2005.html>`_. .. math:: \\text{NSE}_{\\text{mod}} = 1 - \\frac{\\sum_{i=1}^{N} \\left| \\text{predicted}_i - \\text{true}_i \\right|^j}{\\sum_{i=1}^{N} \\left| \\text{true}_i - \\bar{\text{true}} \\right|^j} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nse_mod() """ return nse_mod(true=self.true, predicted=self.predicted, treat_arrays=False, j=j)
[docs] def nse_rel(self) -> float: """ `Relative Nash-Sutcliff Efficiency <https://doi.org/10.5194/adgeo-5-89-2005>`_. .. math:: \\text{NSE}_{\\text{rel}} = 1 - \\frac{\\sum_{i=1}^{N} \\left( \\frac{|\\text{predicted}_i - \\text{true}_i|}{\\text{true}_i} \\right)^2}{\\sum_{i=1}^{N} \\left( \\frac{|\\text{true}_i - \\overline{\\text{true}}|}{\\overline{\\text{true}}} \\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nse_rel() """ return nse_rel(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def nse_bound(self) -> float: """ Bounded Version of the Nash-Sutcliffe Efficiency (nse_) .. math:: \\text{NSE}_{\\text{bound}} = \\frac{\\text{NSE}}{2 - \\text{NSE}} .. _nse: https://iahs.info/uploads/dms/13614.21--211-219-41-MATHEVET.pdf Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.nse_bound() """ return nse_bound(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def log_nse( self, epsilon:float=0.0, log_base:str = 'e' ) -> float: """ `log transformed Nash-Sutcliffe Efficiency <https://doi.org/10.1002/2016WR019605>`_. It is especially useful for capturing prediction performance for the lowest flows due to the logarithmic transform. .. math:: NSE = 1-\\frac{\\sum_{i=1}^{N}(log(e_{i})-log(s_{i}))^2}{\\sum_{i=1}^{N}(log(e_{i})-log(\\bar{e})^2}-1)*-1 Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.log_nse() """ return log_nse(true=self.true, predicted=self.predicted, epsilon=epsilon, log_base=log_base, treat_arrays=False)
[docs] def log_prob(self) -> float: """ Logarithmic probability distribution .. math:: \\text{log_prob} = \\frac{1}{N} \\sum_{i=1}^{N} \\left( -\\frac{\\left( \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{scale}} \\right)^2}{2} - \\log(\\sqrt{2\\pi}) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.log_prob() """ return log_prob(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def pbias(self) -> float: """ `Percent bias <https://elibrary.asabe.org/abstract.asp?aid=46548>`_ determines how well the model simulates the average magnitudes for the output response of interest. It can also determine over and under-prediction. It cannot be used (1) for single-event simulations to identify differences in timing and magnitude of peak flows and the shape of recession curves nor (2) to determine how well the model simulates residual variations and/or trends for the output response of interest. It can give a deceiving rating of model performance if the model overpredicts as much as it underpredicts, in which case percent bias will be close to zero even though the model simulation is poor. .. math:: PBIAS = 100 \\times \\frac{\\sum_{i=1}^{N} (\\text{true}_i - \\text{predicted}_i)}{\\sum_{i=1}^{N} \\text{true}_i} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.pbias() """ return pbias(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rmsle(self) -> float: """`Root mean square log error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.root_mean_squared_log_error.html>`_. This error is less sensitive to `outliers <https://stats.stackexchange.com/q/56658/314919>`_ . Compared to RMSE, RMSLE only considers the relative error between predicted and actual values, and the scale of the error is nullified by the log-transformation. Furthermore, RMSLE penalizes underestimation more than overestimation. This is especially useful in those studies where the underestimation of the target variable is not acceptable but overestimation can be `tolerated <https://doi.org/10.1016/j.scitotenv.2020.137894>`_ . .. math:: RMSLE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left( \\log(1 + \\text{predicted}_i) - \\log(1 + \\text{true}_i) \\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rmsle() """ return rmsle(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rmdspe(self) -> float: """ `Root Median Squared Percentage Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.root_mean_squared_error.html>`_. The value is multiplied by 100 to reflect percentage. .. math:: \\text{RMDSPE} = \\sqrt{\\text{median}\\left(\\left(\\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\times 100\\right)^2\\right)} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rmdspe() """ return rmdspe(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rse(self) -> float: """Relative Squared Error .. math:: \\text{RSE} = \\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} (\\text{true}_i - \\bar{\\text{true}})^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rse() """ return rse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rrse(self) -> float: """ `Root Relative Squared Error <https://www.sciencedirect.com/science/article/pii/S0360319923031798>`_ .. math:: RRSE = \\sqrt{\\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} (\\text{true}_i - \\bar{\\text{true}})^2}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rrse() """ return rrse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rae(self) -> float: """ `Relative Absolute Error <https://doi.org/10.1016/j.compbiomed.2017.02.010>`_ (aka Approximation Error) .. math:: \\text{RAE} = \\frac{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\overline{\\text{true}} \\right|} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rae() """ return rae(true=self.true, treat_arrays=False, predicted=self.predicted)
[docs] def ref_agreement_index(self) -> float: """ Refined Index of Agreement after after `Willmott et al., 2012 <https://doi.org/10.1002/joc.2419>`_. It varies from -1 to 1. Larger the better. .. math:: a = \\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right| .. math:: b = 2 \\sum_{i=1}^{n} \\left| \\text{true}_i - \\overline{\\text{true}} \\right| .. math:: d_{\\text{ref}} = \\begin{cases} 1 - \\frac{a}{b} & \\text{if } a \\leq b \\ \\frac{b}{a} - 1 & \\text{if } a > b \\end{cases} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.ref_agreement_index() """ return ref_agreement_index(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rel_agreement_index(self) -> float: """ `Relative index of agreement <https://doi.org/10.1007/s10661-022-10844-9>`_. from 0 to 1. larger the better. .. math:: \\text{rel_agreement_index} = 1 - \\frac{\\sum_{i=1}^{n} \\left( \\frac{\\text{predicted}_i - \\text{true}_i}{\\text{true}_i} \\right)^2}{\\sum_{i=1}^{n} \\left( \\frac{|\\text{predicted}_i - \\bar{\\text{true}}| + |\\text{true}_i - \\bar{\\text{true}}|}{\\bar{\\text{true}}} \\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rel_agreement_index() """ return rel_agreement_index(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rmse(self, weights=None) -> float: """ `Root mean squared error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.root_mean_squared_error.html>`_ .. math:: \\text{RMSE} = \\sqrt{\\frac{\\sum_{i=1}^{n} w_i (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} w_i}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rmse() """ return rmse(true=self.true, predicted=self.predicted, treat_arrays=False, weights=weights)
[docs] def r2(self) -> float: """ R2 is a statistical measure of how well the regression line approximates the actual data. Quantifies the percent of variation in the response that the 'model' explains_. The 'model' here is anything from which we obtained predicted array. It is also called coefficient of determination or square of pearson correlation coefficient. More heavily affected by outliers than pearson correlatin r. .. math :: R^2 = \\left( \\frac{\\sum_{i=1}^{N} \\left( \\frac{true_i - \\bar{true}}{\\sigma_{true}} \\cdot \\frac{predicted_i - \\bar{predicted}}{\\sigma_{predicted}} \\right)}{N - 1} \\right)^2 where the bar above predicted and true indicates the mean of the array. .. _explains: https://data.library.virginia.edu/is-r-squared-useless/ Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> r_square= metrics.r2() >>> r_square """ return r2(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def r2_score(self, weights=None): """ This is not a symmetric function. Unlike most other scores, `R^2 score <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html>`_ may be negative (it need not actually be the square of a quantity R). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. .. math:: \\text{R2}_{\\text{score}} = 1 - \\frac{\\sum_{i=1}^{n} w_i (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} w_i (\\text{true}_i - \\bar{\\text{true}})^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.r2_score() """ return r2_score(true=self.true, predicted=self.predicted, treat_arrays=False, weights=weights)
[docs] def relative_rmse(self) -> float: """ Relative Root Mean Squared Error .. math:: RRMSE=\\frac{\\sqrt{\\frac{1}{N}\\sum_{i=1}^{N}(e_{i}-s_{i})^2}}{\\bar{e}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.relative_rmse() """ return relative_rmse(true=self.true, treat_arrays=False, predicted=self.predicted)
[docs] def rmspe(self) -> float: """ `Root Mean Square Percentage Error <https://stackoverflow.com/a/53166790/5982232>`_ . .. math:: RMSPE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left(PE_i\\right)^2} = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left(\\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i}\\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rmspe() """ return rmspe(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rsr(self) -> float: """ It is MSE normalized by standard deviation of true values. Following `Moriasi et al., 2007. <https://swat.tamu.edu/media/1312/moriasimodeleval.pdf>`_. It incorporates the benefits of error index statistics and includes a scaling/normalization factor, so that the resulting statistic and reported values can apply to various constituents. It ranges from 0 to infinity, with 0-0.5 indicating very good model performance, 0.5-0.8 indicating good model performance. Standard deviation is calculated using np.ntd(true, ddof=1) to match the results of `this implementation <https://rdrr.io/cran/hydroGOF/man/rsr.html>`_. .. math:: \\text{RSR} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}}{\\sqrt{\\frac{1}{n-1} \\sum_{i=1}^{n} (\\text{true}_i - \\bar{\\text{true}})^2}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rsr() """ return rsr(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def rmsse(self) -> float: """ Root Mean Squared Scaled Error after `Muhaimin et al., 2021 <https://doi.org/10.1109/Confluence51648.2021.9376880>`_ and `Zhou T, 2023 <https://doi.org/10.1109/ICAIBD57115.2023.10206380>`_. It is also considered similar to MASE. .. math:: \\text{RMSSE} = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left( \\frac{\\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\frac{1}{n-s} \\sum_{j=s+1}^{n} \\left| \\text{true}_j - \\text{true}_{j-s} \\right|} \\right)^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.rmsse() """ return rmsse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def sa(self) -> float: """ Spectral angle `Keshava N, 2004 <https://doi.org/10.1109/TGRS.2004.830549>`_. It is arccosine of the dot product of true and predicted arrays. It varies from -pi/2 to pi/2. Closer to 0 is better. It measures angle between two vectors in hyperspace indicating how well the shape of two arrays match instead of their magnitude. .. math:: SA = \\arccos \\left( \\frac{\\sum_{i=1}^{n} (\\text{true}_i \\cdot \\text{predicted}_i)}{\\sqrt{\\sum_{i=1}^{n} (\\text{true}_i)^2} \\cdot \\sqrt{\\sum_{i=1}^{n} (\\text{predicted}_i)^2}} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.sa() """ return sa(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def sc(self) -> float: """ Spectral correlation ater `Robila and Gershman, 2005 <https://ieeexplore.ieee.org/abstract/document/1509878>`_.. It varies from -pi/2 to pi/2. Closer to 0 is better. It measures the angle between the two vectors in hyperspace and highlights how well the shape of the two series match. .. math:: sc = \\arccos \\left( \\frac{ \\sum_{i=1}^{n} (t_i - \\bar{t}) \\cdot (p_i - \\bar{p}) }{ \\sqrt{\\sum_{i=1}^{n} (t_i - \\bar{t})^2} \\cdot \\sqrt{\\sum_{i=1}^{n} (p_i - \\bar{p})^2} } \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.sc() """ return sc(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def sga(self) -> float: """Spectral gradient angle. It varies from -pi/2 to pi/2. Closer to 0 is better. .. math:: \\text{SGA} = \\arccos \\left( \\frac{\\sum_{i=1}^{n-1} \\left( (true_{i+1} - true_i) \\cdot (predicted_{i+1} - predicted_i) \\right)}{\\sqrt{\\sum_{i=1}^{n-1} (true_{i+1} - true_i)^2} \\times \\sqrt{\\sum_{i=1}^{n-1} (predicted_{i+1} - predicted_i)^2}} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.sga() """ return sga(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def smape(self) -> float: """ `Symmetric Mean Absolute Percentage Error <https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error>`_. Adoption from `this <https://stackoverflow.com/a/51440114/5982232>`_. .. math:: SMAPE = \\frac{100}{n} \\sum_{i=1}^{n} \\frac{2 \\left| \\text{predicted}_i - \\text{true}_i \\right|}{\\left| \\text{true}_i \\right| + \\left| \\text{predicted}_i \\right|} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.smape() """ return smape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def smdape(self) -> float: """ Symmetric Median Absolute Percentage Error Note: result is NOT multiplied by 100 .. math:: \\text{smdape} = \\text{median} \\left( \\frac{2 \\cdot | \\text{predicted} - \\text{true} |}{| \\text{true} | + | \\text{predicted} | + \\epsilon} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.smdape() """ return smdape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def sid(self) -> float: """Spectral Information Divergence. From -pi/2 to pi/2. Closer to 0 is better. .. math:: \\text{SID} = \\left( \\frac{\\text{t}}{\\text{mean(t)}} - \\frac{\\text{p}}{\\text{mean(p)}} \\right) \\cdot \\left( \\log_{10}(\\text{t}) - \\log_{10}(\\text{mean(t)}) - \\log_{10}(\\text{p}) + \\log_{10}(\\text{mean(p)}) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.sid() """ return sid(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def skill_score_murphy(self) -> float: """ Skill score after `Murphy, 1988 <https://doi.org/10.1175/1520-0493(1988)116%3C2417:SSBOTM%3E2.0.CO;2>`_. Adopted from `SkillMetrics <https://github.com/PeterRochford/SkillMetrics/blob/278b2f58c7d73566f25f10c9c16a15dc204f5869/skill_metrics/skill_score_murphy.py>`_ . Calculate non-dimensional skill score (SS) between two variables using definition of Murphy (1988) using the formula: .. math:: SS = 1 - RMSE^2/SDEV^2 where SDEV is the standard deviation of the true values .. math:: SDEV^2 = sum_(n=1)^N [r_n - mean(r)]^2/(N-1) where p is the predicted values, r is the reference values, and N is the total number of values in p & r. Note that p & r must have the same number of values. A positive skill score can be interpreted as the percentage of improvement of the new model forecast in comparison to the reference. On the other hand, a negative skill score denotes that the forecast of interest is worse than the referencing forecast. Consequently, a value of zero denotes that both forecasts perform equally [MLAir, 2020]. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.skill_score_murphy() """ return skill_score_murphy(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def spearmann_corr(self) -> float: """ `Separmann correlation coefficient <https://hess.copernicus.org/articles/24/2505/2020/hess-24-2505-2020.pdf>`_. This is a nonparametric metric and assesses how well the relationship between the true and predicted data can be described using a monotonic function. .. math:: r = \\frac{\\sum_{i=1}^{n} \\left( R_{t,i} - \\overline{R_t} \\right) \\left( R_{p,i} - \\overline{R_p} \\right)}{\\sqrt{ \\sum_{i=1}^{n} \\left( R_{t,i} - \\overline{R_t} \\right)^2 \\sum_{i=1}^{n} \\left( R_{p,i} - \\overline{R_p} \\right)^2 }} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.spearmann_corr() """ # todo : is there a parameteric version of this as well? return spearmann_corr(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def sse(self) -> float: """ Sum of squared errors_ (model vs actual). It is measure of how far off our model's predictions are from the observed values. A value of 0 indicates that all predications are spot on. A non-zero value indicates errors. This is also called residual sum of squares (RSS) or sum of squared residuals as per tutorialspoint_ . .. math:: \\text{SSE} = \\sum_{i=1}^{n} (true_i - predicted_i)^2 .. _errors: https://dziganto.github.io/data%20science/linear%20regression/machine%20learning/python/Linear-Regression-101-Metrics/ .. _tutorialspoint: https://www.tutorialspoint.com/statistics/residual_sum_of_squares.html Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.sse() """ return sse(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def std_ratio(self, **kwargs) -> float: """ `Ratio of standard deviations of predictions and trues <https://doi.org/10.1016/j.engfracmech.2024.110057>`_. Also known as standard ratio, it varies from 0.0 to infinity while 1.0 being the perfect value. .. math:: \\text{std_ratio} = \\frac{\\sigma_{\\text{predicted}}}{\\sigma_{\\text{true}}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.std_ratio() """ return std_ratio(true=self.true, predicted=self.predicted, treat_arrays=False, **kwargs)
[docs] def umbrae(self, benchmark: np.ndarray = None): """ `Unscaled Mean Bounded Relative Absolute Error <https://doi.org/10.1016/j.jclepro.2022.135414>`_ .. math:: UMBRAE = \\frac{\\frac{1}{n} \\sum_{i=1}^{n} \\frac{|t_i - p_i|}{|t_i - b_i|}}{1 - \\frac{1}{n} \\sum_{i=1}^{n} \\frac{|t_i - p_i|}{|t_i - b_i|}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.umbrae() """ return umbrae(true=self.true, predicted=self.predicted, treat_arrays=False, benchmark=benchmark)
[docs] def ve(self) -> float: """ `Volumetric efficiency <https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2007WR006415>`_. from 0 to 1. Smaller the better. .. math:: VE = 1 - \\frac{\\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right|}{\\sum_{i=1}^{n} \\text{true}_i} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.ve() """ return ve(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def volume_error(self) -> float: """ Returns the Volume Error (Ve) after `Reynolds, 2017 <https://doi.org/10.1016/j.jhydrol.2017.05.012>`_. It is an indicator of the agreement between the averages of the simulated and observed runoff (i.e. long-term water balance). .. math:: \\text{volume_error}= Sum(predicted- true)/sum(predicted) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.volume_error() """ return volume_error(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def wape(self) -> float: """ weighted absolute percentage error (wape_) It is a variation of mape but more suitable for intermittent and low-volume data_. .. math:: \\text{WAPE} = \\frac{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\sum_{i=1}^{n} \\text{true}_i} .. _wape: https://mattdyor.wordpress.com/2018/05/23/calculating-wape/ .. _data: https://arxiv.org/pdf/2103.12057v1.pdf Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.wape() """ return wape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def watt_m(self) -> float: """`Watterson's M. <https://rmets.onlinelibrary.wiley.com/doi/abs/10.1002/(SICI)1097-0088(199604)16:4%3C379::AID-JOC18%3E3.0.CO;2-U>`_ .. math:: M = \\frac{2}{\\pi} \\cdot \\arcsin \\left( 1 - \\frac{\\frac{1}{n} \\sum_{i=1}^{n} ( \\text{true}_i - \\text{predicted}_i )^2}{\\sigma_{\\text{true}}^2 + \\sigma_{\\text{predicted}}^2 + (\\mu_{\\text{predicted}} - \\mu_{\\text{true}})^2} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.watt_m() """ return watt_m(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def wmape(self) -> float: """ `Weighted Mean Absolute Percent Error <https://stackoverflow.com/a/54833202/5982232>`_ .. math:: \\text{WMAPE} = \\frac{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\sum_{i=1}^{n} \\text{true}_i} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.wmape() """ return wmape(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def variability_ratio(self) -> float: """ Variability Ratio It is the ratio of the variance of the predicted values to the variance of the true values. It is used to measure the variability of the predicted values relative to the true values. .. math:: VR = 1 - \\left| \\frac{\\frac{\\sigma_{\\text{predicted}}}{\\mu_{\\text{predicted}}}}{\\frac{\\sigma_{\\text{true}}}{\\mu_{\\text{true}}}} - 1 \\right| Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.variability_ratio() """ return variability_ratio(true=self.true, predicted=self.predicted, treat_arrays=False)
[docs] def concordance_corr_coef(self) -> float: """ `Concordance Correlation Coefficient (CCC) <https://en.wikipedia.org/wiki/Concordance_correlation_coefficient>`_ taken from this `paper <https://doi.org/10.2307/2532051>`_. .. math:: CCC = \\frac{2 \\rho \\sigma_{true} \\sigma_{predicted}}{\\sigma_{true}^2 + \\sigma_{predicted}^2 + (\\bar{true} - \\bar{predicted})^2} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.concordance_corr_coef() """ return concordance_corr_coef(true= self.true, predicted= self.predicted, treat_arrays=False)
[docs] def critical_success_index(self, threshold= 0.5) -> float: """ `Critical Success Index (CSI) <https://doi.org/10.1016/j.heliyon.2024.e26371>`_ .. math:: CSI = \\frac{TP}{TP + FN + FP} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([0, 1, 1, 0, 0, 1]) >>> p = np.array([0, 1, 0, 1, 1, 1]) >>> metrics= RegressionMetrics(t, p) >>> metrics.critical_success_index() """ return critical_success_index(true= self.true, predicted= self.predicted, treat_arrays=False, threshold= threshold)
[docs] def kl_divergence(self) -> float: """ `Kullback-Leibler Divergence <https://doi.org/10.1016/j.imu.2024.101510>`_ .. math:: D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([0.1, 0.2, 0.3, 0.2, 0.2]) >>> p = np.array([0.2, 0.2, 0.2, 0.2, 0.2]) >>> metrics= RegressionMetrics(t, p) >>> divergence = metrics.kl_divergence() """ return kl_divergence(true= self.true, predicted= self.predicted, treat_arrays=False)
[docs] def log_cosh_error(self) -> float: """ `Log-Cosh Error <https://doi.org/10.1016/j.compchemeng.2022.107933>`_ .. math:: \\text{Log-Cosh Error} = \\frac{1}{n} \\sum_{i=1}^{n} \\log \\left( \\cosh(\\text{predicted}_i - \\text{true}_i) \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> metrics= RegressionMetrics(t, p) >>> error = metrics.log_cosh_error() """ return log_cosh_error(true= self.true, predicted= self.predicted, treat_arrays=False)
[docs] def minkowski_distance(self, order= 1) -> float: """ `Minkowski Distance <https://doi.org/10.1016/j.imu.2024.101492>`_ .. math:: D_{Minkowski} = \\left( \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|^p \\right)^{\\frac{1}{p}} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> metrics= RegressionMetrics(t, p) >>> distance = metrics.minkowski_distance() """ return minkowski_distance(true= self.true, predicted= self.predicted, treat_arrays=False, order= order)
[docs] def tweedie_deviance_score(self, power=0) -> float: """ `Tweedie Deviance Score <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_tweedie_deviance.html>`_ .. math:: D(\\text{true}, \\text{predicted}) = \\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2 .. math:: D(\\text{true}, \\text{predicted}) = 2 \\sum_{i=1}^{n} \\left( \\text{true}_i \\log\\left(\\frac{\\text{true}_i + (\\text{true}_i = 0)}{\\text{predicted}_i}\\right) - \\text{true}_i + \\text{predicted}_i \\right) .. math:: D(\\text{true}, \\text{predicted}) = 2 \\sum_{i=1}^{n} \\left( \\frac{\\text{true}_i}{\\text{predicted}_i} - \\log\\left(\\frac{\\text{true}_i}{\\text{predicted}_i}\\right) - 1 \\right) .. math:: D(\\text{true}, \\text{predicted}) = 2 \\sum_{i=1}^{n} \\left( \\frac{(\\text{true}_i - \\text{predicted}_i)^2}{\\text{true}_i^2 \\text{predicted}_i} \\right) Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> metrics= RegressionMetrics(t, p) >>> score = metrics.tweedie_deviance_score() """ return tweedie_deviance_score(true= self.true, predicted= self.predicted, treat_arrays=False, power= power)
# def peak_flow_ratio(self, power=0) -> float: # """ # Peak flow ratio is defined the ratio of the highest simulated to the highest # observed flow rates (Broekhuizen et al., 2020). # # https://doi.org/10.5194/hess-24-869-2020 # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import RegressionMetrics # >>> t = np.array([1, 2, 3, 4, 5]) # >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) # >>> metrics= RegressionMetrics(t, p) # >>> score = metrics.peak_flow_ratio() # """ # return peak_flow_ratio(true= self.true, predicted= self.predicted, treat_arrays=False, power= power)
[docs] def legates_coeff_eff(self, power=0) -> float: """ Legates Coefficient of Efficiency. Its value varies between 0 and 1. It is not as sensitive to extreme values as agreement_index and coefficcient of determination because of the utilization of the absolute value of the difference instead of the squared difference. See Equaltion 23 in `Dodo et al., 2022 <https://doi.org/10.1016/j.nexus.2022.100157>`_ .. math:: LCE = 1 - \\frac{\\sum_{i=1}^{n} |true_i - predicted_i|}{\\sum_{i=1}^{n} |true_i - \\bar{true}|} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> metrics= RegressionMetrics(t, p) >>> score = metrics.legates_coeff_eff() """ return legates_coeff_eff(true= self.true, predicted= self.predicted, treat_arrays=False, power= power)
# def relative_error(self, power=0) -> float: # """ # Relative Error. It indicates the mismatch that # occurs between the observed and modeled values, expressed # in terms of percentages. # It quantifies the relative deviations between observed/true # and predicted values. This significantly reduces the influence of absolute # differences at peaks. The absolute lower differences during low flow # periods are enhanced because they are significant if looked at in a # relative sense. As a result, there might be a systematic over- or underprediction during low flow periods. # It used along with other statistics to quantify low # flow simulations Moriasi et al., 2007. # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import RegressionMetrics # >>> t = np.array([1, 2, 3, 4, 5]) # >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) # >>> metrics= RegressionMetrics(t, p) # >>> score = metrics.relative_error() # """ # return relative_error(true= self.true, predicted= self.predicted, treat_arrays=False, power= power)
[docs] def manhattan_distance(self) -> float: """ Manhattan distance, also known as cityblock distance or taxicab norm. .. math:: D_{\\text{manhattan}} = \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right| See `Blanco-Mallo et al., 2023 <https://doi.org/10.1016/j.patcog.2023.109646>`_ and `Cha et al., 2007 <https://pdodds.w3.uvm.edu/research/papers/others/everything/cha2007a.pdf>`_ and `Alexei Botchkarev 2019 <https://www.ijikm.org/Volume14/IJIKMv14p045-076Botchkarev5064.pdf>`_ on the use of distances in performance measures. Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> metrics= RegressionMetrics(t, p) >>> score = metrics.manhattan_distance() """ return manhattan_distance(true= self.true, predicted= self.predicted, treat_arrays=False)
[docs] def mse(self) -> float: """ `Mean Square Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html>`_ .. math:: MSE = \\frac{\\sum_{i=1}^{N} w_i (true_i - predicted_i)^2}{\\sum_{i=1}^{N} w_i} Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mse() """ return mse(true=self.true, predicted=self.predicted, treat_arrays=False, weights= None)
[docs] def mape_for_peaks(self) -> float: """ Mean Absolute Percentage Error for peaks which are found using `scipy.singnal.find_peaks <https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html>`_ .. math:: \\text{MAPE}_\\text{peak} = \\frac{1}{P}\\sum_{p=1}^{P} \\left |\\frac{Q_{s,p} - Q_{o,p}}{Q_{o,p}} \\right | \\times 100, Examples --------- >>> import numpy as np >>> from SeqMetrics import RegressionMetrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> metrics= RegressionMetrics(t, p) >>> metrics.mape_for_peaks() """ return mape_for_peaks(true=self.true, predicted=self.predicted, treat_arrays=False, weights= None)
#************************************* # FUNCTIONAL API # #************************************* def post_process_kge(cc, alpha, beta, return_all=False): kge_ = float(1 - np.sqrt((cc - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)) if return_all: return np.array([kge_, cc, alpha, beta]) else: return kge_
[docs]def r2( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ R2 is a statistical measure of how well the regression line approximates the actual data. Quantifies the percent of variation in the response that the 'model' explains_. The 'model' here is anything from which we obtained predicted array. It is also called coefficient of determination or square of pearson correlation coefficient. More heavily affected by outliers than pearson correlatin r. .. _explains: https://data.library.virginia.edu/is-r-squared-useless/ .. math :: R^2 = \\left( \\frac{\\sum_{i=1}^{N} \\left( \\frac{true_i - \\bar{true}}{\\sigma_{true}} \\cdot \\frac{predicted_i - \\bar{predicted}}{\\sigma_{predicted}} \\right)}{N - 1} \\right)^2 where the bar above predicted and true indicates the mean of the array. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import r2 >>> t = np.random.random(10) >>> p = np.random.random(10) >>> r2(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) zx = (true - np.mean(true)) / np.std(true, ddof=1) zy = (predicted - np.mean(predicted)) / np.std(predicted, ddof=1) r = np.sum(zx * zy) / (len(true) - 1) return float(r ** 2)
[docs]def nse(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """Nash-Sutcliff Efficiency. The Nash-Sutcliffe efficiency (NSE) is a normalized statistic that determines the relative magnitude of the residual variance compared to the measured data variance It determines how well the model simulates trends for the output response of concern. But cannot help identify model bias and cannot be used to identify differences in timing and magnitude of peak flows and shape of recession curves; in other words, it cannot be used for single-event simulations. It is sensitive to extreme values due to the squared differences `Moriasi et a., 2015 <https://elibrary.asabe.org/abstract.asp?aid=46548>`_. To make it less sensitive to outliers, `Krause et al., 2005 <https://dx.doi.org/10.5194/adgeo-5-89-2005>`_ proposed log and relative nse. .. math:: \\text{NSE} = 1 - \\frac{\\sum_{i=1}^{N} (predicted_i - true_i)^2}{\\sum_{i=1}^{N} (true_i - \\bar{true})^2} where the bar above predicted and true indicates the mean of the array. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import nse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) _nse = 1 - sum((predicted - true) ** 2) / sum((true - np.mean(true)) ** 2) return float(_nse.item())
[docs]def nse_alpha(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Alpha decomposition of the NSE, see `Gupta et al. 2009 <https://doi.org/10.1029/97WR03495>`_ used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. .. math:: \\text{NSE}_{\\text{alpha}} = \\frac{\\sigma_{\\text{predicted}}}{\\sigma_{\\text{true}}} Returns ------- float Alpha decomposition of the NSE Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import nse_alpha >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nse_alpha(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.std(predicted) / np.std(true).item())
[docs]def nse_beta(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Beta decomposition of NSE. See `Gupta et al. 2009 <https://doi.org/10.1029/97WR03495>`_ used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. .. math:: \\text{NSE}_{\\text{beta}} = \\frac{\\mu_{\\text{predicted}} - \\mu_{\\text{true}}}{\\sigma_{\\text{true}}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Returns ------- float Beta decomposition of the NSE Examples --------- >>> import numpy as np >>> from SeqMetrics import nse_beta >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nse_beta(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float((np.mean(predicted) - np.mean(true)) / np.std(true).item())
[docs]def nse_mod( true, predicted, treat_arrays: bool = True, j=1, **treat_arrays_kws ) -> float: """ Gives less weightage to outliers if j=1 and if j>1 then it gives more weightage to outliers following `Krause_ et al., 2005 <https://adgeo.copernicus.org/articles/5/89/2005/adgeo-5-89-2005.html>`_. .. math:: \\text{NSE}_{\\text{mod}} = 1 - \\frac{\\sum_{i=1}^{N} \\left| \\text{predicted}_i - \\text{true}_i \\right|^j}{\\sum_{i=1}^{N} \\left| \\text{true}_i - \\bar{\text{true}} \\right|^j} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function j: Examples --------- >>> import numpy as np >>> from SeqMetrics import nse_mod >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nse_mod(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = (np.abs(predicted - true)) ** j b = (np.abs(true - np.mean(true))) ** j return float(1 - (np.sum(a) / np.sum(b)))
[docs]def nse_rel( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Relative Nash-Sutcliff Efficiency <https://doi.org/10.5194/adgeo-5-89-2005>`_. .. math:: \\text{NSE}_{\\text{rel}} = 1 - \\frac{\\sum_{i=1}^{N} \\left( \\frac{|\\text{predicted}_i - \\text{true}_i|}{\\text{true}_i} \\right)^2}{\\sum_{i=1}^{N} \\left( \\frac{|\\text{true}_i - \\overline{\\text{true}}|}{\\overline{\\text{true}}} \\right)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import nse_rel >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nse_rel(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = (np.abs((predicted - true) / true)) ** 2 b = (np.abs((true - np.mean(true)) / np.mean(true))) ** 2 return float(1 - (np.sum(a) / np.sum(b)))
[docs]def nse_bound(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Bounded Version of the Nash-Sutcliffe Efficiency (nse_) .. math:: \\text{NSE}_{\\text{bound}} = \\frac{\\text{NSE}}{2 - \\text{NSE}} .. _nse: https://iahs.info/uploads/dms/13614.21--211-219-41-MATHEVET.pdf Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import nse_bound >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nse_bound(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) nse_ = nse(true, predicted, treat_arrays=False) nse_c2m_ = nse_ / (2 - nse_) return nse_c2m_
[docs]def r2_score(true, predicted, treat_arrays: bool = True, weights=None, **treat_arrays_kws): """ This is not a symmetric function. Unlike most other scores, `R^2 score <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html>`_ score may be negative (it need not actually be the square of a quantity R). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. .. math:: \\text{R2}_{\\text{score}} = 1 - \\frac{\\sum_{i=1}^{n} w_i (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} w_i (\\text{true}_i - \\bar{\\text{true}})^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights: Examples --------- >>> import numpy as np >>> from SeqMetrics import r2_score >>> t = np.random.random(10) >>> p = np.random.random(10) >>> r2_score(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) if len(predicted) < 2: msg = "R^2 score is not well-defined with less than two samples." warnings.warn(msg) return None if weights is None: weight = 1. else: weight = weights[:, np.newaxis] numerator = (weight * (true - predicted) ** 2).sum(axis=0, dtype=np.float64) denominator = (weight * (true - np.average(true, axis=0, weights=weights)) ** 2).sum(axis=0, dtype=np.float64) if numerator == 0.0: return None output_scores = _foo(denominator, numerator) return float(np.average(output_scores, weights=weights))
[docs]def adjusted_r2(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Adjusted R squared also known as `Ezekiel estimate <https://www.glmj.org/archives/MLRV_2007_33_1.pdf>`_. .. math:: \\text{Adjusted } R^2 = 1 - \\left( \\frac{(1 - R^2) \\cdot (n - 1)}{n - k - 1} \\right) where n = number of observations and k = 1. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import adjusted_r2 >>> t = np.random.random(10) >>> p = np.random.random(10) >>> adjusted_r2(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) k = 1 n = len(predicted) adj_r = 1 - ((1 - r2(true, predicted, treat_arrays=False)) * (n - 1)) / (n - k - 1) return float(adj_r)
[docs]def kge( true, predicted, treat_arrays: bool = True, return_all:bool = False, **treat_arrays_kws ) -> Union[float, np.ndarray]: """ Kling-Gupta Efficiency following `Gupta et al. 2009 <https://doi.org/10.1016/j.jhydrol.2009.08.003>`_. This error considers correlation (r), variability (:math:`\\alpha`) and mean difference/error which is also called bias (:math:`\\beta`). KGE values varies from -infinity to 1 with higher the better. KGE values above -0.41 means the simulted/predicted (by the model) is better than the mean of the observed data (`Knoben et al, 2019 <https://doi.org/10.5194/hess-23-4323-2019>`_). .. math:: \\text{KGE} = 1 - \\sqrt{(r - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2} .. math:: \\alpha = \\frac{\\sigma_{\\text{predicted}}}{\\sigma_{\\text{true}}} .. math:: \\beta = \\frac{\\mu_{\\text{predicted}}}{\\mu_{\\text{true}}} Please note that bias (:math:`\\beta`) is not same as :py:func:`SeqMetrics.bias` method. The term :math:`\\sqrt{(r - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2}` is also called euclidean distance which means KGE can also be defined as below .. math:: \\text{KGE} = 1 - ED Another form of KGE equation is below: .. math:: \\text{KGE} = \\frac{\\sum_{i=1}^{N} ( \\text{true}_i - \\bar{\\text{true}} ) ( \\text{predicted}_i - \\bar{\\text{predicted}} )}{\\sqrt{\\sum_{i=1}^{N} ( \\text{true}_i - \\bar{\\text{true}} )^2} \\sqrt{\\sum_{i=1}^{N} ( \\text{predicted}_i - \\bar{\\text{predicted}} )^2}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function return_all: If True, it returns a numpy array of shape (4, ) containing kge, :math:`\\gamma`, :math:`\\alpha`, :math:`\\beta`. Otherwise, it returns kge. treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- If return_all is True, it returns a numpy array of shape (4, ) containing kge, correlation (r), variability (:math:`\\alpha`) and bias (:math:`\\beta`). Otherwise, it returns kge score. Examples --------- >>> import numpy as np >>> from SeqMetrics import kge >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kge(t, p) >>> kge, corr, var, bias = kge(t, p, return_all=True) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) cc = np.corrcoef(true, predicted)[0, 1] # correlation alpha = np.std(predicted) / np.std(true) # variability beta = np.sum(predicted) / np.sum(true) # mean return post_process_kge(cc, alpha, beta, return_all)
[docs]def kge_bound( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Mathevet et al. 2006 <https://iahs.info/uploads/dms/13614.21--211-219-41-MATHEVET.pdf>`_ proposed a bounded version of NSE since the original NSE lacks a lower bound and thus have skewed distribution when calculated for large number of basins. To avoid its skewed distributions and make it vary between -1 and +1, they proposed a bounder version of the statistic i.e. NSE. The same concept is applied here to KGE. As per the authors, this bounded version of the statistic makes it less optimistic for positive values. .. math:: \\text{KGE}_{\\text{bound}} = \\frac{\\text{KGE}}{2 - \\text{KGE}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import kge_bound >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kge_bound(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) kge_ = kge(true, predicted, return_all=True, treat_arrays=False)[0] kge_c2m_ = kge_ / (2 - kge_) return float(kge_c2m_.item())
[docs]def kge_mod( true, predicted, treat_arrays: bool = True, return_all=False, **treat_arrays_kws )-> Union[float, np.ndarray]: """ Modified Kling-Gupta Efficiency after `Kling et al. 2012 <https://doi.org/10.1016/j.jhydrol.2012.01.011>`_. Similar to original KGE, its values varies fro -infinity to 1 with higher the better. This version of KGE was introduced to avoid cross-correlation between bias and variability which happens when the precipitation data is biased. This is done by calculating the variability (:math:`\\alpha`) by :math:`{CV}_s/{CV}_o` instaed of :math:`{\sigma}_s/{\sigma}_o` where CV is the coefficient of variation which is defined as the ratio of the standard deviation to the mean (:math:`{\sigma}/{\mu}`). .. math:: \\text{KGE`} = 1 - \\sqrt{(r - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2} Parameters ---------- true : true/observed/actual/target values. It must be a :obj:`numpy.array`, or :obj:`pandas.DataFrame` or :obj:`pandas.Series` or a python :obj:`list` or any object which has :obj:`__len__` method. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function return_all: treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- If return_all is True, it returns a numpy array of shape (4, ) containing kge, :math:`\gamma`, :math:`\\alpha` and :math:`\\beta`. Otherwise, it returns kge. Examples --------- >>> import numpy as np >>> from SeqMetrics import kge_mod >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kge_mod(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # calculate error in timing and dynamics r (Pearson's correlation coefficient) sim_mean = np.mean(predicted, axis=0, dtype=np.float64) obs_mean = np.mean(true, dtype=np.float64) r = np.sum((predicted - sim_mean) * (true - obs_mean), axis=0, dtype=np.float64) / \ np.sqrt(np.sum((predicted - sim_mean) ** 2, axis=0, dtype=np.float64) * np.sum((true - obs_mean) ** 2, dtype=np.float64)) # calculate error in spread of flow gamma (avoiding cross correlation with bias by dividing by the mean) gamma = (np.std(predicted, axis=0, dtype=np.float64) / sim_mean) / \ (np.std(true, dtype=np.float64) / obs_mean) # calculate error in volume beta (bias of mean discharge) beta = np.mean(predicted, axis=0, dtype=np.float64) / np.mean(true, axis=0, dtype=np.float64) # calculate the modified Kling-Gupta Efficiency KGE' return post_process_kge(r, gamma, beta, return_all)
[docs]def kge_np( true, predicted, treat_arrays: bool = True, return_all=False, **treat_arrays_kws )-> Union[float, np.ndarray]: """ Non-parametric Kling-Gupta Efficiency after `Pool et al. 2018 <https://doi.org/10.1080/02626667.2018.1552002>`_. This differs from original KGE by using non-parameteric components of KGE i.e. :math:`\\alpha` and :math:`\gamma` / cc. The variability (:math:`\\alpha`) is non-parametrized by using the FDCs of the true and predicted values. The FDCs are normalized to remove the volume information. It also differs from normal kge by using the Spearman's rank correlation instead of Pearson's correlation coefficient. .. math:: \\text{KGE}_{\\text{np}} = 1 - \\sqrt{(cc - 1)^2 + (\\alpha - 1)^2 + (\\beta - 1)^2} .. math:: cc = \\rho(\\text{true}, \\text{predicted}) .. math:: \\alpha = 1 - 0.5 \\sum_{i=1}^{n} \\left| \\frac{\\text{sorted(predicted}_i\\text{)}}{\\text{mean(predicted)} \\cdot n} - \\frac{\\text{sorted(true}_i\\text{)}}{\\text{mean(true)} \\cdot n} \\right| .. math:: \\beta = \\frac{\\text{mean(predicted)}}{\\text{mean(true)}} Parameters ---------- true : true/observed/actual/measured/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function return_all : treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- If return_all is True, it returns a numpy array of shape (4, ) containing kge, :math:`cc`, :math:`\\alpha` and :math:`\\beta`. Otherwise, it returns kge. Examples --------- >>> import numpy as np >>> from SeqMetrics import kge_np >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kge_np(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # # self-made formula cc = spearmann_corr(true, predicted, treat_arrays=False) fdc_sim = np.sort(predicted / (np.nanmean(predicted) * len(predicted))) fdc_obs = np.sort(true / (np.nanmean(true) * len(true))) alpha = 1 - 0.5 * np.sum(np.abs(fdc_sim - fdc_obs)) beta = np.mean(predicted) / np.mean(true) return post_process_kge(cc, alpha, beta, return_all)
[docs]def spearmann_corr( true, predicted, treat_arrays:bool = True, **treat_arrays_kws )->float: """ `Separmann correlation coefficient <https://hess.copernicus.org/articles/24/2505/2020/hess-24-2505-2020.pdf>`_. .. math:: r = \\frac{\\sum_{i=1}^{n} \\left( R_{t,i} - \\overline{R_t} \\right) \\left( R_{p,i} - \\overline{R_p} \\right)}{\\sqrt{ \\sum_{i=1}^{n} \\left( R_{t,i} - \\overline{R_t} \\right)^2 \\sum_{i=1}^{n} \\left( R_{p,i} - \\overline{R_p} \\right)^2 }} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import spearmann_corr >>> t = np.random.random(10) >>> p = np.random.random(10) >>> spearmann_corr(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) sim_rank = np.argsort(np.argsort(predicted, axis=0), axis=0) obs_rank = np.argsort(np.argsort(true, axis=0), axis=0) r_num = np.sum((obs_rank - np.mean(obs_rank, axis=0, dtype=np.float64)) * (sim_rank - np.mean(sim_rank, axis=0, dtype=np.float64)), axis=0) r_den = np.sqrt( np.sum((obs_rank - np.mean(obs_rank, axis=0, dtype=np.float64)) ** 2, axis=0) * np.sum((sim_rank - np.mean(sim_rank, axis=0, dtype=np.float64)) ** 2, axis=0) ) r = r_num / r_den return float(r)
# def spearmann_rank_corr( # true, # predicted, # treat_arrays: bool = True, # **treat_arrays_kws # ) -> float: # """Separmann rank correlation coefficient_. # # This is a nonparametric metric and assesses how well the relationship # between the true and predicted data can be described using a monotonic # function. # https://rdrr.io/cran/hydroGOF/man/rSpearman.html # .. _coefficient: # https://hess.copernicus.org/articles/24/2505/2020/hess-24-2505-2020.pdf # # Parameters # ---------- # true : # true/observed/actual/target values. It must be a numpy array, # or pandas series/DataFrame or a list. # predicted : # simulated values # treat_arrays : # process the true and predicted arrays using maybe_treat_arrays function # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import spearmann_rank_corr # >>> t = np.random.random(10) # >>> p = np.random.random(10) # >>> spearmann_rank_corr(t, p) # """ # true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # col = [list(a) for a in zip(true, predicted)] # xy = sorted(col, key=lambda _x: _x[0], reverse=False) # # rang of x-value # for i, row in enumerate(xy): # row.append(i + 1) # # a = sorted(xy, key=lambda _x: _x[1], reverse=False) # # rang of y-value # for i, row in enumerate(a): # row.append(i + 1) # # mw_rank_x = np.nanmean(np.array(a)[:, 2]) # mw_rank_y = np.nanmean(np.array(a)[:, 3]) # # numerator = np.nansum([float((a[j][2] - mw_rank_x) * (a[j][3] - mw_rank_y)) for j in range(len(a))]) # denominator1 = np.sqrt(np.nansum([(a[j][2] - mw_rank_x) ** 2. for j in range(len(a))])) # denominator2 = np.sqrt(np.nansum([(a[j][3] - mw_rank_x) ** 2. for j in range(len(a))])) # return float(numerator / (denominator1 * denominator2))
[docs]def log_nse( true, predicted, treat_arrays: bool = True, epsilon:float=0.0, log_base:str='e', **treat_arrays_kws ) -> float: """ `log transformed Nash-Sutcliffe Efficiency <https://doi.org/10.1002/2016WR019605>`_. It is especially useful for capturing prediction performance for the lowest flows due to the logarithmic transform. .. math:: NSE = 1-\\frac{\\sum_{i=1}^{N}(log(e_{i})-log(s_{i}))^2}{\\sum_{i=1}^{N}(log(e_{i})-log(\\bar{e})^2}-1)*-1 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function epsilon : A small value to be added to true and predicted values to avoid log(0) treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. References ---------- Pushpalatha, R.; Perrin, C.; le Moine, N. and Andréassian V. (2012). "A review of efficiency criteria suitable for evaluating low-flow simulations". Journal of Hydrology. 420-421, 171-182. doi:10.1016/j.jhydrol.2011.11.055 https://doi.org/10.1029/2012WR012005 Examples --------- >>> import numpy as np >>> from SeqMetrics import log_nse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> log_nse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) if log_base == 'e': func = np.log elif log_base == '10': func = np.log10 elif log_base == '2': func = np.log2 else: raise ValueError('log_base must be e, 10 or 2') true, predicted = func(true + epsilon), func(predicted + epsilon) return nse(true, predicted, treat_arrays=False)
[docs]def corr_coeff( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Pearson correlation coefficient as proposed by `Pearson, 1895 <https://royalsocietypublishing.org/doi/abs/10.1098/rsta.1895.0010>`_. It measures linear correlatin between true and predicted arrays. It is sensitive to outliers. The following equation is taken after `Jiang et al., 2022 <https://doi.org/10.5194/essd-15-621-2023>`_ . .. math:: r = \\frac{\\sum ^n _{i=1}(predicted_i - \\bar{predicted})(s_i - \\bar{observed})}{\\sqrt{\\sum ^n _{i=1}(predicted_i - \\bar{predicted})^2} \\sqrt{\\sum ^n _{i=1}(true_i - \\bar{true})^2}} Where n is length of true/predicted arrays. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import corr_coeff >>> t = np.random.random(10) >>> p = np.random.random(10) >>> corr_coeff(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) correlation_coefficient = np.corrcoef(true, predicted)[0, 1] return float(correlation_coefficient)
[docs]def rmse( true, predicted, treat_arrays: bool = True, weights=None, **treat_arrays_kws ) -> float: """ `Root mean squared error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.root_mean_squared_error.html>`_ .. math:: \\text{RMSE} = \\sqrt{\\frac{\\sum_{i=1}^{n} w_i (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} w_i}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights: treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import rmse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rmse(t, p) """ if treat_arrays: true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return sqrt(np.average((true - predicted) ** 2, axis=0, weights=weights))
[docs]def rmsle( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """Root mean square log error. This error is less sensitive to `outliers <https://stats.stackexchange.com/q/56658/314919>`_ . Compared to RMSE, RMSLE only considers the relative error between predicted and actual values, and the scale of the error is nullified by the log-transformation. Furthermore, RMSLE penalizes underestimation more than overestimation. This is especially useful in those studies where the underestimation of the target variable is not acceptable but overestimation can be `tolerated <https://doi.org/10.1016/j.scitotenv.2020.137894>`_ . .. math:: RMSLE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left( \\log(1 + \\text{predicted}_i) - \\log(1 + \\text{true}_i) \\right)^2} References ---------- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.root_mean_squared_log_error.html Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import rmsle >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rmsle(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sqrt(np.mean(np.power(log1p(predicted) - log1p(true), 2))))
[docs]def mape( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Mean Absolute Percentage Error. The MAPE is often used when the quantity to predict is known to remain way above zero_. It is useful when the size or size of a prediction variable is significant in evaluating the accuracy of a prediction_. It has advantages of scale-independency and interpretability. However, it has the significant disadvantage that it produces infinite or undefined values for zero or close-to-zero actual values_. .. _zero: https://doi.org/10.1016/j.neucom.2015.12.114 .. _prediction: https://doi.org/10.1088/1742-6596/930/1/012002 .. _values: https://doi.org/10.1016/j.ijforecast.2015.12.003 .. math:: MAPE = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{true_i - predicted_i}{true_i} \\right| \\times 100 References --------- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import mape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.mean(np.abs((true - predicted) / true)) * 100)
[docs]def nrmse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Normalized Root Mean Squared Error <https://www.sciencedirect.com/science/article/pii/S0957417411003289>`_ .. math:: NRMSE = \\frac{\\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} (\\text{true}_i - \\text{predicted}_i)^2}}{\\max(\\text{true}) - \\min(\text{true})} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import nrmse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nrmse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(rmse(true, predicted, treat_arrays=False) / (np.max(true) - np.min(true)))
[docs]def pbias( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Percent bias <https://elibrary.asabe.org/abstract.asp?aid=46548>`_ determines how well the model simulates the average magnitudes for the output response of interest. It can also determine over and under-prediction. It cannot be used (1) for single-event simulations to identify differences in timing and magnitude of peak flows and the shape of recession curves nor (2) to determine how well the model simulates residual variations and/or trends for the output response of interest. It can give a deceiving rating of model performance if the model overpredicts as much as it underpredicts, in which case percent bias will be close to zero even though the model simulation is poor. .. math:: PBIAS = 100 \\times \\frac{\\sum_{i=1}^{N} (\\text{true}_i - \\text{predicted}_i)}{\\sum_{i=1}^{N} \\text{true}_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import pbias >>> t = np.random.random(10) >>> p = np.random.random(10) >>> pbias(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(100.0 * sum(true - predicted) / sum(true))
[docs]def bias( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Bias as and given by Gupta1998_ et al., 1998 in Table 1 It is also called mean error. .. math:: Bias=\\frac{1}{N}\\sum_{i=1}^{N}(True_{i}-Predicted_{i}) .. _Gupta1998: https://doi.org/10.1029/97WR03495 Parameters ---------- true : true/observed/actual/measured/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import bias >>> t = np.random.random(10) >>> p = np.random.random(10) >>> bias(t, p) ... >>> bias([1.1, 2.2, 3.3], [11.1, 12.2, 13.3]) -10.0 """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) bias_ = np.nansum(true - predicted) / len(true) return float(bias_)
[docs]def mae( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Mean Absolute Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html>`_. It is less sensitive to outliers as compared to mse/rmse. .. math:: \\text{MAE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right| Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import mae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) if true is None: true = true if predicted is None: predicted = predicted return float(np.mean(np.abs(true - predicted)))
# def abs_pbias(true, predicted, treat_arrays: bool = True, # **treat_arrays_kws) -> float: # """Absolute Percent bias # # Parameters # ---------- # true : # true/observed/actual/target values. It must be a numpy array, # or pandas series/DataFrame or a list. # predicted : # simulated values # treat_arrays : # process the true and predicted arrays using maybe_treat_arrays function # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import abs_pbias # >>> t = np.random.random(10) # >>> p = np.random.random(10) # >>> abs_pbias(t, p) # """ # true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # _apb = 100.0 * sum(abs(predicted - true)) / sum(true) # return float(_apb)
[docs]def gmae( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Geometric Mean Absolute Error <https://doi.org/10.1016/j.isprsjprs.2024.04.015>`_ .. math:: GMAE = \\left( \\prod_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right| \\right)^{\\frac{1}{n}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import gmae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> gmae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted return _geometric_mean(np.abs(error))
[docs]def inrse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Integral Normalized Root Squared Error <https://doi.org/10.1016/j.engappai.2023.107559>`_ .. math:: IN\\text{-}RSE = \\sqrt{\\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} (\\text{true}_i - \\overline{\\text{true}})^2}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import inrse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> inrse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted return float(np.sqrt(np.sum(np.square(error)) / np.sum(np.square(true - np.mean(true)))))
[docs]def irmse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Inertial RMSE <https://link.springer.com/article/10.1007/s11069-008-9299-2>`_. RMSE divided by standard deviation of the gradient of true. .. math:: \\text{IRMSE} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left( \\text{true}_i - \\text{predicted}_i \\right)^2}}{\\sqrt{\\frac{1}{n-2} \\sum_{i=1}^{n-1} \\left( (\\text{true}_{i+1} - \\text{true}_i) - \\overline{(\\text{true}_{i+1} - \\text{true}_i)} \\right)^2}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import irmse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> irmse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Getting the gradient of the observed data obs_len = true.size obs_grad = true[1:obs_len] - true[0:obs_len - 1] # Standard deviation of the gradient obs_grad_std = np.std(obs_grad, ddof=1) # Divide RMSE by the standard deviation of the gradient of the observed data return float(rmse(true, predicted, treat_arrays=False) / obs_grad_std)
[docs]def mase( true, predicted, treat_arrays: bool = True, seasonality: int = 1, **treat_arrays_kws): """ Mean Absolute Scaled Error following `Hyndman et al., 2006 <http://datascienceassn.org/sites/default/files/Another%20Look%20at%20Measures%20of%20Forecast%20Accuracy.pdf>`_. Baseline (benchmark) is computed with naive forecasting (shifted by seasonality) modified after `this <https://gist.github.com/bshishov/5dc237f59f019b26145648e2124ca1c9>`_. It is the ratio of MAE of used model and MAE of naive forecast. .. math:: \\text{MASE} = \\frac{\\frac{1}{n} \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\frac{1}{n-s} \\sum_{i=s+1}^{n} \\left| \\text{true}_i - \\text{true}_{i-s} \\right|} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function process the true and predicted arrays using maybe_treat_arrays function seasonality: treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import mase >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mase(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return mae(true, predicted, False) / mae(true[seasonality:], _naive_prognose(true, seasonality), treat_arrays=False)
[docs]def mare(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Mean Absolute Relative Error. When expressed in %age, it is also known as mape_. .. math:: \\text{MARE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\right| .. _mape: https://doi.org/10.1016/j.rser.2015.08.035 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import mare >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mare(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted return float(np.mean(np.abs(error / true)))
[docs]def msle(true, predicted, treat_arrays=True, weights=None, **treat_arrays_kws) -> float: """ `Mean square logrithmic error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html>`_ .. math:: \\text{MSLE} = \\frac{\\sum_{i=1}^{n} w_i \\cdot \\text{sq_log_error}_i}{\\sum_{i=1}^{n} w_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights: treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import msle >>> t = np.random.random(10) >>> p = np.random.random(10) >>> msle(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.average((log1p(true) - log1p(predicted)) ** 2, axis=0, weights=weights))
def log1p(array): with np.errstate(**ERR_STATE): return np.log1p(array)
[docs]def covariance( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Covariance as defined in Eq. 3 at mathworld <https://mathworld.wolfram.com/Covariance.html>`_ A positive covariance means that the means of true and predicted values increase or decrease together. .. math:: Covariance = \\frac{1}{N} \\sum_{i=1}^{N}((true_{i} - \\bar{true}) * (predicted_{i} - \\bar{predicted})) The bar represents the mean of the array. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import covariance >>> t = np.random.random(10) >>> p = np.random.random(10) >>> covariance(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) obs_mean = np.mean(true) sim_mean = np.mean(predicted) covariance_ = np.mean((true - obs_mean) * (predicted - sim_mean)) return float(covariance_)
[docs]def brier_score( true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Adopted from `SkillMetrics <https://github.com/PeterRochford/SkillMetrics/blob/master/skill_metrics/brier_score.py>`_ This function calculates the `Brier score (BS) <https://viterbi-web.usc.edu/~shaddin/teaching/cs699fa17/docs/Brier50.pdf>`_, which is a measure of the mean-square error of probability forecasts for a dichotomous (two-category) event, such as the occurrence/non-occurrence of precipitation. The score is calculated using the formula: .. math:: BS = sum_(n=1)^N (f_n - o_n)^2/N where f is the forecast probabilities, o is the observed probabilities (0 or 1), and N is the total number of values in f & o. Note that f & o must have the same number of values, and those values must be in the `range [0,1] <https://data.library.virginia.edu/a-brief-on-brier-scores/>`_. Returns -------- float BS : Brier score References --------- `D. S. Wilks, 1995: Statistical Methods in the Atmospheric Sciences. Cambridge Press. 547 pp <https://doi.org/10.1016/C2017-0-03921-6>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import brier_score >>> t = np.random.random(10) >>> p = np.random.random(10) >>> brier_score(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Check for valid values index = np.where(np.logical_and(true != 0, true != 1)) if np.sum(index) > 0: msg = 'Observed has values not equal to 0 or 1.' raise ValueError(msg) index = np.where(np.logical_or(predicted < 0, predicted > 1)) if np.sum(index) > 0: msg = 'Forecast has values outside interval [0,1].' raise ValueError(msg) # Calculate score bs = np.sum(np.square(predicted - true)) / len(predicted) return bs
[docs]def bic( true, predicted, treat_arrays: bool = True, p=1, **treat_arrays_kws) -> float: """ Bayesian Information Criterion Minimising the BIC_ is intended to give the best model. The model chosen by the BIC is either the same as that chosen by the AIC, or one with fewer terms. This is because the BIC penalises the number of parameters more heavily than the AIC. Modified after RegscorePy_. .. math:: BIC = n \\cdot \\ln\\left(\\frac{\\text{SSE}}{n}\\right) + p \\cdot \\ln(n) .. _BIC: https://otexts.com/fpp2/selecting-predictors.html#schwarzs-bayesian-information-criterion .. _RegscorePy: https://github.com/UBC-MDS/RegscorePy/blob/master/RegscorePy/bic.py Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function p: treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import bic >>> t = np.random.random(10) >>> p = np.random.random(10) >>> bic(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) assert p >= 0 n = len(true) return float(n * np.log(sse(true, predicted, treat_arrays=False) / n) + p * np.log(n))
[docs]def sse(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Sum of squared errors_ (model vs actual). It is measure of how far off our model's predictions are from the observed values. A value of 0 indicates that all predications are spot on. A non-zero value indicates errors_. This is also called residual sum of squares (RSS) or sum of squared residuals as per tutorialspoint_ . .. math:: \\text{SSE} = \\sum_{i=1}^{n} (true_i - predicted_i)^2 .. _errors: https://dziganto.github.io/data%20science/linear%20regression/machine%20learning/python/Linear-Regression-101-Metrics/ .. _tutorialspoint: https://www.tutorialspoint.com/statistics/residual_sum_of_squares.html Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import sse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> sse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) squared_errors = (true - predicted) ** 2 return float(np.sum(squared_errors))
[docs]def amemiya_pred_criterion(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """`Amemiya's Prediction Criterion <https://www.sfu.ca/sasdoc/sashtml/ets/chap30/sect19.htm#:~:text=Amemiya>`_ .. math:: \\text{APC} = \\left( \\frac{n + k}{n - k} \\right) \\left( \\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2 \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import amemiya_pred_criterion >>> t = np.random.random(10) >>> p = np.random.random(10) >>> amemiya_pred_criterion(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) k = 1 n = len(predicted) return float(((n + k) / (n - k)) * (1 / n) * sse(true, predicted, treat_arrays=False))
[docs]def amemiya_adj_r2(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """`Amemiya's Adjusted R-squared <https://www.sfu.ca/sasdoc/sashtml/ets/chap30/sect19.htm#:~:text=Amemiya>`_ .. math:: R^2_{\\text{adj, Amemiya}} = 1 - \\left( \\frac{(1 - R^2) \\cdot (n + k)}{n - k - 1} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import amemiya_adj_r2 >>> t = np.random.random(10) >>> p = np.random.random(10) >>> amemiya_adj_r2(t, p) """ if treat_arrays: true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) k = 1 n = len(predicted) adj_r = 1 - ((1 - r2(true, predicted, treat_arrays=False)) * (n + k)) / (n - k - 1) return float(adj_r)
[docs]def aitchison(true, predicted, treat_arrays: bool = True, center='mean', **treat_arrays_kws) -> float: """ Aitchison distance as used in `Zhang et al., 2020 <https://doi.org/10.5194/hess-24-2505-2020>`_. .. math:: d_{\\text{Aitchison}} = \\sqrt{\\sum_{i=1}^{n} \\left( \\log(\\text{true}_i) - \\text{center}(\\log(\\text{true})) - \\left(\\log(\\text{predicted}_i) - \\text{center}(\\log(\\text{predicted}))\\right) \\right)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function center: treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import aitchison >>> t = np.random.random(10) >>> p = np.random.random(10) >>> aitchison(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) lx = log_t(true) ly = log_p(predicted) if center.upper() == 'MEAN': m = np.mean elif center.upper() == 'MEDIAN': m = np.median else: raise ValueError clr_x = lx - m(lx) clr_y = ly - m(ly) d = (sum((clr_x - clr_y) ** 2)) ** 0.5 return float(d)
def log_t(true): with np.errstate(**ERR_STATE): return np.log(true) def log_p(predicted): with np.errstate(**ERR_STATE): return np.log(predicted) def _assert_greater_than_one(true, predicted): # assert that both true and predicted arrays are greater than one. if len(true) <= 1 or len(predicted) <= 1: raise ValueError(f""" Expect length of true and predicted arrays to be larger than 1 but they are {len(true)} and {len(predicted)}""") return
[docs]def acc( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """Anomaly correction coefficient. See Langland_ et al., 2012; Miyakoda_ et al., 1972 and Murphy_ et al., 1989. .. math:: ACC = \\frac{\\sum_{i=1}^{N} \\left( (\\text{predicted}_i - \\overline{\\text{predicted}})(\\text{true}_i - \\overline{\\text{true}}) \\right)}{(N-1) \\cdot \\sigma_{\\text{true}} \\cdot \\sigma_{\\text{predicted}}} .. _Langland: https://doi.org/10.3402/tellusa.v64i0.17531 .. _Miyakoda: .. _Murphy: https://doi.org/10.1080/02723646.1972.10642213 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import acc >>> t = np.random.random(10) >>> p = np.random.random(10) >>> acc(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = predicted - np.mean(predicted) b = true - np.mean(true) c = np.std(true, ddof=1) * np.std(predicted, ddof=1) * predicted.size return float(np.dot(a, b / c))
[docs]def agreement_index( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Agreement Index (d) developed by `Willmott, 1981 <https://doi.org/10.1080/02723646.1981.10642213>`_. It detects additive and pro-portional differences in the observed and simulated means and variances (`Moriasi et al., 2015 <https://doi.org/10.13031/trans.58.10715>`_). It is overly sensitive to extreme values due to the squared differences. It can also be used as a substitute for R2 to identify the degree to which model predictions are error-free. .. math:: d = 1 - \\frac{\\sum_{i=1}^{N}(e_{i} - s_{i})^2}{\\sum_{i=1}^{N}(\\left | s_{i} - \\bar{e} \\right | + \\left | e_{i} - \\bar{e} \\right |)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import agreement_index >>> t = np.random.random(10) >>> p = np.random.random(10) >>> agreement_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) b = np.abs(predicted - np.mean(true)) c = np.abs(true - np.mean(true)) e = (b + c) ** 2 agreement_index_ = 1 - (np.sum((true - predicted) ** 2)) / (np.sum(e)) return float(agreement_index_)
[docs]def legates_coeff_eff(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Legates Coefficient of Efficiency. Its value varies between 0 and 1. It is not as sensitive to extreme values as agreement_index and coefficcient of determination because of the utilization of the absolute value of the difference instead of the squared difference. See Equaltion 23 in `Dodo et al., 2022 <https://doi.org/10.1016/j.nexus.2022.100157>`_ .. math:: LCE = 1 - \\frac{\\sum_{i=1}^{n} |true_i - predicted_i|}{\\sum_{i=1}^{n} |true_i - \\bar{true}|} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import legates_coeff_eff >>> t = np.random.random(10) >>> p = np.random.random(10) >>> agreement_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) agreement_index_ = 1 - (np.sum((true - predicted) ** 2)) / (np.sum(np.abs(predicted - np.mean(true)))) return float(agreement_index_)
[docs]def aic( true, predicted, treat_arrays: bool = True, p:int=1, **treat_arrays_kws ) -> float: """ It estimates relative quality of a model for a given input. By comparing AIC for differnt models, we can identify the model which best explains the data. Theoretically, it penlizes those models with more parameters thereby reducing overfitting/model complexity. When comparing multiple models, the one with the lowest value is generally preferred. When sample size is small, then AIC can be biased. Akaike_ Information Criterion. Modifying from this sourcee_ .. math:: AIC = n \\cdot \\ln\\left(\\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{n}\\right) + 2p .. _Akaike: https://doi.org/10.1007/978-1-4612-1694-0_15 .. _sourcee: https://github.com/UBC-MDS/RegscorePy/blob/master/RegscorePy/aic.py Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function p : int number of parameters in the model treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import aic >>> t = np.random.random(10) >>> p = np.random.random(10) >>> aic(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) assert p > 0 _assert_greater_than_one(true, predicted) # noac n = len(true) resid = np.subtract(predicted, true) rss = np.sum(np.power(resid, 2)) return float(n * np.log(rss / n) + 2 * p)
[docs]def cronbach_alpha( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ It is a measure of internal consitency of data following `Cheung and Yip, 2005 <https://doi.org/10.1016/B0-12-369398-5/00396-0>`_. See ucla_ and stackoverflow_ pages for more info. .. math:: alpha = \\frac{N}{N - 1} \\left(1 - \\frac{\\sum_{i=1}^{N} \\sigma^2_{i}}{\\sigma^2_{\\text{total}}}\\right) .. _ucla: https://stats.idre.ucla.edu/spss/faq/what-does-cronbachs-alpha-mean/ .. _stackoverflow: https://stackoverflow.com/a/20799687/5982232 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import cronbach_alpha >>> t = np.random.random(10) >>> p = np.random.random(10) >>> cronbach_alpha(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) itemscores = np.stack([true, predicted]) itemvars = itemscores.var(axis=1, ddof=1) tscores = itemscores.sum(axis=0) nitems = len(itemscores) return float(nitems / (nitems - 1.) * (1 - itemvars.sum() / tscores.var(ddof=1)))
[docs]def centered_rms_dev( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Modified after `SkillMetrics <https://doi.org/10.1016/B0-12-227410-5/00612-8>`_. Calculates the centered root-mean-square (RMS) difference between true and predicted using the formula: (E')^2 = sum_(n=1)^N [(p_n - mean(p))(r_n - mean(r))]^2/N where p is the predicted values, r is the true values, and N is the total number of values in p & r. .. math:: CRMSD = \\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} \\left( (p_i - \\text{mean}(p)) - (r_i - \\text{mean}(r)) \\right)^2} Output: CRMSDIFF : centered root-mean-square (RMS) difference (E')^2 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Examples --------- >>> import numpy as np >>> from SeqMetrics import centered_rms_dev >>> t = np.random.random(10) >>> p = np.random.random(10) >>> centered_rms_dev(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Calculate means pmean = np.mean(predicted) rmean = np.mean(true) # Calculate (E')^2 crmsd = np.square((predicted - pmean) - (true - rmean)) crmsd = np.sum(crmsd) / predicted.size crmsd = np.sqrt(crmsd) return float(crmsd)
[docs]def cosine_similarity(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ It is a judgment of orientation and not magnitude: two vectors with the same orientation have a cosine similarity of 1, two vectors oriented at 90° relative to each other have a similarity of 0, and two vectors diametrically opposed have a similarity of -1, independent of their magnitude. `See <https://doi.org/10.1016/B978-0-12-804452-0.00002-6>`_ .. math:: \\text{Cosine Similarity} = \\frac{\\sum_{i=1}^{n} \\text{true}_i \\cdot \\text{predicted}_i}{\\sqrt{\\sum_{i=1}^{n} (\\text{true}_i)^2} \\cdot \\sqrt{\\sum_{i=1}^{n} (\\text{predicted}_i)^2}} References ---------- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import cosine_similarity >>> t = np.random.random(10) >>> p = np.random.random(10) >>> cosine_similarity(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.dot(true, predicted) / (np.linalg.norm(true) * np.linalg.norm(predicted)))
[docs]def decomposed_mse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Decomposed MSE developed by `Kobayashi and Salam (2000) <https://doi.org/10.2134/agronj2000.922345x>`_ Equation 24 .. math :: dMSE = (\\frac{1}{N}\\sum_{i=1}^{N}(e_{i}-s_{i}))^2 + SDSD + LCS .. math:: SDSD = (\\sigma(e) - \\sigma(s))^2 .. math:: LCS = 2 \\sigma(e) \\sigma(s) * (1 - \\frac{\\sum ^n _{i=1}(e_i - \\bar{e})(s_i - \\bar{s})} {\\sqrt{\\sum ^n _{i=1}(e_i - \\bar{e})^2} \\sqrt{\\sum ^n _{i=1}(s_i - \\bar{s})^2}}) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import decomposed_mse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> decomposed_mse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) t_std = np.std(true) s_std = np.std(predicted) bias_squared = bias(true, predicted, treat_arrays=False) ** 2 # sdsd is difference in the magnitude of fluctuation between predicted and true # bigger sdsd means the model fails to simulate the magnitude of the fluctuations in the true data sdsd = (s_std - t_std) ** 2 # lack of positive correlation weighted by the std # bigger lcs means the models fails to simulate the patters of the fluctuations in the true data lcs = 2 * t_std * s_std * (1 - corr_coeff(true, predicted, treat_arrays=False)) decomposed_mse_ = bias_squared + sdsd + lcs return float(decomposed_mse_)
[docs]def euclid_distance( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Euclidian distance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.euclidean_distances.html>`_ taken from `Elementary DIfferential Geometry by Barret O'Neil <https://doi.org/10.1016/B978-0-12-088735-4.50006-7>`_. .. math:: D = \\sqrt{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2} Parameters ---------- true : true/observed/actual/measured/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import euclid_distance >>> t = np.random.random(10) >>> p = np.random.random(10) >>> euclid_distance(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.linalg.norm(true - predicted))
[docs]def exp_var_score(true, predicted, treat_arrays: bool = True, weights=None, **treat_arrays_kws) -> Union[float, None]: """ Explained variance `score <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.explained_variance_score.html>`_ . Best value is 1, lower values are less accurate. .. math:: \\text{EVS} = 1 - \\frac{\\sum_{i=1}^{n} w_i \\left( (true_i - predicted_i) - \\frac{\\sum_{j=1}^{n} w_j (true_j - predicted_j)}{\\sum_{j=1}^{n} w_j} \\right)^2}{\\sum_{i=1}^{n} w_i (true_i - \\frac{\\sum_{j=1}^{n} w_j true_j}{\\sum_{j=1}^{n} w_j})^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights: Examples --------- >>> import numpy as np >>> from SeqMetrics import exp_var_score >>> t = np.random.random(10) >>> p = np.random.random(10) >>> exp_var_score(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) y_diff_avg = np.average(true - predicted, weights=weights, axis=0) numerator = np.average((true - predicted - y_diff_avg) ** 2, weights=weights, axis=0) y_true_avg = np.average(true, weights=weights, axis=0) denominator = np.average((true - y_true_avg) ** 2, weights=weights, axis=0) if numerator == 0.0: return None output_scores = _foo(denominator, numerator) return float(np.average(output_scores, weights=weights))
[docs]def expanded_uncertainty(true, predicted, treat_arrays: bool = True, cov_fact=1.96, **treat_arrays_kws) -> float: """By default, it calculates uncertainty with 95% confidence interval. 1.96 is the coverage factor corresponding 95% confidence level .This indicator is used in order to show more information about the model deviation. Using formula from by Behar_ et al., 2015 and Gueymard_ et al., 2014. .. math:: U = \\text{cov_fact} \\times \\sqrt{\\frac{1}{n-1} \\sum_{i=1}^{n} \\left( \\left(\\text{true}_i - \\text{predicted}_i\\right) - \\overline{\\left(\\text{true} - \\text{predicted}\\right)} \\right)^2 + \\frac{1}{n} \\sum_{i=1}^{n} \\left(\\text{true}_i - \\text{predicted}_i\\right)^2} .. _Behar: https://doi.org/10.1016/j.enconman.2015.03.067 .. _Gueymard: https://doi.org/10.1016/j.rser.2014.07.117 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function cov_fact: Examples --------- >>> import numpy as np >>> from SeqMetrics import expanded_uncertainty >>> t = np.random.random(10) >>> p = np.random.random(10) >>> expanded_uncertainty(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted sd = np.std(error) return float(cov_fact * np.sqrt(sd ** 2 + rmse(true, predicted, treat_arrays=False) ** 2))
[docs]def fdc_fhv( true, predicted, treat_arrays: bool = True, h: float = 0.02, **treat_arrays_kws) -> float: """ Peak flow bias of the flow duration curve `(Yilmaz 2008) <doi:10.1029/2007WR006716>`_ as used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. Code modified `Kratzert2018 <https://github.com/kratzert/ealstm_regional_modeling/blob/64a446e9012ecd601e0a9680246d3bbf3f002f6d/papercode/metrics.py#L190>`_ code. .. math:: FHV = \\frac{\\sum_{i=1}^{k} (predicted_i - true_i)}{\\sum_{i=1}^{k} true_i} \\times 100 Parameters ---------- h : float Must be between 0 and 1. true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : bool process the true and predicted arrays using maybe_treat_arrays function Returns ------- Bias of the peak flows Examples --------- >>> import numpy as np >>> from SeqMetrics import fdc_fhv >>> t = np.random.random(10) >>> p = np.random.random(10) >>> fdc_fhv(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) if (h <= 0) or (h >= 1): raise RuntimeError("h has to be in the range (0,1)") # sort both in descending order true = -np.sort(-true) predicted = -np.sort(-predicted) # subset data to only top h flow values true = true[:np.round(h * len(true)).astype(int)] predicted = predicted[:np.round(h * len(predicted)).astype(int)] fhv = np.sum(predicted - true) / (np.sum(true)) return float(fhv * 100)
[docs]def fdc_flv(true, predicted, treat_arrays: bool = True, low_flow: float = 0.3, **treat_arrays_kws) -> float: """ bias of the bottom 30 % low flows as used in `kratzert et al., 2019 <https://hess.copernicus.org/articles/23/5089/2019/hess-23-5089-2019.html>`_. .. math:: \\text{FLV} = -1 \\times \\frac{\\sum (\\log(\\text{predicted}) - \\min(\\log(\\text{predicted}))) - \\sum (\\log(\\text{true}) - \\min(\\log(\\text{true})))}{\\sum (\\log(\\text{true}) - \\min(\\log(\\text{true}))) + 1 \\times 10^{-6}} Parameters ---------- low_flow : float, optional Upper limit of the flow duration curve. E.g. 0.3 means the bottom 30% of the flows are considered as low flows, by default 0.3 true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Returns ------- float Examples --------- >>> import numpy as np >>> from SeqMetrics import fdc_flv >>> t = np.random.random(10) >>> p = np.random.random(10) >>> fdc_flv(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) low_flow = 1.0 - low_flow # make sure that metric is calculated over the same dimension obs = true.flatten() sim = predicted.flatten() if (low_flow <= 0) or (low_flow >= 1): raise RuntimeError("l has to be in the range (0,1)") # for numerical reasons change 0s to 1e-6 sim[sim == 0] = 1e-6 obs[obs == 0] = 1e-6 # sort both in descending order obs = -np.sort(-obs) sim = -np.sort(-sim) # subset data to only top h flow values obs = obs[np.round(low_flow * len(obs)).astype(int):] sim = sim[np.round(low_flow * len(sim)).astype(int):] # transform values to log scale obs = np.log(obs) sim = np.log(sim) # calculate flv part by part qsl = np.sum(sim - sim.min()) qol = np.sum(obs - obs.min()) flv = -1 * (qsl - qol) / (qol + 1e-6) return float(flv * 100)
[docs]def gmean_diff(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Geometric mean difference <https://www.sciencedirect.com/science/article/abs/pii/S0022316624002281>`_. First geometric mean is calculated for true and predicted arrays and their difference is calculated. .. math:: \\text{gmean_diff} = \\left( \\prod_{i=1}^{n} \\text{true}_i \\right)^{\\frac{1}{n}} - \\left( \\prod_{i=1}^{n} \\text{predicted}_i \\right)^{\\frac{1}{n}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import gmean_diff >>> t = np.random.random(10) >>> p = np.random.random(10) >>> gmean_diff(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(_geometric_mean(true) - _geometric_mean(predicted))
[docs]def gmrae(true, predicted, treat_arrays: bool = True, benchmark: np.ndarray = None, **treat_arrays_kws) -> float: """ `Geometric Mean Relative Absolute Error <https://doi.org/10.1016/0169-2070(92)90008-W>`_ .. math:: GMRAE = \\left( \\prod_{i=1}^{n} \\frac{|true_i - predicted_i|}{|true_i - benchmark_i|} \\right)^{\\frac{1}{n}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function benchmark: Examples --------- >>> import numpy as np >>> from SeqMetrics import gmrae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> gmrae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return _geometric_mean(np.abs(_relative_error(true, predicted, benchmark)))
def _relative_error(true, predicted, benchmark: np.ndarray = None): """ Relative Error Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values """ error = true - predicted if benchmark is None or isinstance(benchmark, int): # If no benchmark prediction provided - use naive forecasting if not isinstance(benchmark, int): seasonality = 1 else: seasonality = benchmark return (true[seasonality:] - predicted[seasonality:]) / \ (true[seasonality:] - _naive_prognose(true, seasonality) + EPS) return error / (true - benchmark + EPS) def _naive_prognose(true, seasonality: int = 1): """ Naive forecasting method which just repeats previous samples Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. """ return true[:-seasonality] def _minimal() -> list: """some minimal and basic metrics""" return ['r2', 'mape', 'nrmse', 'corr_coeff', 'rmse', 'mae', 'mse', 'mpe', 'mase', 'r2_score'] def _hydro_metrics() -> list: """Names of metrics related to hydrology""" return _minimal() + [ 'fdc_flv', 'fdc_fhv', 'kge', 'kge_np', 'kge_mod', 'kge_bound', 'kgeprime_bound', 'kgenp_bound', 'nse', 'nse_alpha', 'nse_beta', 'nse_mod', 'nse_bound']
[docs]def calculate_hydro_metrics( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> dict: """ Calculates the following performance metrics related to hydrology. - fdc_flv - fdc_fhv - kge - kge_np - kge_mod - kge_bound - kgeprime_bound - kgenp_bound - nse - nse_alpha - nse_beta - nse_mod - nse_bound - r2 - mape - nrmse - corr_coeff - rmse - mae - mse - mpe - mase - r2_score Returns ------- dict Dictionary with all metrics Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import calculate_hydro_metrics >>> t = np.random.random(10) >>> p = np.random.random(10) >>> calculate_hydro_metrics(t, p) """ metrics = {} true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) for metric in [fdc_flv, fdc_fhv, kge, kge_np, kge_mod, kge_bound, kgeprime_bound, kgenp_bound, nse, nse_alpha, nse_beta, nse_mod, nse_bound, r2, mape, nrmse, corr_coeff, rmse, mae, mse, mpe, mase, r2_score]: metrics[metric.__name__] = metric(true, predicted, treat_arrays=False) return metrics
[docs]def JS(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """`Jensen-shannon divergence <https://datascientest.com/en/jensen-shannon-divergence-everything-you-need-to-know-about-this-ml-model#:~:text=Jensen%2DShannon%20divergence%20and%20Data,and%20expected%20or%20reference%20distributions>`_ .. math:: JS(P \parallel Q) = \\frac{1}{2} \\sum_{i} \\left( P(i) \\log_2 \\left( \\frac{2P(i)}{P(i) + Q(i)} \\right) + Q(i) \\log_2 \\left( \\frac{2Q(i)}{P(i) + Q(i)} \\right) \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import JS >>> t = np.random.random(10) >>> p = np.random.random(10) >>> JS(t, p) """ if treat_arrays: true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) warnings.filterwarnings("ignore", category=RuntimeWarning) d1 = true * np.log2(2 * true / (true + predicted)) d2 = predicted * np.log2(2 * predicted / (true + predicted)) d1[np.isnan(d1)] = 0 d2[np.isnan(d2)] = 0 d = 0.5 * sum(d1 + d2) return float(d)
def kendall_tau(true, predicted, treat_arrays: bool = True, return_p=False, **treat_arrays_kws) -> Union[float, tuple]: """ Kendall's tau_ .used in Probst_ et al., 2019. It is a non-parameteric estimate of correlation between true and predicted arrays. It does not assume linearity of the relationship between true and predicted values. It compares the ranks of the values in the two arrays to estimate strength and direction of association between them. It ranges between -1 to 1 with 1 indicating strong association and -1 indicated strong disassociation. .. math:: tau = \\frac{(C - D)}{\\sqrt{(C + D + T_{\\text{true}})(C + D + T_{\\text{predicted}})}} 10.1088/1742-5468/aace08 .. _tau: https://machinelearningmastery.com/how-to-calculate-nonparametric-rank-correlation-in-python/ .. _Probst: https://www.jmlr.org/papers/volume20/18-444/18-444.pdf Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function return_p: Examples --------- >>> import numpy as np >>> from SeqMetrics import kendall_tau >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kendall_tau(t, p) """ if kendalltau is None: raise NotImplementedError("kendalltau function is not available. Please install scipy") true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) coef, p = kendalltau(true, predicted) if return_p: return coef, p return float(coef) def kgeprime_bound(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Bounded Version of the Modified Kling-Gupta Efficiency <https://iahs.info/uploads/dms/13614.21--211-219-41-MATHEVET.pdf>`_ .. math:: KGE'_{\\text{bounded}} = \\frac{1 - \\sqrt{(r - 1)^2 + (\\gamma - 1)^2 + (\\beta - 1)^2}}{2 - (1 - \\sqrt{(r - 1)^2 + (\\gamma - 1)^2 + (\\beta - 1)^2})} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import kgeprime_bound >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kgeprime_bound(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) kgeprime_ = kge_mod(true, predicted, return_all=True, treat_arrays=False)[0] kgeprime_c2m_ = kgeprime_ / (2 - kgeprime_) return float(kgeprime_c2m_.item())
[docs]def kgenp_bound(true, predicted, treat_arrays: bool = True, **treat_arrays_kws): """ `Bounded Version of the Non-Parametric Kling-Gupta Efficiency <https://thibhlln.github.io/hydroeval/functions/hydroeval.kgenp_c2m.html#hydroeval.kgenp_c2m>`_ .. math:: KGE_{np_{bound}} = \\frac{1 - \\sqrt{\\left(\\rho(t, p) - 1\\right)^2 + \\left(1 - 0.5 \\sum_{i=1}^{n} \\left| \\frac{\\text{sorted}(p_i)}{\\text{mean}(p) \\cdot n} - \\frac{\\text{sorted}(t_i)}{\\text{mean}(t) \\cdot n} \\right| - 1\\right)^2 + \\left(\\frac{\\text{mean}(p)}{\\text{mean}(t)} - 1\\right)^2}}{2 - \\left(1 - \\sqrt{\\left(\\rho(t, p) - 1\\right)^2 + \\left(1 - 0.5 \\sum_{i=1}^{n} \\left| \\frac{\\text{sorted}(p_i)}{\\text{mean}(p) \\cdot n} - \\frac{\\text{sorted}(t_i)}{\\text{mean}(t) \\cdot n} \\right| - 1\\right)^2 + \\left(\\frac{\\text{mean}(p)}{\\text{mean}(t)} - 1\\right)^2}\\right)} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import kgenp_bound >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kgenp_bound(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) kgenp_ = kge_np(return_all=True, true=true, predicted=predicted, treat_arrays=False)[0] kgenp_c2m_ = kgenp_ / (2 - kgenp_) return float(kgenp_c2m_.item())
[docs]def kl_sym(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> Union[float, None]: """ `Symmetric kullback-leibler divergence <https://doi.org/10.1016/j.procs.2018.10.144>`_ .. math:: \\text{KL}_{\\text{sym}}(P || Q) = \\frac{1}{2} \\sum_{i=1}^{n} \\left( P_i - Q_i \\right) \\left( \\log_2 \\frac{P_i}{Q_i} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import kl_sym >>> t = np.random.random(10) >>> p = np.random.random(10) >>> kl_sym(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) if not all((true == 0) == (predicted == 0)): return None # ('KL divergence not defined when only one distribution is 0.') x, y = true, predicted # set values where both distributions are 0 to the same (positive) value. # This will not contribute to the final distance. x[x == 0] = 1 y[y == 0] = 1 d = 0.5 * np.sum((x - y) * (np.log2(x) - np.log2(y))) return float(d)
[docs]def lm_index(true, predicted, treat_arrays: bool = True, obs_bar_p=None, **treat_arrays_kws) -> float: """ `Legate-McCabe Efficiency Index <https://doi.org/10.1016/j.cmpb.2023.107737>`_. Less sensitive to outliers in the data. The larger, the better .. math:: a_i = |predicted_i - true_i| .. math:: b_i = |true_i - \\text{obs\\_bar\\_p}| \\text{if } \\text{obs\\_bar\\_p} \\text{ is provided} \\|true_i - \\bar{true}| \\text{otherwise} .. math:: \\text{LM Index} = 1 - \\frac{\\sum_{i=1}^{n} a_i}{\\sum_{i=1}^{n} b_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function obs_bar_p : float, Seasonal or other selected average. If None, the mean of the observed array will be used. Examples --------- >>> import numpy as np >>> from SeqMetrics import lm_index >>> t = np.random.random(10) >>> p = np.random.random(10) >>> lm_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) mean_obs = np.mean(true) a = np.abs(predicted - true) if obs_bar_p is not None: b = np.abs(true - obs_bar_p) else: b = np.abs(true - mean_obs) return float(1 - (np.sum(a) / np.sum(b)))
[docs]def maape(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Mean Arctangent Absolute Percentage Error <https://doi.org/10.1016/j.ijforecast.2015.12.003>`_ Note: result is NOT multiplied by 100 .. math:: MAAPE = \\frac{1}{n} \\sum_{i=1}^{n} \\arctan \\left( \\frac{| \\text{true}_i - \\text{predicted}_i |}{| \\text{true}_i | + \\epsilon} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import maape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> maape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.mean(np.arctan(np.abs((true - predicted) / (true + EPS)))))
def _percentage_error(true, predicted): """ Percentage error. The value is multiplied by 100 to reflect percentage. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values """ error = true - predicted return error / (true + EPS) * 100 def _bounded_relative_error( true, predicted, benchmark: np.ndarray = None): """ Bounded Relative Error Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values """ error = true - predicted if benchmark is None or isinstance(benchmark, int): # If no benchmark prediction provided - use naive forecasting if not isinstance(benchmark, int): seasonality = 1 else: seasonality = benchmark abs_err = np.abs(true[seasonality:] - predicted[seasonality:]) abs_err_bench = np.abs(true[seasonality:] - _naive_prognose(true, seasonality)) else: abs_err = np.abs(error) abs_err_bench = np.abs(error) return abs_err / (abs_err + abs_err_bench + EPS)
[docs]def mbrae(true, predicted, treat_arrays: bool = True, benchmark: np.ndarray = None, **treat_arrays_kws) -> float: """ `Mean Bounded Relative Absolute Error <https://doi.org/10.1371/journal.pone.0174202>`_ .. math:: MBRAE = \\frac{1}{n} \\sum_{i=1}^{n} \\frac{| \\text{true}_i - \\text{predicted}_i |}{| \\text{true}_i - \\text{benchmark}_i |} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function benchmark: Examples --------- >>> import numpy as np >>> from SeqMetrics import mbrae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mbrae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.mean(_bounded_relative_error(true, predicted, benchmark=benchmark)))
[docs]def mapd(true, predicted, treat_arrays: bool = True, #ToDo equation not multiplied by 100 **treat_arrays_kws) -> float: """ `Mean absolute percentage deviation <https://doi.org/10.1016/j.rinma.2022.100347>`_ .. math:: MAPD = \\frac{\\sum_{i=1}^{n} \\left| predicted_i - true_i \\right|}{\\sum_{i=1}^{n} \\left| true_i \\right|} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mapd >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mapd(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = np.sum(np.abs(predicted - true)) b = np.sum(np.abs(true)) return float(a / b)
def _ae(true, predicted): """Absolute error Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values """ return np.abs(true - predicted)
[docs]def max_error(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `maximum absolute error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.max_error.html>`_ In Sklearn, there is "absolute" in equation but not in name of metric. .. math:: \\text{Max Error} = \\max_{i=1}^n \\left| \\text{true}_i - \\text{predicted}_i \\right| Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import max_error >>> t = np.random.random(10) >>> p = np.random.random(10) >>> max_error(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.max(_ae(true, predicted)))
[docs]def mb_r(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Mielke-Berry R value <https://psycnet.apa.org/buy/1988-15790-001>`_. .. math:: R = 1 - \\frac{n^2 \\cdot \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right|}{\\sum_{i=1}^{n} \\sum_{j=1}^{n} \\left| \\text{predicted}_j - \\text{true}_i \\right|} References ---------- `Mielke, P. W., & Berry, K. J. (2007). Permutation methods: a distance function approach. Springer Science & Business Media <https://link.springer.com/book/10.1007/978-1-4757-3449-2>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mb_r >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mb_r(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Calculate metric n = predicted.size tot = 0.0 for i in range(n): tot = tot + np.sum(np.abs(predicted - true[i])) mae_val = np.sum(np.abs(predicted - true)) / n mb = 1 - ((n ** 2) * mae_val / tot) return float(mb)
[docs]def mda( true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """Mean Directional Accuracy .. math:: \\text{MDA} = \\frac{1}{n-1} \\sum_{i=1}^{n-1} \\left( \\text{sign}( \\text{true}_{i+1} - \\text{true}_i) == \\text{sign}( \\text{predicted}_{i+1} - \\text{predicted}_i) \\right) modified `after <https://gist.github.com/bshishov/5dc237f59f019b26145648e2124ca1c9>`_. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mda >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mda(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) dict_acc = np.sign(true[1:] - true[:-1]) == np.sign(predicted[1:] - true[:-1]) return float(np.mean(dict_acc))
[docs]def mde(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Median Error <https://doi.org/10.1016/j.cma.2024.116842>`_ .. math:: MDE = \\text{median}(\\text{predicted}_i - \\text{true}_i) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mde >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mde(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.median(predicted - true))
[docs]def mdape(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Median Absolute Percentage Error <https://doi.org/10.1016/j.petrol.2021.109265>`_. The value is multiplied by 100. .. math:: \\text{MdAPE} = 100 \\times \\text{Median} \\left( \\left\\{ \\frac{|\\text{true}_i - \\text{predicted}_i|}{|\\text{true}_i|} \\right\\}_{i=1}^n \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mdape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mdape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.median(np.abs(_percentage_error(true, predicted))))
[docs]def mdrae(true, predicted, treat_arrays: bool = True, benchmark: np.ndarray = None, **treat_arrays_kws) -> float: """ `Median Relative Absolute Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.median_absolute_error.html>`_ In Sklearn, there is "absolute" in equation but not in name of metric. .. math:: MdRAE = \\text{median} \\left( \\left| \\frac{true_i - predicted_i}{true_i - benchmark_i} \\right| \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function benchmark: Examples --------- >>> import numpy as np >>> from SeqMetrics import mdrae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mdrae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.median(np.abs(_relative_error(true, predicted, benchmark))))
[docs]def me(true, predicted, treat_arrays: bool = True, **treat_arrays_kws): """ `Mean error <https://doi.org/10.1016/j.scitotenv.2024.174533>`_ or bias. .. math:: ME = \\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import me >>> t = np.random.random(10) >>> p = np.random.random(10) >>> me(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted return float(np.mean(error))
[docs]def mean_bias_error( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Mean Bias Error It represents overall bias error or systematic error. It shows average interpolation bias; i.e. average over- or underestimation. [1][2].This indicator expresses a tendency of model to underestimate (negative value) or overestimate (positive value) global radiation, while the mean bias error values closest to zero are desirable. The drawback of this test is that it does not show the correct performance when the model presents overestimated and underestimated values at the same time, since overestimation and underestimation values cancel each `other <https://doi.org/10.1016/j.rser.2015.08.035>`_. .. math:: \\text{MBE} = \\frac{1}{N} \\sum_{i=1}^{N} (true_i - predicted_i) References ---------- - `Willmott, C. J., & Matsuura, K. (2006). On the use of dimensioned measures of error to evaluate the performance of spatial interpolators. International Journal of Geographical Information Science, 20(1), 89-102. <https://doi.org/10.1080/1365881050028697>`_ - `Valipour, M. (2015). Retracted: Comparative Evaluation of Radiation-Based Methods for Estimation of Potential Evapotranspiration. Journal of Hydrologic Engineering, 20(5), 04014068. <https://dx.doi.org/10.1061/(ASCE)HE.1943-5584.0001066>`_ - `Despotovic, M., Nedic, V., Despotovic, D., & Cvetanovic, S. (2015). Review and statistical analysis of different global solar radiation sunshine models. Renewable and Sustainable Energy Reviews, 52, 1869-1880. <https://doi.org/10.1016/j.rser.2015.08.035>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mean_bias_error >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mean_bias_error(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sum(true - predicted) / len(true))
[docs]def mean_var( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Mean variance, adopted from `HydroErr <https://github.com/BYU-Hydroinformatics/HydroErr/blob/master/HydroErr/HydroErr.py#L6072>`_ .. math:: \\text{mean_var} = \\text{Var} \\left( \\log(1 + \\text{true}) - \\log(1 + \\text{predicted}) \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mean_var >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mean_var(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.var(log1p(true) - log1p(predicted)))
[docs]def mean_poisson_deviance( true, predicted, treat_arrays: bool = True, weights=None, **treat_arrays_kws ) -> float: """ mean poisson deviance .. math:: \\text{MPD} = \\frac{1}{n} \\sum_{i=1}^{n} 2 \\left( \\text{true}_i \\log \\left( \\frac{\\text{true}_i}{\\text{predicted}_i} \\right) - (\\text{true}_i - \\text{predicted}_i) \\right) References --------- `<https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_poisson_deviance.html>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights: Examples --------- >>> import numpy as np >>> from SeqMetrics import mean_poisson_deviance >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mean_poisson_deviance(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return _mean_tweedie_deviance(true, predicted, weights=weights, power=1)
[docs]def mean_gamma_deviance(true, predicted, treat_arrays: bool = True, weights=None, **treat_arrays_kws) -> float: """ `mean gamma deviance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_gamma_deviance.html>`_ .. math:: \\text{Mean Gamma Deviance (Weighted)} = \\frac{1}{\\sum_{i=1}^{n} w_i} \\sum_{i=1}^{n} w_i \\frac{2}{\\text{true}_i} \\left( \\text{predicted}_i - \\text{true}_i - \\text{true}_i \\ln \\left( \\frac{\\text{predicted}_i}{\\text{true}_i} \\right) \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights: Examples --------- >>> import numpy as np >>> from SeqMetrics import mean_gamma_deviance >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mean_gamma_deviance(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return _mean_tweedie_deviance(true, predicted, weights=weights, power=2)
[docs]def median_abs_error( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ median absolute error .. math:: \\text{MedAE} = \\text{median} \\left( \\left| \\text{true}_i - \\text{predicted}_i \\right| \\right) References ---------- `<https://scikit-learn.org/stable/modules/generated/sklearn.metrics.median_absolute_error.html>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import median_abs_error >>> t = np.random.random(10) >>> p = np.random.random(10) >>> median_abs_error(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.median(np.abs(predicted - true), axis=0))
[docs]def med_seq_error( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Median Squared Error <https://www.sciencedirect.com/science/article/pii/S2468227620301757>`_ It is same as mse, but it takes median which reduces the impact of outliers. .. math:: \\text{MedSE} = \\text{median} \\left( (\\text{predicted}_i - \\text{true}_i)^2 \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import med_seq_error >>> t = np.random.random(10) >>> p = np.random.random(10) >>> med_seq_error(t, p) """ if treat_arrays: true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.median((predicted - true) ** 2))
[docs]def mle( true, predicted, treat_arrays=True, **treat_arrays_kws ) -> float: """ `Mean log error <https://doi.org/10.1038/s41598-023-29871-8>`_ .. math:: \\text{MLE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left( \\log(1 + \\text{predicted}_i) - \\log(1 + \\text{true}_i) \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mle >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mle(t, p) """ if treat_arrays: true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.mean(log1p(predicted) - log1p(true)))
[docs]def mod_agreement_index( true, predicted, treat_arrays: bool = True, j:int=1, **treat_arrays_kws) -> float: """ `Modified agreement of index <https://search.r-project.org/CRAN/refmans/hydroGOF/html/md.html>`_. It varies between 0 and 1 where 1 indicates perfect match between the observed and predicted values. .. math:: MAI = 1 - \\frac{\\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right|^j}{\\sum_{i=1}^{n} \\left( \\left| \\text{predicted}_i - \\overline{\\text{true}} \\right| + \\left| \\text{true}_i - \\overline{\\text{true}} \\right| \\right)^j} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function j : int, default 1 when j is 2, this is same as agreement_index. Higher j means more impact of outliers. Examples --------- >>> import numpy as np >>> from SeqMetrics import mod_agreement_index >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mod_agreement_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = (np.abs(predicted - true)) ** j b = np.abs(predicted - np.mean(true)) c = np.abs(true - np.mean(true)) e = (b + c) ** j return float(1 - (np.sum(a) / np.sum(e)))
[docs]def mpe( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Mean Percentage Error <https://doi.org/10.1016/j.molliq.2023.123378>`_. The value is multiplied by 100 to reflect percentage. .. math:: MPE = \\frac{1}{n} \\sum_{i=1}^{n} \\left( \\frac{true_i - predicted_i}{true_i} \\right) \\times 100 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import mpe >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mpe(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.mean(_percentage_error(true, predicted)))
[docs]def mrae(true, predicted, treat_arrays: bool = True, benchmark: np.ndarray = None, **treat_arrays_kws): """ `Mean Relative Absolute Error <https://doi.org/10.1016/j.comnet.2024.110237>`_ .. math:: MRAE = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{benchmark}_i} \\right| Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function benchmark: Examples --------- >>> import numpy as np >>> from SeqMetrics import mrae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> mrae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.mean(np.abs(_relative_error(true, predicted, benchmark))))
[docs]def norm_euclid_distance(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Normalized Euclidian distance <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.euclidean_distances.html>`_ .. math:: D_{norm} = \\sqrt{\\sum_{i=1}^{n} \\left( \\frac{\\text{true}_i}{\\bar{\\text{true}}} - \\frac{\\text{predicted}_i}{\\bar{\\text{predicted}}} \\right)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import norm_euclid_distance >>> t = np.random.random(10) >>> p = np.random.random(10) >>> norm_euclid_distance(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = true / np.mean(true) b = predicted / np.mean(predicted) return float(np.linalg.norm(a - b))
[docs]def norm_nse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Normalized Nash-Sutcliffe Efficiency <https://doi.org/10.1029/2021WR030138>`_. It ranges from 0 to 1. A value of 1 indicates perfect fit. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) nse_ = nse(true, predicted, treat_arrays=False) return 1 / (2 - nse_)
[docs]def nrmse_range( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Range Normalized Root Mean Squared Error after `Pontius et al., 2008 <https://link.springer.com/article/10.1007/s10651-007-0043-y>`_ RMSE normalized by true values. This allows comparison between data sets with different scales. It is more sensitive to outliers. .. math:: \\text{NRMSE} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{predicted}_i - \\text{true}_i)^2}}{\\max(\\text{true}) - \\min(\\text{true})} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import nrmse_range >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nrmse_range(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(rmse(true, predicted, treat_arrays=False) / (np.max(true) - np.min(true)))
[docs]def nrmse_ipercentile( true, predicted, treat_arrays: bool = True, q1=25, q2=75, **treat_arrays_kws ) -> float: """ RMSE normalized by inter percentile range of true. This is the least sensitive to outliers. q1: any interger between 1 and 99 q2: any integer between 2 and 100. Should be greater than q1. Reference: `Pontius et al., 2008. <https://link.springer.com/article/10.1007/s10651-007-0043-y>`_ .. math:: \\text{NRMSE}_{\\text{IP}} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}}{Q_{q2} - Q_{q1}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function q1: q2: Examples --------- >>> import numpy as np >>> from SeqMetrics import nrmse_ipercentile >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nrmse_ipercentile(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) q1 = np.percentile(true, q1) q3 = np.percentile(true, q2) iqr = q3 - q1 return float(rmse(true, predicted, treat_arrays=False) / iqr)
[docs]def nrmse_mean(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """Mean Normalized RMSE RMSE normalized by mean of true values.This allows comparison between datasets with different scales. .. math:: NRMSE_{mean} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}}{\\bar{\\text{true}}} Reference: `Pontius et al., 2008 <https://link.springer.com/article/10.1007/s10651-007-0043-y>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import nrmse_mean >>> t = np.random.random(10) >>> p = np.random.random(10) >>> nrmse_mean(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(rmse(true, predicted, treat_arrays=False) / np.mean(true))
[docs]def norm_ae(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Normalized Absolute Error <https://doi.org/10.1016/j.apor.2024.104042>`_ .. math:: norm\\_ae = \\sqrt{\\frac{\\sum_{i=1}^{n} (error_i - MAE)^2}{n - 1}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import norm_ae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> norm_ae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted return float(np.sqrt(np.sum(np.square(error - mae(true, predicted, False))) / (len(true) - 1)))
[docs]def log_prob(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Logarithmic probability distribution .. math:: \\text{log_prob} = \\frac{1}{N} \\sum_{i=1}^{N} \\left( -\\frac{\\left( \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{scale}} \\right)^2}{2} - \\log(\\sqrt{2\\pi}) \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import log_prob >>> t = np.random.random(10) >>> p = np.random.random(10) >>> log_prob(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) scale = np.mean(true) / 10 if scale < .01: scale = .01 y = (true - predicted) / scale normpdf = -y ** 2 / 2 - np.log(np.sqrt(2 * np.pi)) return float(np.mean(normpdf))
[docs]def rmdspe(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Root Median Squared Percentage Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.root_mean_squared_error.html>`_. The value is multiplied by 100 to reflect percentage. .. math:: \\text{RMDSPE} = \\sqrt{\\text{median}\\left(\\left(\\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\times 100\\right)^2\\right)} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rmdspe >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rmdspe(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sqrt(np.median(np.square(_percentage_error(true, predicted)))))
[docs]def rse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """Relative Squared Error .. math:: \\text{RSE} = \\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} (\\text{true}_i - \\bar{\\text{true}})^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sum(np.square(true - predicted)) / np.sum(np.square(true - np.mean(true))))
[docs]def rrse(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Root Relative Squared Error <https://www.sciencedirect.com/science/article/pii/S0360319923031798>`_ .. math:: RRSE = \\sqrt{\\frac{\\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}{\\sum_{i=1}^{n} (\\text{true}_i - \\bar{\\text{true}})^2}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rrse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rrse(t, p)""" true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sqrt(rse(true, predicted, treat_arrays=False)))
[docs]def rae(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Relative Absolute Error <https://doi.org/10.1016/j.compbiomed.2017.02.010>`_ (aka Approximation Error) .. math:: \\text{RAE} = \\frac{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\overline{\\text{true}} \\right|} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sum(_ae(true, predicted)) / (np.sum(np.abs(true - np.mean(true))) + EPS))
[docs]def ref_agreement_index( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Refined Index of Agreement after after `Willmott et al., 2012 <https://doi.org/10.1002/joc.2419>`_. It varies from -1 to 1. Larger the better. .. math:: a = \\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right| .. math:: b = 2 \\sum_{i=1}^{n} \\left| \\text{true}_i - \\overline{\\text{true}} \\right| .. math:: d_{\\text{ref}} = \\begin{cases} 1 - \\frac{a}{b} & \\text{if } a \\leq b \\ \\frac{b}{a} - 1 & \\text{if } a > b \\end{cases} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import ref_agreement_index >>> t = np.random.random(10) >>> p = np.random.random(10) >>> ref_agreement_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = np.sum(np.abs(predicted - true)) b = 2 * np.sum(np.abs(true - true.mean())) if a <= b: return float(1 - (a / b)) else: return float((b / a) - 1)
[docs]def rel_agreement_index(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Relative index of agreement <https://doi.org/10.1007/s10661-022-10844-9>`_. from 0 to 1. larger the better. .. math:: \\text{rel_agreement_index} = 1 - \\frac{\\sum_{i=1}^{n} \\left( \\frac{\\text{predicted}_i - \\text{true}_i}{\\text{true}_i} \\right)^2}{\\sum_{i=1}^{n} \\left( \\frac{|\\text{predicted}_i - \\bar{\\text{true}}| + |\\text{true}_i - \\bar{\\text{true}}|}{\\bar{\\text{true}}} \\right)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rel_agreement_index >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rel_agreement_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = ((predicted - true) / true) ** 2 b = np.abs(predicted - np.mean(true)) c = np.abs(true - np.mean(true)) e = ((b + c) / np.mean(true)) ** 2 return float(1 - (np.sum(a) / np.sum(e)))
[docs]def relative_rmse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Relative Root Mean Squared Error <https://search.r-project.org/CRAN/refmans/metrica/html/RRMSE.html>`_. It normalizes teh rmse by mean of true values. .. math:: RRMSE=\\frac{\\sqrt{\\frac{1}{N}\\sum_{i=1}^{N}(e_{i}-s_{i})^2}}{\\bar{e}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import relative_rmse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> relative_rmse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) rrmse = rmse(true, predicted, treat_arrays=False) / np.mean(true) return float(rrmse)
[docs]def rmspe( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Root Mean Square Percentage Error <https://stackoverflow.com/a/53166790/5982232>`_. .. math:: RMSPE = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left(PE_i\\right)^2} = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left(\\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i}\\right)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rmspe >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rmspe(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sqrt(np.mean(np.square(_percentage_error(true, predicted)), axis=0)))
[docs]def rsr( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ It is MSE normalized by standard deviation of true values. Following `Moriasi et al., 2007. <https://swat.tamu.edu/media/1312/moriasimodeleval.pdf>`_. It incorporates the benefits of error index statistics and includes a scaling/normalization factor, so that the resulting statistic and reported values can apply to various constituents. It ranges from 0 to infinity, with 0-0.5 indicating very good model performance, 0.5-0.8 indicating good model performance. Standard deviation is calculated using np.ntd(true, ddof=1) to match the results of `this implementation <https://rdrr.io/cran/hydroGOF/man/rsr.html>`_. .. math:: \\text{RSR} = \\frac{\\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2}}{\\sqrt{\\frac{1}{n-1} \\sum_{i=1}^{n} (\\text{true}_i - \\bar{\\text{true}})^2}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import rsr >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rsr(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(rmse(predicted=predicted, true=true, treat_arrays=False) / np.std(true, ddof=1))
[docs]def rmsse( true, predicted, treat_arrays: bool = True, seasonality: int = 1, **treat_arrays_kws ) -> float: """ Root Mean Squared Scaled Error after `Muhaimin et al., 2021 <https://doi.org/10.1109/Confluence51648.2021.9376880>`_ and `Zhou T, 2023 <https://doi.org/10.1109/ICAIBD57115.2023.10206380>`_. It is also considered similar to MASE. .. math:: \\text{RMSSE} = \\sqrt{\\frac{1}{n} \\sum_{i=1}^{n} \\left( \\frac{\\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\frac{1}{n-s} \\sum_{j=s+1}^{n} \\left| \\text{true}_j - \\text{true}_{j-s} \\right|} \\right)^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function seasonality: Examples --------- >>> import numpy as np >>> from SeqMetrics import rmsse >>> t = np.random.random(10) >>> p = np.random.random(10) >>> rmsse(t, p) """ # todo : check equation from the reference, do they really match? true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) error = true - predicted q = np.abs(error) / mae(true[seasonality:], _naive_prognose(true, seasonality), treat_arrays=False) return float(np.sqrt(np.mean(np.square(q))))
[docs]def sa( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Spectral angle `Keshava N, 2004 <https://doi.org/10.1109/TGRS.2004.830549>`_. It is arccosine of the dot product of true and predicted arrays. It varies from -pi/2 to pi/2. Closer to 0 is better. It measures angle between two vectors in hyperspace indicating how well the shape of two arrays match instead of their magnitude. .. math:: SA = \\arccos \\left( \\frac{\\sum_{i=1}^{n} (\\text{true}_i \\cdot \\text{predicted}_i)}{\\sqrt{\\sum_{i=1}^{n} (\\text{true}_i)^2} \\cdot \\sqrt{\\sum_{i=1}^{n} (\\text{predicted}_i)^2}} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import sa >>> t = np.random.random(10) >>> p = np.random.random(10) >>> sa(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = np.dot(predicted, true) b = np.linalg.norm(predicted) * np.linalg.norm(true) return float(np.arccos(a / b))
[docs]def sc( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Spectral correlation ater `Robila and Gershman, 2005 <https://ieeexplore.ieee.org/abstract/document/1509878>`_.. It varies from -pi/2 to pi/2. Closer to 0 is better. It measures the angle between the two vectors in hyperspace and highlights how well the shape of the two series match. .. math:: sc = \\arccos \\left( \\frac{ \\sum_{i=1}^{n} (t_i - \\bar{t}) \\cdot (p_i - \\bar{p}) }{ \\sqrt{\\sum_{i=1}^{n} (t_i - \\bar{t})^2} \\cdot \\sqrt{\\sum_{i=1}^{n} (p_i - \\bar{p})^2} } \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import sc >>> t = np.random.random(10) >>> p = np.random.random(10) >>> sc(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = np.dot(true - np.mean(true), predicted - np.mean(predicted)) b = np.linalg.norm(true - np.mean(true)) c = np.linalg.norm(predicted - np.mean(predicted)) e = b * c return float(np.arccos(a / e))
[docs]def sga(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """Spectral gradient angle. It varies from -pi/2 to pi/2. Closer to 0 is better. .. math:: \\text{SGA} = \\arccos \\left( \\frac{\\sum_{i=1}^{n-1} \\left( (true_{i+1} - true_i) \\cdot (predicted_{i+1} - predicted_i) \\right)}{\\sqrt{\\sum_{i=1}^{n-1} (true_{i+1} - true_i)^2} \\times \\sqrt{\\sum_{i=1}^{n-1} (predicted_{i+1} - predicted_i)^2}} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import sga >>> t = np.random.random(10) >>> p = np.random.random(10) >>> sga(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) sgx = true[1:] - true[:true.size - 1] sgy = predicted[1:] - predicted[:predicted.size - 1] a = np.dot(sgx, sgy) b = np.linalg.norm(sgx) * np.linalg.norm(sgy) return float(np.arccos(a / b))
[docs]def smape(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Symmetric Mean Absolute Percentage Error <https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error>`_. Adoption from `this <https://stackoverflow.com/a/51440114/5982232>`_. .. math:: SMAPE = \\frac{100}{n} \\sum_{i=1}^{n} \\frac{2 \\left| \\text{predicted}_i - \\text{true}_i \\right|}{\\left| \\text{true}_i \\right| + \\left| \\text{predicted}_i \\right|} Goodwin and Lawton, 1999 : https://doi.org/10.1016/S0169-2070(99)00007-2 Flores et al., 1986 : https://doi.org/10.1016/0305-0483(86)90013-7 Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import smape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> smape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) _temp = np.sum(2 * np.abs(predicted - true) / (np.abs(true) + np.abs(predicted))) return float(100 / len(true) * _temp)
[docs]def smdape( true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Symmetric Median Absolute Percentage Error Note: result is NOT multiplied by 100 .. math:: \\text{smdape} = \\text{median} \\left( \\frac{2 \\cdot | \\text{predicted} - \\text{true} |}{| \\text{true} | + | \\text{predicted} | + \\epsilon} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import smdape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> smdape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.median(2.0 * _ae(predicted=predicted, true=true) / ((np.abs(true) + np.abs(predicted)) + EPS)))
[docs]def sid( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Spectral Information Divergence. From -pi/2 to pi/2. Closer to 0 is better. .. math:: \\text{SID} = \\left( \\frac{\\text{t}}{\\text{mean(t)}} - \\frac{\\text{p}}{\\text{mean(p)}} \\right) \\cdot \\left( \\log_{10}(\\text{t}) - \\log_{10}(\\text{mean(t)}) - \\log_{10}(\\text{p}) + \\log_{10}(\\text{mean(p)}) \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import sid >>> t = np.random.random(10) >>> p = np.random.random(10) >>> sid(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) first = (true / np.mean(true)) - ( predicted / np.mean(predicted)) second1 = np.log10(true) - np.log10(np.mean(true)) second2 = np.log10(predicted) - np.log10(np.mean(predicted)) return float(np.dot(first, second1 - second2))
[docs]def skill_score_murphy( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Skill score after `Murphy, 1988 <https://doi.org/10.1175/1520-0493(1988)116%3C2417:SSBOTM%3E2.0.CO;2>`_. Adopted from `SkillMetrics <https://github.com/PeterRochford/SkillMetrics/blob/278b2f58c7d73566f25f10c9c16a15dc204f5869/skill_metrics/skill_score_murphy.py>`_ . Calculate non-dimensional skill score (SS) between two variables using definition of Murphy (1988) using the formula: .. math:: SS = 1 - RMSE^2/SDEV^2 where SDEV is the standard deviation of the true values .. math:: SDEV^2 = sum_(n=1)^N [r_n - mean(r)]^2/(N-1) where p is the predicted values, r is the reference values, and N is the total number of values in p & r. Note that p & r must have the same number of values. A positive skill score can be interpreted as the percentage of improvement of the new model forecast in comparison to the reference. On the other hand, a negative skill score denotes that the forecast of interest is worse than the referencing forecast. Consequently, a value of zero denotes that both forecasts perform equally [MLAir, 2020]. Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Returns: flaot Examples --------- >>> import numpy as np >>> from SeqMetrics import skill_score_murphy >>> t = np.random.random(10) >>> p = np.random.random(10) >>> skill_score_murphy(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Calculate RMSE rmse2 = rmse(true, predicted, treat_arrays=False) ** 2 # Calculate standard deviation sdev2 = np.std(true, ddof=1) ** 2 # Calculate skill score ss = 1 - rmse2 / sdev2 return float(ss)
[docs]def std_ratio(true, predicted, treat_arrays: bool = True, std_kwargs: dict = None, **treat_arrays_kws ) -> float: """ `Ratio of standard deviations of predictions and trues <https://doi.org/10.1016/j.engfracmech.2024.110057>`_. Also known as standard ratio, it varies from 0.0 to infinity while 1.0 being the perfect value. .. math:: \\text{std_ratio} = \\frac{\\sigma_{\\text{predicted}}}{\\sigma_{\\text{true}}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import std_ratio >>> t = np.random.random(10) >>> p = np.random.random(10) >>> std_ratio(t, p) """ std_kwargs = std_kwargs or {} true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.std(predicted, **std_kwargs) / np.std(true, **std_kwargs))
[docs]def umbrae( true, predicted, treat_arrays: bool = True, benchmark: np.ndarray = None, **treat_arrays_kws): """ `Unscaled Mean Bounded Relative Absolute Error <https://doi.org/10.1016/j.jclepro.2022.135414>`_ .. math:: UMBRAE = \\frac{\\frac{1}{n} \\sum_{i=1}^{n} \\frac{|t_i - p_i|}{|t_i - b_i|}}{1 - \\frac{1}{n} \\sum_{i=1}^{n} \\frac{|t_i - p_i|}{|t_i - b_i|}} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function benchmark : Examples --------- >>> import numpy as np >>> from SeqMetrics import umbrae >>> t = np.random.random(10) >>> p = np.random.random(10) >>> umbrae(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return mbrae(true, predicted, False, benchmark) / (1 - mbrae(true, predicted, False, benchmark))
[docs]def ve(true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Volumetric efficiency <https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2007WR006415>`_. Ranges from 0 to 1. Smaller the better. .. math:: VE = 1 - \\frac{\\sum_{i=1}^{n} \\left| \\text{predicted}_i - \\text{true}_i \\right|}{\\sum_{i=1}^{n} \\text{true}_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import ve >>> t = np.random.random(10) >>> p = np.random.random(10) >>> ve(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = np.sum(np.abs(predicted - true)) b = np.sum(true) return float(1 - (a / b))
[docs]def volume_error( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Returns the Volume Error (Ve) after `Reynolds, 2017 <https://doi.org/10.1016/j.jhydrol.2017.05.012>`_. It is an indicator of the agreement between the averages of the simulated and observed runoff (i.e. long-term water balance). .. math:: \\text{volume_error}= Sum(predicted- true)/sum(predicted) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import volume_error >>> t = np.random.random(10) >>> p = np.random.random(10) >>> volume_error(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # TODO written formula and executed formula are different. ve_ = np.sum(predicted - true) / np.sum(true) return float(ve_)
[docs]def wape( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `weighted absolute percentage error <https://mattdyor.wordpress.com/2018/05/23/calculating-wape/>`_. The lower the better. It is a variation of mape but more suitable for intermittent and low-volume `data <https://arxiv.org/pdf/2103.12057v1.pdf>`_. .. math:: \\text{WAPE} = \\frac{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\sum_{i=1}^{n} \\text{true}_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import wape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> wape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sum(_ae(true, predicted) / np.sum(true)))
[docs]def watt_m( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Watterson's M. <https://rmets.onlinelibrary.wiley.com/doi/abs/10.1002/(SICI)1097-0088(199604)16:4%3C379::AID-JOC18%3E3.0.CO;2-U>`_ .. math:: M = \\frac{2}{\\pi} \\cdot \\arcsin \\left( 1 - \\frac{\\frac{1}{n} \\sum_{i=1}^{n} ( \\text{true}_i - \\text{predicted}_i )^2}{\\sigma_{\\text{true}}^2 + \\sigma_{\\text{predicted}}^2 + (\\mu_{\\text{predicted}} - \\mu_{\\text{true}})^2} \\right) Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import watt_m >>> t = np.random.random(10) >>> p = np.random.random(10) >>> watt_m(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) a = 2 / np.pi c = np.std(true, ddof=1) ** 2 + np.std(predicted, ddof=1) ** 2 e = (np.mean(predicted) - np.mean(true)) ** 2 f = c + e return float(a * np.arcsin(1 - (mse(true, predicted, treat_arrays=False) / f)))
[docs]def wmape( true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ `Weighted Mean Absolute Percent Error <https://stackoverflow.com/a/54833202/5982232>`_. .. math:: \\text{WMAPE} = \\frac{\\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|}{\\sum_{i=1}^{n} \\text{true}_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import wmape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> wmape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Take a series (actual) and a dataframe (forecast) and calculate wmape # for each forecast. Output shape is (1, num_forecasts) # Make an array of mape (same shape as forecast) se_mape = abs(true - predicted) / true # Calculate sum of actual values ft_actual_sum = true.sum(axis=0) # Multiply the actual values by the mape se_actual_prod_mape = true * se_mape # Take the sum of the product of actual values and mape # Make sure to sum down the rows (1 for each column) ft_actual_prod_mape_sum = se_actual_prod_mape.sum(axis=0) # Calculate wmape for each forecast and return as a dictionary ft_wmape_forecast = ft_actual_prod_mape_sum / ft_actual_sum return float(ft_wmape_forecast)
[docs]def norm_ape( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Normalized Absolute Percentage Error .. math:: \\text{norm_APE} = \\sqrt{ \\frac{1}{n-1} \\sum_{i=1}^{n} \\left( \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\right| - \\frac{1}{n} \\sum_{j=1}^{n} \\left| \\frac{\\text{true}_j - \\text{predicted}_j}{\\text{true}_j} \\right| \\right)^2 } Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function Examples --------- >>> import numpy as np >>> from SeqMetrics import norm_ape >>> t = np.random.random(10) >>> p = np.random.random(10) >>> norm_ape(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float( np.sqrt(np.sum(np.square(_percentage_error(true, predicted) - mape(true, predicted, treat_arrays=False))) / ( len(true) - 1)))
[docs]def mse( true, predicted, treat_arrays: bool = True, weights=None, **treat_arrays_kws ) -> float: """ `Mean Square Error <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html>`_ .. math:: MSE = \\frac{\\sum_{i=1}^{N} w_i (true_i - predicted_i)^2}{\\sum_{i=1}^{N} w_i} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : process the true and predicted arrays using maybe_treat_arrays function weights : Examples --------- >>> import numpy as np >>> from SeqMetrics import mse >>> t = np.random.random(10) >>> p = np.random.random(10)treat_arrays >>> mse(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.average((true - predicted) ** 2, axis=0, weights=weights))
[docs]def variability_ratio(true, predicted, treat_arrays: bool = True, **treat_arrays_kws) -> float: """ Variability Ratio It is the ratio of the variance of the predicted values to the variance of the true values. It is used to measure the variability of the predicted values relative to the true values. .. math:: VR = 1 - \\left| \\frac{\\frac{\\sigma_{\\text{predicted}}}{\\mu_{\\text{predicted}}}}{\\frac{\\sigma_{\\text{true}}}{\\mu_{\\text{true}}}} - 1 \\right| Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated/predicted values treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import variability_ratio >>> t = np.random.random(10) >>> p = np.random.random(10) >>> variability_ratio(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(1 - abs((np.std(predicted) / np.mean(predicted)) / (np.std(true) / np.mean(true)) - 1))
[docs]def concordance_corr_coef( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ `Concordance Correlation Coefficient (CCC) <https://en.wikipedia.org/wiki/Concordance_correlation_coefficient>`_ taken from this `paper <https://doi.org/10.2307/2532051>`_. .. math:: CCC = \\frac{2 \\rho \\sigma_{true} \\sigma_{predicted}}{\\sigma_{true}^2 + \\sigma_{predicted}^2 + (\\bar{true} - \\bar{predicted})^2} Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : simulated values treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import concordance_corr_coef >>> t = np.random.random(10) >>> p = np.random.random(10) >>> concordance_corr_coef(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # taken from https://nirpyresearch.com/concordance-correlation-coefficient/ mean_true = np.mean(true) mean_predicted = np.mean(predicted) var_true = np.var(true) var_predicted = np.var(predicted) true_mean = np.mean(true) pred_mean = np.mean(predicted) numerator = np.sum((true - true_mean) * (predicted - pred_mean)) denominator = np.sqrt(np.sum((true - true_mean) ** 2)) * np.sqrt(np.sum((predicted - pred_mean) ** 2)) pearson = numerator / denominator ccc = (2 * pearson * np.sqrt(var_true) * np.sqrt(var_predicted)) / (var_true + var_predicted + (mean_true - mean_predicted) ** 2) return float(ccc)
[docs]def critical_success_index( true, predicted,treat_arrays: bool = True, threshold=0.5, **treat_arrays_kws )->float: """ `Critical Success Index (CSI) <https://doi.org/10.1016/j.heliyon.2024.e26371>`_ .. math:: CSI = \\frac{TP}{TP + FN + FP} Parameters ---------- true : True/observed/actual/target values. It should be a binary array (0s and 1s), or a continuous array where values are binarized using a threshold. predicted : Predicted values, same format as 'true'. treat_arrays : treat_arrays the true and predicted array threshold : Threshold for binarizing continuous values (if applicable). Examples --------- >>> import numpy as np >>> from SeqMetrics import critical_success_index >>> t = np.array([0.4, 0.1, 0.1, 0.3, 0.7, 0.1]) >>> p = np.array([0.8, 0.11, 0.5, 0.1, 0.1, 0.1]) >>> critical_success_index(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # Binarize if the arrays are not binary true = np.array(true) >= threshold predicted = np.array(predicted) >= threshold TP = np.sum((true == 1) & (predicted == 1)) FN = np.sum((true == 1) & (predicted == 0)) FP = np.sum((true == 0) & (predicted == 1)) csi = TP / float(TP + FN + FP) if (TP + FN + FP) > 0 else 0 return float(csi)
[docs]def kl_divergence( true, predicted, treat_arrays: bool = True, **treat_arrays_kws )->float: """ `Kullback-Leibler Divergence <https://doi.org/10.1016/j.imu.2024.101510>`_ .. math:: D_{KL}(P \\parallel Q) = \\sum_{i} P(i) \\log \\left( \\frac{P(i)}{Q(i)} \\right) Parameters ---------- true : True/observed/actual/target probability distribution. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted probability distribution, same format as 'true'. treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import kl_divergence >>> t = np.array([0.1, 0.2, 0.3, 0.2, 0.2]) >>> p = np.array([0.2, 0.2, 0.2, 0.2, 0.2]) >>> divergence = kl_divergence(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # taken from https://lightning.ai/docs/torchmetrics/stable/regression/kl_divergence.html predicted = predicted / predicted.sum(axis=-1, keepdims=True) true = true / true.sum(axis=-1, keepdims=True) res = predicted * np.log(predicted / true) res[predicted == 0] = 0.0 kl = res.sum(axis=-1) return float(kl)
[docs]def log_cosh_error( true, predicted, treat_arrays: bool = True, **treat_arrays_kws )->float: """ `Log-Cosh Error <https://doi.org/10.1016/j.compchemeng.2022.107933>`_ .. math:: \\text{Log-Cosh Error} = \\frac{1}{n} \\sum_{i=1}^{n} \\log \\left( \\cosh(\\text{predicted}_i - \\text{true}_i) \\right) Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import log_cosh_error >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> error = log_cosh_error(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) true = np.asarray(true) predicted = np.asarray(predicted) # Calculation of Log-Cosh Error error = np.log(np.cosh(predicted - true)) return float(np.mean(error).item())
[docs]def minkowski_distance( true, predicted, order =1, treat_arrays: bool = True, **treat_arrays_kws )->float: """ `Minkowski Distance <https://doi.org/10.1016/j.imu.2024.101492>`_ .. math:: D_{Minkowski} = \\left( \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right|^p \\right)^{\\frac{1}{p}} Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. order : The order of the norm of the difference. `order=2` is equivalent to the Euclidean distance, `order=1` is the Manhattan distance. treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import minkowski_distance >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> order = 2 # Euclidean distance >>> distance = minkowski_distance(t, p, order) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) true = np.asarray(true) predicted = np.asarray(predicted) # Calculation of Minkowski Distance return float(np.sum(np.abs(true - predicted) ** order) ** (1 / order))
[docs]def tweedie_deviance_score( true, predicted, power=0, treat_arrays: bool = True, **treat_arrays_kws )->float: """ `Tweedie Deviance Score <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_tweedie_deviance.html>`_ .. math:: D(\\text{true}, \\text{predicted}) = \\frac{1}{n} \\sum_{i=1}^{n} (\\text{true}_i - \\text{predicted}_i)^2 .. math:: D(\\text{true}, \\text{predicted}) = 2 \\sum_{i=1}^{n} \\left( \\text{true}_i \\log\\left(\\frac{\\text{true}_i + (\\text{true}_i = 0)}{\\text{predicted}_i}\\right) - \\text{true}_i + \\text{predicted}_i \\right) .. math:: D(\\text{true}, \\text{predicted}) = 2 \\sum_{i=1}^{n} \\left( \\frac{\\text{true}_i}{\\text{predicted}_i} - \\log\\left(\\frac{\\text{true}_i}{\\text{predicted}_i}\\right) - 1 \\right) .. math:: D(\\text{true}, \\text{predicted}) = 2 \\sum_{i=1}^{n} \\left( \\frac{(\\text{true}_i - \\text{predicted}_i)^2}{\\text{true}_i^2 \\text{predicted}_i} \\right) Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. power : The power determines the underlying target distribution. `power=0` for Normal, `power=1` for Poisson, `power=2` for Gamma, and `power=3` for Inverse Gaussian. treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import tweedie_deviance_score >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> power = 2 # Gamma distribution >>> score = tweedie_deviance_score(t, p, power) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) true = np.asarray(true) predicted = np.asarray(predicted) # Tweedie Deviance Score calculation if power == 0: # Normal distribution return np.mean((true - predicted) ** 2).item() elif power == 1: # Poisson distribution return 2 * np.sum(true * np.log((true + (true == 0)) / predicted) - true + predicted) elif power == 2: # Gamma distribution return 2 * np.sum((true / predicted) - np.log(true / predicted) - 1) elif power == 3: # Inverse Gaussian distribution return 2 * np.sum((true - predicted) ** 2 / (true ** 2 * predicted)) else: raise ValueError("Invalid power value. Power must be 0 (Normal), 1 (Poisson), 2 (Gamma), or 3 (Inverse Gaussian).")
[docs]def mre( true, predicted, benchmark:np.ndarray = None, treat_arrays: bool = True, **treat_arrays_kws )->float: """ `mean relative error <https://doi.org/10.1016/j.trd.2022.103505>`_ .. math:: \\text{MRE} = \\frac{1}{n} \\sum_{i=1}^{n} \\left| \\frac{\\text{true}_i - \\text{predicted}_i}{\\text{true}_i} \\right| Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. benchmark : treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import mre >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> score = mre(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) re = _relative_error(true, predicted, benchmark) return float(np.mean(re).item())
# def peak_flow_ratio( # true, # predicted, # treat_arrays: bool = True, # **treat_arrays_kws # )->float: # """ # Peak flow ratio is defined the ratio of the highest simulated to the highest # observed flow rates (Broekhuizen et al., 2020). # # https://doi.org/10.5194/hess-24-869-2020 # # Parameters # ---------- # true : # True/observed/actual/target values. It must be a numpy array, # pandas series/DataFrame, or a list. # predicted : # Predicted values, same format as 'true'. # treat_arrays : # treat_arrays the true and predicted array # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import peak_flow_ratio # >>> t = np.array([1, 2, 3, 4, 5]) # >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) # >>> score = mre(t, p) # """ # # true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # o_max = np.max(true) # s_max = np.max(predicted) # return float(s_max/o_max)
[docs]def mape_for_peaks( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Mean Absolute Percentage Error for peaks which are found using `scipy.singnal.find_peaks <https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html>`_ .. math:: \\text{MAPE}_\\text{peak} = \\frac{1}{P}\\sum_{p=1}^{P} \\left |\\frac{Q_{s,p} - Q_{o,p}}{Q_{o,p}} \\right | \\times 100, Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. treat_arrays : treat_arrays the true and predicted array https://github.com/neuralhydrology/neuralhydrology/blob/master/neuralhydrology/evaluation/metrics.py#L707 Examples --------- >>> import numpy as np >>> from SeqMetrics import mape_for_peaks >>> t = np.array([1, 2, 3, 4, 5]) >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) >>> score = mre(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) peaks, _ = find_peaks(true, prominence=np.std(true)) if peaks.size == 0: return np.nan true = true[peaks] predicted = predicted[peaks] return mape(true, predicted, treat_arrays=False)
# todo, relative_error following equation from Moriasi does not return scaler value # it taken mean, then what will be the difference between this and mre? # def relative_error( # true, # predicted, # treat_arrays: bool = True, # **treat_arrays_kws # ): # """ # Relative Error. It indicates the mismatch that # occurs between the observed and modeled values, expressed # in terms of percentages. # It quantifies the relative deviations between observed/true # and predicted values. This significantly reduces the influence of absolute # differences at peaks. The absolute lower differences during low flow # periods are enhanced because they are significant if looked at in a # relative sense. As a result, there might be a systematic over- or underprediction during low flow periods. # It used along with other statistics to quantify low # flow simulations Moriasi et al., 2007. # # Parameters # ---------- # true : # true/observed/actual/target values. It must be a numpy array, # or pandas series/DataFrame or a list. # predicted : # simulated/predicted values # treat_arrays : # treat_arrays the true and predicted array # # Examples # --------- # >>> import numpy as np # >>> from SeqMetrics import relative_error # >>> t = np.array([1, 2, 3, 4, 5]) # >>> p = np.array([1.1, 1.9, 3.1, 4.2, 4.8]) # >>> score = mre(t, p) # """ # true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) # # error = true - predicted # # return np.abs(error / (true + EPS)) * 100
[docs]def manhattan_distance( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Manhattan distance, also known as cityblock distance or taxicab norm. See `Blanco-Mallo et al., 2023 <https://doi.org/10.1016/j.patcog.2023.109646>`_ and `Alexei Botchkarev 2019 <https://www.ijikm.org/Volume14/IJIKMv14p045-076Botchkarev5064.pdf>`_ on the use of distances in performance measures. .. math:: D_{\\text{manhattan}} = \\sum_{i=1}^{n} \\left| \\text{true}_i - \\text{predicted}_i \\right| Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. treat_arrays : treat_arrays the true and predicted array Examples --------- >>> import numpy as np >>> from SeqMetrics import manhattan_distance >>> t = np.random.random(100) >>> p = np.random.random(100) >>> manhattan_distance(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) return float(np.sum(np.abs(true - predicted)).item())
[docs]def coeff_of_persistence( true, predicted, lag: int = 1, treat_arrays: bool = True, **treat_arrays_kws ) -> float: """ Coefficient of Persistence as introducted by `Kitanidis and Bras <https://doi.org/10.1029/WR016i006p01025>`_ . Varies between -inf to 1. The higher the better. Parameters ---------- true : True/observed/actual/target values. It must be a numpy array, pandas series/DataFrame, or a list. predicted : Predicted values, same format as 'true'. lag : The lag for the baseline treat_arrays : treat_arrays the true and predicted array https://rdrr.io/cran/hydroGOF/man/cp.html Examples --------- >>> import numpy as np >>> from SeqMetrics import manhattan_distance >>> t = np.random.random(100) >>> p = np.random.random(100) >>> coeff_of_persistence(t, p) """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) baseline = np.roll(true, lag) true, predicted, baseline = true[lag:], predicted[lag:], baseline[lag:] numerator = np.sum(np.abs(np.subtract(true, predicted)) ** 2.0) / len(true) denominator = np.sum(np.abs(np.subtract(true, baseline)) ** 2.0) / len(true) return 1.0 - numerator/denominator
def reciprocal_nse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws )->float: """ The reciprocal NSE focuses the error metric on low flows `Pushpalatha et al., 2012 <https://doi.org/10.1016/j.jhydrol.2011.11.055>`_ by comparing the reciprocals of the observed and modelled flows. Formula taken from `Clark et al., 2024 <https://doi.org/10.5194/hess-28-1191-2024>`_ """ true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) recip_pred = 1.0 / (predicted+1.0) recip_true = 1.0 / (true+1.0) recip_true_mean = 1.0 / (np.mean(true)+1.0) numerator = sum((recip_pred - recip_true) ** 2) denominator = sum((true - recip_true_mean) ** 2) _nse = 1 - numerator / denominator return float(_nse.item()) def coeff_of_extrapolation( true, predicted, log_transform: bool = False, treat_arrays: bool = True, **treat_arrays_kws )->float: """ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : array of simulated/modeled/predicted/estimated values. It must also be array like and have same length as true. log_scale : bool If True, the values will be log transformed before calculating the coefficient of extrapolation. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. """ # https://github.com/NOAA-OWP/hydrotools/blob/main/python/metrics/src/hydrotools/metrics/metrics.py#L303 true, predicted = maybe_treat_arrays(treat_arrays, true, predicted, 'regression', **treat_arrays_kws) if log_transform: true = np.log(true) predicted = np.log(predicted) slope = np.diff(true)[:-1] base = true[2:] + slope raise NotImplementedError def roce( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ Runoff coefficient percent error after equation 3 in `Kollat et al., 2012 <https://doi.org/10.1029/2011WR011534>`_ Parameters ---------- true : true/observed/actual/target values. It must be a numpy array, or pandas series/DataFrame or a list. predicted : array of simulated/modeled/predicted/estimated values. It must also be array like and have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. """ raise NotImplementedError def trmse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ The Box-Cox transformed root mean squared error (TRMSE) is a metric that emphasizes low flows using box-cox transformation (`Kollat et al., 2012 <https://doi.org/10.1029/2011WR011534>`_, `Mirirli et al., 2003 <https://doi.org/10.1029/WS006p0113>`_, `Tang et al., 2006 https://doi.org/10.5194/hess-10-289-2006>`_). # https://github.com/CUG-hydro/VICResOpt/blob/76558f2f1aab9ab199d9cd461e7afdcb20048b15/src/OptCalib/indices.py#L20 Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def fdc_slope( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ The slope of the flow duration curve (FDC) after `Kollat et al., 2012 <https://doi.org/10.1029/2011WR011534>`_ . Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def drv( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://rstudio-pubs-static.s3.amazonaws.com/433152_56d00c1e29724829bad5fc4fd8c8ebff.html """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def gini( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://github.com/benhamner/Metrics/blob/master/MATLAB/metrics/gini.m """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def log_likelihood( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/elementwise.py#L202 """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def cross_entropy( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://datascience.stackexchange.com/q/20296 """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def vaf( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://www.dcsc.tudelft.nl/~jwvanwingerden/lti/doc/html/vaf.html """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def resid_std_error( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://www.statology.org/residual-standard-error-r/ """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def eff_coeff( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://doi.org/10.1016/j.csite.2022.101797 """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def overall_index( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://doi.org/10.1016/j.csite.2022.101797 """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def resid_mass_coeff( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): # https://doi.org/10.1016/j.csite.2022.101797 """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def log_euclid_dist( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def relative_bias( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ The following equation is taken after `Jiang et al., 2022 <https://doi.org/10.5194/essd-15-621-2023>`_ . # https://github.com/Ouranosinc/xclim/blob/4198e8bcc9d21dd6a89b5c93cf58972a69b87758/xclim/sdba/measures.py#L183 Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def ppmc( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ pearson product moment correlation https://github.com/flowmatters/veneer-py/blob/86bb9beb2d57f07e95a89d1f7bc410166a791118/veneer/stats.py#L135 Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def weighted_nse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ weighted Nash-Sutcliffe efficiency between ``true`` and ``predicted`` arrays as proposed by `Hundecha and Bardossy (2004) <https://doi.org/10.1016/j.jhydrol.2004.01.002>`_ to provide more focus on peaks. Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def weighted_seasonal_nse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ It was proposed by `Zambrano-Bigiarini and Bellin (2012) <https://meetingorganizer.copernicus.org/EGU2012/EGU2012-11549-1.pdf>`_ . Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def unbiased_rmse( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ unbiased RMSE Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def kge_lf( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ Kling-Gupta Efficiency for low flows developed by `Pizarro and Jorquera, 2024 <https://doi.org/10.1016/j.jhydrol.2024.131071>`_ or `Gracia et al., 2017 <https://doi.org/10.1080/02626667.2017.1308511>`_ ? Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float """ raise NotImplementedError def peak_bias( true, predicted, treat_arrays: bool = True, **treat_arrays_kws ): """ It is also known as high flow bias. Its values range from 0 to inf with 0 being the best/ideal value. Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float Examples --------- >>> import numpy as np >>> from SeqMetrics import peak_bias """ raise NotImplementedError def annual_peak_flow_bias( true, predicted, treat_arrays: bool = True, **treat_arrays_kws )->float: """ Annual peak flow bias as proposed by `Mizukami et al. (2019) <https://doi.org/10.5194/hess-23-2601-2019>`_ to provide more focus on peaks. Subsequently a model which reduces annual peak flow bias will produce best peak estimates. Its values vary from 0 to infinity with 0 being the best. Higher values indicate larger difference in peaks. .. math:: APFB = \\sqrt{ [mu_s / mu_o] - 1]^2} where mu is mean of annual peak flow series. Parameters ---------- true : a pandas Series of true/observed/actual/target values with DateTimeIndex. predicted : a pandas Series of simulated/modeled/predicted/estimated with DateTimeIndex. It must also have same length as true. treat_arrays : process the true and predicted arrays using :py:func:`SeqMetrics.utils.treat_arrays` function treat_arrays_kws: Additional keyword arguments to be passed to :py:func:`SeqMetrics.utils.treat_arrays` function. Returns ------- float Notes ----- This metric requires pandas library. The index/timesteps of true and predicted arrays must exactly align with each other. Examples --------- >>> import numpy as np >>> from SeqMetrics import annual_peak_flow_bias >>> t = pd.Series(random_state.random(100), index=pd.date_range('1/1/2000', periods=100)) >>> p = pd.Series(random_state.random(100), index=pd.date_range('1/1/2000', periods=100)) >>> score = annual_peak_flow_bias(t, p) """ try: import pandas as pd except (ModuleNotFoundError, ImportError): raise NotImplementedError("This metric requires pandas library.") assert isinstance(true, (pd.Series, pd.DataFrame)), "true must be a pandas Series or DataFrame." assert isinstance(predicted, (pd.Series, pd.DataFrame)), "predicted must be a pandas Series or DataFrame." if isinstance(true, pd.DataFrame): assert true.shape[1] == 1, "true must be a single column DataFrame." true = true.squeeze() if isinstance(predicted, pd.DataFrame): assert predicted.shape[1] == 1, "predicted must be a single column DataFrame." predicted = predicted.squeeze() if not predicted.index.equals(true.index): raise ValueError("Simulated and observed data must have the same dates!") # should return np.nan when the data is less than 1 year # Grouping data by year and calculating the maximum (peak flow) sim_max = predicted.groupby(predicted.index.year).max() obs_max = true.groupby(true.index.year).max() return float(np.sqrt(((sim_max / obs_max - 1) ** 2).mean()))