Module imodelsx.metrics

Expand source code
from functools import partial

import numpy as np
from sklearn.metrics import (accuracy_score, auc, balanced_accuracy_score,
                             brier_score_loss, mean_squared_error,
                             precision_recall_curve, precision_score, r2_score,
                             recall_score, roc_auc_score)


def gini_score(y_true, y_pred):
    """Purer (more accurate) is better"""
    y_pred = y_pred.astype(bool)
    y_pred_sum = y_pred.sum()
    if y_pred_sum == 0 or y_pred_sum == y_pred.size:
        y_mean = y_true.mean()
    else:
        y_mean = y_true[y_pred].mean()
    return gini_binary(y_mean)


def gini_binary(y_mean: float) -> float:
    """{0, 1} -> 1
    {0.5} -> 0.5
    """
    return y_mean ** 2 + (1 - y_mean) ** 2


def entropy_binary(y_mean: float) -> float:
    return -y_mean * np.log2(y_mean) - (1 - y_mean) * np.log2(1 - y_mean)


def auprc_score(y_true, y_pred):
    """area under precision recall curve"""
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    return auc(recall, precision)


metrics_classification_discrete = {
    "accuracy": accuracy_score,
    "precision": partial(precision_score, zero_division=0),
    "recall": partial(recall_score, zero_division=0),
    "balanced_accuracy": balanced_accuracy_score,
}
metrics_classification_proba = {
    "roc_auc": roc_auc_score,
    "brier_score_loss": brier_score_loss,
    "auprc": auprc_score,
}
metrics_regression = {
    "r2": r2_score,
    "mse": mean_squared_error,
    "corr": lambda y_true, y_pred: np.corrcoef(y_true, y_pred)[0, 1],
}

Functions

def auprc_score(y_true, y_pred)

area under precision recall curve

Expand source code
def auprc_score(y_true, y_pred):
    """area under precision recall curve"""
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    return auc(recall, precision)
def entropy_binary(y_mean: float) ‑> float
Expand source code
def entropy_binary(y_mean: float) -> float:
    return -y_mean * np.log2(y_mean) - (1 - y_mean) * np.log2(1 - y_mean)
def gini_binary(y_mean: float) ‑> float

{0, 1} -> 1 {0.5} -> 0.5

Expand source code
def gini_binary(y_mean: float) -> float:
    """{0, 1} -> 1
    {0.5} -> 0.5
    """
    return y_mean ** 2 + (1 - y_mean) ** 2
def gini_score(y_true, y_pred)

Purer (more accurate) is better

Expand source code
def gini_score(y_true, y_pred):
    """Purer (more accurate) is better"""
    y_pred = y_pred.astype(bool)
    y_pred_sum = y_pred.sum()
    if y_pred_sum == 0 or y_pred_sum == y_pred.size:
        y_mean = y_true.mean()
    else:
        y_mean = y_true[y_pred].mean()
    return gini_binary(y_mean)