Expand source code
import numpy as np
from scipy.stats import rankdata
def tauAP_b(x, y, decreasing=True):
"""
Weighted kendall tau correlation metric, which handles ties.
Proposed in "The Treatment of Ties in AP Correlation" by
Urbano and Marrero (2017). This is the python implementation
of ircor::tauAP_b from R.
Parameters
----------
x: array-like of shape (n,)
Numeric vector.
y: array-like of shape (n,)
Numeric vector of same length as x.
decreasing: bool
Should the sort order be increasing or decreasing (default)?
Returns
-------
Scalar value between -1 and 1, quantifying how much the
rankings of x and y agree with each other. A higher
values indicates greater similarity.
"""
if decreasing:
return tauAP_b(-x, -y, decreasing=False)
else:
return (_tauAP_b_ties(x, y) + _tauAP_b_ties(y, x)) / 2
def _tauAP_b_ties(x, y):
n = len(x)
rx = rankdata(x)
ry = rankdata(y, method="ordinal")
p = rankdata(y, method="min")
c_all = 0
not_top = np.argwhere(p != 1)
for i in not_top:
c_above = 0
for j in np.argwhere(p < p[i]):
sx = np.sign(rx[i] - rx[j])
sy = np.sign(ry[i] - ry[j])
if sx == sy:
c_above = c_above + 1
c_all = c_all + c_above/(p[i] - 1)
return 2 / len(not_top) * c_all - 1
def rbo(s, t, p, k=None, side="top", uneven_lengths=True):
"""
Rank-based overlap (RBO) metric.
Proposed in "A Similarity Measure for Indefinite Rankings" by
Webber et al. (2010). This is the python implementation
of gespeR::rbo from R.
Parameters
----------
s: array-like of shape (n,)
Numeric vector.
t: array-like of shape (n,)
Numeric vector of same length as s.
p: float between 0 and 1
Weighting parameter in [0, 1]. High p implies strong emphasis
on the top-ranked elements (i.e, the larger elements).
k: None or int
Evaluation depth for extrapolation
side: string in {"top", "bottom"}
Evaluate similarity between the top or the bottom of the
ranked lists.
uneven_lengths: bool
Indicator if lists have uneven lengths.
Returns
-------
Scalar value between 0 and 1, quantifying how much the
rankings of x and y agree with each other. A higher
values indicates greater similarity.
"""
assert side in ["top", "bottom"]
if k is None:
k = int(np.floor(max(len(s), len(t)) / 2))
if side == "top":
ids = {"s": _select_ids(s, "top"),
"t": _select_ids(t, "top")}
elif side == "bottom":
ids = {"s": _select_ids(s, "bottom"),
"t": _select_ids(t, "bottom")}
return min(1, _rbo_ext(ids["s"], ids["t"], p, k, uneven_lengths=uneven_lengths))
def _select_ids(x, side="top"):
assert side in ["top", "bottom"]
if side == "top":
return np.argsort(-x)
elif side == "bottom":
return np.argsort(x)
def _rbo_ext(x, y, p, k, uneven_lengths=True):
if len(x) <= len(y):
S = x
L = y
else:
S = y
L = x
l = min(k, len(L))
s = min(k, len(S))
if uneven_lengths:
Xd = [len(np.intersect1d(S[:(i+1)], L[:(i+1)])) for i in range(l)]
if l > s:
sl_range = np.arange(s+1, l+1)
else:
sl_range = np.arange(l, s+2)
result = ((1 - p) / p) * \
((sum(Xd[:l] / np.arange(1, l+1) * p**np.arange(1, l+1))) +
(sum(Xd[s-1] * (sl_range - s) / (s * sl_range) * p**sl_range))) + \
((Xd[l-1] - Xd[s-1]) / l + (Xd[s-1] / s)) * p**l
else:
k = min(s, k)
Xd = [len(np.intersect1d(x[:(i+1)], y[:(i+1)])) for i in range(k)]
Xk = Xd[k-1]
result = (Xk / k) * p**k + (((1 - p) / p) * sum((Xd / np.arange(1, k+1)) * p**np.arange(1, k+1)))
return result
Functions
def rbo(s, t, p, k=None, side='top', uneven_lengths=True)
-
Rank-based overlap (RBO) metric. Proposed in "A Similarity Measure for Indefinite Rankings" by Webber et al. (2010). This is the python implementation of gespeR::rbo from R.
Parameters
s
:array-like
ofshape (n,)
- Numeric vector.
t
:array-like
ofshape (n,)
- Numeric vector of same length as s.
p
:float between 0 and 1
- Weighting parameter in [0, 1]. High p implies strong emphasis on the top-ranked elements (i.e, the larger elements).
k
:None
orint
- Evaluation depth for extrapolation
side
:string in {"top", "bottom"}
- Evaluate similarity between the top or the bottom of the ranked lists.
uneven_lengths
:bool
- Indicator if lists have uneven lengths.
Returns
Scalar value between 0 and 1, quantifying how much the
rankings
ofx and y agree with each other. A higher
values indicates greater similarity.
Expand source code
def rbo(s, t, p, k=None, side="top", uneven_lengths=True): """ Rank-based overlap (RBO) metric. Proposed in "A Similarity Measure for Indefinite Rankings" by Webber et al. (2010). This is the python implementation of gespeR::rbo from R. Parameters ---------- s: array-like of shape (n,) Numeric vector. t: array-like of shape (n,) Numeric vector of same length as s. p: float between 0 and 1 Weighting parameter in [0, 1]. High p implies strong emphasis on the top-ranked elements (i.e, the larger elements). k: None or int Evaluation depth for extrapolation side: string in {"top", "bottom"} Evaluate similarity between the top or the bottom of the ranked lists. uneven_lengths: bool Indicator if lists have uneven lengths. Returns ------- Scalar value between 0 and 1, quantifying how much the rankings of x and y agree with each other. A higher values indicates greater similarity. """ assert side in ["top", "bottom"] if k is None: k = int(np.floor(max(len(s), len(t)) / 2)) if side == "top": ids = {"s": _select_ids(s, "top"), "t": _select_ids(t, "top")} elif side == "bottom": ids = {"s": _select_ids(s, "bottom"), "t": _select_ids(t, "bottom")} return min(1, _rbo_ext(ids["s"], ids["t"], p, k, uneven_lengths=uneven_lengths))
def tauAP_b(x, y, decreasing=True)
-
Weighted kendall tau correlation metric, which handles ties. Proposed in "The Treatment of Ties in AP Correlation" by Urbano and Marrero (2017). This is the python implementation of ircor::tauAP_b from R.
Parameters
x
:array-like
ofshape (n,)
- Numeric vector.
y
:array-like
ofshape (n,)
- Numeric vector of same length as x.
decreasing
:bool
- Should the sort order be increasing or decreasing (default)?
Returns
Scalar value between -1 and 1, quantifying how much the
rankings
ofx and y agree with each other. A higher
values indicates greater similarity.
Expand source code
def tauAP_b(x, y, decreasing=True): """ Weighted kendall tau correlation metric, which handles ties. Proposed in "The Treatment of Ties in AP Correlation" by Urbano and Marrero (2017). This is the python implementation of ircor::tauAP_b from R. Parameters ---------- x: array-like of shape (n,) Numeric vector. y: array-like of shape (n,) Numeric vector of same length as x. decreasing: bool Should the sort order be increasing or decreasing (default)? Returns ------- Scalar value between -1 and 1, quantifying how much the rankings of x and y agree with each other. A higher values indicates greater similarity. """ if decreasing: return tauAP_b(-x, -y, decreasing=False) else: return (_tauAP_b_ties(x, y) + _tauAP_b_ties(y, x)) / 2