Expand source code
import numpy as np
import pandas as pd
import random
from collections import Counter, defaultdict
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets, unique_labels
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from imodels.rule_list.bayesian_rule_list.brl_util import (
default_permsdic, preds_d_t, run_bdl_multichain_serial, merge_chains, get_point_estimate, get_rule_rhs
from imodels.rule_list.rule_list import RuleList
from imodels.util.convert import itemsets_to_rules
from imodels.util.extract import extract_fpgrowth
from imodels.util.rule import get_feature_dict, replace_feature_name, Rule
class BayesianRuleListClassifier(BaseEstimator, RuleList, ClassifierMixin):
This is a scikit-learn compatible wrapper for the Bayesian Rule List
classifier developed by Benjamin Letham. It produces a highly
interpretable model (a list of decision rules) by sampling many different
rule lists, trying to optimize for compactness and predictive performance.
listlengthprior : int, optional (default=3)
Prior hyperparameter for expected list length (excluding null rule)
listwidthprior : int, optional (default=1)
Prior hyperparameter for expected list width (excluding null rule)
maxcardinality : int, optional (default=2)
Maximum cardinality of an itemset
minsupport : float, optional (default=0.1)
Minimum support (fraction between 0 and 1) of an itemset
alpha : array_like, shape = [n_classes]
prior hyperparameter for multinomial pseudocounts
n_chains : int, optional (default=3)
Number of MCMC chains for inference
max_iter : int, optional (default=50000)
Maximum number of iterations
class1label: str, optional (default="class 1")
Label or description of what the positive class (with y=1) means
verbose: bool, optional (default=True)
Verbose output
random_state: int
Random seed
def __init__(self,
alpha=np.array([1., 1.]),
class1label="class 1",
self.listlengthprior = listlengthprior
self.listwidthprior = listwidthprior
self.maxcardinality = maxcardinality
self.minsupport = minsupport
self.alpha = alpha
self.n_chains = n_chains
self.max_iter = max_iter
self.class1label = class1label
self.verbose = verbose
self._zmin = 1
self.thinning = 1 # The thinning rate
self.burnin = self.max_iter // 2 # the number of samples to drop as burn-in in-simulation
self.d_star = None
self.random_state = random_state
def seed(self):
if self.random_state is not None:
def _setlabels(self, X, feature_names=[]):
if len(feature_names) == 0:
if type(X) == pd.DataFrame and ('object' in str(X.columns.dtype) or 'str' in str(X.columns.dtype)):
feature_names = X.columns
feature_names = ["ft" + str(i + 1) for i in range(len(X[0]))]
self.feature_names = feature_names
def fit(self, X, y, feature_names: list = None, verbose=False):
"""Fit rule lists to data.
Note: The BRL algorithm requires numeric features to be discretized into bins
prior to fitting. See imodels.discretization or sklearn.preprocessing for
helpful utilities.
X : array-like, shape = [n_samples, n_features]
Training data
y : array_like, shape = [n_samples]
feature_names : array_like, shape = [n_features], optional (default: [])
String labels for each feature.
If empty and X is a DataFrame, column labels are used.
If empty and X is not a DataFrame, then features are simply enumerated
verbose : bool
Currently doesn't do anything
self : returns an instance of self.
if len(set(y)) != 2:
raise ValueError("Only binary classification is supported at this time!")
X, y = check_X_y(X, y)
self.n_features_in_ = X.shape[1]
self.classes_ = unique_labels(y)
# Check that all features are either categorical or discretized
if not np.all((X == 1) | (X == 0)):
raise ValueError("All numeric features must be discretized prior to fitting!")
self.feature_dict_ = get_feature_dict(X.shape[1], feature_names)
self.feature_placeholders = np.array(list(self.feature_dict_.keys()))
self.feature_names = np.array(list(self.feature_dict_.values()))
X_df = pd.DataFrame(X, columns=self.feature_placeholders)
itemsets = extract_fpgrowth(X_df, minsupport=self.minsupport,
# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for col in X_df.columns:
# X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
X_df[col] = X_df[col].replace({1: col, 0: ''})
itemset_support_inds = [{}] * (len(itemsets) + 1)
itemset_support_inds[0] = set(range(X_df.shape[0])) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
itemset_support_inds[j + 1] = set(
[i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
# now form lhs_len
lhs_len = [0]
for lhs in itemsets:
nruleslen = Counter(lhs_len)
lhs_len = np.array(lhs_len)
itemsets_all = ['null']
self.itemsets = itemsets_all
Xtrain = itemset_support_inds
Ytrain = np.vstack((1 - np.array(y), y)).T.astype(int)
permsdic = defaultdict(default_permsdic) # We will store here the MCMC results
res, Rhat = run_bdl_multichain_serial(
self.max_iter, self.thinning, self.alpha, self.listlengthprior,
self.listwidthprior, Xtrain, Ytrain, nruleslen, lhs_len,
self.maxcardinality, permsdic, self.burnin, self.n_chains,
[None] * self.n_chains, verbose=self.verbose, seed=self.random_state)
# Merge the chains
permsdic = merge_chains(res)
# The point estimate, BRL-point
self.d_star = get_point_estimate(permsdic, lhs_len, Xtrain, Ytrain, self.alpha, nruleslen, self.maxcardinality,
self.listlengthprior, self.listwidthprior,
verbose=self.verbose) # get the point estimate
if self.d_star:
# Compute the rule consequent
self.theta, self.ci_theta = get_rule_rhs(Xtrain, Ytrain, self.d_star, self.alpha, True)
self.final_itemsets = np.array(self.itemsets, dtype=object)[self.d_star]
rule_strs = itemsets_to_rules(self.final_itemsets)
self.rules_without_feature_names_ = [Rule(r) for r in rule_strs]
self.rules_ = [
replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_
self.complexity_ = self._get_complexity()
return self
def _get_complexity(self):
n_rule_terms = sum([len(iset) for iset in self.final_itemsets if type(iset) != str])
return n_rule_terms + 1
# def __repr__(self, decimals=1):
# if self.d_star:
# detect = ""
# if self.class1label != "class 1":
# detect = "for detecting " + self.class1label
# header = "Trained RuleListClassifier " + detect + "\n"
# separator = "".join(["="] * len(header)) + "\n"
# s = ""
# for i, j in enumerate(self.d_star):
# if self.itemsets[j] != 'null':
# condition = "ELSE IF " + (
# " AND ".join([str(self.itemsets[j][k]) for k in range(len(self.itemsets[j]))])) + " THEN"
# else:
# condition = "ELSE"
# s += condition + " probability of " + self.class1label + ": " + str(
# np.round(self.theta[i] * 100, decimals)) + "% (" + str(
# np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str(
# np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n"
# return header + separator + s[5:] + separator[1:]
# else:
# return "(Untrained RuleListClassifier)"
def __repr__(self, decimals=1):
if self.d_star:
detect = ""
if self.class1label != "class 1":
detect = "for detecting " + self.class1label
header = "Trained RuleListClassifier " + detect + "\n"
separator = "".join(["="] * len(header)) + "\n"
s = ""
for i in range(len(self.rules_) + 1):
if i != len(self.rules_):
condition = "ELSE IF " + str(self.rules_[i]) + " THEN"
condition = "ELSE"
s += condition + " probability of " + self.class1label + ": " + str(
np.round(self.theta[i] * 100, decimals)) + "% (" + str(
np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str(
np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n"
return header + separator + s[5:] + separator[1:]
return "(Untrained RuleListClassifier)"
def _to_itemset_indices(self, X_df_onehot):
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [set() for j in range(len(self.itemsets))]
X[0] = set(range(X_df_onehot.shape[0])) # the default rule satisfies all data
for (j, lhs) in enumerate(self.itemsets):
if j > 0:
X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
return X
def predict_proba(self, X):
"""Compute probabilities of possible outcomes for samples in X.
X : array-like, shape = [n_samples, n_features]
T : array-like, shape = [n_samples, n_classes]
Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute `classes_`.
X = check_array(X)
D = pd.DataFrame(X, columns=self.feature_placeholders)
N = len(D)
X2 = self._to_itemset_indices(D)
P = preds_d_t(X2, np.zeros((N, 1), dtype=int), self.d_star, self.theta)
return np.vstack((1 - P, P)).T
def predict(self, X, threshold=0.1):
"""Perform classification on samples in X.
X : array-like, shape = [n_samples, n_features]
y_pred : array, shape = [n_samples]
Class labels for samples in X.
X = check_array(X)
# print('predicting!')
# print('preds_proba', self.predict_proba(X)[:, 1])
return 1 * (self.predict_proba(X)[:, 1] >= threshold)
class BayesianRuleListClassifier (listlengthprior=3, listwidthprior=1, maxcardinality=2, minsupport=0.1, alpha=array([1., 1.]), n_chains=3, max_iter=50000, class1label='class 1', verbose=False, random_state=42)
This is a scikit-learn compatible wrapper for the Bayesian Rule List classifier developed by Benjamin Letham. It produces a highly interpretable model (a list of decision rules) by sampling many different rule lists, trying to optimize for compactness and predictive performance.
, optional(default=3)
- Prior hyperparameter for expected list length (excluding null rule)
, optional(default=1)
- Prior hyperparameter for expected list width (excluding null rule)
, optional(default=2)
- Maximum cardinality of an itemset
, optional(default=0.1)
- Minimum support (fraction between 0 and 1) of an itemset
:array_like, shape = [n_classes]
- prior hyperparameter for multinomial pseudocounts
, optional(default=3)
- Number of MCMC chains for inference
, optional(default=50000)
- Maximum number of iterations
, optional(default="class 1")
- Label or description of what the positive class (with y=1) means
, optional(default=True)
- Verbose output
- Random seed
Expand source code
class BayesianRuleListClassifier(BaseEstimator, RuleList, ClassifierMixin): """ This is a scikit-learn compatible wrapper for the Bayesian Rule List classifier developed by Benjamin Letham. It produces a highly interpretable model (a list of decision rules) by sampling many different rule lists, trying to optimize for compactness and predictive performance. Parameters ---------- listlengthprior : int, optional (default=3) Prior hyperparameter for expected list length (excluding null rule) listwidthprior : int, optional (default=1) Prior hyperparameter for expected list width (excluding null rule) maxcardinality : int, optional (default=2) Maximum cardinality of an itemset minsupport : float, optional (default=0.1) Minimum support (fraction between 0 and 1) of an itemset alpha : array_like, shape = [n_classes] prior hyperparameter for multinomial pseudocounts n_chains : int, optional (default=3) Number of MCMC chains for inference max_iter : int, optional (default=50000) Maximum number of iterations class1label: str, optional (default="class 1") Label or description of what the positive class (with y=1) means verbose: bool, optional (default=True) Verbose output random_state: int Random seed """ def __init__(self, listlengthprior=3, listwidthprior=1, maxcardinality=2, minsupport=0.1, alpha=np.array([1., 1.]), n_chains=3, max_iter=50000, class1label="class 1", verbose=False, random_state=42): self.listlengthprior = listlengthprior self.listwidthprior = listwidthprior self.maxcardinality = maxcardinality self.minsupport = minsupport self.alpha = alpha self.n_chains = n_chains self.max_iter = max_iter self.class1label = class1label self.verbose = verbose self._zmin = 1 self.thinning = 1 # The thinning rate self.burnin = self.max_iter // 2 # the number of samples to drop as burn-in in-simulation self.d_star = None self.random_state = random_state self.seed() def seed(self): if self.random_state is not None: random.seed(self.random_state) np.random.seed(self.random_state) def _setlabels(self, X, feature_names=[]): if len(feature_names) == 0: if type(X) == pd.DataFrame and ('object' in str(X.columns.dtype) or 'str' in str(X.columns.dtype)): feature_names = X.columns else: feature_names = ["ft" + str(i + 1) for i in range(len(X[0]))] self.feature_names = feature_names def fit(self, X, y, feature_names: list = None, verbose=False): """Fit rule lists to data. Note: The BRL algorithm requires numeric features to be discretized into bins prior to fitting. See imodels.discretization or sklearn.preprocessing for helpful utilities. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training data y : array_like, shape = [n_samples] Labels feature_names : array_like, shape = [n_features], optional (default: []) String labels for each feature. If empty and X is a DataFrame, column labels are used. If empty and X is not a DataFrame, then features are simply enumerated verbose : bool Currently doesn't do anything Returns ------- self : returns an instance of self. """ self.seed() if len(set(y)) != 2: raise ValueError("Only binary classification is supported at this time!") X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_in_ = X.shape[1] self.classes_ = unique_labels(y) # Check that all features are either categorical or discretized if not np.all((X == 1) | (X == 0)): raise ValueError("All numeric features must be discretized prior to fitting!") self.feature_dict_ = get_feature_dict(X.shape[1], feature_names) self.feature_placeholders = np.array(list(self.feature_dict_.keys())) self.feature_names = np.array(list(self.feature_dict_.values())) X_df = pd.DataFrame(X, columns=self.feature_placeholders) itemsets = extract_fpgrowth(X_df, minsupport=self.minsupport, maxcardinality=self.maxcardinality, verbose=verbose) # Now form the data-vs.-lhs set # X[j] is the set of data points that contain itemset j (that is, satisfy rule j) for col in X_df.columns: # X_df[c] = [c if x == 1 else '' for x in list(X_df[c])] X_df[col] = X_df[col].replace({1: col, 0: ''}) itemset_support_inds = [{}] * (len(itemsets) + 1) itemset_support_inds[0] = set(range(X_df.shape[0])) # the default rule satisfies all data for (j, lhs) in enumerate(itemsets): itemset_support_inds[j + 1] = set( [i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)]) # now form lhs_len lhs_len = [0] for lhs in itemsets: lhs_len.append(len(lhs)) nruleslen = Counter(lhs_len) lhs_len = np.array(lhs_len) itemsets_all = ['null'] itemsets_all.extend(itemsets) self.itemsets = itemsets_all Xtrain = itemset_support_inds Ytrain = np.vstack((1 - np.array(y), y)).T.astype(int) permsdic = defaultdict(default_permsdic) # We will store here the MCMC results # Do MCMC res, Rhat = run_bdl_multichain_serial( self.max_iter, self.thinning, self.alpha, self.listlengthprior, self.listwidthprior, Xtrain, Ytrain, nruleslen, lhs_len, self.maxcardinality, permsdic, self.burnin, self.n_chains, [None] * self.n_chains, verbose=self.verbose, seed=self.random_state) # Merge the chains permsdic = merge_chains(res) # The point estimate, BRL-point self.d_star = get_point_estimate(permsdic, lhs_len, Xtrain, Ytrain, self.alpha, nruleslen, self.maxcardinality, self.listlengthprior, self.listwidthprior, verbose=self.verbose) # get the point estimate if self.d_star: # Compute the rule consequent self.theta, self.ci_theta = get_rule_rhs(Xtrain, Ytrain, self.d_star, self.alpha, True) self.final_itemsets = np.array(self.itemsets, dtype=object)[self.d_star] rule_strs = itemsets_to_rules(self.final_itemsets) self.rules_without_feature_names_ = [Rule(r) for r in rule_strs] self.rules_ = [ replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_ ] self.complexity_ = self._get_complexity() return self def _get_complexity(self): n_rule_terms = sum([len(iset) for iset in self.final_itemsets if type(iset) != str]) return n_rule_terms + 1 # def __repr__(self, decimals=1): # if self.d_star: # detect = "" # if self.class1label != "class 1": # detect = "for detecting " + self.class1label # header = "Trained RuleListClassifier " + detect + "\n" # separator = "".join(["="] * len(header)) + "\n" # s = "" # for i, j in enumerate(self.d_star): # if self.itemsets[j] != 'null': # condition = "ELSE IF " + ( # " AND ".join([str(self.itemsets[j][k]) for k in range(len(self.itemsets[j]))])) + " THEN" # else: # condition = "ELSE" # s += condition + " probability of " + self.class1label + ": " + str( # np.round(self.theta[i] * 100, decimals)) + "% (" + str( # np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str( # np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n" # return header + separator + s[5:] + separator[1:] # else: # return "(Untrained RuleListClassifier)" def __repr__(self, decimals=1): if self.d_star: detect = "" if self.class1label != "class 1": detect = "for detecting " + self.class1label header = "Trained RuleListClassifier " + detect + "\n" separator = "".join(["="] * len(header)) + "\n" s = "" for i in range(len(self.rules_) + 1): if i != len(self.rules_): condition = "ELSE IF " + str(self.rules_[i]) + " THEN" else: condition = "ELSE" s += condition + " probability of " + self.class1label + ": " + str( np.round(self.theta[i] * 100, decimals)) + "% (" + str( np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str( np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n" return header + separator + s[5:] + separator[1:] else: return "(Untrained RuleListClassifier)" def _to_itemset_indices(self, X_df_onehot): # X[j] is the set of data points that contain itemset j (that is, satisfy rule j) for c in X_df_onehot.columns: X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])] X = [set() for j in range(len(self.itemsets))] X[0] = set(range(X_df_onehot.shape[0])) # the default rule satisfies all data for (j, lhs) in enumerate(self.itemsets): if j > 0: X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)]) return X def predict_proba(self, X): """Compute probabilities of possible outcomes for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] Returns the probability of the sample for each class in the model. The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`. """ check_is_fitted(self) X = check_array(X) D = pd.DataFrame(X, columns=self.feature_placeholders) N = len(D) X2 = self._to_itemset_indices(D) P = preds_d_t(X2, np.zeros((N, 1), dtype=int), self.d_star, self.theta) return np.vstack((1 - P, P)).T def predict(self, X, threshold=0.1): """Perform classification on samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- y_pred : array, shape = [n_samples] Class labels for samples in X. """ check_is_fitted(self) X = check_array(X) # print('predicting!') # print('preds_proba', self.predict_proba(X)[:, 1]) return 1 * (self.predict_proba(X)[:, 1] >= threshold)
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- RuleList
- sklearn.base.ClassifierMixin
def fit(self, X, y, feature_names: list = None, verbose=False)
Fit rule lists to data. Note: The BRL algorithm requires numeric features to be discretized into bins prior to fitting. See imodels.discretization or sklearn.preprocessing for helpful utilities.
:array-like, shape = [n_samples, n_features]
- Training data
:array_like, shape = [n_samples]
- Labels
:array_like, shape = [n_features]
, optional(default: [])
- String labels for each feature. If empty and X is a DataFrame, column labels are used. If empty and X is not a DataFrame, then features are simply enumerated
- Currently doesn't do anything
self : returns an instance of self.
Expand source code
def fit(self, X, y, feature_names: list = None, verbose=False): """Fit rule lists to data. Note: The BRL algorithm requires numeric features to be discretized into bins prior to fitting. See imodels.discretization or sklearn.preprocessing for helpful utilities. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training data y : array_like, shape = [n_samples] Labels feature_names : array_like, shape = [n_features], optional (default: []) String labels for each feature. If empty and X is a DataFrame, column labels are used. If empty and X is not a DataFrame, then features are simply enumerated verbose : bool Currently doesn't do anything Returns ------- self : returns an instance of self. """ self.seed() if len(set(y)) != 2: raise ValueError("Only binary classification is supported at this time!") X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_in_ = X.shape[1] self.classes_ = unique_labels(y) # Check that all features are either categorical or discretized if not np.all((X == 1) | (X == 0)): raise ValueError("All numeric features must be discretized prior to fitting!") self.feature_dict_ = get_feature_dict(X.shape[1], feature_names) self.feature_placeholders = np.array(list(self.feature_dict_.keys())) self.feature_names = np.array(list(self.feature_dict_.values())) X_df = pd.DataFrame(X, columns=self.feature_placeholders) itemsets = extract_fpgrowth(X_df, minsupport=self.minsupport, maxcardinality=self.maxcardinality, verbose=verbose) # Now form the data-vs.-lhs set # X[j] is the set of data points that contain itemset j (that is, satisfy rule j) for col in X_df.columns: # X_df[c] = [c if x == 1 else '' for x in list(X_df[c])] X_df[col] = X_df[col].replace({1: col, 0: ''}) itemset_support_inds = [{}] * (len(itemsets) + 1) itemset_support_inds[0] = set(range(X_df.shape[0])) # the default rule satisfies all data for (j, lhs) in enumerate(itemsets): itemset_support_inds[j + 1] = set( [i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)]) # now form lhs_len lhs_len = [0] for lhs in itemsets: lhs_len.append(len(lhs)) nruleslen = Counter(lhs_len) lhs_len = np.array(lhs_len) itemsets_all = ['null'] itemsets_all.extend(itemsets) self.itemsets = itemsets_all Xtrain = itemset_support_inds Ytrain = np.vstack((1 - np.array(y), y)).T.astype(int) permsdic = defaultdict(default_permsdic) # We will store here the MCMC results # Do MCMC res, Rhat = run_bdl_multichain_serial( self.max_iter, self.thinning, self.alpha, self.listlengthprior, self.listwidthprior, Xtrain, Ytrain, nruleslen, lhs_len, self.maxcardinality, permsdic, self.burnin, self.n_chains, [None] * self.n_chains, verbose=self.verbose, seed=self.random_state) # Merge the chains permsdic = merge_chains(res) # The point estimate, BRL-point self.d_star = get_point_estimate(permsdic, lhs_len, Xtrain, Ytrain, self.alpha, nruleslen, self.maxcardinality, self.listlengthprior, self.listwidthprior, verbose=self.verbose) # get the point estimate if self.d_star: # Compute the rule consequent self.theta, self.ci_theta = get_rule_rhs(Xtrain, Ytrain, self.d_star, self.alpha, True) self.final_itemsets = np.array(self.itemsets, dtype=object)[self.d_star] rule_strs = itemsets_to_rules(self.final_itemsets) self.rules_without_feature_names_ = [Rule(r) for r in rule_strs] self.rules_ = [ replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_ ] self.complexity_ = self._get_complexity() return self
def predict(self, X, threshold=0.1)
Perform classification on samples in X.
:array-like, shape = [n_samples, n_features]
:array, shape = [n_samples]
- Class labels for samples in X.
Expand source code
def predict(self, X, threshold=0.1): """Perform classification on samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- y_pred : array, shape = [n_samples] Class labels for samples in X. """ check_is_fitted(self) X = check_array(X) # print('predicting!') # print('preds_proba', self.predict_proba(X)[:, 1]) return 1 * (self.predict_proba(X)[:, 1] >= threshold)
def predict_proba(self, X)
Compute probabilities of possible outcomes for samples in X.
:array-like, shape = [n_samples, n_features]
:array-like, shape = [n_samples, n_classes]
- Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute
Expand source code
def predict_proba(self, X): """Compute probabilities of possible outcomes for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] Returns the probability of the sample for each class in the model. The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`. """ check_is_fitted(self) X = check_array(X) D = pd.DataFrame(X, columns=self.feature_placeholders) N = len(D) X2 = self._to_itemset_indices(D) P = preds_d_t(X2, np.zeros((N, 1), dtype=int), self.d_star, self.theta) return np.vstack((1 - P, P)).T
def seed(self)
Expand source code
def seed(self): if self.random_state is not None: random.seed(self.random_state) np.random.seed(self.random_state)
def set_fit_request(self: BayesianRuleListClassifier, *, feature_names: Union[bool, ForwardRef(None), str] = '$UNCHANGED$', verbose: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> BayesianRuleListClassifier
Request metadata passed to the
method.Note that this method is only relevant if
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
: metadata is requested, and passed tofit
if provided. The request is ignored if metadata is not provided. -
: metadata is not requested and the meta-estimator will not pass it tofit
. -
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
. Otherwise it has no effect.Parameters
:str, True, False,
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
parameter infit
. verbose
:str, True, False,
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
parameter infit
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
def set_predict_request(self: BayesianRuleListClassifier, *, threshold: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> BayesianRuleListClassifier
Request metadata passed to the
method.Note that this method is only relevant if
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
: metadata is requested, and passed topredict
if provided. The request is ignored if metadata is not provided. -
: metadata is not requested and the meta-estimator will not pass it topredict
. -
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
. Otherwise it has no effect.Parameters
:str, True, False,
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
parameter inpredict
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
def set_score_request(self: BayesianRuleListClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> BayesianRuleListClassifier
Request metadata passed to the
method.Note that this method is only relevant if
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
: metadata is requested, and passed toscore
if provided. The request is ignored if metadata is not provided. -
: metadata is not requested and the meta-estimator will not pass it toscore
. -
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
. Otherwise it has no effect.Parameters
:str, True, False,
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
parameter inscore
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance