Expand source code
import numpy as np
import pandas as pd
import random
from collections import Counter, defaultdict
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets, unique_labels
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from imodels.rule_list.bayesian_rule_list.brl_util import (
default_permsdic, preds_d_t, run_bdl_multichain_serial, merge_chains, get_point_estimate, get_rule_rhs
)
from imodels.rule_list.rule_list import RuleList
from imodels.util.convert import itemsets_to_rules
from imodels.util.extract import extract_fpgrowth
from imodels.util.rule import get_feature_dict, replace_feature_name, Rule
class BayesianRuleListClassifier(BaseEstimator, RuleList, ClassifierMixin):
"""
This is a scikit-learn compatible wrapper for the Bayesian Rule List
classifier developed by Benjamin Letham. It produces a highly
interpretable model (a list of decision rules) by sampling many different
rule lists, trying to optimize for compactness and predictive performance.
Parameters
----------
listlengthprior : int, optional (default=3)
Prior hyperparameter for expected list length (excluding null rule)
listwidthprior : int, optional (default=1)
Prior hyperparameter for expected list width (excluding null rule)
maxcardinality : int, optional (default=2)
Maximum cardinality of an itemset
minsupport : float, optional (default=0.1)
Minimum support (fraction between 0 and 1) of an itemset
alpha : array_like, shape = [n_classes]
prior hyperparameter for multinomial pseudocounts
n_chains : int, optional (default=3)
Number of MCMC chains for inference
max_iter : int, optional (default=50000)
Maximum number of iterations
class1label: str, optional (default="class 1")
Label or description of what the positive class (with y=1) means
verbose: bool, optional (default=True)
Verbose output
random_state: int
Random seed
"""
def __init__(self,
listlengthprior=3,
listwidthprior=1,
maxcardinality=2,
minsupport=0.1,
alpha=np.array([1., 1.]),
n_chains=3,
max_iter=50000,
class1label="class 1",
verbose=False,
random_state=42):
self.listlengthprior = listlengthprior
self.listwidthprior = listwidthprior
self.maxcardinality = maxcardinality
self.minsupport = minsupport
self.alpha = alpha
self.n_chains = n_chains
self.max_iter = max_iter
self.class1label = class1label
self.verbose = verbose
self._zmin = 1
self.thinning = 1 # The thinning rate
self.burnin = self.max_iter // 2 # the number of samples to drop as burn-in in-simulation
self.d_star = None
self.random_state = random_state
self.seed()
def seed(self):
if self.random_state is not None:
random.seed(self.random_state)
np.random.seed(self.random_state)
def _setlabels(self, X, feature_names=[]):
if len(feature_names) == 0:
if type(X) == pd.DataFrame and ('object' in str(X.columns.dtype) or 'str' in str(X.columns.dtype)):
feature_names = X.columns
else:
feature_names = ["ft" + str(i + 1) for i in range(len(X[0]))]
self.feature_names = feature_names
def fit(self, X, y, feature_names: list = None, verbose=False):
"""Fit rule lists to data.
Note: The BRL algorithm requires numeric features to be discretized into bins
prior to fitting. See imodels.discretization or sklearn.preprocessing for
helpful utilities.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training data
y : array_like, shape = [n_samples]
Labels
feature_names : array_like, shape = [n_features], optional (default: [])
String labels for each feature.
If empty and X is a DataFrame, column labels are used.
If empty and X is not a DataFrame, then features are simply enumerated
verbose : bool
Currently doesn't do anything
Returns
-------
self : returns an instance of self.
"""
self.seed()
if len(set(y)) != 2:
raise ValueError("Only binary classification is supported at this time!")
X, y = check_X_y(X, y)
check_classification_targets(y)
self.n_features_in_ = X.shape[1]
self.classes_ = unique_labels(y)
# Check that all features are either categorical or discretized
if not np.all((X == 1) | (X == 0)):
raise ValueError("All numeric features must be discretized prior to fitting!")
self.feature_dict_ = get_feature_dict(X.shape[1], feature_names)
self.feature_placeholders = np.array(list(self.feature_dict_.keys()))
self.feature_names = np.array(list(self.feature_dict_.values()))
X_df = pd.DataFrame(X, columns=self.feature_placeholders)
itemsets = extract_fpgrowth(X_df, minsupport=self.minsupport,
maxcardinality=self.maxcardinality,
verbose=verbose)
# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for col in X_df.columns:
# X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
X_df[col] = X_df[col].replace({1: col, 0: ''})
itemset_support_inds = [{}] * (len(itemsets) + 1)
itemset_support_inds[0] = set(range(X_df.shape[0])) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
itemset_support_inds[j + 1] = set(
[i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
# now form lhs_len
lhs_len = [0]
for lhs in itemsets:
lhs_len.append(len(lhs))
nruleslen = Counter(lhs_len)
lhs_len = np.array(lhs_len)
itemsets_all = ['null']
itemsets_all.extend(itemsets)
self.itemsets = itemsets_all
Xtrain = itemset_support_inds
Ytrain = np.vstack((1 - np.array(y), y)).T.astype(int)
permsdic = defaultdict(default_permsdic) # We will store here the MCMC results
# Do MCMC
res, Rhat = run_bdl_multichain_serial(
self.max_iter, self.thinning, self.alpha, self.listlengthprior,
self.listwidthprior, Xtrain, Ytrain, nruleslen, lhs_len,
self.maxcardinality, permsdic, self.burnin, self.n_chains,
[None] * self.n_chains, verbose=self.verbose, seed=self.random_state)
# Merge the chains
permsdic = merge_chains(res)
# The point estimate, BRL-point
self.d_star = get_point_estimate(permsdic, lhs_len, Xtrain, Ytrain, self.alpha, nruleslen, self.maxcardinality,
self.listlengthprior, self.listwidthprior,
verbose=self.verbose) # get the point estimate
if self.d_star:
# Compute the rule consequent
self.theta, self.ci_theta = get_rule_rhs(Xtrain, Ytrain, self.d_star, self.alpha, True)
self.final_itemsets = np.array(self.itemsets, dtype=object)[self.d_star]
rule_strs = itemsets_to_rules(self.final_itemsets)
self.rules_without_feature_names_ = [Rule(r) for r in rule_strs]
self.rules_ = [
replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_
]
self.complexity_ = self._get_complexity()
return self
def _get_complexity(self):
n_rule_terms = sum([len(iset) for iset in self.final_itemsets if type(iset) != str])
return n_rule_terms + 1
# def __repr__(self, decimals=1):
# if self.d_star:
# detect = ""
# if self.class1label != "class 1":
# detect = "for detecting " + self.class1label
# header = "Trained RuleListClassifier " + detect + "\n"
# separator = "".join(["="] * len(header)) + "\n"
# s = ""
# for i, j in enumerate(self.d_star):
# if self.itemsets[j] != 'null':
# condition = "ELSE IF " + (
# " AND ".join([str(self.itemsets[j][k]) for k in range(len(self.itemsets[j]))])) + " THEN"
# else:
# condition = "ELSE"
# s += condition + " probability of " + self.class1label + ": " + str(
# np.round(self.theta[i] * 100, decimals)) + "% (" + str(
# np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str(
# np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n"
# return header + separator + s[5:] + separator[1:]
# else:
# return "(Untrained RuleListClassifier)"
def __repr__(self, decimals=1):
if self.d_star:
detect = ""
if self.class1label != "class 1":
detect = "for detecting " + self.class1label
header = "Trained RuleListClassifier " + detect + "\n"
separator = "".join(["="] * len(header)) + "\n"
s = ""
for i in range(len(self.rules_) + 1):
if i != len(self.rules_):
condition = "ELSE IF " + str(self.rules_[i]) + " THEN"
else:
condition = "ELSE"
s += condition + " probability of " + self.class1label + ": " + str(
np.round(self.theta[i] * 100, decimals)) + "% (" + str(
np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str(
np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n"
return header + separator + s[5:] + separator[1:]
else:
return "(Untrained RuleListClassifier)"
def _to_itemset_indices(self, X_df_onehot):
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [set() for j in range(len(self.itemsets))]
X[0] = set(range(X_df_onehot.shape[0])) # the default rule satisfies all data
for (j, lhs) in enumerate(self.itemsets):
if j > 0:
X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
return X
def predict_proba(self, X):
"""Compute probabilities of possible outcomes for samples in X.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
T : array-like, shape = [n_samples, n_classes]
Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute `classes_`.
"""
check_is_fitted(self)
X = check_array(X)
D = pd.DataFrame(X, columns=self.feature_placeholders)
N = len(D)
X2 = self._to_itemset_indices(D)
P = preds_d_t(X2, np.zeros((N, 1), dtype=int), self.d_star, self.theta)
return np.vstack((1 - P, P)).T
def predict(self, X, threshold=0.1):
"""Perform classification on samples in X.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Returns
-------
y_pred : array, shape = [n_samples]
Class labels for samples in X.
"""
check_is_fitted(self)
X = check_array(X)
# print('predicting!')
# print('preds_proba', self.predict_proba(X)[:, 1])
return 1 * (self.predict_proba(X)[:, 1] >= threshold)
Classes
class BayesianRuleListClassifier (listlengthprior=3, listwidthprior=1, maxcardinality=2, minsupport=0.1, alpha=array([1., 1.]), n_chains=3, max_iter=50000, class1label='class 1', verbose=False, random_state=42)
-
This is a scikit-learn compatible wrapper for the Bayesian Rule List classifier developed by Benjamin Letham. It produces a highly interpretable model (a list of decision rules) by sampling many different rule lists, trying to optimize for compactness and predictive performance.
Parameters
listlengthprior
:int
, optional(default=3)
- Prior hyperparameter for expected list length (excluding null rule)
listwidthprior
:int
, optional(default=1)
- Prior hyperparameter for expected list width (excluding null rule)
maxcardinality
:int
, optional(default=2)
- Maximum cardinality of an itemset
minsupport
:float
, optional(default=0.1)
- Minimum support (fraction between 0 and 1) of an itemset
alpha
:array_like, shape = [n_classes]
- prior hyperparameter for multinomial pseudocounts
n_chains
:int
, optional(default=3)
- Number of MCMC chains for inference
max_iter
:int
, optional(default=50000)
- Maximum number of iterations
class1label
:str
, optional(default="class 1")
- Label or description of what the positive class (with y=1) means
verbose
:bool
, optional(default=True)
- Verbose output
random_state
:int
- Random seed
Expand source code
class BayesianRuleListClassifier(BaseEstimator, RuleList, ClassifierMixin): """ This is a scikit-learn compatible wrapper for the Bayesian Rule List classifier developed by Benjamin Letham. It produces a highly interpretable model (a list of decision rules) by sampling many different rule lists, trying to optimize for compactness and predictive performance. Parameters ---------- listlengthprior : int, optional (default=3) Prior hyperparameter for expected list length (excluding null rule) listwidthprior : int, optional (default=1) Prior hyperparameter for expected list width (excluding null rule) maxcardinality : int, optional (default=2) Maximum cardinality of an itemset minsupport : float, optional (default=0.1) Minimum support (fraction between 0 and 1) of an itemset alpha : array_like, shape = [n_classes] prior hyperparameter for multinomial pseudocounts n_chains : int, optional (default=3) Number of MCMC chains for inference max_iter : int, optional (default=50000) Maximum number of iterations class1label: str, optional (default="class 1") Label or description of what the positive class (with y=1) means verbose: bool, optional (default=True) Verbose output random_state: int Random seed """ def __init__(self, listlengthprior=3, listwidthprior=1, maxcardinality=2, minsupport=0.1, alpha=np.array([1., 1.]), n_chains=3, max_iter=50000, class1label="class 1", verbose=False, random_state=42): self.listlengthprior = listlengthprior self.listwidthprior = listwidthprior self.maxcardinality = maxcardinality self.minsupport = minsupport self.alpha = alpha self.n_chains = n_chains self.max_iter = max_iter self.class1label = class1label self.verbose = verbose self._zmin = 1 self.thinning = 1 # The thinning rate self.burnin = self.max_iter // 2 # the number of samples to drop as burn-in in-simulation self.d_star = None self.random_state = random_state self.seed() def seed(self): if self.random_state is not None: random.seed(self.random_state) np.random.seed(self.random_state) def _setlabels(self, X, feature_names=[]): if len(feature_names) == 0: if type(X) == pd.DataFrame and ('object' in str(X.columns.dtype) or 'str' in str(X.columns.dtype)): feature_names = X.columns else: feature_names = ["ft" + str(i + 1) for i in range(len(X[0]))] self.feature_names = feature_names def fit(self, X, y, feature_names: list = None, verbose=False): """Fit rule lists to data. Note: The BRL algorithm requires numeric features to be discretized into bins prior to fitting. See imodels.discretization or sklearn.preprocessing for helpful utilities. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training data y : array_like, shape = [n_samples] Labels feature_names : array_like, shape = [n_features], optional (default: []) String labels for each feature. If empty and X is a DataFrame, column labels are used. If empty and X is not a DataFrame, then features are simply enumerated verbose : bool Currently doesn't do anything Returns ------- self : returns an instance of self. """ self.seed() if len(set(y)) != 2: raise ValueError("Only binary classification is supported at this time!") X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_in_ = X.shape[1] self.classes_ = unique_labels(y) # Check that all features are either categorical or discretized if not np.all((X == 1) | (X == 0)): raise ValueError("All numeric features must be discretized prior to fitting!") self.feature_dict_ = get_feature_dict(X.shape[1], feature_names) self.feature_placeholders = np.array(list(self.feature_dict_.keys())) self.feature_names = np.array(list(self.feature_dict_.values())) X_df = pd.DataFrame(X, columns=self.feature_placeholders) itemsets = extract_fpgrowth(X_df, minsupport=self.minsupport, maxcardinality=self.maxcardinality, verbose=verbose) # Now form the data-vs.-lhs set # X[j] is the set of data points that contain itemset j (that is, satisfy rule j) for col in X_df.columns: # X_df[c] = [c if x == 1 else '' for x in list(X_df[c])] X_df[col] = X_df[col].replace({1: col, 0: ''}) itemset_support_inds = [{}] * (len(itemsets) + 1) itemset_support_inds[0] = set(range(X_df.shape[0])) # the default rule satisfies all data for (j, lhs) in enumerate(itemsets): itemset_support_inds[j + 1] = set( [i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)]) # now form lhs_len lhs_len = [0] for lhs in itemsets: lhs_len.append(len(lhs)) nruleslen = Counter(lhs_len) lhs_len = np.array(lhs_len) itemsets_all = ['null'] itemsets_all.extend(itemsets) self.itemsets = itemsets_all Xtrain = itemset_support_inds Ytrain = np.vstack((1 - np.array(y), y)).T.astype(int) permsdic = defaultdict(default_permsdic) # We will store here the MCMC results # Do MCMC res, Rhat = run_bdl_multichain_serial( self.max_iter, self.thinning, self.alpha, self.listlengthprior, self.listwidthprior, Xtrain, Ytrain, nruleslen, lhs_len, self.maxcardinality, permsdic, self.burnin, self.n_chains, [None] * self.n_chains, verbose=self.verbose, seed=self.random_state) # Merge the chains permsdic = merge_chains(res) # The point estimate, BRL-point self.d_star = get_point_estimate(permsdic, lhs_len, Xtrain, Ytrain, self.alpha, nruleslen, self.maxcardinality, self.listlengthprior, self.listwidthprior, verbose=self.verbose) # get the point estimate if self.d_star: # Compute the rule consequent self.theta, self.ci_theta = get_rule_rhs(Xtrain, Ytrain, self.d_star, self.alpha, True) self.final_itemsets = np.array(self.itemsets, dtype=object)[self.d_star] rule_strs = itemsets_to_rules(self.final_itemsets) self.rules_without_feature_names_ = [Rule(r) for r in rule_strs] self.rules_ = [ replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_ ] self.complexity_ = self._get_complexity() return self def _get_complexity(self): n_rule_terms = sum([len(iset) for iset in self.final_itemsets if type(iset) != str]) return n_rule_terms + 1 # def __repr__(self, decimals=1): # if self.d_star: # detect = "" # if self.class1label != "class 1": # detect = "for detecting " + self.class1label # header = "Trained RuleListClassifier " + detect + "\n" # separator = "".join(["="] * len(header)) + "\n" # s = "" # for i, j in enumerate(self.d_star): # if self.itemsets[j] != 'null': # condition = "ELSE IF " + ( # " AND ".join([str(self.itemsets[j][k]) for k in range(len(self.itemsets[j]))])) + " THEN" # else: # condition = "ELSE" # s += condition + " probability of " + self.class1label + ": " + str( # np.round(self.theta[i] * 100, decimals)) + "% (" + str( # np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str( # np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n" # return header + separator + s[5:] + separator[1:] # else: # return "(Untrained RuleListClassifier)" def __repr__(self, decimals=1): if self.d_star: detect = "" if self.class1label != "class 1": detect = "for detecting " + self.class1label header = "Trained RuleListClassifier " + detect + "\n" separator = "".join(["="] * len(header)) + "\n" s = "" for i in range(len(self.rules_) + 1): if i != len(self.rules_): condition = "ELSE IF " + str(self.rules_[i]) + " THEN" else: condition = "ELSE" s += condition + " probability of " + self.class1label + ": " + str( np.round(self.theta[i] * 100, decimals)) + "% (" + str( np.round(self.ci_theta[i][0] * 100, decimals)) + "%-" + str( np.round(self.ci_theta[i][1] * 100, decimals)) + "%)\n" return header + separator + s[5:] + separator[1:] else: return "(Untrained RuleListClassifier)" def _to_itemset_indices(self, X_df_onehot): # X[j] is the set of data points that contain itemset j (that is, satisfy rule j) for c in X_df_onehot.columns: X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])] X = [set() for j in range(len(self.itemsets))] X[0] = set(range(X_df_onehot.shape[0])) # the default rule satisfies all data for (j, lhs) in enumerate(self.itemsets): if j > 0: X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)]) return X def predict_proba(self, X): """Compute probabilities of possible outcomes for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] Returns the probability of the sample for each class in the model. The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`. """ check_is_fitted(self) X = check_array(X) D = pd.DataFrame(X, columns=self.feature_placeholders) N = len(D) X2 = self._to_itemset_indices(D) P = preds_d_t(X2, np.zeros((N, 1), dtype=int), self.d_star, self.theta) return np.vstack((1 - P, P)).T def predict(self, X, threshold=0.1): """Perform classification on samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- y_pred : array, shape = [n_samples] Class labels for samples in X. """ check_is_fitted(self) X = check_array(X) # print('predicting!') # print('preds_proba', self.predict_proba(X)[:, 1]) return 1 * (self.predict_proba(X)[:, 1] >= threshold)
Ancestors
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- RuleList
- sklearn.base.ClassifierMixin
Methods
def fit(self, X, y, feature_names: list = None, verbose=False)
-
Fit rule lists to data. Note: The BRL algorithm requires numeric features to be discretized into bins prior to fitting. See imodels.discretization or sklearn.preprocessing for helpful utilities.
Parameters
X
:array-like, shape = [n_samples, n_features]
- Training data
y
:array_like, shape = [n_samples]
- Labels
feature_names
:array_like, shape = [n_features]
, optional(default: [])
- String labels for each feature. If empty and X is a DataFrame, column labels are used. If empty and X is not a DataFrame, then features are simply enumerated
verbose
:bool
- Currently doesn't do anything
Returns
self : returns an instance of self.
Expand source code
def fit(self, X, y, feature_names: list = None, verbose=False): """Fit rule lists to data. Note: The BRL algorithm requires numeric features to be discretized into bins prior to fitting. See imodels.discretization or sklearn.preprocessing for helpful utilities. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training data y : array_like, shape = [n_samples] Labels feature_names : array_like, shape = [n_features], optional (default: []) String labels for each feature. If empty and X is a DataFrame, column labels are used. If empty and X is not a DataFrame, then features are simply enumerated verbose : bool Currently doesn't do anything Returns ------- self : returns an instance of self. """ self.seed() if len(set(y)) != 2: raise ValueError("Only binary classification is supported at this time!") X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_in_ = X.shape[1] self.classes_ = unique_labels(y) # Check that all features are either categorical or discretized if not np.all((X == 1) | (X == 0)): raise ValueError("All numeric features must be discretized prior to fitting!") self.feature_dict_ = get_feature_dict(X.shape[1], feature_names) self.feature_placeholders = np.array(list(self.feature_dict_.keys())) self.feature_names = np.array(list(self.feature_dict_.values())) X_df = pd.DataFrame(X, columns=self.feature_placeholders) itemsets = extract_fpgrowth(X_df, minsupport=self.minsupport, maxcardinality=self.maxcardinality, verbose=verbose) # Now form the data-vs.-lhs set # X[j] is the set of data points that contain itemset j (that is, satisfy rule j) for col in X_df.columns: # X_df[c] = [c if x == 1 else '' for x in list(X_df[c])] X_df[col] = X_df[col].replace({1: col, 0: ''}) itemset_support_inds = [{}] * (len(itemsets) + 1) itemset_support_inds[0] = set(range(X_df.shape[0])) # the default rule satisfies all data for (j, lhs) in enumerate(itemsets): itemset_support_inds[j + 1] = set( [i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)]) # now form lhs_len lhs_len = [0] for lhs in itemsets: lhs_len.append(len(lhs)) nruleslen = Counter(lhs_len) lhs_len = np.array(lhs_len) itemsets_all = ['null'] itemsets_all.extend(itemsets) self.itemsets = itemsets_all Xtrain = itemset_support_inds Ytrain = np.vstack((1 - np.array(y), y)).T.astype(int) permsdic = defaultdict(default_permsdic) # We will store here the MCMC results # Do MCMC res, Rhat = run_bdl_multichain_serial( self.max_iter, self.thinning, self.alpha, self.listlengthprior, self.listwidthprior, Xtrain, Ytrain, nruleslen, lhs_len, self.maxcardinality, permsdic, self.burnin, self.n_chains, [None] * self.n_chains, verbose=self.verbose, seed=self.random_state) # Merge the chains permsdic = merge_chains(res) # The point estimate, BRL-point self.d_star = get_point_estimate(permsdic, lhs_len, Xtrain, Ytrain, self.alpha, nruleslen, self.maxcardinality, self.listlengthprior, self.listwidthprior, verbose=self.verbose) # get the point estimate if self.d_star: # Compute the rule consequent self.theta, self.ci_theta = get_rule_rhs(Xtrain, Ytrain, self.d_star, self.alpha, True) self.final_itemsets = np.array(self.itemsets, dtype=object)[self.d_star] rule_strs = itemsets_to_rules(self.final_itemsets) self.rules_without_feature_names_ = [Rule(r) for r in rule_strs] self.rules_ = [ replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_ ] self.complexity_ = self._get_complexity() return self
def predict(self, X, threshold=0.1)
-
Perform classification on samples in X.
Parameters
X
:array-like, shape = [n_samples, n_features]
Returns
y_pred
:array, shape = [n_samples]
- Class labels for samples in X.
Expand source code
def predict(self, X, threshold=0.1): """Perform classification on samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- y_pred : array, shape = [n_samples] Class labels for samples in X. """ check_is_fitted(self) X = check_array(X) # print('predicting!') # print('preds_proba', self.predict_proba(X)[:, 1]) return 1 * (self.predict_proba(X)[:, 1] >= threshold)
def predict_proba(self, X)
-
Compute probabilities of possible outcomes for samples in X.
Parameters
X
:array-like, shape = [n_samples, n_features]
Returns
T
:array-like, shape = [n_samples, n_classes]
- Returns the probability of the sample for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute
classes_
.
Expand source code
def predict_proba(self, X): """Compute probabilities of possible outcomes for samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] Returns the probability of the sample for each class in the model. The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`. """ check_is_fitted(self) X = check_array(X) D = pd.DataFrame(X, columns=self.feature_placeholders) N = len(D) X2 = self._to_itemset_indices(D) P = preds_d_t(X2, np.zeros((N, 1), dtype=int), self.d_star, self.theta) return np.vstack((1 - P, P)).T
def seed(self)
-
Expand source code
def seed(self): if self.random_state is not None: random.seed(self.random_state) np.random.seed(self.random_state)
def set_fit_request(self: BayesianRuleListClassifier, *, feature_names: Union[bool, ForwardRef(None), str] = '$UNCHANGED$', verbose: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> BayesianRuleListClassifier
-
Request metadata passed to the
fit
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed tofit
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it tofit
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
feature_names
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
feature_names
parameter infit
. verbose
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
verbose
parameter infit
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
def set_predict_request(self: BayesianRuleListClassifier, *, threshold: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> BayesianRuleListClassifier
-
Request metadata passed to the
predict
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed topredict
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it topredict
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
threshold
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
threshold
parameter inpredict
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
def set_score_request(self: BayesianRuleListClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> BayesianRuleListClassifier
-
Request metadata passed to the
score
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed toscore
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it toscore
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter inscore
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-