Expand source code
import numpy as np
from typing import List
from imodels.rule_set.skope_rules import SkopeRulesClassifier
from imodels.util.rule import Rule
from imodels.util.score import score_precision_recall
from sklearn.base import BaseEstimator
from .util import extract_ensemble
class StableSkopeClassifier(SkopeRulesClassifier):
def __init__(self,
weak_learners: List[BaseEstimator],
max_complexity: int,
min_mult: int = 1,
precision_min=0.5,
recall_min=0.4,
n_estimators=10,
max_samples=.8,
max_samples_features=.8,
bootstrap=False,
bootstrap_features=False,
max_depth=3,
max_depth_duplication=None,
max_features=1.,
min_samples_split=2,
n_jobs=1,
random_state=None):
super().__init__(precision_min,
recall_min,
n_estimators,
max_samples,
max_samples_features,
bootstrap,
bootstrap_features,
max_depth,
max_depth_duplication,
max_features,
min_samples_split,
n_jobs,
random_state)
self.weak_learners = weak_learners
self.max_complexity = max_complexity
self.min_mult = min_mult
def fit(self, X, y=None, feature_names=None, sample_weight=None):
super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
return self
def _extract_rules(self, X, y) -> List[str]:
return [extract_ensemble(self.weak_learners, X, y, self.min_mult)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]
def _score_rules(self, X, y, rules) -> List[Rule]:
return score_precision_recall(X, y,
rules,
self.estimators_samples_,
self.estimators_features_,
self.feature_placeholders,
oob=False)
Classes
class StableSkopeClassifier (weak_learners: List[sklearn.base.BaseEstimator], max_complexity: int, min_mult: int = 1, precision_min=0.5, recall_min=0.4, n_estimators=10, max_samples=0.8, max_samples_features=0.8, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1.0, min_samples_split=2, n_jobs=1, random_state=None)
-
An easy-interpretable classifier optimizing simple logical rules.
Parameters
feature_names
:list
ofstr
, optional- The names of each feature to be used for returning rules in string format.
precision_min
:float
, optional(default=0.5)
- The minimal precision of a rule to be selected.
recall_min
:float
, optional(default=0.01)
- The minimal recall of a rule to be selected.
n_estimators
:int
, optional(default=10)
- The number of base estimators (rules) to use for prediction. More are built before selection. All are available in the estimators_ attribute.
max_samples
:int
orfloat
, optional(default=.8)
- The number of samples to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_samples
samples. - If float, then drawmax_samples * X.shape[0]
samples. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling). max_samples_features
:int
orfloat
, optional(default=1.0)
- The number of features to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_features
features. - If float, then drawmax_features * X.shape[1]
features. bootstrap
:boolean
, optional(default=False)
- Whether samples are drawn with replacement.
bootstrap_features
:boolean
, optional(default=False)
- Whether features are drawn with replacement.
max_depth
:integer
orList
orNone
, optional(default=3)
- The maximum depth of the decision trees. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. If an iterable is passed, you will train n_estimators for each tree depth. It allows you to create and compare rules of different length.
max_depth_duplication
:integer
, optional(default=None)
- The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.
max_features
:int, float, string
orNone
, optional(default="auto")
-
The number of features considered (by each decision tree) when looking for the best split:
- If int, then consider
max_features
features at each split. - If float, then
max_features
is a percentage andint(max_features * n_features)
features are considered at each split. - If "auto", then
max_features=sqrt(n_features)
. - If "sqrt", then
max_features=sqrt(n_features)
(same as "auto"). - If "log2", then
max_features=log2(n_features)
. - If None, then
max_features=n_features
.
Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than
max_features
features. - If int, then consider
min_samples_split
:int, float
, optional(default=2)
- The minimum number of samples required to split an internal node for
each decision tree.
- If int, then consider
min_samples_split
as the minimum number. - If float, thenmin_samples_split
is a percentage andceil(min_samples_split * n_samples)
are the minimum number of samples for each split. n_jobs
:integer
, optional(default=1)
- The number of jobs to run in parallel for both
fit
andpredict
. If -1, then the number of jobs is set to the number of cores. random_state
:int, RandomState instance
orNone
, optional-
- If int, random_state is the seed used by the random number generator.
- If RandomState instance, random_state is the random number generator.
- If None, the random number generator is the RandomState instance used
by
np.random
.
verbose
:int
, optional(default=0)
- Controls the verbosity of the tree building process.
Attributes
rules_ : dict of tuples (rule, precision, recall, nb). The collection of
n_estimators
rules used in thepredict
method. The rules are generated by fitted sub-estimators (decision trees). Each rule satisfies recall_min and precision_min conditions. The selection is done according to OOB precisions.estimators_
:list
ofDecisionTreeClassifier
- The collection of fitted sub-estimators used to generate candidate rules.
estimators_samples_
:list
ofarrays
- The subset of drawn samples (i.e., the in-bag samples) for each base estimator.
estimators_features_
:list
ofarrays
- The subset of drawn features for each base estimator.
max_samples_
:integer
- The actual number of samples
n_features_
:integer
- The number of features when
fit
is performed. classes_
:array, shape (n_classes,)
- The classes labels.
Expand source code
class StableSkopeClassifier(SkopeRulesClassifier): def __init__(self, weak_learners: List[BaseEstimator], max_complexity: int, min_mult: int = 1, precision_min=0.5, recall_min=0.4, n_estimators=10, max_samples=.8, max_samples_features=.8, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1., min_samples_split=2, n_jobs=1, random_state=None): super().__init__(precision_min, recall_min, n_estimators, max_samples, max_samples_features, bootstrap, bootstrap_features, max_depth, max_depth_duplication, max_features, min_samples_split, n_jobs, random_state) self.weak_learners = weak_learners self.max_complexity = max_complexity self.min_mult = min_mult def fit(self, X, y=None, feature_names=None, sample_weight=None): super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight) return self def _extract_rules(self, X, y) -> List[str]: return [extract_ensemble(self.weak_learners, X, y, self.min_mult)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))] def _score_rules(self, X, y, rules) -> List[Rule]: return score_precision_recall(X, y, rules, self.estimators_samples_, self.estimators_features_, self.feature_placeholders, oob=False)
Ancestors
- SkopeRulesClassifier
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- RuleSet
- sklearn.base.ClassifierMixin
Inherited members