Expand source code
import numpy as np
from typing import List
from imodels.rule_set.skope_rules import SkopeRulesClassifier
from imodels.util.rule import Rule
from imodels.util.score import score_precision_recall
from sklearn.base import BaseEstimator
from .util import extract_ensemble
class StableSkopeClassifier(SkopeRulesClassifier):
def __init__(self,
weak_learners: List[BaseEstimator],
max_complexity: int,
min_mult: int = 1,
precision_min=0.5,
recall_min=0.4,
n_estimators=10,
max_samples=.8,
max_samples_features=.8,
bootstrap=False,
bootstrap_features=False,
max_depth=3,
max_depth_duplication=None,
max_features=1.,
min_samples_split=2,
n_jobs=1,
random_state=None):
super().__init__(precision_min,
recall_min,
n_estimators,
max_samples,
max_samples_features,
bootstrap,
bootstrap_features,
max_depth,
max_depth_duplication,
max_features,
min_samples_split,
n_jobs,
random_state)
self.weak_learners = weak_learners
self.max_complexity = max_complexity
self.min_mult = min_mult
def fit(self, X, y=None, feature_names=None, sample_weight=None):
super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
return self
def _extract_rules(self, X, y) -> List[str]:
return [extract_ensemble(self.weak_learners, X, y, self.min_mult)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]
def _score_rules(self, X, y, rules) -> List[Rule]:
return score_precision_recall(X, y,
rules,
self.estimators_samples_,
self.estimators_features_,
self.feature_placeholders,
oob=False)
Classes
class StableSkopeClassifier (weak_learners: List[sklearn.base.BaseEstimator], max_complexity: int, min_mult: int = 1, precision_min=0.5, recall_min=0.4, n_estimators=10, max_samples=0.8, max_samples_features=0.8, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1.0, min_samples_split=2, n_jobs=1, random_state=None)-
An easy-interpretable classifier optimizing simple logical rules.
Parameters
feature_names:listofstr, optional- The names of each feature to be used for returning rules in string format.
precision_min:float, optional(default=0.5)- The minimal precision of a rule to be selected.
recall_min:float, optional(default=0.01)- The minimal recall of a rule to be selected.
n_estimators:int, optional(default=10)- The number of base estimators (rules) to use for prediction. More are built before selection. All are available in the estimators_ attribute.
max_samples:intorfloat, optional(default=.8)- The number of samples to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_samplessamples. - If float, then drawmax_samples * X.shape[0]samples. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling). max_samples_features:intorfloat, optional(default=1.0)- The number of features to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_featuresfeatures. - If float, then drawmax_features * X.shape[1]features. bootstrap:boolean, optional(default=False)- Whether samples are drawn with replacement.
bootstrap_features:boolean, optional(default=False)- Whether features are drawn with replacement.
max_depth:integerorListorNone, optional(default=3)- The maximum depth of the decision trees. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. If an iterable is passed, you will train n_estimators for each tree depth. It allows you to create and compare rules of different length.
max_depth_duplication:integer, optional(default=None)- The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.
max_features:int, float, stringorNone, optional(default="auto")-
The number of features considered (by each decision tree) when looking for the best split:
- If int, then consider
max_featuresfeatures at each split. - If float, then
max_featuresis a percentage andint(max_features * n_features)features are considered at each split. - If "auto", then
max_features=sqrt(n_features). - If "sqrt", then
max_features=sqrt(n_features)(same as "auto"). - If "log2", then
max_features=log2(n_features). - If None, then
max_features=n_features.
Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than
max_featuresfeatures. - If int, then consider
min_samples_split:int, float, optional(default=2)- The minimum number of samples required to split an internal node for
each decision tree.
- If int, then consider
min_samples_splitas the minimum number. - If float, thenmin_samples_splitis a percentage andceil(min_samples_split * n_samples)are the minimum number of samples for each split. n_jobs:integer, optional(default=1)- The number of jobs to run in parallel for both
fitandpredict. If -1, then the number of jobs is set to the number of cores. random_state:int, RandomState instanceorNone, optional-
- If int, random_state is the seed used by the random number generator.
- If RandomState instance, random_state is the random number generator.
- If None, the random number generator is the RandomState instance used
by
np.random.
verbose:int, optional(default=0)- Controls the verbosity of the tree building process.
Attributes
rules_ : dict of tuples (rule, precision, recall, nb). The collection of
n_estimatorsrules used in thepredictmethod. The rules are generated by fitted sub-estimators (decision trees). Each rule satisfies recall_min and precision_min conditions. The selection is done according to OOB precisions.estimators_:listofDecisionTreeClassifier- The collection of fitted sub-estimators used to generate candidate rules.
estimators_samples_:listofarrays- The subset of drawn samples (i.e., the in-bag samples) for each base estimator.
estimators_features_:listofarrays- The subset of drawn features for each base estimator.
max_samples_:integer- The actual number of samples
n_features_:integer- The number of features when
fitis performed. classes_:array, shape (n_classes,)- The classes labels.
Expand source code
class StableSkopeClassifier(SkopeRulesClassifier): def __init__(self, weak_learners: List[BaseEstimator], max_complexity: int, min_mult: int = 1, precision_min=0.5, recall_min=0.4, n_estimators=10, max_samples=.8, max_samples_features=.8, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1., min_samples_split=2, n_jobs=1, random_state=None): super().__init__(precision_min, recall_min, n_estimators, max_samples, max_samples_features, bootstrap, bootstrap_features, max_depth, max_depth_duplication, max_features, min_samples_split, n_jobs, random_state) self.weak_learners = weak_learners self.max_complexity = max_complexity self.min_mult = min_mult def fit(self, X, y=None, feature_names=None, sample_weight=None): super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight) return self def _extract_rules(self, X, y) -> List[str]: return [extract_ensemble(self.weak_learners, X, y, self.min_mult)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))] def _score_rules(self, X, y, rules) -> List[Rule]: return score_precision_recall(X, y, rules, self.estimators_samples_, self.estimators_features_, self.feature_placeholders, oob=False)Ancestors
- SkopeRulesClassifier
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- RuleSet
- sklearn.base.ClassifierMixin
Inherited members