Expand source code
import numpy as np
from typing import List

from imodels.rule_set.skope_rules import SkopeRulesClassifier
from imodels.util.rule import Rule
from imodels.util.score import score_precision_recall
from sklearn.base import BaseEstimator

from .util import extract_ensemble


class StableSkopeClassifier(SkopeRulesClassifier):

    def __init__(self,
                 weak_learners: List[BaseEstimator],
                 max_complexity: int,
                 min_mult: int = 1,
                 precision_min=0.5,
                 recall_min=0.4,
                 n_estimators=10,
                 max_samples=.8,
                 max_samples_features=.8,
                 bootstrap=False,
                 bootstrap_features=False,
                 max_depth=3,
                 max_depth_duplication=None,
                 max_features=1.,
                 min_samples_split=2,
                 n_jobs=1,
                 random_state=None):
        super().__init__(precision_min,
                         recall_min,
                         n_estimators,
                         max_samples,
                         max_samples_features,
                         bootstrap,
                         bootstrap_features,
                         max_depth,
                         max_depth_duplication,
                         max_features,
                         min_samples_split,
                         n_jobs,
                         random_state)
        self.weak_learners = weak_learners
        self.max_complexity = max_complexity
        self.min_mult = min_mult

    def fit(self, X, y=None, feature_names=None, sample_weight=None):
        super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
        return self

    def _extract_rules(self, X, y) -> List[str]:
        return [extract_ensemble(self.weak_learners, X, y, self.min_mult)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]

    def _score_rules(self, X, y, rules) -> List[Rule]:
        return score_precision_recall(X, y,
                                      rules,
                                      self.estimators_samples_,
                                      self.estimators_features_,
                                      self.feature_placeholders,
                                      oob=False)

Classes

class StableSkopeClassifier (weak_learners: List[sklearn.base.BaseEstimator], max_complexity: int, min_mult: int = 1, precision_min=0.5, recall_min=0.4, n_estimators=10, max_samples=0.8, max_samples_features=0.8, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1.0, min_samples_split=2, n_jobs=1, random_state=None)

An easy-interpretable classifier optimizing simple logical rules.

Parameters

feature_names : list of str, optional
The names of each feature to be used for returning rules in string format.
precision_min : float, optional (default=0.5)
The minimal precision of a rule to be selected.
recall_min : float, optional (default=0.01)
The minimal recall of a rule to be selected.
n_estimators : int, optional (default=10)
The number of base estimators (rules) to use for prediction. More are built before selection. All are available in the estimators_ attribute.
max_samples : int or float, optional (default=.8)
The number of samples to draw from X to train each decision tree, from which rules are generated and selected. - If int, then draw max_samples samples. - If float, then draw max_samples * X.shape[0] samples. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling).
max_samples_features : int or float, optional (default=1.0)
The number of features to draw from X to train each decision tree, from which rules are generated and selected. - If int, then draw max_features features. - If float, then draw max_features * X.shape[1] features.
bootstrap : boolean, optional (default=False)
Whether samples are drawn with replacement.
bootstrap_features : boolean, optional (default=False)
Whether features are drawn with replacement.
max_depth : integer or List or None, optional (default=3)
The maximum depth of the decision trees. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. If an iterable is passed, you will train n_estimators for each tree depth. It allows you to create and compare rules of different length.
max_depth_duplication : integer, optional (default=None)
The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.
max_features : int, float, string or None, optional (default="auto")

The number of features considered (by each decision tree) when looking for the best split:

  • If int, then consider max_features features at each split.
  • If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split.
  • If "auto", then max_features=sqrt(n_features).
  • If "sqrt", then max_features=sqrt(n_features) (same as "auto").
  • If "log2", then max_features=log2(n_features).
  • If None, then max_features=n_features.

Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than max_features features.

min_samples_split : int, float, optional (default=2)
The minimum number of samples required to split an internal node for each decision tree. - If int, then consider min_samples_split as the minimum number. - If float, then min_samples_split is a percentage and ceil(min_samples_split * n_samples) are the minimum number of samples for each split.
n_jobs : integer, optional (default=1)
The number of jobs to run in parallel for both fit and predict. If -1, then the number of jobs is set to the number of cores.
random_state : int, RandomState instance or None, optional
  • If int, random_state is the seed used by the random number generator.
  • If RandomState instance, random_state is the random number generator.
  • If None, the random number generator is the RandomState instance used by np.random.
verbose : int, optional (default=0)
Controls the verbosity of the tree building process.

Attributes

rules_ : dict of tuples (rule, precision, recall, nb). The collection of n_estimators rules used in the predict method. The rules are generated by fitted sub-estimators (decision trees). Each rule satisfies recall_min and precision_min conditions. The selection is done according to OOB precisions.

estimators_ : list of DecisionTreeClassifier
The collection of fitted sub-estimators used to generate candidate rules.
estimators_samples_ : list of arrays
The subset of drawn samples (i.e., the in-bag samples) for each base estimator.
estimators_features_ : list of arrays
The subset of drawn features for each base estimator.
max_samples_ : integer
The actual number of samples
n_features_ : integer
The number of features when fit is performed.
classes_ : array, shape (n_classes,)
The classes labels.
Expand source code
class StableSkopeClassifier(SkopeRulesClassifier):

    def __init__(self,
                 weak_learners: List[BaseEstimator],
                 max_complexity: int,
                 min_mult: int = 1,
                 precision_min=0.5,
                 recall_min=0.4,
                 n_estimators=10,
                 max_samples=.8,
                 max_samples_features=.8,
                 bootstrap=False,
                 bootstrap_features=False,
                 max_depth=3,
                 max_depth_duplication=None,
                 max_features=1.,
                 min_samples_split=2,
                 n_jobs=1,
                 random_state=None):
        super().__init__(precision_min,
                         recall_min,
                         n_estimators,
                         max_samples,
                         max_samples_features,
                         bootstrap,
                         bootstrap_features,
                         max_depth,
                         max_depth_duplication,
                         max_features,
                         min_samples_split,
                         n_jobs,
                         random_state)
        self.weak_learners = weak_learners
        self.max_complexity = max_complexity
        self.min_mult = min_mult

    def fit(self, X, y=None, feature_names=None, sample_weight=None):
        super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
        return self

    def _extract_rules(self, X, y) -> List[str]:
        return [extract_ensemble(self.weak_learners, X, y, self.min_mult)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]

    def _score_rules(self, X, y, rules) -> List[Rule]:
        return score_precision_recall(X, y,
                                      rules,
                                      self.estimators_samples_,
                                      self.estimators_features_,
                                      self.feature_placeholders,
                                      oob=False)

Ancestors

Inherited members