Expand source code

from typing import List

import numpy as np
import pandas as pd

from imodels.rule_set.skope_rules import SkopeRulesClassifier
from imodels.util.convert import itemsets_to_rules
from imodels.util.extract import extract_fpgrowth
from imodels.util.rule import Rule
from imodels.util.score import score_precision_recall


class FPSkopeClassifier(SkopeRulesClassifier):

    def __init__(self,
                 minsupport=0.1,
                 maxcardinality=2,
                 verbose=False,
                 precision_min=0.5,
                 recall_min=0.01,
                 n_estimators=10,
                 max_samples=.8,
                 max_samples_features=1.,
                 bootstrap=False,
                 bootstrap_features=False,
                 max_depth=3,
                 max_depth_duplication=None,
                 max_features=1.,
                 min_samples_split=2,
                 n_jobs=1,
                 random_state=None):
        super().__init__(precision_min,
                         recall_min,
                         n_estimators,
                         max_samples,
                         max_samples_features,
                         bootstrap,
                         bootstrap_features,
                         max_depth,
                         max_depth_duplication,
                         max_features,
                         min_samples_split,
                         n_jobs,
                         random_state,
                         verbose)
        self.minsupport = minsupport
        self.maxcardinality = maxcardinality
        self.verbose = verbose

    def fit(self, X, y=None, feature_names=None, undiscretized_features=[], sample_weight=None):
        self.undiscretized_features = undiscretized_features
        super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
        return self

    def _extract_rules(self, X, y) -> List[str]:
        X = pd.DataFrame(X, columns=self.feature_placeholders)
        itemsets = extract_fpgrowth(X, minsupport=self.minsupport,
                                    maxcardinality=self.maxcardinality,
                                    verbose=self.verbose)
        return [itemsets_to_rules(itemsets)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]

    def _score_rules(self, X, y, rules) -> List[Rule]:
        return score_precision_recall(X, y,
                                      rules,
                                      self.estimators_samples_,
                                      self.estimators_features_,
                                      self.feature_placeholders,
                                      oob=False)

Classes

class FPSkopeClassifier (minsupport=0.1, maxcardinality=2, verbose=False, precision_min=0.5, recall_min=0.01, n_estimators=10, max_samples=0.8, max_samples_features=1.0, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1.0, min_samples_split=2, n_jobs=1, random_state=None)

An easy-interpretable classifier optimizing simple logical rules.

Parameters

feature_names : list of str, optional

The names of each feature to be used for returning rules in string format.

precision_min : float, optional (default=0.5)

The minimal precision of a rule to be selected.

recall_min : float, optional (default=0.01)

The minimal recall of a rule to be selected.

n_estimators : int, optional (default=10)

The number of base estimators (rules) to use for prediction. More are built before selection. All are available in the estimators_ attribute.

max_samples : int or float, optional (default=.8)

The number of samples to draw from X to train each decision tree, from which rules are generated and selected. - If int, then draw max_samples samples. - If float, then draw max_samples * X.shape[0] samples. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling).

max_samples_features : int or float, optional (default=1.0)

The number of features to draw from X to train each decision tree, from which rules are generated and selected. - If int, then draw max_features features. - If float, then draw max_features * X.shape[1] features.

bootstrap : boolean, optional (default=False)

Whether samples are drawn with replacement.

bootstrap_features : boolean, optional (default=False)

Whether features are drawn with replacement.

max_depth : integer or List or None, optional (default=3)

The maximum depth of the decision trees. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. If an iterable is passed, you will train n_estimators for each tree depth. It allows you to create and compare rules of different length.

max_depth_duplication : integer, optional (default=None)

The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.

max_features : int, float, string or None, optional (default="auto")

The number of features considered (by each decision tree) when looking for the best split:

If int, then consider max_features features at each split.
If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split.
If "auto", then max_features=sqrt(n_features).
If "sqrt", then max_features=sqrt(n_features) (same as "auto").
If "log2", then max_features=log2(n_features).
If None, then max_features=n_features.

Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than max_features features.

min_samples_split : int, float, optional (default=2)

The minimum number of samples required to split an internal node for each decision tree. - If int, then consider min_samples_split as the minimum number. - If float, then min_samples_split is a percentage and ceil(min_samples_split * n_samples) are the minimum number of samples for each split.

n_jobs : integer, optional (default=1)

The number of jobs to run in parallel for both fit and predict. If -1, then the number of jobs is set to the number of cores.

random_state : int, RandomState instance or None, optional

If int, random_state is the seed used by the random number generator.
If RandomState instance, random_state is the random number generator.
If None, the random number generator is the RandomState instance used by np.random.

verbose : int, optional (default=0)

Controls the verbosity of the tree building process.

Attributes

rules_ : dict of tuples (rule, precision, recall, nb). The collection of n_estimators rules used in the predict method. The rules are generated by fitted sub-estimators (decision trees). Each rule satisfies recall_min and precision_min conditions. The selection is done according to OOB precisions.

estimators_ : list of DecisionTreeClassifier: The collection of fitted sub-estimators used to generate candidate rules.
estimators_samples_ : list of arrays: The subset of drawn samples (i.e., the in-bag samples) for each base estimator.
estimators_features_ : list of arrays: The subset of drawn features for each base estimator.
max_samples_ : integer: The actual number of samples
n_features_ : integer: The number of features when fit is performed.
classes_ : array, shape (n_classes,): The classes labels.

Expand source code

class FPSkopeClassifier(SkopeRulesClassifier):

    def __init__(self,
                 minsupport=0.1,
                 maxcardinality=2,
                 verbose=False,
                 precision_min=0.5,
                 recall_min=0.01,
                 n_estimators=10,
                 max_samples=.8,
                 max_samples_features=1.,
                 bootstrap=False,
                 bootstrap_features=False,
                 max_depth=3,
                 max_depth_duplication=None,
                 max_features=1.,
                 min_samples_split=2,
                 n_jobs=1,
                 random_state=None):
        super().__init__(precision_min,
                         recall_min,
                         n_estimators,
                         max_samples,
                         max_samples_features,
                         bootstrap,
                         bootstrap_features,
                         max_depth,
                         max_depth_duplication,
                         max_features,
                         min_samples_split,
                         n_jobs,
                         random_state,
                         verbose)
        self.minsupport = minsupport
        self.maxcardinality = maxcardinality
        self.verbose = verbose

    def fit(self, X, y=None, feature_names=None, undiscretized_features=[], sample_weight=None):
        self.undiscretized_features = undiscretized_features
        super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
        return self

    def _extract_rules(self, X, y) -> List[str]:
        X = pd.DataFrame(X, columns=self.feature_placeholders)
        itemsets = extract_fpgrowth(X, minsupport=self.minsupport,
                                    maxcardinality=self.maxcardinality,
                                    verbose=self.verbose)
        return [itemsets_to_rules(itemsets)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]

    def _score_rules(self, X, y, rules) -> List[Rule]:
        return score_precision_recall(X, y,
                                      rules,
                                      self.estimators_samples_,
                                      self.estimators_features_,
                                      self.feature_placeholders,
                                      oob=False)

Ancestors

SkopeRulesClassifier
sklearn.base.BaseEstimator
sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
sklearn.utils._metadata_requests._MetadataRequester
RuleSet
sklearn.base.ClassifierMixin

Methods

def set_fit_request(self: FPSkopeClassifier, *, feature_names: Union[bool, ForwardRef(None), str] = '$UNCHANGED$', sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$', undiscretized_features: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> FPSkopeClassifier

Request metadata passed to the fit method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to fit.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

feature_names : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for feature_names parameter in fit.
sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for sample_weight parameter in fit.
undiscretized_features : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED: Metadata routing for undiscretized_features parameter in fit.

Returns

self : object: The updated object.

Expand source code

def func(*args, **kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. "
            f"Accepted arguments are: {set(self.keys)}"
        )

    # This makes it possible to use the decorated method as an unbound method,
    # for instance when monkeypatching.
    # https://github.com/scikit-learn/scikit-learn/issues/28632
    if instance is None:
        _instance = args[0]
        args = args[1:]
    else:
        _instance = instance

    # Replicating python's behavior when positional args are given other than
    # `self`, and `self` is only allowed if this method is unbound.
    if args:
        raise TypeError(
            f"set_{self.name}_request() takes 0 positional argument but"
            f" {len(args)} were given"
        )

    requests = _instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    _instance._metadata_request = requests

    return _instance

Inherited members

SkopeRulesClassifier:
- fit
- predict
- predict_proba
- set_score_request