Expand source code
from typing import List
import numpy as np
import pandas as pd
from imodels.rule_set.skope_rules import SkopeRulesClassifier
from imodels.util.convert import itemsets_to_rules
from imodels.util.extract import extract_fpgrowth
from imodels.util.rule import Rule
from imodels.util.score import score_precision_recall
class FPSkopeClassifier(SkopeRulesClassifier):
def __init__(self,
minsupport=0.1,
maxcardinality=2,
verbose=False,
precision_min=0.5,
recall_min=0.01,
n_estimators=10,
max_samples=.8,
max_samples_features=1.,
bootstrap=False,
bootstrap_features=False,
max_depth=3,
max_depth_duplication=None,
max_features=1.,
min_samples_split=2,
n_jobs=1,
random_state=None):
super().__init__(precision_min,
recall_min,
n_estimators,
max_samples,
max_samples_features,
bootstrap,
bootstrap_features,
max_depth,
max_depth_duplication,
max_features,
min_samples_split,
n_jobs,
random_state,
verbose)
self.minsupport = minsupport
self.maxcardinality = maxcardinality
self.verbose = verbose
def fit(self, X, y=None, feature_names=None, undiscretized_features=[], sample_weight=None):
self.undiscretized_features = undiscretized_features
super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
return self
def _extract_rules(self, X, y) -> List[str]:
X = pd.DataFrame(X, columns=self.feature_placeholders)
itemsets = extract_fpgrowth(X, minsupport=self.minsupport,
maxcardinality=self.maxcardinality,
verbose=self.verbose)
return [itemsets_to_rules(itemsets)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]
def _score_rules(self, X, y, rules) -> List[Rule]:
return score_precision_recall(X, y,
rules,
self.estimators_samples_,
self.estimators_features_,
self.feature_placeholders,
oob=False)
Classes
class FPSkopeClassifier (minsupport=0.1, maxcardinality=2, verbose=False, precision_min=0.5, recall_min=0.01, n_estimators=10, max_samples=0.8, max_samples_features=1.0, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1.0, min_samples_split=2, n_jobs=1, random_state=None)
-
An easy-interpretable classifier optimizing simple logical rules.
Parameters
feature_names
:list
ofstr
, optional- The names of each feature to be used for returning rules in string format.
precision_min
:float
, optional(default=0.5)
- The minimal precision of a rule to be selected.
recall_min
:float
, optional(default=0.01)
- The minimal recall of a rule to be selected.
n_estimators
:int
, optional(default=10)
- The number of base estimators (rules) to use for prediction. More are built before selection. All are available in the estimators_ attribute.
max_samples
:int
orfloat
, optional(default=.8)
- The number of samples to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_samples
samples. - If float, then drawmax_samples * X.shape[0]
samples. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling). max_samples_features
:int
orfloat
, optional(default=1.0)
- The number of features to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_features
features. - If float, then drawmax_features * X.shape[1]
features. bootstrap
:boolean
, optional(default=False)
- Whether samples are drawn with replacement.
bootstrap_features
:boolean
, optional(default=False)
- Whether features are drawn with replacement.
max_depth
:integer
orList
orNone
, optional(default=3)
- The maximum depth of the decision trees. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. If an iterable is passed, you will train n_estimators for each tree depth. It allows you to create and compare rules of different length.
max_depth_duplication
:integer
, optional(default=None)
- The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.
max_features
:int, float, string
orNone
, optional(default="auto")
-
The number of features considered (by each decision tree) when looking for the best split:
- If int, then consider
max_features
features at each split. - If float, then
max_features
is a percentage andint(max_features * n_features)
features are considered at each split. - If "auto", then
max_features=sqrt(n_features)
. - If "sqrt", then
max_features=sqrt(n_features)
(same as "auto"). - If "log2", then
max_features=log2(n_features)
. - If None, then
max_features=n_features
.
Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than
max_features
features. - If int, then consider
min_samples_split
:int, float
, optional(default=2)
- The minimum number of samples required to split an internal node for
each decision tree.
- If int, then consider
min_samples_split
as the minimum number. - If float, thenmin_samples_split
is a percentage andceil(min_samples_split * n_samples)
are the minimum number of samples for each split. n_jobs
:integer
, optional(default=1)
- The number of jobs to run in parallel for both
fit
andpredict
. If -1, then the number of jobs is set to the number of cores. random_state
:int, RandomState instance
orNone
, optional-
- If int, random_state is the seed used by the random number generator.
- If RandomState instance, random_state is the random number generator.
- If None, the random number generator is the RandomState instance used
by
np.random
.
verbose
:int
, optional(default=0)
- Controls the verbosity of the tree building process.
Attributes
rules_ : dict of tuples (rule, precision, recall, nb). The collection of
n_estimators
rules used in thepredict
method. The rules are generated by fitted sub-estimators (decision trees). Each rule satisfies recall_min and precision_min conditions. The selection is done according to OOB precisions.estimators_
:list
ofDecisionTreeClassifier
- The collection of fitted sub-estimators used to generate candidate rules.
estimators_samples_
:list
ofarrays
- The subset of drawn samples (i.e., the in-bag samples) for each base estimator.
estimators_features_
:list
ofarrays
- The subset of drawn features for each base estimator.
max_samples_
:integer
- The actual number of samples
n_features_
:integer
- The number of features when
fit
is performed. classes_
:array, shape (n_classes,)
- The classes labels.
Expand source code
class FPSkopeClassifier(SkopeRulesClassifier): def __init__(self, minsupport=0.1, maxcardinality=2, verbose=False, precision_min=0.5, recall_min=0.01, n_estimators=10, max_samples=.8, max_samples_features=1., bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1., min_samples_split=2, n_jobs=1, random_state=None): super().__init__(precision_min, recall_min, n_estimators, max_samples, max_samples_features, bootstrap, bootstrap_features, max_depth, max_depth_duplication, max_features, min_samples_split, n_jobs, random_state, verbose) self.minsupport = minsupport self.maxcardinality = maxcardinality self.verbose = verbose def fit(self, X, y=None, feature_names=None, undiscretized_features=[], sample_weight=None): self.undiscretized_features = undiscretized_features super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight) return self def _extract_rules(self, X, y) -> List[str]: X = pd.DataFrame(X, columns=self.feature_placeholders) itemsets = extract_fpgrowth(X, minsupport=self.minsupport, maxcardinality=self.maxcardinality, verbose=self.verbose) return [itemsets_to_rules(itemsets)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))] def _score_rules(self, X, y, rules) -> List[Rule]: return score_precision_recall(X, y, rules, self.estimators_samples_, self.estimators_features_, self.feature_placeholders, oob=False)
Ancestors
- SkopeRulesClassifier
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- RuleSet
- sklearn.base.ClassifierMixin
Methods
def set_fit_request(self: FPSkopeClassifier, *, feature_names: Union[bool, ForwardRef(None), str] = '$UNCHANGED$', sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$', undiscretized_features: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> FPSkopeClassifier
-
Request metadata passed to the
fit
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed tofit
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it tofit
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
feature_names
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
feature_names
parameter infit
. sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter infit
. undiscretized_features
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
undiscretized_features
parameter infit
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
Inherited members