Expand source code
from typing import List
import numpy as np
import pandas as pd
from imodels.rule_set.skope_rules import SkopeRulesClassifier
from imodels.util.convert import itemsets_to_rules
from imodels.util.extract import extract_fpgrowth
from imodels.util.rule import Rule
from imodels.util.score import score_precision_recall
class FPSkopeClassifier(SkopeRulesClassifier):
def __init__(self,
minsupport=0.1,
maxcardinality=2,
verbose=False,
precision_min=0.5,
recall_min=0.01,
n_estimators=10,
max_samples=.8,
max_samples_features=1.,
bootstrap=False,
bootstrap_features=False,
max_depth=3,
max_depth_duplication=None,
max_features=1.,
min_samples_split=2,
n_jobs=1,
random_state=None):
super().__init__(precision_min,
recall_min,
n_estimators,
max_samples,
max_samples_features,
bootstrap,
bootstrap_features,
max_depth,
max_depth_duplication,
max_features,
min_samples_split,
n_jobs,
random_state,
verbose)
self.minsupport = minsupport
self.maxcardinality = maxcardinality
self.verbose = verbose
def fit(self, X, y=None, feature_names=None, undiscretized_features=[], sample_weight=None):
self.undiscretized_features = undiscretized_features
super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight)
return self
def _extract_rules(self, X, y) -> List[str]:
X = pd.DataFrame(X, columns=self.feature_placeholders)
itemsets = extract_fpgrowth(X, minsupport=self.minsupport,
maxcardinality=self.maxcardinality,
verbose=self.verbose)
return [itemsets_to_rules(itemsets)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))]
def _score_rules(self, X, y, rules) -> List[Rule]:
return score_precision_recall(X, y,
rules,
self.estimators_samples_,
self.estimators_features_,
self.feature_placeholders,
oob=False)
Classes
class FPSkopeClassifier (minsupport=0.1, maxcardinality=2, verbose=False, precision_min=0.5, recall_min=0.01, n_estimators=10, max_samples=0.8, max_samples_features=1.0, bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1.0, min_samples_split=2, n_jobs=1, random_state=None)-
An easy-interpretable classifier optimizing simple logical rules.
Parameters
feature_names:listofstr, optional- The names of each feature to be used for returning rules in string format.
precision_min:float, optional(default=0.5)- The minimal precision of a rule to be selected.
recall_min:float, optional(default=0.01)- The minimal recall of a rule to be selected.
n_estimators:int, optional(default=10)- The number of base estimators (rules) to use for prediction. More are built before selection. All are available in the estimators_ attribute.
max_samples:intorfloat, optional(default=.8)- The number of samples to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_samplessamples. - If float, then drawmax_samples * X.shape[0]samples. If max_samples is larger than the number of samples provided, all samples will be used for all trees (no sampling). max_samples_features:intorfloat, optional(default=1.0)- The number of features to draw from X to train each decision tree, from
which rules are generated and selected.
- If int, then draw
max_featuresfeatures. - If float, then drawmax_features * X.shape[1]features. bootstrap:boolean, optional(default=False)- Whether samples are drawn with replacement.
bootstrap_features:boolean, optional(default=False)- Whether features are drawn with replacement.
max_depth:integerorListorNone, optional(default=3)- The maximum depth of the decision trees. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. If an iterable is passed, you will train n_estimators for each tree depth. It allows you to create and compare rules of different length.
max_depth_duplication:integer, optional(default=None)- The maximum depth of the decision tree for rule deduplication, if None then no deduplication occurs.
max_features:int, float, stringorNone, optional(default="auto")-
The number of features considered (by each decision tree) when looking for the best split:
- If int, then consider
max_featuresfeatures at each split. - If float, then
max_featuresis a percentage andint(max_features * n_features)features are considered at each split. - If "auto", then
max_features=sqrt(n_features). - If "sqrt", then
max_features=sqrt(n_features)(same as "auto"). - If "log2", then
max_features=log2(n_features). - If None, then
max_features=n_features.
Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than
max_featuresfeatures. - If int, then consider
min_samples_split:int, float, optional(default=2)- The minimum number of samples required to split an internal node for
each decision tree.
- If int, then consider
min_samples_splitas the minimum number. - If float, thenmin_samples_splitis a percentage andceil(min_samples_split * n_samples)are the minimum number of samples for each split. n_jobs:integer, optional(default=1)- The number of jobs to run in parallel for both
fitandpredict. If -1, then the number of jobs is set to the number of cores. random_state:int, RandomState instanceorNone, optional-
- If int, random_state is the seed used by the random number generator.
- If RandomState instance, random_state is the random number generator.
- If None, the random number generator is the RandomState instance used
by
np.random.
verbose:int, optional(default=0)- Controls the verbosity of the tree building process.
Attributes
rules_ : dict of tuples (rule, precision, recall, nb). The collection of
n_estimatorsrules used in thepredictmethod. The rules are generated by fitted sub-estimators (decision trees). Each rule satisfies recall_min and precision_min conditions. The selection is done according to OOB precisions.estimators_:listofDecisionTreeClassifier- The collection of fitted sub-estimators used to generate candidate rules.
estimators_samples_:listofarrays- The subset of drawn samples (i.e., the in-bag samples) for each base estimator.
estimators_features_:listofarrays- The subset of drawn features for each base estimator.
max_samples_:integer- The actual number of samples
n_features_:integer- The number of features when
fitis performed. classes_:array, shape (n_classes,)- The classes labels.
Expand source code
class FPSkopeClassifier(SkopeRulesClassifier): def __init__(self, minsupport=0.1, maxcardinality=2, verbose=False, precision_min=0.5, recall_min=0.01, n_estimators=10, max_samples=.8, max_samples_features=1., bootstrap=False, bootstrap_features=False, max_depth=3, max_depth_duplication=None, max_features=1., min_samples_split=2, n_jobs=1, random_state=None): super().__init__(precision_min, recall_min, n_estimators, max_samples, max_samples_features, bootstrap, bootstrap_features, max_depth, max_depth_duplication, max_features, min_samples_split, n_jobs, random_state, verbose) self.minsupport = minsupport self.maxcardinality = maxcardinality self.verbose = verbose def fit(self, X, y=None, feature_names=None, undiscretized_features=[], sample_weight=None): self.undiscretized_features = undiscretized_features super().fit(X, y, feature_names=feature_names, sample_weight=sample_weight) return self def _extract_rules(self, X, y) -> List[str]: X = pd.DataFrame(X, columns=self.feature_placeholders) itemsets = extract_fpgrowth(X, minsupport=self.minsupport, maxcardinality=self.maxcardinality, verbose=self.verbose) return [itemsets_to_rules(itemsets)], [np.arange(X.shape[0])], [np.arange(len(self.feature_names))] def _score_rules(self, X, y, rules) -> List[Rule]: return score_precision_recall(X, y, rules, self.estimators_samples_, self.estimators_features_, self.feature_placeholders, oob=False)Ancestors
- SkopeRulesClassifier
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- RuleSet
- sklearn.base.ClassifierMixin
Methods
def set_fit_request(self: FPSkopeClassifier, *, feature_names: bool | str | None = '$UNCHANGED$', sample_weight: bool | str | None = '$UNCHANGED$', undiscretized_features: bool | str | None = '$UNCHANGED$') ‑> FPSkopeClassifier-
Request metadata passed to the
fitmethod.Note that this method is only relevant if
enable_metadata_routing=True(see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing>on how the routing mechanism works.The options for each parameter are:
-
True: metadata is requested, and passed tofitif provided. The request is ignored if metadata is not provided. -
False: metadata is not requested and the meta-estimator will not pass it tofit. -
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline. Otherwise it has no effect.Parameters
feature_names:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
feature_namesparameter infit. sample_weight:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
sample_weightparameter infit. undiscretized_features:str, True, False,orNone, default=sklearn.utils.metadata_routing.UNCHANGED- Metadata routing for
undiscretized_featuresparameter infit.
Returns
self:object- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance -
Inherited members