Expand source code
from copy import deepcopy
import numpy as np
from matplotlib import pyplot as plt
from sklearn.base import BaseEstimator
try:
from sklearn.feature_selection.base import SelectorMixin
except:
from sklearn.feature_selection._base import SelectorMixin
from ..diagnostics.features import null_feature_split_proportions_distribution, \
local_thresholds, global_thresholds, is_kept, feature_split_proportions, \
plot_feature_proportions_against_thresholds, plot_null_feature_importance_distributions, \
plot_feature_split_proportions
from ..sklearnmodel import SklearnModel
class SelectSplitProportionThreshold(BaseEstimator, SelectorMixin):
def __init__(self,
model: SklearnModel,
percentile: float = 0.2):
self.model = deepcopy(model)
self.percentile = percentile
def fit(self, X, y):
self.model.fit(X, y)
self.X, self.y = X, y
self.feature_proportions = feature_split_proportions(self.model)
return self
def _get_support_mask(self):
return np.array([proportion > self.percentile for proportion in self.feature_proportions.values()])
def plot(self):
plot_feature_split_proportions(self.model)
plt.show()
class SelectNullDistributionThreshold(BaseEstimator, SelectorMixin):
def __init__(self,
model: SklearnModel,
percentile: float = 0.95,
method="local",
n_permutations=10,
n_trees=None):
if method == "local":
self.method = local_thresholds
elif method == "global":
self.method = global_thresholds
else:
raise NotImplementedError(
"Currently only local and global methods are supported, found {}".format(self.method))
self.model = deepcopy(model)
if n_trees is not None:
self.model.n_trees = n_trees
self.percentile = percentile
self.n_permutations = n_permutations
def fit(self, X, y):
self.model.fit(X, y)
self.X, self.y = X, y
self.null_distribution = null_feature_split_proportions_distribution(self.model, X, y, self.n_permutations)
self.thresholds = self.method(self.null_distribution, self.percentile)
self.feature_proportions = feature_split_proportions(self.model)
return self
def _get_support_mask(self):
return np.array(is_kept(self.feature_proportions, self.thresholds))
def plot(self):
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
plot_feature_proportions_against_thresholds(self.feature_proportions, self.thresholds, ax1)
plot_null_feature_importance_distributions(self.null_distribution, ax2)
plt.show()
Classes
class SelectNullDistributionThreshold (model: SklearnModel, percentile: float = 0.95, method='local', n_permutations=10, n_trees=None)
-
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCV
and friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:
User Guide <rolling_your_own_estimator>
.Notes
All estimators should specify all the parameters that can be set at the class level in their
__init__
as explicit keyword arguments (no*args
or**kwargs
).Examples
>>> import numpy as np >>> from sklearn.base import BaseEstimator >>> class MyEstimator(BaseEstimator): ... def __init__(self, *, param=1): ... self.param = param ... def fit(self, X, y=None): ... self.is_fitted_ = True ... return self ... def predict(self, X): ... return np.full(shape=X.shape[0], fill_value=self.param) >>> estimator = MyEstimator(param=2) >>> estimator.get_params() {'param': 2} >>> X = np.array([[1, 2], [2, 3], [3, 4]]) >>> y = np.array([1, 0, 1]) >>> estimator.fit(X, y).predict(X) array([2, 2, 2]) >>> estimator.set_params(param=3).fit(X, y).predict(X) array([3, 3, 3])
Expand source code
class SelectNullDistributionThreshold(BaseEstimator, SelectorMixin): def __init__(self, model: SklearnModel, percentile: float = 0.95, method="local", n_permutations=10, n_trees=None): if method == "local": self.method = local_thresholds elif method == "global": self.method = global_thresholds else: raise NotImplementedError( "Currently only local and global methods are supported, found {}".format(self.method)) self.model = deepcopy(model) if n_trees is not None: self.model.n_trees = n_trees self.percentile = percentile self.n_permutations = n_permutations def fit(self, X, y): self.model.fit(X, y) self.X, self.y = X, y self.null_distribution = null_feature_split_proportions_distribution(self.model, X, y, self.n_permutations) self.thresholds = self.method(self.null_distribution, self.percentile) self.feature_proportions = feature_split_proportions(self.model) return self def _get_support_mask(self): return np.array(is_kept(self.feature_proportions, self.thresholds)) def plot(self): fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10)) plot_feature_proportions_against_thresholds(self.feature_proportions, self.thresholds, ax1) plot_null_feature_importance_distributions(self.null_distribution, ax2) plt.show()
Ancestors
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- sklearn.feature_selection._base.SelectorMixin
- sklearn.base.TransformerMixin
- sklearn.utils._set_output._SetOutputMixin
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): self.model.fit(X, y) self.X, self.y = X, y self.null_distribution = null_feature_split_proportions_distribution(self.model, X, y, self.n_permutations) self.thresholds = self.method(self.null_distribution, self.percentile) self.feature_proportions = feature_split_proportions(self.model) return self
def plot(self)
-
Expand source code
def plot(self): fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10)) plot_feature_proportions_against_thresholds(self.feature_proportions, self.thresholds, ax1) plot_null_feature_importance_distributions(self.null_distribution, ax2) plt.show()
- setting and getting parameters used by
class SelectSplitProportionThreshold (model: SklearnModel, percentile: float = 0.2)
-
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCV
and friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:
User Guide <rolling_your_own_estimator>
.Notes
All estimators should specify all the parameters that can be set at the class level in their
__init__
as explicit keyword arguments (no*args
or**kwargs
).Examples
>>> import numpy as np >>> from sklearn.base import BaseEstimator >>> class MyEstimator(BaseEstimator): ... def __init__(self, *, param=1): ... self.param = param ... def fit(self, X, y=None): ... self.is_fitted_ = True ... return self ... def predict(self, X): ... return np.full(shape=X.shape[0], fill_value=self.param) >>> estimator = MyEstimator(param=2) >>> estimator.get_params() {'param': 2} >>> X = np.array([[1, 2], [2, 3], [3, 4]]) >>> y = np.array([1, 0, 1]) >>> estimator.fit(X, y).predict(X) array([2, 2, 2]) >>> estimator.set_params(param=3).fit(X, y).predict(X) array([3, 3, 3])
Expand source code
class SelectSplitProportionThreshold(BaseEstimator, SelectorMixin): def __init__(self, model: SklearnModel, percentile: float = 0.2): self.model = deepcopy(model) self.percentile = percentile def fit(self, X, y): self.model.fit(X, y) self.X, self.y = X, y self.feature_proportions = feature_split_proportions(self.model) return self def _get_support_mask(self): return np.array([proportion > self.percentile for proportion in self.feature_proportions.values()]) def plot(self): plot_feature_split_proportions(self.model) plt.show()
Ancestors
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- sklearn.feature_selection._base.SelectorMixin
- sklearn.base.TransformerMixin
- sklearn.utils._set_output._SetOutputMixin
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): self.model.fit(X, y) self.X, self.y = X, y self.feature_proportions = feature_split_proportions(self.model) return self
def plot(self)
-
Expand source code
def plot(self): plot_feature_split_proportions(self.model) plt.show()
- setting and getting parameters used by