Expand source code
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
import imodels
import imodels.algebraic.gam_multitask
class ResidualBoostingRegressor(BaseEstimator, RegressorMixin):
def __init__(self, estimator, n_estimators=10):
"""
A meta-estimator that fits a base estimator to the residuals of the
previous estimators.
Parameters:
- estimator: The estimator to fit on the residual of the previous step.
- n_estimators: The number of estimators to fit.
"""
self.estimator = estimator
self.n_estimators = n_estimators
def fit(self, X, y):
"""
Fit the ensemble of base estimators on the training data.
Parameters:
- X: array-like of shape (n_samples, n_features)
Training data.
- y: array-like of shape (n_samples,)
Target values.
Returns:
- self: object
"""
# Check that X and y have correct shape
X, y = check_X_y(X, y)
self.estimators_ = []
current_prediction = np.zeros(y.shape)
for _ in range(self.n_estimators):
residual = y - current_prediction
estimator = clone(self.estimator)
estimator.fit(X, residual)
self.estimators_.append(estimator)
current_prediction += estimator.predict(X)
return self
def predict(self, X):
"""
Predict regression target for X.
Parameters:
- X: array-like of shape (n_samples, n_features)
The input samples.
Returns:
- y_pred: ndarray of shape (n_samples,)
The predicted values.
"""
# Check is fit had been called
check_is_fitted(self)
# Input validation
X = check_array(X)
predictions = sum(estimator.predict(X)
for estimator in self.estimators_)
return predictions
class SimpleBaggingRegressor:
def __init__(self, estimator, n_estimators=10, random_state=None):
self.estimator = estimator
self.n_estimators = n_estimators
self.random_state = random_state
def fit(self, X, y):
np.random.seed(self.random_state)
self.estimators_ = []
rng = np.random.default_rng(self.random_state)
for _ in range(self.n_estimators):
# Simple bootstrap sampling
# sample_indices = np.random.choice(
# range(X.shape[0]), size=X.shape[0], replace=True)
sample_indices = rng.choice(
range(X.shape[0]), size=X.shape[0], replace=True)
X_sample = X[sample_indices]
y_sample = y[sample_indices]
# Fit a base estimator
# estimator = DecisionTreeRegressor()
estimator = clone(self.estimator)
estimator.fit(X_sample, y_sample)
self.estimators_.append(estimator)
def predict(self, X):
# Collect predictions from each base estimator
predictions = np.array([estimator.predict(X)
for estimator in self.estimators_])
# Aggregate predictions
return np.mean(predictions, axis=0)
if __name__ == '__main__':
X, y, feature_names = imodels.get_clean_dataset('california_housing')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42)
X_train = X_train[:50, :2]
y_train = y_train[:50]
X_test = X_test[:50, :2]
y_test = y_test[:50]
# estimator = DecisionTreeRegressor(max_depth=3)
estimator = imodels.algebraic.gam_multitask.MultiTaskGAMRegressor()
for n_estimators in [1, 3, 5]:
# residual_boosting_regressor = ResidualBoostingRegressor(
# estimator=estimator, n_estimators=n_estimators)
residual_boosting_regressor = SimpleBaggingRegressor(
estimator=estimator, n_estimators=n_estimators)
residual_boosting_regressor.fit(X_train, y_train)
y_pred = residual_boosting_regressor.predict(X_test)
mse_train = mean_squared_error(
y_train, residual_boosting_regressor.predict(X_train))
mse = mean_squared_error(y_test, y_pred)
print(
f'MSE with {n_estimators} estimators: {mse:.2f} (train: {mse_train:.2f})')
Classes
class ResidualBoostingRegressor (estimator, n_estimators=10)
-
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCV
and friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:
User Guide <rolling_your_own_estimator>
.Notes
All estimators should specify all the parameters that can be set at the class level in their
__init__
as explicit keyword arguments (no*args
or**kwargs
).Examples
>>> import numpy as np >>> from sklearn.base import BaseEstimator >>> class MyEstimator(BaseEstimator): ... def __init__(self, *, param=1): ... self.param = param ... def fit(self, X, y=None): ... self.is_fitted_ = True ... return self ... def predict(self, X): ... return np.full(shape=X.shape[0], fill_value=self.param) >>> estimator = MyEstimator(param=2) >>> estimator.get_params() {'param': 2} >>> X = np.array([[1, 2], [2, 3], [3, 4]]) >>> y = np.array([1, 0, 1]) >>> estimator.fit(X, y).predict(X) array([2, 2, 2]) >>> estimator.set_params(param=3).fit(X, y).predict(X) array([3, 3, 3])
A meta-estimator that fits a base estimator to the residuals of the previous estimators.
Parameters: - estimator: The estimator to fit on the residual of the previous step. - n_estimators: The number of estimators to fit.
Expand source code
class ResidualBoostingRegressor(BaseEstimator, RegressorMixin): def __init__(self, estimator, n_estimators=10): """ A meta-estimator that fits a base estimator to the residuals of the previous estimators. Parameters: - estimator: The estimator to fit on the residual of the previous step. - n_estimators: The number of estimators to fit. """ self.estimator = estimator self.n_estimators = n_estimators def fit(self, X, y): """ Fit the ensemble of base estimators on the training data. Parameters: - X: array-like of shape (n_samples, n_features) Training data. - y: array-like of shape (n_samples,) Target values. Returns: - self: object """ # Check that X and y have correct shape X, y = check_X_y(X, y) self.estimators_ = [] current_prediction = np.zeros(y.shape) for _ in range(self.n_estimators): residual = y - current_prediction estimator = clone(self.estimator) estimator.fit(X, residual) self.estimators_.append(estimator) current_prediction += estimator.predict(X) return self def predict(self, X): """ Predict regression target for X. Parameters: - X: array-like of shape (n_samples, n_features) The input samples. Returns: - y_pred: ndarray of shape (n_samples,) The predicted values. """ # Check is fit had been called check_is_fitted(self) # Input validation X = check_array(X) predictions = sum(estimator.predict(X) for estimator in self.estimators_) return predictions
Ancestors
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- sklearn.base.RegressorMixin
Methods
def fit(self, X, y)
-
Fit the ensemble of base estimators on the training data.
Parameters: - X: array-like of shape (n_samples, n_features) Training data. - y: array-like of shape (n_samples,) Target values.
Returns: - self: object
Expand source code
def fit(self, X, y): """ Fit the ensemble of base estimators on the training data. Parameters: - X: array-like of shape (n_samples, n_features) Training data. - y: array-like of shape (n_samples,) Target values. Returns: - self: object """ # Check that X and y have correct shape X, y = check_X_y(X, y) self.estimators_ = [] current_prediction = np.zeros(y.shape) for _ in range(self.n_estimators): residual = y - current_prediction estimator = clone(self.estimator) estimator.fit(X, residual) self.estimators_.append(estimator) current_prediction += estimator.predict(X) return self
def predict(self, X)
-
Predict regression target for X.
Parameters: - X: array-like of shape (n_samples, n_features) The input samples.
Returns: - y_pred: ndarray of shape (n_samples,) The predicted values.
Expand source code
def predict(self, X): """ Predict regression target for X. Parameters: - X: array-like of shape (n_samples, n_features) The input samples. Returns: - y_pred: ndarray of shape (n_samples,) The predicted values. """ # Check is fit had been called check_is_fitted(self) # Input validation X = check_array(X) predictions = sum(estimator.predict(X) for estimator in self.estimators_) return predictions
def set_score_request(self: ResidualBoostingRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> ResidualBoostingRegressor
-
Request metadata passed to the
score
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed toscore
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it toscore
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter inscore
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
- setting and getting parameters used by
class SimpleBaggingRegressor (estimator, n_estimators=10, random_state=None)
-
Expand source code
class SimpleBaggingRegressor: def __init__(self, estimator, n_estimators=10, random_state=None): self.estimator = estimator self.n_estimators = n_estimators self.random_state = random_state def fit(self, X, y): np.random.seed(self.random_state) self.estimators_ = [] rng = np.random.default_rng(self.random_state) for _ in range(self.n_estimators): # Simple bootstrap sampling # sample_indices = np.random.choice( # range(X.shape[0]), size=X.shape[0], replace=True) sample_indices = rng.choice( range(X.shape[0]), size=X.shape[0], replace=True) X_sample = X[sample_indices] y_sample = y[sample_indices] # Fit a base estimator # estimator = DecisionTreeRegressor() estimator = clone(self.estimator) estimator.fit(X_sample, y_sample) self.estimators_.append(estimator) def predict(self, X): # Collect predictions from each base estimator predictions = np.array([estimator.predict(X) for estimator in self.estimators_]) # Aggregate predictions return np.mean(predictions, axis=0)
Methods
def fit(self, X, y)
-
Expand source code
def fit(self, X, y): np.random.seed(self.random_state) self.estimators_ = [] rng = np.random.default_rng(self.random_state) for _ in range(self.n_estimators): # Simple bootstrap sampling # sample_indices = np.random.choice( # range(X.shape[0]), size=X.shape[0], replace=True) sample_indices = rng.choice( range(X.shape[0]), size=X.shape[0], replace=True) X_sample = X[sample_indices] y_sample = y[sample_indices] # Fit a base estimator # estimator = DecisionTreeRegressor() estimator = clone(self.estimator) estimator.fit(X_sample, y_sample) self.estimators_.append(estimator)
def predict(self, X)
-
Expand source code
def predict(self, X): # Collect predictions from each base estimator predictions = np.array([estimator.predict(X) for estimator in self.estimators_]) # Aggregate predictions return np.mean(predictions, axis=0)