Expand source code

import numpy as np
from sklearn.base import RegressorMixin, clone
try:
    from sklearn.linear_model.base import LinearRegression
except:
    from sklearn.linear_model._base import LinearRegression


from ..sklearnmodel import SklearnModel


class ResidualBART(SklearnModel):

    def __init__(self,
                 base_estimator: RegressorMixin = None,
                 **kwargs):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(**kwargs)

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'ResidualBART':
        self.base_estimator.fit(X, y)
        SklearnModel.fit(self, X, y - self.base_estimator.predict(X))
        return self

    def predict(self, X: np.ndarray=None) -> np.ndarray:
        if X is None:
            X = self.data.X
        sm_prediction = self.base_estimator.predict(X)
        bart_prediction = SklearnModel.predict(self, X)
        return sm_prediction + bart_prediction

Classes

class ResidualBART (base_estimator: sklearn.base.RegressorMixin = None, **kwargs)

The main access point to building BART models in BartPy

Parameters

n_trees : int: the number of trees to use, more trees will make a smoother fit, but slow training and fitting
n_chains : int: the number of independent chains to run more chains will improve the quality of the samples, but will require more computation
sigma_a : float: shape parameter of the prior on sigma
sigma_b : float: scale parameter of the prior on sigma
n_samples : int: how many recorded samples to take
n_burn : int: how many samples to run without recording to reach convergence
thin : float: percentage of samples to store. use this to save memory when running large models
p_grow : float: probability of choosing a grow mutation in tree mutation sampling
p_prune : float: probability of choosing a prune mutation in tree mutation sampling
alpha : float: prior parameter on tree structure
beta : float: prior parameter on tree structure
store_in_sample_predictions : bool: whether to store full prediction samples set to False if you don't need in sample results - saves a lot of memory
store_acceptance_trace : bool: whether to store acceptance rates of the gibbs samples unless you're very memory constrained, you wouldn't want to set this to false useful for diagnostics
tree_sampler : TreeMutationSampler: Method of sampling used on trees defaults to bartpy.samplers.unconstrainedtree
initializer : Initializer: Class that handles the initialization of tree structure and leaf values
n_jobs : int: how many cores to use when computing MCMC samples set to -1 to use all cores

Expand source code

class ResidualBART(SklearnModel):

    def __init__(self,
                 base_estimator: RegressorMixin = None,
                 **kwargs):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(**kwargs)

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'ResidualBART':
        self.base_estimator.fit(X, y)
        SklearnModel.fit(self, X, y - self.base_estimator.predict(X))
        return self

    def predict(self, X: np.ndarray=None) -> np.ndarray:
        if X is None:
            X = self.data.X
        sm_prediction = self.base_estimator.predict(X)
        bart_prediction = SklearnModel.predict(self, X)
        return sm_prediction + bart_prediction

Ancestors

SklearnModel
sklearn.base.BaseEstimator
sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
sklearn.utils._metadata_requests._MetadataRequester
sklearn.base.RegressorMixin

Inherited members

SklearnModel:
- acceptance_trace
- f_chains
- f_delayed_chains
- fit
- from_extract
- l2_error
- likelihood
- model_samples
- predict
- prediction_samples
- probs
- residuals
- rmse
- set_score_request