Expand source code
import numpy as np
from sklearn.base import RegressorMixin, clone
try:
    from sklearn.linear_model.base import LinearRegression
except:
    from sklearn.linear_model._base import LinearRegression


from ..sklearnmodel import SklearnModel


class ResidualBART(SklearnModel):

    def __init__(self,
                 base_estimator: RegressorMixin = None,
                 **kwargs):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(**kwargs)

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'ResidualBART':
        self.base_estimator.fit(X, y)
        SklearnModel.fit(self, X, y - self.base_estimator.predict(X))
        return self

    def predict(self, X: np.ndarray=None) -> np.ndarray:
        if X is None:
            X = self.data.X
        sm_prediction = self.base_estimator.predict(X)
        bart_prediction = SklearnModel.predict(self, X)
        return sm_prediction + bart_prediction

Classes

class ResidualBART (base_estimator: sklearn.base.RegressorMixin = None, **kwargs)

The main access point to building BART models in BartPy

Parameters

n_trees : int
the number of trees to use, more trees will make a smoother fit, but slow training and fitting
n_chains : int
the number of independent chains to run more chains will improve the quality of the samples, but will require more computation
sigma_a : float
shape parameter of the prior on sigma
sigma_b : float
scale parameter of the prior on sigma
n_samples : int
how many recorded samples to take
n_burn : int
how many samples to run without recording to reach convergence
thin : float
percentage of samples to store. use this to save memory when running large models
p_grow : float
probability of choosing a grow mutation in tree mutation sampling
p_prune : float
probability of choosing a prune mutation in tree mutation sampling
alpha : float
prior parameter on tree structure
beta : float
prior parameter on tree structure
store_in_sample_predictions : bool
whether to store full prediction samples set to False if you don't need in sample results - saves a lot of memory
store_acceptance_trace : bool
whether to store acceptance rates of the gibbs samples unless you're very memory constrained, you wouldn't want to set this to false useful for diagnostics
tree_sampler : TreeMutationSampler
Method of sampling used on trees defaults to bartpy.samplers.unconstrainedtree
initializer : Initializer
Class that handles the initialization of tree structure and leaf values
n_jobs : int
how many cores to use when computing MCMC samples set to -1 to use all cores
Expand source code
class ResidualBART(SklearnModel):

    def __init__(self,
                 base_estimator: RegressorMixin = None,
                 **kwargs):

        if base_estimator is not None:
            self.base_estimator = clone(base_estimator)
        else:
            base_estimator = LinearRegression()
        self.base_estimator = base_estimator
        super().__init__(**kwargs)

    def fit(self, X: np.ndarray, y: np.ndarray) -> 'ResidualBART':
        self.base_estimator.fit(X, y)
        SklearnModel.fit(self, X, y - self.base_estimator.predict(X))
        return self

    def predict(self, X: np.ndarray=None) -> np.ndarray:
        if X is None:
            X = self.data.X
        sm_prediction = self.base_estimator.predict(X)
        bart_prediction = SklearnModel.predict(self, X)
        return sm_prediction + bart_prediction

Ancestors

  • SklearnModel
  • sklearn.base.BaseEstimator
  • sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
  • sklearn.utils._metadata_requests._MetadataRequester
  • sklearn.base.RegressorMixin

Inherited members