Expand source code
import numpy as np
from sklearn.base import RegressorMixin, clone
try:
from sklearn.linear_model.base import LinearRegression
except:
from sklearn.linear_model._base import LinearRegression
from ..sklearnmodel import SklearnModel
class ResidualBART(SklearnModel):
def __init__(self,
base_estimator: RegressorMixin = None,
**kwargs):
if base_estimator is not None:
self.base_estimator = clone(base_estimator)
else:
base_estimator = LinearRegression()
self.base_estimator = base_estimator
super().__init__(**kwargs)
def fit(self, X: np.ndarray, y: np.ndarray) -> 'ResidualBART':
self.base_estimator.fit(X, y)
SklearnModel.fit(self, X, y - self.base_estimator.predict(X))
return self
def predict(self, X: np.ndarray=None) -> np.ndarray:
if X is None:
X = self.data.X
sm_prediction = self.base_estimator.predict(X)
bart_prediction = SklearnModel.predict(self, X)
return sm_prediction + bart_prediction
Classes
class ResidualBART (base_estimator: sklearn.base.RegressorMixin = None, **kwargs)-
The main access point to building BART models in BartPy
Parameters
n_trees:int- the number of trees to use, more trees will make a smoother fit, but slow training and fitting
n_chains:int- the number of independent chains to run more chains will improve the quality of the samples, but will require more computation
sigma_a:float- shape parameter of the prior on sigma
sigma_b:float- scale parameter of the prior on sigma
n_samples:int- how many recorded samples to take
n_burn:int- how many samples to run without recording to reach convergence
thin:float- percentage of samples to store. use this to save memory when running large models
p_grow:float- probability of choosing a grow mutation in tree mutation sampling
p_prune:float- probability of choosing a prune mutation in tree mutation sampling
alpha:float- prior parameter on tree structure
beta:float- prior parameter on tree structure
store_in_sample_predictions:bool- whether to store full prediction samples set to False if you don't need in sample results - saves a lot of memory
store_acceptance_trace:bool- whether to store acceptance rates of the gibbs samples unless you're very memory constrained, you wouldn't want to set this to false useful for diagnostics
tree_sampler:TreeMutationSampler- Method of sampling used on trees
defaults to
bartpy.samplers.unconstrainedtree initializer:Initializer- Class that handles the initialization of tree structure and leaf values
n_jobs:int- how many cores to use when computing MCMC samples
set to
-1to use all cores
Expand source code
class ResidualBART(SklearnModel): def __init__(self, base_estimator: RegressorMixin = None, **kwargs): if base_estimator is not None: self.base_estimator = clone(base_estimator) else: base_estimator = LinearRegression() self.base_estimator = base_estimator super().__init__(**kwargs) def fit(self, X: np.ndarray, y: np.ndarray) -> 'ResidualBART': self.base_estimator.fit(X, y) SklearnModel.fit(self, X, y - self.base_estimator.predict(X)) return self def predict(self, X: np.ndarray=None) -> np.ndarray: if X is None: X = self.data.X sm_prediction = self.base_estimator.predict(X) bart_prediction = SklearnModel.predict(self, X) return sm_prediction + bart_predictionAncestors
- SklearnModel
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- sklearn.base.RegressorMixin
Inherited members