Expand source code
from copy import deepcopy, copy
from typing import List, Generator, Optional
from imodels.util.checks import check_is_fitted
import numpy as np
import pandas as pd
from .data import Data
from .initializers.initializer import Initializer
from .initializers.sklearntreeinitializer import SklearnTreeInitializer
from .sigma import Sigma
from .split import Split
from .tree import Tree, LeafNode, deep_copy_tree
class Model:
def __init__(self,
data: Optional[Data],
sigma: Sigma,
trees: Optional[List[Tree]] = None,
n_trees: int = 50,
alpha: float = 0.95,
beta: float = 2.,
k: int = 2.,
initializer: Initializer = SklearnTreeInitializer(),
classification: bool = False):
self.data = deepcopy(data)
self.alpha = float(alpha)
self.beta = float(beta)
self.k = k
self._sigma = sigma
self._prediction = None
self._initializer = initializer
self._check_initializer()
self.classification = classification
if trees is None:
self.n_trees = n_trees
self._trees = self.initialize_trees()
if self._initializer is not None:
if hasattr(self._initializer._tree,"trees_"):
self.n_trees = len(self._initializer._tree.trees_)
self._trees = self.initialize_trees()
elif hasattr(self._initializer._tree, "figs"):
self.n_trees = len(self._initializer._tree.figs.trees_)
self._trees = self.initialize_trees()
# for tree in self.trees:
self._initializer.initialize_trees(self.refreshed_trees())
# self._initializer.initialize_trees(trees=self._trees)
else:
self.n_trees = len(trees)
self._trees = trees
def _check_initializer(self):
if not hasattr(self._initializer, "_tree"):
return
elif self._initializer._tree is None:
return
if not check_is_fitted(self._initializer._tree):
self._initializer._tree.fit(self.data.X.values, self.data.y.values)
def initialize_trees(self) -> List[Tree]:
trees = [Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees)]
for tree in trees:
tree.update_y(tree.update_y(self.data.y.values / self.n_trees))
return trees
def residuals(self) -> np.ndarray:
return self.data.y.values - self.predict()
def unnormalized_residuals(self) -> np.ndarray:
return self.data.y.unnormalized_y - self.data.y.unnormalize_y(self.predict())
def predict(self, X: np.ndarray = None) -> np.ndarray:
if X is not None:
return self._out_of_sample_predict(X)
return np.sum([tree.predict() for tree in self.trees], axis=0)
def _out_of_sample_predict(self, X: np.ndarray) -> np.ndarray:
if type(X) == pd.DataFrame:
X: pd.DataFrame = X
X = X.values
return np.sum([tree.predict(X) for tree in self.trees], axis=0)
@property
def trees(self) -> List[Tree]:
return self._trees
def refreshed_trees(self) -> Generator[Tree, None, None]:
if self._prediction is None:
self._prediction = self.predict()
for tree in self._trees:
self._prediction -= tree.predict()
tree.update_y(self.data.y.values - self._prediction)
yield tree
self._prediction += tree.predict()
def update_z_values(self, y):
if not self.classification:
return
z = np.random.normal(loc=self.predict(self.data.X.values))
one_label = np.maximum(z[y == 1], 0)
zero_label = np.minimum(z[y == 0], 0)
z[y == 1] = one_label
z[y == 0] = zero_label
self.data.update_y(z)
@property
def sigma_m(self) -> float:
if self.classification:
return 3 / (self.k * np.power(self.n_trees, 0.5))
return 0.5 / (self.k * np.power(self.n_trees, 0.5))
@property
def sigma(self) -> Sigma:
return self._sigma
def deep_copy_model(model: Model) -> Model:
copied_model = Model(None, deepcopy(model.sigma), [deep_copy_tree(tree) for tree in model.trees])
return copied_model
Functions
def deep_copy_model(model: Model) ‑> Model
-
Expand source code
def deep_copy_model(model: Model) -> Model: copied_model = Model(None, deepcopy(model.sigma), [deep_copy_tree(tree) for tree in model.trees]) return copied_model
Classes
class Model (data: Optional[Data], sigma: Sigma, trees: Optional[List[Tree]] = None, n_trees: int = 50, alpha: float = 0.95, beta: float = 2.0, k: int = 2.0, initializer: Initializer = <imodels.experimental.bartpy.initializers.sklearntreeinitializer.SklearnTreeInitializer object>, classification: bool = False)
-
Expand source code
class Model: def __init__(self, data: Optional[Data], sigma: Sigma, trees: Optional[List[Tree]] = None, n_trees: int = 50, alpha: float = 0.95, beta: float = 2., k: int = 2., initializer: Initializer = SklearnTreeInitializer(), classification: bool = False): self.data = deepcopy(data) self.alpha = float(alpha) self.beta = float(beta) self.k = k self._sigma = sigma self._prediction = None self._initializer = initializer self._check_initializer() self.classification = classification if trees is None: self.n_trees = n_trees self._trees = self.initialize_trees() if self._initializer is not None: if hasattr(self._initializer._tree,"trees_"): self.n_trees = len(self._initializer._tree.trees_) self._trees = self.initialize_trees() elif hasattr(self._initializer._tree, "figs"): self.n_trees = len(self._initializer._tree.figs.trees_) self._trees = self.initialize_trees() # for tree in self.trees: self._initializer.initialize_trees(self.refreshed_trees()) # self._initializer.initialize_trees(trees=self._trees) else: self.n_trees = len(trees) self._trees = trees def _check_initializer(self): if not hasattr(self._initializer, "_tree"): return elif self._initializer._tree is None: return if not check_is_fitted(self._initializer._tree): self._initializer._tree.fit(self.data.X.values, self.data.y.values) def initialize_trees(self) -> List[Tree]: trees = [Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees)] for tree in trees: tree.update_y(tree.update_y(self.data.y.values / self.n_trees)) return trees def residuals(self) -> np.ndarray: return self.data.y.values - self.predict() def unnormalized_residuals(self) -> np.ndarray: return self.data.y.unnormalized_y - self.data.y.unnormalize_y(self.predict()) def predict(self, X: np.ndarray = None) -> np.ndarray: if X is not None: return self._out_of_sample_predict(X) return np.sum([tree.predict() for tree in self.trees], axis=0) def _out_of_sample_predict(self, X: np.ndarray) -> np.ndarray: if type(X) == pd.DataFrame: X: pd.DataFrame = X X = X.values return np.sum([tree.predict(X) for tree in self.trees], axis=0) @property def trees(self) -> List[Tree]: return self._trees def refreshed_trees(self) -> Generator[Tree, None, None]: if self._prediction is None: self._prediction = self.predict() for tree in self._trees: self._prediction -= tree.predict() tree.update_y(self.data.y.values - self._prediction) yield tree self._prediction += tree.predict() def update_z_values(self, y): if not self.classification: return z = np.random.normal(loc=self.predict(self.data.X.values)) one_label = np.maximum(z[y == 1], 0) zero_label = np.minimum(z[y == 0], 0) z[y == 1] = one_label z[y == 0] = zero_label self.data.update_y(z) @property def sigma_m(self) -> float: if self.classification: return 3 / (self.k * np.power(self.n_trees, 0.5)) return 0.5 / (self.k * np.power(self.n_trees, 0.5)) @property def sigma(self) -> Sigma: return self._sigma
Instance variables
var sigma : Sigma
-
Expand source code
@property def sigma(self) -> Sigma: return self._sigma
var sigma_m : float
-
Expand source code
@property def sigma_m(self) -> float: if self.classification: return 3 / (self.k * np.power(self.n_trees, 0.5)) return 0.5 / (self.k * np.power(self.n_trees, 0.5))
var trees : List[Tree]
-
Expand source code
@property def trees(self) -> List[Tree]: return self._trees
Methods
def initialize_trees(self) ‑> List[Tree]
-
Expand source code
def initialize_trees(self) -> List[Tree]: trees = [Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees)] for tree in trees: tree.update_y(tree.update_y(self.data.y.values / self.n_trees)) return trees
def predict(self, X: numpy.ndarray = None) ‑> numpy.ndarray
-
Expand source code
def predict(self, X: np.ndarray = None) -> np.ndarray: if X is not None: return self._out_of_sample_predict(X) return np.sum([tree.predict() for tree in self.trees], axis=0)
def refreshed_trees(self) ‑> Generator[Tree, None, None]
-
Expand source code
def refreshed_trees(self) -> Generator[Tree, None, None]: if self._prediction is None: self._prediction = self.predict() for tree in self._trees: self._prediction -= tree.predict() tree.update_y(self.data.y.values - self._prediction) yield tree self._prediction += tree.predict()
def residuals(self) ‑> numpy.ndarray
-
Expand source code
def residuals(self) -> np.ndarray: return self.data.y.values - self.predict()
def unnormalized_residuals(self) ‑> numpy.ndarray
-
Expand source code
def unnormalized_residuals(self) -> np.ndarray: return self.data.y.unnormalized_y - self.data.y.unnormalize_y(self.predict())
def update_z_values(self, y)
-
Expand source code
def update_z_values(self, y): if not self.classification: return z = np.random.normal(loc=self.predict(self.data.X.values)) one_label = np.maximum(z[y == 1], 0) zero_label = np.minimum(z[y == 0], 0) z[y == 1] = one_label z[y == 0] = zero_label self.data.update_y(z)