Expand source code
from typing import Tuple, List
from operator import gt, le

import numpy as np
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from imodels.tree.figs import FIGSRegressor, Node, FIGSRegressorCV
from sklearn.exceptions import NotFittedError
from sklearn.tree import DecisionTreeRegressor

from ..data import make_bartpy_data
from ..initializers.initializer import Initializer
from ..mutation import GrowMutation
from ..node import split_node, LeafNode
from ..split import Split
from ..splitcondition import SplitCondition
from ..tree import Tree, mutate


class SklearnTreeInitializer(Initializer):
    """
    Initialize tree structure and leaf node values by fitting a single Sklearn GBR tree

    Both tree structure and leaf node parameters are copied across
    """

    def __init__(self,
                 max_depth: int = 4,
                 min_samples_split: int = 2,
                 loss: str = 'squared_error',
                 tree_=None):
        super().__init__()
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.loss = loss
        self._tree = tree_

    def initialize_tree(self,
                        tree: Tree, tree_number: int) -> None:
        # # trees = list(trees)
        # if not self._tree:
        #     params = {
        #         'n_estimators': 1,
        #         'max_depth': self.max_depth,
        #         'min_samples_split': self.min_samples_split,
        #         'learning_rate': 0.8,
        #         'loss': self.loss
        #     }
        #     # for tree in trees:
        #
        #     self._tree = GradientBoostingRegressor(**params)
        #     # clf = FIGSRegressor()
        #
        #     self._tree.fit(tree.nodes[0].data.X.values, tree.nodes[0].data.y.values)

        # for i, tree in enumerate(trees):
        # sklearn_tree = self._tree.estimators_[0][0].tree_
        sklearn_tree = self._get_sklearn_tree(tree_number)
        map_sklearn_tree_into_bartpy(tree, sklearn_tree)
        pass

    def _get_sklearn_tree(self, tree_number):
        if isinstance(self._tree, GradientBoostingRegressor):
            return self._tree.estimators_[0][tree_number].tree_
        elif isinstance(self._tree, DecisionTreeRegressor):
            return self._tree.tree_
        elif isinstance(self._tree, FIGSRegressor):
            return SkTree(self._tree.trees_[tree_number])
        elif isinstance(self._tree, FIGSRegressorCV):
            return SkTree(self._tree.figs.trees_[tree_number])
        elif isinstance(self._tree, RandomForestRegressor):
            return self._tree.estimators_[tree_number]


def get_child(node, direction):
    if hasattr(node, direction):
        return getattr(node, direction)
    elif hasattr(node, f"{direction}_child"):
        return getattr(node, f"{direction}_child")
    return


def enumerate_tree(tree: Node, num_iter=iter(range(int(1e+06)))):
    if tree is None:
        return
    tree.number = next(num_iter)
    # if hasattr(tree, 'left'):
    enumerate_tree(get_child(tree, 'left'), num_iter)
    # if hasattr(tree, 'right'):
    enumerate_tree(get_child(tree, 'right'), num_iter)


def fill_nodes_dict(tree: Node, node_dict: dict):
    if tree is None:
        return
    node_dict[tree.number] = tree
    fill_nodes_dict(get_child(tree, 'left'), node_dict)
    fill_nodes_dict(get_child(tree, 'right'), node_dict)

    # return node_dict


class SkTree:
    def __init__(self, tree: Node):
        nodes_dict = {}
        enumerate_tree(tree, num_iter=iter(range(int(1e+06))))
        fill_nodes_dict(tree, nodes_dict)
        self.children_left = []
        self.children_right = []
        self.feature = []
        self.threshold = []
        self.n_node_samples = []
        self.impurity = []
        self.value = []
        for node in nodes_dict.values():
            r_c = get_child(node, "right")
            l_c = get_child(node, "left")
            right_number = r_c.number if r_c else -1
            left_number = l_c.number if l_c else -1
            f_node = node.feature if l_c else -2
            t_node = node.threshold if l_c else -2

            self.children_right.append(right_number)
            self.children_left.append(left_number)
            self.feature.append(f_node)
            self.threshold.append(t_node)
            self.n_node_samples.append(np.sum(node.idxs))
            self.impurity.append(node.impurity_reduction)
            self.value.append(node.value)
    #
    # def predict(self, *args, **kwargs):
    #     """ Predict target for X. """
    #     self._figs.predict(*args, **kwargs)


class SkFigs:
    def __init__(self, figs: FIGSRegressor):
        self.trees_ = [SkTree(t) for t in figs.trees_]


def get_figs_sklearn(figs: FIGSRegressor):
    return SkFigs(figs)


def map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index: int) -> Tuple[SplitCondition, SplitCondition]:
    """
    Convert how a split is stored in sklearn's gradient boosted trees library to the bartpy representation

    Parameters
    ----------
    sklearn_tree: The full tree object
    index: The index of the node in the tree object

    Returns
    -------

    """
    return (
        SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], le),
        SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], gt)

    )


def get_bartpy_tree_from_sklearn(sklearn_tree, X, y):
    data = make_bartpy_data(X, y, normalize=False)
    bartpy_tree = Tree([LeafNode(Split(data))])
    map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree)
    return bartpy_tree.nodes[0]


def map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree):
    nodes = [None for x in sklearn_tree.children_left]
    nodes[0] = bartpy_tree.nodes[0]

    def search(index: int = 0):
        left_child_index, right_child_index = sklearn_tree.children_left[index], sklearn_tree.children_right[index]

        if left_child_index == -1:  # Trees are binary splits, so only need to check left tree
            return

        searched_node: LeafNode = nodes[index]

        split_conditions = map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index)
        decision_node = split_node(searched_node, split_conditions)

        left_child: LeafNode = decision_node.left_child
        right_child: LeafNode = decision_node.right_child
        left_child.set_value(sklearn_tree.value[left_child_index][0][0])
        right_child.set_value(sklearn_tree.value[right_child_index][0][0])

        mutation = GrowMutation(searched_node, decision_node)
        mutate(bartpy_tree, mutation)

        nodes[index] = decision_node
        nodes[left_child_index] = decision_node.left_child
        nodes[right_child_index] = decision_node.right_child

        search(left_child_index)
        search(right_child_index)

    search()

Functions

def enumerate_tree(tree: Node, num_iter=<range_iterator object>)
Expand source code
def enumerate_tree(tree: Node, num_iter=iter(range(int(1e+06)))):
    if tree is None:
        return
    tree.number = next(num_iter)
    # if hasattr(tree, 'left'):
    enumerate_tree(get_child(tree, 'left'), num_iter)
    # if hasattr(tree, 'right'):
    enumerate_tree(get_child(tree, 'right'), num_iter)
def fill_nodes_dict(tree: Node, node_dict: dict)
Expand source code
def fill_nodes_dict(tree: Node, node_dict: dict):
    if tree is None:
        return
    node_dict[tree.number] = tree
    fill_nodes_dict(get_child(tree, 'left'), node_dict)
    fill_nodes_dict(get_child(tree, 'right'), node_dict)

    # return node_dict
def get_bartpy_tree_from_sklearn(sklearn_tree, X, y)
Expand source code
def get_bartpy_tree_from_sklearn(sklearn_tree, X, y):
    data = make_bartpy_data(X, y, normalize=False)
    bartpy_tree = Tree([LeafNode(Split(data))])
    map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree)
    return bartpy_tree.nodes[0]
def get_child(node, direction)
Expand source code
def get_child(node, direction):
    if hasattr(node, direction):
        return getattr(node, direction)
    elif hasattr(node, f"{direction}_child"):
        return getattr(node, f"{direction}_child")
    return
def get_figs_sklearn(figs: FIGSRegressor)
Expand source code
def get_figs_sklearn(figs: FIGSRegressor):
    return SkFigs(figs)
def map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index: int) ‑> Tuple[SplitConditionSplitCondition]

Convert how a split is stored in sklearn's gradient boosted trees library to the bartpy representation

Parameters

sklearn_tree : The full tree object
 
index : The index of the node in the tree object
 

Returns

Expand source code
def map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index: int) -> Tuple[SplitCondition, SplitCondition]:
    """
    Convert how a split is stored in sklearn's gradient boosted trees library to the bartpy representation

    Parameters
    ----------
    sklearn_tree: The full tree object
    index: The index of the node in the tree object

    Returns
    -------

    """
    return (
        SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], le),
        SplitCondition(sklearn_tree.feature[index], sklearn_tree.threshold[index], gt)

    )
def map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree)
Expand source code
def map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree):
    nodes = [None for x in sklearn_tree.children_left]
    nodes[0] = bartpy_tree.nodes[0]

    def search(index: int = 0):
        left_child_index, right_child_index = sklearn_tree.children_left[index], sklearn_tree.children_right[index]

        if left_child_index == -1:  # Trees are binary splits, so only need to check left tree
            return

        searched_node: LeafNode = nodes[index]

        split_conditions = map_sklearn_split_into_bartpy_split_conditions(sklearn_tree, index)
        decision_node = split_node(searched_node, split_conditions)

        left_child: LeafNode = decision_node.left_child
        right_child: LeafNode = decision_node.right_child
        left_child.set_value(sklearn_tree.value[left_child_index][0][0])
        right_child.set_value(sklearn_tree.value[right_child_index][0][0])

        mutation = GrowMutation(searched_node, decision_node)
        mutate(bartpy_tree, mutation)

        nodes[index] = decision_node
        nodes[left_child_index] = decision_node.left_child
        nodes[right_child_index] = decision_node.right_child

        search(left_child_index)
        search(right_child_index)

    search()

Classes

class SkFigs (figs: FIGSRegressor)
Expand source code
class SkFigs:
    def __init__(self, figs: FIGSRegressor):
        self.trees_ = [SkTree(t) for t in figs.trees_]
class SkTree (tree: Node)
Expand source code
class SkTree:
    def __init__(self, tree: Node):
        nodes_dict = {}
        enumerate_tree(tree, num_iter=iter(range(int(1e+06))))
        fill_nodes_dict(tree, nodes_dict)
        self.children_left = []
        self.children_right = []
        self.feature = []
        self.threshold = []
        self.n_node_samples = []
        self.impurity = []
        self.value = []
        for node in nodes_dict.values():
            r_c = get_child(node, "right")
            l_c = get_child(node, "left")
            right_number = r_c.number if r_c else -1
            left_number = l_c.number if l_c else -1
            f_node = node.feature if l_c else -2
            t_node = node.threshold if l_c else -2

            self.children_right.append(right_number)
            self.children_left.append(left_number)
            self.feature.append(f_node)
            self.threshold.append(t_node)
            self.n_node_samples.append(np.sum(node.idxs))
            self.impurity.append(node.impurity_reduction)
            self.value.append(node.value)
    #
    # def predict(self, *args, **kwargs):
    #     """ Predict target for X. """
    #     self._figs.predict(*args, **kwargs)
class SklearnTreeInitializer (max_depth: int = 4, min_samples_split: int = 2, loss: str = 'squared_error', tree_=None)

Initialize tree structure and leaf node values by fitting a single Sklearn GBR tree

Both tree structure and leaf node parameters are copied across

Expand source code
class SklearnTreeInitializer(Initializer):
    """
    Initialize tree structure and leaf node values by fitting a single Sklearn GBR tree

    Both tree structure and leaf node parameters are copied across
    """

    def __init__(self,
                 max_depth: int = 4,
                 min_samples_split: int = 2,
                 loss: str = 'squared_error',
                 tree_=None):
        super().__init__()
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.loss = loss
        self._tree = tree_

    def initialize_tree(self,
                        tree: Tree, tree_number: int) -> None:
        # # trees = list(trees)
        # if not self._tree:
        #     params = {
        #         'n_estimators': 1,
        #         'max_depth': self.max_depth,
        #         'min_samples_split': self.min_samples_split,
        #         'learning_rate': 0.8,
        #         'loss': self.loss
        #     }
        #     # for tree in trees:
        #
        #     self._tree = GradientBoostingRegressor(**params)
        #     # clf = FIGSRegressor()
        #
        #     self._tree.fit(tree.nodes[0].data.X.values, tree.nodes[0].data.y.values)

        # for i, tree in enumerate(trees):
        # sklearn_tree = self._tree.estimators_[0][0].tree_
        sklearn_tree = self._get_sklearn_tree(tree_number)
        map_sklearn_tree_into_bartpy(tree, sklearn_tree)
        pass

    def _get_sklearn_tree(self, tree_number):
        if isinstance(self._tree, GradientBoostingRegressor):
            return self._tree.estimators_[0][tree_number].tree_
        elif isinstance(self._tree, DecisionTreeRegressor):
            return self._tree.tree_
        elif isinstance(self._tree, FIGSRegressor):
            return SkTree(self._tree.trees_[tree_number])
        elif isinstance(self._tree, FIGSRegressorCV):
            return SkTree(self._tree.figs.trees_[tree_number])
        elif isinstance(self._tree, RandomForestRegressor):
            return self._tree.estimators_[tree_number]

Ancestors

Methods

def initialize_tree(self, tree: Tree, tree_number: int) ‑> None
Expand source code
def initialize_tree(self,
                    tree: Tree, tree_number: int) -> None:
    # # trees = list(trees)
    # if not self._tree:
    #     params = {
    #         'n_estimators': 1,
    #         'max_depth': self.max_depth,
    #         'min_samples_split': self.min_samples_split,
    #         'learning_rate': 0.8,
    #         'loss': self.loss
    #     }
    #     # for tree in trees:
    #
    #     self._tree = GradientBoostingRegressor(**params)
    #     # clf = FIGSRegressor()
    #
    #     self._tree.fit(tree.nodes[0].data.X.values, tree.nodes[0].data.y.values)

    # for i, tree in enumerate(trees):
    # sklearn_tree = self._tree.estimators_[0][0].tree_
    sklearn_tree = self._get_sklearn_tree(tree_number)
    map_sklearn_tree_into_bartpy(tree, sklearn_tree)
    pass