Expand source code
import numpy as np
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.tree import _tree
from typing import Union, List, Tuple
def tree_to_rules(tree: Union[DecisionTreeClassifier, DecisionTreeRegressor],
feature_names: List[str],
prediction_values: bool = False, round_thresholds=True) -> List[str]:
"""
Return a list of rules from a tree
Parameters
----------
tree : Decision Tree Classifier/Regressor
feature_names: list of variable names
Returns
-------
rules : list of rules.
"""
# XXX todo: check the case where tree is build on subset of features,
# ie max_features != None
tree_ = tree.tree_
feature_name = [
feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
for i in tree_.feature
]
rules = []
def recurse(node, base_name):
if tree_.feature[node] != _tree.TREE_UNDEFINED:
name = feature_name[node]
symbol = '<='
symbol2 = '>'
threshold = tree_.threshold[node]
if round_thresholds:
threshold = np.round(threshold, decimals=5)
text = base_name + ["{} {} {}".format(name, symbol, threshold)]
recurse(tree_.children_left[node], text)
text = base_name + ["{} {} {}".format(name, symbol2,
threshold)]
recurse(tree_.children_right[node], text)
else:
rule = str.join(' and ', base_name)
rule = (rule if rule != ''
else ' == '.join([feature_names[0]] * 2))
# a rule selecting all is set to "c0==c0"
if prediction_values:
rules.append((rule, tree_.value[node][0].tolist()))
else:
rules.append(rule)
recurse(0, [])
return rules if len(rules) > 0 else 'True'
def tree_to_code(clf, feature_names):
'''Prints a tree with a single split
'''
n_nodes = clf.tree_.node_count
children_left = clf.tree_.children_left
children_right = clf.tree_.children_right
feature = clf.tree_.feature
threshold = clf.tree_.threshold
node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, 0)] # start with the root node id (0) and its depth (0)
s = ''
while len(stack) > 0:
# `pop` ensures each node is only visited once
node_id, depth = stack.pop()
node_depth[node_id] = depth
# If the left and right child of a node is not the same we have a split
# node
is_split_node = children_left[node_id] != children_right[node_id]
# If a split node, append left and right children and depth to `stack`
# so we can loop through them
if is_split_node:
stack.append((children_left[node_id], depth + 1))
stack.append((children_right[node_id], depth + 1))
else:
is_leaves[node_id] = True
# print("The binary tree structure has {n} nodes and has "
# "the following tree structure:\n".format(n=n_nodes))
for i in range(n_nodes):
if is_leaves[i]:
pass
# print("{space}node={node} is a leaf node.".format(
# space=node_depth[i] * "\t", node=i))
else:
s += f"{feature_names[feature[i]]} <= {threshold[i]}"
return f"\033[96m{s}\033[00m\n"
def itemsets_to_rules(itemsets: List[Tuple]) -> List[str]:
itemsets_clean = list(filter(lambda it: it != 'null' and 'All' not in ''.join(it), itemsets))
f = lambda itemset: ' and '.join([single_discretized_feature_to_rule(item) for item in itemset])
return list(map(f, itemsets_clean))
def dict_to_rule(rule, clf_feature_dict):
"""
Function to accept rule dict and convert to Rule object
Parameters:
rule: list of dict of schema
[
{
'feature': int,
'operator': str,
'value': float
},
]
"""
output = ''
for condition in rule:
output += '{} {} {} and '.format(
clf_feature_dict[int(condition['feature'])],
condition['operator'],
condition['pivot']
)
return output[:-5]
def single_discretized_feature_to_rule(feat: str) -> str:
# categorical feature
if '_to_' not in feat:
return f'{feat} > 0.5'
# discretized numeric feature
feat_split = feat.split('_to_')
upper_value = feat_split[-1]
lower_value = feat_split[-2].split('_')[-1]
lower_to_upper_len = 1 + len(lower_value) + 4 + len(upper_value)
feature_name = feat[:-lower_to_upper_len]
if lower_value == '-inf':
rule = f'{feature_name} <= {upper_value}'
elif upper_value == 'inf':
rule = f'{feature_name} > {lower_value}'
else:
rule = f'{feature_name} > {lower_value} and {feature_name} <= {upper_value}'
return rule
Functions
def dict_to_rule(rule, clf_feature_dict)
-
Function to accept rule dict and convert to Rule object
Parameters: rule: list of dict of schema [ { 'feature': int, 'operator': str, 'value': float }, ]
Expand source code
def dict_to_rule(rule, clf_feature_dict): """ Function to accept rule dict and convert to Rule object Parameters: rule: list of dict of schema [ { 'feature': int, 'operator': str, 'value': float }, ] """ output = '' for condition in rule: output += '{} {} {} and '.format( clf_feature_dict[int(condition['feature'])], condition['operator'], condition['pivot'] ) return output[:-5]
def itemsets_to_rules(itemsets: List[Tuple]) ‑> List[str]
-
Expand source code
def itemsets_to_rules(itemsets: List[Tuple]) -> List[str]: itemsets_clean = list(filter(lambda it: it != 'null' and 'All' not in ''.join(it), itemsets)) f = lambda itemset: ' and '.join([single_discretized_feature_to_rule(item) for item in itemset]) return list(map(f, itemsets_clean))
def single_discretized_feature_to_rule(feat: str) ‑> str
-
Expand source code
def single_discretized_feature_to_rule(feat: str) -> str: # categorical feature if '_to_' not in feat: return f'{feat} > 0.5' # discretized numeric feature feat_split = feat.split('_to_') upper_value = feat_split[-1] lower_value = feat_split[-2].split('_')[-1] lower_to_upper_len = 1 + len(lower_value) + 4 + len(upper_value) feature_name = feat[:-lower_to_upper_len] if lower_value == '-inf': rule = f'{feature_name} <= {upper_value}' elif upper_value == 'inf': rule = f'{feature_name} > {lower_value}' else: rule = f'{feature_name} > {lower_value} and {feature_name} <= {upper_value}' return rule
def tree_to_code(clf, feature_names)
-
Prints a tree with a single split
Expand source code
def tree_to_code(clf, feature_names): '''Prints a tree with a single split ''' n_nodes = clf.tree_.node_count children_left = clf.tree_.children_left children_right = clf.tree_.children_right feature = clf.tree_.feature threshold = clf.tree_.threshold node_depth = np.zeros(shape=n_nodes, dtype=np.int64) is_leaves = np.zeros(shape=n_nodes, dtype=bool) stack = [(0, 0)] # start with the root node id (0) and its depth (0) s = '' while len(stack) > 0: # `pop` ensures each node is only visited once node_id, depth = stack.pop() node_depth[node_id] = depth # If the left and right child of a node is not the same we have a split # node is_split_node = children_left[node_id] != children_right[node_id] # If a split node, append left and right children and depth to `stack` # so we can loop through them if is_split_node: stack.append((children_left[node_id], depth + 1)) stack.append((children_right[node_id], depth + 1)) else: is_leaves[node_id] = True # print("The binary tree structure has {n} nodes and has " # "the following tree structure:\n".format(n=n_nodes)) for i in range(n_nodes): if is_leaves[i]: pass # print("{space}node={node} is a leaf node.".format( # space=node_depth[i] * "\t", node=i)) else: s += f"{feature_names[feature[i]]} <= {threshold[i]}" return f"\033[96m{s}\033[00m\n"
def tree_to_rules(tree: Union[sklearn.tree._classes.DecisionTreeClassifier, sklearn.tree._classes.DecisionTreeRegressor], feature_names: List[str], prediction_values: bool = False, round_thresholds=True) ‑> List[str]
-
Return a list of rules from a tree
Parameters
tree : Decision Tree Classifier/Regressor feature_names: list of variable names
Returns
rules : list of rules.
Expand source code
def tree_to_rules(tree: Union[DecisionTreeClassifier, DecisionTreeRegressor], feature_names: List[str], prediction_values: bool = False, round_thresholds=True) -> List[str]: """ Return a list of rules from a tree Parameters ---------- tree : Decision Tree Classifier/Regressor feature_names: list of variable names Returns ------- rules : list of rules. """ # XXX todo: check the case where tree is build on subset of features, # ie max_features != None tree_ = tree.tree_ feature_name = [ feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" for i in tree_.feature ] rules = [] def recurse(node, base_name): if tree_.feature[node] != _tree.TREE_UNDEFINED: name = feature_name[node] symbol = '<=' symbol2 = '>' threshold = tree_.threshold[node] if round_thresholds: threshold = np.round(threshold, decimals=5) text = base_name + ["{} {} {}".format(name, symbol, threshold)] recurse(tree_.children_left[node], text) text = base_name + ["{} {} {}".format(name, symbol2, threshold)] recurse(tree_.children_right[node], text) else: rule = str.join(' and ', base_name) rule = (rule if rule != '' else ' == '.join([feature_names[0]] * 2)) # a rule selecting all is set to "c0==c0" if prediction_values: rules.append((rule, tree_.value[node][0].tolist())) else: rules.append(rule) recurse(0, []) return rules if len(rules) > 0 else 'True'