Expand source code
import copy
import re
from collections import OrderedDict
from typing import Dict, Iterable


class Rule:
    """ An object modeling a logical rule and add factorization methods.
    It is used to simplify rules and deduplicate them.

    Parameters
    ----------

    rule : str
        The logical rule that is interpretable by a pandas query.

    args : object, optional
        Arguments associated to the rule, it is not used for factorization
        but it takes part of the output when the rule is converted to an array.
    """

    def __init__(self, rule, args=None, support=None):
        self.rule = rule
        self.args = args
        self.support = support
        self.terms = [t.split(' ') for t in self.rule.split(' and ')]
        self.agg_dict = {}
        self.factorize()
        self.rule = str(self)

    def __eq__(self, other):
        return self.agg_dict == other.agg_dict

    def __hash__(self):
        # FIXME : Easier method ?
        return hash(tuple(sorted(((i, j) for i, j in self.agg_dict.items()))))

    def factorize(self) -> None:
        for feature, symbol, value in self.terms:
            if (feature, symbol) not in self.agg_dict:
                if symbol != '==':
                    self.agg_dict[(feature, symbol)] = str(float(value))
                else:
                    self.agg_dict[(feature, symbol)] = value
            else:
                if symbol[0] == '<':
                    self.agg_dict[(feature, symbol)] = str(min(
                        float(self.agg_dict[(feature, symbol)]),
                        float(value)))
                elif symbol[0] == '>':
                    self.agg_dict[(feature, symbol)] = str(max(
                        float(self.agg_dict[(feature, symbol)]),
                        float(value)))
                else:  # Handle the c0 == c0 case
                    self.agg_dict[(feature, symbol)] = value

    def __iter__(self):
        yield str(self)
        yield self.args

    def __repr__(self):
        return ' and '.join([' '.join(
            [feature, symbol, str(self.agg_dict[(feature, symbol)])])
            for feature, symbol in sorted(self.agg_dict.keys())
        ])


def replace_feature_name(rule: Rule, replace_dict: Dict[str, str]) -> Rule:
    def replace(match):
        return replace_dict[match.group(0)]

    rule_replaced = copy.copy(rule)
    rule_replaced.rule = re.sub('|'.join(r'\b%s\b' % re.escape(s) for s in replace_dict), replace, rule.rule)
    replaced_agg_dict = {}
    for feature, symbol in rule_replaced.agg_dict:
        replaced_agg_dict[(replace_dict[feature], symbol)] = rule_replaced.agg_dict[(feature, symbol)]
    rule_replaced.agg_dict = replaced_agg_dict
    return rule_replaced


def get_feature_dict(num_features: int, feature_names: Iterable[str] = None) -> Dict[str, str]:
    feature_dict = OrderedDict()
    if feature_names is not None:
        for i in range(num_features):
            feature_dict[f'X_{i}'] = feature_names[i]
    else:
        for i in range(num_features):
            feature_dict[f'X_{i}'] = f'X_{i}'
    return feature_dict

Functions

def get_feature_dict(num_features: int, feature_names: Iterable[str] = None) ‑> Dict[str, str]
Expand source code
def get_feature_dict(num_features: int, feature_names: Iterable[str] = None) -> Dict[str, str]:
    feature_dict = OrderedDict()
    if feature_names is not None:
        for i in range(num_features):
            feature_dict[f'X_{i}'] = feature_names[i]
    else:
        for i in range(num_features):
            feature_dict[f'X_{i}'] = f'X_{i}'
    return feature_dict
def replace_feature_name(rule: Rule, replace_dict: Dict[str, str]) ‑> Rule
Expand source code
def replace_feature_name(rule: Rule, replace_dict: Dict[str, str]) -> Rule:
    def replace(match):
        return replace_dict[match.group(0)]

    rule_replaced = copy.copy(rule)
    rule_replaced.rule = re.sub('|'.join(r'\b%s\b' % re.escape(s) for s in replace_dict), replace, rule.rule)
    replaced_agg_dict = {}
    for feature, symbol in rule_replaced.agg_dict:
        replaced_agg_dict[(replace_dict[feature], symbol)] = rule_replaced.agg_dict[(feature, symbol)]
    rule_replaced.agg_dict = replaced_agg_dict
    return rule_replaced

Classes

class Rule (rule, args=None, support=None)

An object modeling a logical rule and add factorization methods. It is used to simplify rules and deduplicate them.

Parameters

rule : str
The logical rule that is interpretable by a pandas query.
args : object, optional
Arguments associated to the rule, it is not used for factorization but it takes part of the output when the rule is converted to an array.
Expand source code
class Rule:
    """ An object modeling a logical rule and add factorization methods.
    It is used to simplify rules and deduplicate them.

    Parameters
    ----------

    rule : str
        The logical rule that is interpretable by a pandas query.

    args : object, optional
        Arguments associated to the rule, it is not used for factorization
        but it takes part of the output when the rule is converted to an array.
    """

    def __init__(self, rule, args=None, support=None):
        self.rule = rule
        self.args = args
        self.support = support
        self.terms = [t.split(' ') for t in self.rule.split(' and ')]
        self.agg_dict = {}
        self.factorize()
        self.rule = str(self)

    def __eq__(self, other):
        return self.agg_dict == other.agg_dict

    def __hash__(self):
        # FIXME : Easier method ?
        return hash(tuple(sorted(((i, j) for i, j in self.agg_dict.items()))))

    def factorize(self) -> None:
        for feature, symbol, value in self.terms:
            if (feature, symbol) not in self.agg_dict:
                if symbol != '==':
                    self.agg_dict[(feature, symbol)] = str(float(value))
                else:
                    self.agg_dict[(feature, symbol)] = value
            else:
                if symbol[0] == '<':
                    self.agg_dict[(feature, symbol)] = str(min(
                        float(self.agg_dict[(feature, symbol)]),
                        float(value)))
                elif symbol[0] == '>':
                    self.agg_dict[(feature, symbol)] = str(max(
                        float(self.agg_dict[(feature, symbol)]),
                        float(value)))
                else:  # Handle the c0 == c0 case
                    self.agg_dict[(feature, symbol)] = value

    def __iter__(self):
        yield str(self)
        yield self.args

    def __repr__(self):
        return ' and '.join([' '.join(
            [feature, symbol, str(self.agg_dict[(feature, symbol)])])
            for feature, symbol in sorted(self.agg_dict.keys())
        ])

Methods

def factorize(self) ‑> None
Expand source code
def factorize(self) -> None:
    for feature, symbol, value in self.terms:
        if (feature, symbol) not in self.agg_dict:
            if symbol != '==':
                self.agg_dict[(feature, symbol)] = str(float(value))
            else:
                self.agg_dict[(feature, symbol)] = value
        else:
            if symbol[0] == '<':
                self.agg_dict[(feature, symbol)] = str(min(
                    float(self.agg_dict[(feature, symbol)]),
                    float(value)))
            elif symbol[0] == '>':
                self.agg_dict[(feature, symbol)] = str(max(
                    float(self.agg_dict[(feature, symbol)]),
                    float(value)))
            else:  # Handle the c0 == c0 case
                self.agg_dict[(feature, symbol)] = value