Wrapper for sparse, integer linear models.

minimizes norm(X * w - y, 2) + alpha * norm(w, 1)

with integer coefficients in w

Requires installation of a solver for mixed-integer linear programs, e.g. gurobi, mosek, or cplex

Expand source code
'''
Wrapper for sparse, integer linear models.

minimizes norm(X * w - y, 2) + alpha * norm(w, 1)

with integer coefficients in w

Requires installation of a solver for mixed-integer linear programs, e.g. gurobi, mosek, or cplex
'''

import warnings

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

class SLIMRegressor(BaseEstimator, RegressorMixin):
    '''Sparse integer linear model
    Params
    ------
    alpha: float
        weight for sparsity penalty
    '''

    def __init__(self, alpha=0.01):
        self.alpha = alpha

    def fit(self, X, y, sample_weight=None):
        '''fit a linear model with integer coefficient and L1 regularization.
        In case the optimization fails, fit lasso and round coefs.
        
        Params
        ------
        _sample_weight: np.ndarray (n,), optional
            weight for each individual sample
        '''

        X, y = check_X_y(X, y)
        self.n_features_in_ = X.shape[1]
        self.model_ = LinearRegression()

        try:
            import cvxpy as cp  # package for optimization, import here to make it optional
            from cvxpy.error import SolverError

            # declare the integer-valued optimization variable
            w = cp.Variable(X.shape[1], integer=True)

            # set up the minimization problem
            residuals = X @ w - y
            if sample_weight is not None:
                residuals = cp.multiply(sample_weight, residuals)

            mse = cp.sum_squares(residuals)
            l1_penalty = self.alpha * cp.norm(w, 1)
            obj = cp.Minimize(mse + l1_penalty)
            prob = cp.Problem(obj)

            try:
                # solve the problem using an appropriate solver
                prob.solve()
                self.model_.coef_ = w.value.astype(int)
                self.model_.intercept_ = 0

            except SolverError:
                warnings.warn("gurobi, mosek, or cplex solver required for mixed-integer "
                              "quadratic programming. Rounding non-integer coefficients instead.")
                self._fit_backup(X, y, sample_weight)
        
        except ImportError:
            warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
                          "coefficients instead.")
            self._fit_backup(X, y, sample_weight)

        return self
    
    def _fit_backup(self, X, y, sample_weight):
        m = Lasso(alpha=self.alpha)
        m.fit(X, y, sample_weight=sample_weight)
        self.model_.coef_ = np.round(m.coef_).astype(int)
        self.model_.intercept_ = m.intercept_

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return self.model_.predict(X)


class SLIMClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self, alpha=1):
        '''Model is initialized during fitting

        Params
        ------
        alpha: float
            weight for sparsity penalty
        '''
        self.alpha = alpha

    def fit(self, X, y, sample_weight=None):
        '''fit a logistic model with integer coefficient and L1 regularization.
        In case the optimization fails, fit lasso and round coefs.
        
        Params
        ------
        _sample_weight: np.ndarray (n,), optional
            weight for each individual sample
        '''
        X, y = check_X_y(X, y)
        check_classification_targets(y)
        self.n_features_in_ = X.shape[1]
        self.classes_, y = np.unique(y, return_inverse=True)  # deals with str inputs
        self.model_ = LogisticRegression()
        self.model_.classes_ = self.classes_

        try:
            import cvxpy as cp  # package for optimization, import here to make it optional
            from cvxpy.error import SolverError

            # declare the integer-valued optimization variable
            w = cp.Variable(X.shape[1], integer=True)

            # set up the minimization problem
            logits = -X @ w
            residuals = cp.multiply(1 - y, logits) - cp.logistic(logits)
            if sample_weight is not None:
                residuals = cp.multiply(sample_weight, residuals)

            celoss = -cp.sum(residuals)
            l1_penalty = self.alpha * cp.norm(w, 1)
            obj = cp.Minimize(celoss + l1_penalty)
            prob = cp.Problem(obj)

            try:
                # solve the problem using an appropriate solver
                prob.solve()
                self.model_.coef_ = np.array([w.value.astype(int)])
                self.model_.intercept_ = 0

            except SolverError:
                warnings.warn("mosek solver required for mixed-integer exponential cone "
                              "programming. Rounding non-integer coefficients instead")
                self._fit_backup(X, y, sample_weight)

        except ImportError:
            warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
                          "coefficients instead.")
            self._fit_backup(X, y, sample_weight)
        
        return self

    def _fit_backup(self, X, y, sample_weight=None):
        m = LogisticRegression(C=1 / self.alpha)
        m.fit(X, y, sample_weight=sample_weight)
        self.model_.coef_ = np.round(m.coef_).astype(int)
        self.model_.intercept_ = m.intercept_

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return self.model_.predict(X)

    def predict_proba(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return self.model_.predict_proba(X)

Classes

class SLIMClassifier (alpha=1)

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])

Model is initialized during fitting

Params

alpha: float weight for sparsity penalty

Expand source code
class SLIMClassifier(BaseEstimator, ClassifierMixin):

    def __init__(self, alpha=1):
        '''Model is initialized during fitting

        Params
        ------
        alpha: float
            weight for sparsity penalty
        '''
        self.alpha = alpha

    def fit(self, X, y, sample_weight=None):
        '''fit a logistic model with integer coefficient and L1 regularization.
        In case the optimization fails, fit lasso and round coefs.
        
        Params
        ------
        _sample_weight: np.ndarray (n,), optional
            weight for each individual sample
        '''
        X, y = check_X_y(X, y)
        check_classification_targets(y)
        self.n_features_in_ = X.shape[1]
        self.classes_, y = np.unique(y, return_inverse=True)  # deals with str inputs
        self.model_ = LogisticRegression()
        self.model_.classes_ = self.classes_

        try:
            import cvxpy as cp  # package for optimization, import here to make it optional
            from cvxpy.error import SolverError

            # declare the integer-valued optimization variable
            w = cp.Variable(X.shape[1], integer=True)

            # set up the minimization problem
            logits = -X @ w
            residuals = cp.multiply(1 - y, logits) - cp.logistic(logits)
            if sample_weight is not None:
                residuals = cp.multiply(sample_weight, residuals)

            celoss = -cp.sum(residuals)
            l1_penalty = self.alpha * cp.norm(w, 1)
            obj = cp.Minimize(celoss + l1_penalty)
            prob = cp.Problem(obj)

            try:
                # solve the problem using an appropriate solver
                prob.solve()
                self.model_.coef_ = np.array([w.value.astype(int)])
                self.model_.intercept_ = 0

            except SolverError:
                warnings.warn("mosek solver required for mixed-integer exponential cone "
                              "programming. Rounding non-integer coefficients instead")
                self._fit_backup(X, y, sample_weight)

        except ImportError:
            warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
                          "coefficients instead.")
            self._fit_backup(X, y, sample_weight)
        
        return self

    def _fit_backup(self, X, y, sample_weight=None):
        m = LogisticRegression(C=1 / self.alpha)
        m.fit(X, y, sample_weight=sample_weight)
        self.model_.coef_ = np.round(m.coef_).astype(int)
        self.model_.intercept_ = m.intercept_

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return self.model_.predict(X)

    def predict_proba(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return self.model_.predict_proba(X)

Ancestors

  • sklearn.base.BaseEstimator
  • sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
  • sklearn.utils._metadata_requests._MetadataRequester
  • sklearn.base.ClassifierMixin

Methods

def fit(self, X, y, sample_weight=None)

fit a logistic model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs.

Params

_sample_weight: np.ndarray (n,), optional weight for each individual sample

Expand source code
def fit(self, X, y, sample_weight=None):
    '''fit a logistic model with integer coefficient and L1 regularization.
    In case the optimization fails, fit lasso and round coefs.
    
    Params
    ------
    _sample_weight: np.ndarray (n,), optional
        weight for each individual sample
    '''
    X, y = check_X_y(X, y)
    check_classification_targets(y)
    self.n_features_in_ = X.shape[1]
    self.classes_, y = np.unique(y, return_inverse=True)  # deals with str inputs
    self.model_ = LogisticRegression()
    self.model_.classes_ = self.classes_

    try:
        import cvxpy as cp  # package for optimization, import here to make it optional
        from cvxpy.error import SolverError

        # declare the integer-valued optimization variable
        w = cp.Variable(X.shape[1], integer=True)

        # set up the minimization problem
        logits = -X @ w
        residuals = cp.multiply(1 - y, logits) - cp.logistic(logits)
        if sample_weight is not None:
            residuals = cp.multiply(sample_weight, residuals)

        celoss = -cp.sum(residuals)
        l1_penalty = self.alpha * cp.norm(w, 1)
        obj = cp.Minimize(celoss + l1_penalty)
        prob = cp.Problem(obj)

        try:
            # solve the problem using an appropriate solver
            prob.solve()
            self.model_.coef_ = np.array([w.value.astype(int)])
            self.model_.intercept_ = 0

        except SolverError:
            warnings.warn("mosek solver required for mixed-integer exponential cone "
                          "programming. Rounding non-integer coefficients instead")
            self._fit_backup(X, y, sample_weight)

    except ImportError:
        warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
                      "coefficients instead.")
        self._fit_backup(X, y, sample_weight)
    
    return self
def predict(self, X)
Expand source code
def predict(self, X):
    check_is_fitted(self)
    X = check_array(X)
    return self.model_.predict(X)
def predict_proba(self, X)
Expand source code
def predict_proba(self, X):
    check_is_fitted(self)
    X = check_array(X)
    return self.model_.predict_proba(X)
def set_fit_request(self: SLIMClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMClassifier

Request metadata passed to the fit method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

  • True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.

  • False: metadata is not requested and the meta-estimator will not pass it to fit.

  • None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

  • str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for sample_weight parameter in fit.

Returns

self : object
The updated object.
Expand source code
def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance
def set_score_request(self: SLIMClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMClassifier

Request metadata passed to the score method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

  • True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.

  • False: metadata is not requested and the meta-estimator will not pass it to score.

  • None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

  • str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for sample_weight parameter in score.

Returns

self : object
The updated object.
Expand source code
def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance
class SLIMRegressor (alpha=0.01)

Sparse integer linear model Params


alpha: float weight for sparsity penalty

Expand source code
class SLIMRegressor(BaseEstimator, RegressorMixin):
    '''Sparse integer linear model
    Params
    ------
    alpha: float
        weight for sparsity penalty
    '''

    def __init__(self, alpha=0.01):
        self.alpha = alpha

    def fit(self, X, y, sample_weight=None):
        '''fit a linear model with integer coefficient and L1 regularization.
        In case the optimization fails, fit lasso and round coefs.
        
        Params
        ------
        _sample_weight: np.ndarray (n,), optional
            weight for each individual sample
        '''

        X, y = check_X_y(X, y)
        self.n_features_in_ = X.shape[1]
        self.model_ = LinearRegression()

        try:
            import cvxpy as cp  # package for optimization, import here to make it optional
            from cvxpy.error import SolverError

            # declare the integer-valued optimization variable
            w = cp.Variable(X.shape[1], integer=True)

            # set up the minimization problem
            residuals = X @ w - y
            if sample_weight is not None:
                residuals = cp.multiply(sample_weight, residuals)

            mse = cp.sum_squares(residuals)
            l1_penalty = self.alpha * cp.norm(w, 1)
            obj = cp.Minimize(mse + l1_penalty)
            prob = cp.Problem(obj)

            try:
                # solve the problem using an appropriate solver
                prob.solve()
                self.model_.coef_ = w.value.astype(int)
                self.model_.intercept_ = 0

            except SolverError:
                warnings.warn("gurobi, mosek, or cplex solver required for mixed-integer "
                              "quadratic programming. Rounding non-integer coefficients instead.")
                self._fit_backup(X, y, sample_weight)
        
        except ImportError:
            warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
                          "coefficients instead.")
            self._fit_backup(X, y, sample_weight)

        return self
    
    def _fit_backup(self, X, y, sample_weight):
        m = Lasso(alpha=self.alpha)
        m.fit(X, y, sample_weight=sample_weight)
        self.model_.coef_ = np.round(m.coef_).astype(int)
        self.model_.intercept_ = m.intercept_

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        return self.model_.predict(X)

Ancestors

  • sklearn.base.BaseEstimator
  • sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
  • sklearn.utils._metadata_requests._MetadataRequester
  • sklearn.base.RegressorMixin

Methods

def fit(self, X, y, sample_weight=None)

fit a linear model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs.

Params

_sample_weight: np.ndarray (n,), optional weight for each individual sample

Expand source code
def fit(self, X, y, sample_weight=None):
    '''fit a linear model with integer coefficient and L1 regularization.
    In case the optimization fails, fit lasso and round coefs.
    
    Params
    ------
    _sample_weight: np.ndarray (n,), optional
        weight for each individual sample
    '''

    X, y = check_X_y(X, y)
    self.n_features_in_ = X.shape[1]
    self.model_ = LinearRegression()

    try:
        import cvxpy as cp  # package for optimization, import here to make it optional
        from cvxpy.error import SolverError

        # declare the integer-valued optimization variable
        w = cp.Variable(X.shape[1], integer=True)

        # set up the minimization problem
        residuals = X @ w - y
        if sample_weight is not None:
            residuals = cp.multiply(sample_weight, residuals)

        mse = cp.sum_squares(residuals)
        l1_penalty = self.alpha * cp.norm(w, 1)
        obj = cp.Minimize(mse + l1_penalty)
        prob = cp.Problem(obj)

        try:
            # solve the problem using an appropriate solver
            prob.solve()
            self.model_.coef_ = w.value.astype(int)
            self.model_.intercept_ = 0

        except SolverError:
            warnings.warn("gurobi, mosek, or cplex solver required for mixed-integer "
                          "quadratic programming. Rounding non-integer coefficients instead.")
            self._fit_backup(X, y, sample_weight)
    
    except ImportError:
        warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
                      "coefficients instead.")
        self._fit_backup(X, y, sample_weight)

    return self
def predict(self, X)
Expand source code
def predict(self, X):
    check_is_fitted(self)
    X = check_array(X)
    return self.model_.predict(X)
def set_fit_request(self: SLIMRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMRegressor

Request metadata passed to the fit method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

  • True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.

  • False: metadata is not requested and the meta-estimator will not pass it to fit.

  • None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

  • str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for sample_weight parameter in fit.

Returns

self : object
The updated object.
Expand source code
def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance
def set_score_request(self: SLIMRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMRegressor

Request metadata passed to the score method.

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works.

The options for each parameter are:

  • True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.

  • False: metadata is not requested and the meta-estimator will not pass it to score.

  • None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

  • str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

Added in version: 1.3

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:~sklearn.pipeline.Pipeline. Otherwise it has no effect.

Parameters

sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED
Metadata routing for sample_weight parameter in score.

Returns

self : object
The updated object.
Expand source code
def func(**kw):
    """Updates the request for provided parameters

    This docstring is overwritten below.
    See REQUESTER_DOC for expected functionality
    """
    if not _routing_enabled():
        raise RuntimeError(
            "This method is only available when metadata routing is enabled."
            " You can enable it using"
            " sklearn.set_config(enable_metadata_routing=True)."
        )

    if self.validate_keys and (set(kw) - set(self.keys)):
        raise TypeError(
            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
            f" are: {set(self.keys)}"
        )

    requests = instance._get_metadata_request()
    method_metadata_request = getattr(requests, self.name)

    for prop, alias in kw.items():
        if alias is not UNCHANGED:
            method_metadata_request.add_request(param=prop, alias=alias)
    instance._metadata_request = requests

    return instance