Wrapper for sparse, integer linear models.
minimizes norm(X * w - y, 2) + alpha * norm(w, 1)
with integer coefficients in w
Requires installation of a solver for mixed-integer linear programs, e.g. gurobi, mosek, or cplex
Expand source code
'''
Wrapper for sparse, integer linear models.
minimizes norm(X * w - y, 2) + alpha * norm(w, 1)
with integer coefficients in w
Requires installation of a solver for mixed-integer linear programs, e.g. gurobi, mosek, or cplex
'''
import warnings
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
class SLIMRegressor(BaseEstimator, RegressorMixin):
'''Sparse integer linear model
Params
------
alpha: float
weight for sparsity penalty
'''
def __init__(self, alpha=0.01):
self.alpha = alpha
def fit(self, X, y, sample_weight=None):
'''fit a linear model with integer coefficient and L1 regularization.
In case the optimization fails, fit lasso and round coefs.
Params
------
_sample_weight: np.ndarray (n,), optional
weight for each individual sample
'''
X, y = check_X_y(X, y)
self.n_features_in_ = X.shape[1]
self.model_ = LinearRegression()
try:
import cvxpy as cp # package for optimization, import here to make it optional
from cvxpy.error import SolverError
# declare the integer-valued optimization variable
w = cp.Variable(X.shape[1], integer=True)
# set up the minimization problem
residuals = X @ w - y
if sample_weight is not None:
residuals = cp.multiply(sample_weight, residuals)
mse = cp.sum_squares(residuals)
l1_penalty = self.alpha * cp.norm(w, 1)
obj = cp.Minimize(mse + l1_penalty)
prob = cp.Problem(obj)
try:
# solve the problem using an appropriate solver
prob.solve()
self.model_.coef_ = w.value.astype(int)
self.model_.intercept_ = 0
except SolverError:
warnings.warn("gurobi, mosek, or cplex solver required for mixed-integer "
"quadratic programming. Rounding non-integer coefficients instead.")
self._fit_backup(X, y, sample_weight)
except ImportError:
warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
"coefficients instead.")
self._fit_backup(X, y, sample_weight)
return self
def _fit_backup(self, X, y, sample_weight):
m = Lasso(alpha=self.alpha)
m.fit(X, y, sample_weight=sample_weight)
self.model_.coef_ = np.round(m.coef_).astype(int)
self.model_.intercept_ = m.intercept_
def predict(self, X):
check_is_fitted(self)
X = check_array(X)
return self.model_.predict(X)
class SLIMClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, alpha=1):
'''Model is initialized during fitting
Params
------
alpha: float
weight for sparsity penalty
'''
self.alpha = alpha
def fit(self, X, y, sample_weight=None):
'''fit a logistic model with integer coefficient and L1 regularization.
In case the optimization fails, fit lasso and round coefs.
Params
------
_sample_weight: np.ndarray (n,), optional
weight for each individual sample
'''
X, y = check_X_y(X, y)
check_classification_targets(y)
self.n_features_in_ = X.shape[1]
self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs
self.model_ = LogisticRegression()
self.model_.classes_ = self.classes_
try:
import cvxpy as cp # package for optimization, import here to make it optional
from cvxpy.error import SolverError
# declare the integer-valued optimization variable
w = cp.Variable(X.shape[1], integer=True)
# set up the minimization problem
logits = -X @ w
residuals = cp.multiply(1 - y, logits) - cp.logistic(logits)
if sample_weight is not None:
residuals = cp.multiply(sample_weight, residuals)
celoss = -cp.sum(residuals)
l1_penalty = self.alpha * cp.norm(w, 1)
obj = cp.Minimize(celoss + l1_penalty)
prob = cp.Problem(obj)
try:
# solve the problem using an appropriate solver
prob.solve()
self.model_.coef_ = np.array([w.value.astype(int)])
self.model_.intercept_ = 0
except SolverError:
warnings.warn("mosek solver required for mixed-integer exponential cone "
"programming. Rounding non-integer coefficients instead")
self._fit_backup(X, y, sample_weight)
except ImportError:
warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer "
"coefficients instead.")
self._fit_backup(X, y, sample_weight)
return self
def _fit_backup(self, X, y, sample_weight=None):
m = LogisticRegression(C=1 / self.alpha)
m.fit(X, y, sample_weight=sample_weight)
self.model_.coef_ = np.round(m.coef_).astype(int)
self.model_.intercept_ = m.intercept_
def predict(self, X):
check_is_fitted(self)
X = check_array(X)
return self.model_.predict(X)
def predict_proba(self, X):
check_is_fitted(self)
X = check_array(X)
return self.model_.predict_proba(X)
Classes
class SLIMClassifier (alpha=1)
-
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCV
and friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:
User Guide <rolling_your_own_estimator>
.Notes
All estimators should specify all the parameters that can be set at the class level in their
__init__
as explicit keyword arguments (no*args
or**kwargs
).Examples
>>> import numpy as np >>> from sklearn.base import BaseEstimator >>> class MyEstimator(BaseEstimator): ... def __init__(self, *, param=1): ... self.param = param ... def fit(self, X, y=None): ... self.is_fitted_ = True ... return self ... def predict(self, X): ... return np.full(shape=X.shape[0], fill_value=self.param) >>> estimator = MyEstimator(param=2) >>> estimator.get_params() {'param': 2} >>> X = np.array([[1, 2], [2, 3], [3, 4]]) >>> y = np.array([1, 0, 1]) >>> estimator.fit(X, y).predict(X) array([2, 2, 2]) >>> estimator.set_params(param=3).fit(X, y).predict(X) array([3, 3, 3])
Model is initialized during fitting
Params
alpha: float weight for sparsity penalty
Expand source code
class SLIMClassifier(BaseEstimator, ClassifierMixin): def __init__(self, alpha=1): '''Model is initialized during fitting Params ------ alpha: float weight for sparsity penalty ''' self.alpha = alpha def fit(self, X, y, sample_weight=None): '''fit a logistic model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs. Params ------ _sample_weight: np.ndarray (n,), optional weight for each individual sample ''' X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_in_ = X.shape[1] self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs self.model_ = LogisticRegression() self.model_.classes_ = self.classes_ try: import cvxpy as cp # package for optimization, import here to make it optional from cvxpy.error import SolverError # declare the integer-valued optimization variable w = cp.Variable(X.shape[1], integer=True) # set up the minimization problem logits = -X @ w residuals = cp.multiply(1 - y, logits) - cp.logistic(logits) if sample_weight is not None: residuals = cp.multiply(sample_weight, residuals) celoss = -cp.sum(residuals) l1_penalty = self.alpha * cp.norm(w, 1) obj = cp.Minimize(celoss + l1_penalty) prob = cp.Problem(obj) try: # solve the problem using an appropriate solver prob.solve() self.model_.coef_ = np.array([w.value.astype(int)]) self.model_.intercept_ = 0 except SolverError: warnings.warn("mosek solver required for mixed-integer exponential cone " "programming. Rounding non-integer coefficients instead") self._fit_backup(X, y, sample_weight) except ImportError: warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer " "coefficients instead.") self._fit_backup(X, y, sample_weight) return self def _fit_backup(self, X, y, sample_weight=None): m = LogisticRegression(C=1 / self.alpha) m.fit(X, y, sample_weight=sample_weight) self.model_.coef_ = np.round(m.coef_).astype(int) self.model_.intercept_ = m.intercept_ def predict(self, X): check_is_fitted(self) X = check_array(X) return self.model_.predict(X) def predict_proba(self, X): check_is_fitted(self) X = check_array(X) return self.model_.predict_proba(X)
Ancestors
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- sklearn.base.ClassifierMixin
Methods
def fit(self, X, y, sample_weight=None)
-
fit a logistic model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs.
Params
_sample_weight: np.ndarray (n,), optional weight for each individual sample
Expand source code
def fit(self, X, y, sample_weight=None): '''fit a logistic model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs. Params ------ _sample_weight: np.ndarray (n,), optional weight for each individual sample ''' X, y = check_X_y(X, y) check_classification_targets(y) self.n_features_in_ = X.shape[1] self.classes_, y = np.unique(y, return_inverse=True) # deals with str inputs self.model_ = LogisticRegression() self.model_.classes_ = self.classes_ try: import cvxpy as cp # package for optimization, import here to make it optional from cvxpy.error import SolverError # declare the integer-valued optimization variable w = cp.Variable(X.shape[1], integer=True) # set up the minimization problem logits = -X @ w residuals = cp.multiply(1 - y, logits) - cp.logistic(logits) if sample_weight is not None: residuals = cp.multiply(sample_weight, residuals) celoss = -cp.sum(residuals) l1_penalty = self.alpha * cp.norm(w, 1) obj = cp.Minimize(celoss + l1_penalty) prob = cp.Problem(obj) try: # solve the problem using an appropriate solver prob.solve() self.model_.coef_ = np.array([w.value.astype(int)]) self.model_.intercept_ = 0 except SolverError: warnings.warn("mosek solver required for mixed-integer exponential cone " "programming. Rounding non-integer coefficients instead") self._fit_backup(X, y, sample_weight) except ImportError: warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer " "coefficients instead.") self._fit_backup(X, y, sample_weight) return self
def predict(self, X)
-
Expand source code
def predict(self, X): check_is_fitted(self) X = check_array(X) return self.model_.predict(X)
def predict_proba(self, X)
-
Expand source code
def predict_proba(self, X): check_is_fitted(self) X = check_array(X) return self.model_.predict_proba(X)
def set_fit_request(self: SLIMClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMClassifier
-
Request metadata passed to the
fit
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed tofit
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it tofit
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter infit
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
def set_score_request(self: SLIMClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMClassifier
-
Request metadata passed to the
score
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed toscore
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it toscore
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter inscore
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
- setting and getting parameters used by
class SLIMRegressor (alpha=0.01)
-
Sparse integer linear model Params
alpha: float weight for sparsity penalty
Expand source code
class SLIMRegressor(BaseEstimator, RegressorMixin): '''Sparse integer linear model Params ------ alpha: float weight for sparsity penalty ''' def __init__(self, alpha=0.01): self.alpha = alpha def fit(self, X, y, sample_weight=None): '''fit a linear model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs. Params ------ _sample_weight: np.ndarray (n,), optional weight for each individual sample ''' X, y = check_X_y(X, y) self.n_features_in_ = X.shape[1] self.model_ = LinearRegression() try: import cvxpy as cp # package for optimization, import here to make it optional from cvxpy.error import SolverError # declare the integer-valued optimization variable w = cp.Variable(X.shape[1], integer=True) # set up the minimization problem residuals = X @ w - y if sample_weight is not None: residuals = cp.multiply(sample_weight, residuals) mse = cp.sum_squares(residuals) l1_penalty = self.alpha * cp.norm(w, 1) obj = cp.Minimize(mse + l1_penalty) prob = cp.Problem(obj) try: # solve the problem using an appropriate solver prob.solve() self.model_.coef_ = w.value.astype(int) self.model_.intercept_ = 0 except SolverError: warnings.warn("gurobi, mosek, or cplex solver required for mixed-integer " "quadratic programming. Rounding non-integer coefficients instead.") self._fit_backup(X, y, sample_weight) except ImportError: warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer " "coefficients instead.") self._fit_backup(X, y, sample_weight) return self def _fit_backup(self, X, y, sample_weight): m = Lasso(alpha=self.alpha) m.fit(X, y, sample_weight=sample_weight) self.model_.coef_ = np.round(m.coef_).astype(int) self.model_.intercept_ = m.intercept_ def predict(self, X): check_is_fitted(self) X = check_array(X) return self.model_.predict(X)
Ancestors
- sklearn.base.BaseEstimator
- sklearn.utils._estimator_html_repr._HTMLDocumentationLinkMixin
- sklearn.utils._metadata_requests._MetadataRequester
- sklearn.base.RegressorMixin
Methods
def fit(self, X, y, sample_weight=None)
-
fit a linear model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs.
Params
_sample_weight: np.ndarray (n,), optional weight for each individual sample
Expand source code
def fit(self, X, y, sample_weight=None): '''fit a linear model with integer coefficient and L1 regularization. In case the optimization fails, fit lasso and round coefs. Params ------ _sample_weight: np.ndarray (n,), optional weight for each individual sample ''' X, y = check_X_y(X, y) self.n_features_in_ = X.shape[1] self.model_ = LinearRegression() try: import cvxpy as cp # package for optimization, import here to make it optional from cvxpy.error import SolverError # declare the integer-valued optimization variable w = cp.Variable(X.shape[1], integer=True) # set up the minimization problem residuals = X @ w - y if sample_weight is not None: residuals = cp.multiply(sample_weight, residuals) mse = cp.sum_squares(residuals) l1_penalty = self.alpha * cp.norm(w, 1) obj = cp.Minimize(mse + l1_penalty) prob = cp.Problem(obj) try: # solve the problem using an appropriate solver prob.solve() self.model_.coef_ = w.value.astype(int) self.model_.intercept_ = 0 except SolverError: warnings.warn("gurobi, mosek, or cplex solver required for mixed-integer " "quadratic programming. Rounding non-integer coefficients instead.") self._fit_backup(X, y, sample_weight) except ImportError: warnings.warn("Should install cvxpy with pip install cvxpy. Rounding non-integer " "coefficients instead.") self._fit_backup(X, y, sample_weight) return self
def predict(self, X)
-
Expand source code
def predict(self, X): check_is_fitted(self) X = check_array(X) return self.model_.predict(X)
def set_fit_request(self: SLIMRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMRegressor
-
Request metadata passed to the
fit
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed tofit
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it tofit
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter infit
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-
def set_score_request(self: SLIMRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> SLIMRegressor
-
Request metadata passed to the
score
method.Note that this method is only relevant if
enable_metadata_routing=True
(see :func:sklearn.set_config
). Please see :ref:User Guide <metadata_routing>
on how the routing mechanism works.The options for each parameter are:
-
True
: metadata is requested, and passed toscore
if provided. The request is ignored if metadata is not provided. -
False
: metadata is not requested and the meta-estimator will not pass it toscore
. -
None
: metadata is not requested, and the meta-estimator will raise an error if the user provides it. -
str
: metadata should be passed to the meta-estimator with this given alias instead of the original name.
The default (
sklearn.utils.metadata_routing.UNCHANGED
) retains the existing request. This allows you to change the request for some parameters and not others.Added in version: 1.3
Note
This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:
~sklearn.pipeline.Pipeline
. Otherwise it has no effect.Parameters
sample_weight
:str, True, False,
orNone
, default=sklearn.utils.metadata_routing.UNCHANGED
- Metadata routing for
sample_weight
parameter inscore
.
Returns
self
:object
- The updated object.
Expand source code
def func(*args, **kw): """Updates the request for provided parameters This docstring is overwritten below. See REQUESTER_DOC for expected functionality """ if not _routing_enabled(): raise RuntimeError( "This method is only available when metadata routing is enabled." " You can enable it using" " sklearn.set_config(enable_metadata_routing=True)." ) if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " f"Accepted arguments are: {set(self.keys)}" ) # This makes it possible to use the decorated method as an unbound method, # for instance when monkeypatching. # https://github.com/scikit-learn/scikit-learn/issues/28632 if instance is None: _instance = args[0] args = args[1:] else: _instance = instance # Replicating python's behavior when positional args are given other than # `self`, and `self` is only allowed if this method is unbound. if args: raise TypeError( f"set_{self.name}_request() takes 0 positional argument but" f" {len(args)} were given" ) requests = _instance._get_metadata_request() method_metadata_request = getattr(requests, self.name) for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) _instance._metadata_request = requests return _instance
-