Module imodelsx.augtree.ensemble
Expand source code
from copy import deepcopy
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.utils import resample
from tqdm import tqdm
class BaggingEstimatorText:
def __init__(self, estimator, n_estimators=10, max_samples=1.0,
bootstrap=True, random_state=None):
"""Use this class because sklearn's class doesn't support passing X_text
"""
self.estimator = estimator
self.n_estimators = n_estimators
self.max_samples = max_samples
self.bootstrap = bootstrap
self.random_state = random_state
def fit(self, X, y, X_text, feature_names=None):
self.estimators_ = []
for _ in tqdm(range(self.n_estimators)):
estimator = deepcopy(self.estimator)
if self.bootstrap:
X_, y_, X_text_ = resample(X, y, X_text, replace=True, random_state=self.random_state)
else:
X_, y_, X_text_ = X, y, X_text
if self.max_samples < 1.0:
X_, y_, X_text_ = resample(X_, y_, X_text_,
n_samples=int(self.max_samples * X_.shape[0]), random_state=self.random_state)
X_ = X_.toarray()
estimator.fit(X=X_, y=y_, X_text=X_text_, feature_names=feature_names)
self.estimators_.append(estimator)
return self
def predict(self, X_text):
return (self.predict_proba(X_text)[:, 1] > 0.5).astype(int)
def predict_proba(self, X_text):
if hasattr(X_text, 'shape'):
n = X_text.shape[0]
else:
n = len(X_text)
y_pred = np.zeros((n, 2))
for estimator in self.estimators_:
y_pred += estimator.predict_proba(X_text)
y_pred /= len(self.estimators_)
return y_pred
def score(self, X, y):
return accuracy_score(y, self.predict(X))
def get_params(self, deep=True):
return {
"estimator": self.estimator,
"n_estimators": self.n_estimators,
"max_samples": self.max_samples,
"max_features": self.max_features,
"bootstrap": self.bootstrap,
"bootstrap_features": self.bootstrap_features,
"random_state": self.random_state,
}
Classes
class BaggingEstimatorText (estimator, n_estimators=10, max_samples=1.0, bootstrap=True, random_state=None)
-
Use this class because sklearn's class doesn't support passing X_text
Expand source code
class BaggingEstimatorText: def __init__(self, estimator, n_estimators=10, max_samples=1.0, bootstrap=True, random_state=None): """Use this class because sklearn's class doesn't support passing X_text """ self.estimator = estimator self.n_estimators = n_estimators self.max_samples = max_samples self.bootstrap = bootstrap self.random_state = random_state def fit(self, X, y, X_text, feature_names=None): self.estimators_ = [] for _ in tqdm(range(self.n_estimators)): estimator = deepcopy(self.estimator) if self.bootstrap: X_, y_, X_text_ = resample(X, y, X_text, replace=True, random_state=self.random_state) else: X_, y_, X_text_ = X, y, X_text if self.max_samples < 1.0: X_, y_, X_text_ = resample(X_, y_, X_text_, n_samples=int(self.max_samples * X_.shape[0]), random_state=self.random_state) X_ = X_.toarray() estimator.fit(X=X_, y=y_, X_text=X_text_, feature_names=feature_names) self.estimators_.append(estimator) return self def predict(self, X_text): return (self.predict_proba(X_text)[:, 1] > 0.5).astype(int) def predict_proba(self, X_text): if hasattr(X_text, 'shape'): n = X_text.shape[0] else: n = len(X_text) y_pred = np.zeros((n, 2)) for estimator in self.estimators_: y_pred += estimator.predict_proba(X_text) y_pred /= len(self.estimators_) return y_pred def score(self, X, y): return accuracy_score(y, self.predict(X)) def get_params(self, deep=True): return { "estimator": self.estimator, "n_estimators": self.n_estimators, "max_samples": self.max_samples, "max_features": self.max_features, "bootstrap": self.bootstrap, "bootstrap_features": self.bootstrap_features, "random_state": self.random_state, }
Methods
def fit(self, X, y, X_text, feature_names=None)
-
Expand source code
def fit(self, X, y, X_text, feature_names=None): self.estimators_ = [] for _ in tqdm(range(self.n_estimators)): estimator = deepcopy(self.estimator) if self.bootstrap: X_, y_, X_text_ = resample(X, y, X_text, replace=True, random_state=self.random_state) else: X_, y_, X_text_ = X, y, X_text if self.max_samples < 1.0: X_, y_, X_text_ = resample(X_, y_, X_text_, n_samples=int(self.max_samples * X_.shape[0]), random_state=self.random_state) X_ = X_.toarray() estimator.fit(X=X_, y=y_, X_text=X_text_, feature_names=feature_names) self.estimators_.append(estimator) return self
def get_params(self, deep=True)
-
Expand source code
def get_params(self, deep=True): return { "estimator": self.estimator, "n_estimators": self.n_estimators, "max_samples": self.max_samples, "max_features": self.max_features, "bootstrap": self.bootstrap, "bootstrap_features": self.bootstrap_features, "random_state": self.random_state, }
def predict(self, X_text)
-
Expand source code
def predict(self, X_text): return (self.predict_proba(X_text)[:, 1] > 0.5).astype(int)
def predict_proba(self, X_text)
-
Expand source code
def predict_proba(self, X_text): if hasattr(X_text, 'shape'): n = X_text.shape[0] else: n = len(X_text) y_pred = np.zeros((n, 2)) for estimator in self.estimators_: y_pred += estimator.predict_proba(X_text) y_pred /= len(self.estimators_) return y_pred
def score(self, X, y)
-
Expand source code
def score(self, X, y): return accuracy_score(y, self.predict(X))