"""
Main Author: Will LeVine
Corresponding Email: levinewill@icloud.com
"""
import numpy as np
from sklearn.exceptions import NotFittedError
from .base import BaseClassificationProgressiveLearner, BaseProgressiveLearner
class ProgressiveLearner(BaseProgressiveLearner):
"""
A (mostly) internal class for progressive learning. Most users who desire to
utilize ProgLearn should use the classes defined in {network, forest}.py instead
of this class.
Parameters
----------
default_transformer_class : BaseTransformer, default=None
The class of transformer to which the progressive learner defaults
if None is provided in any of the functions which add or set
transformers.
default_transformer_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines to which type of transformer the
progressive learner defaults if None is provided in any of the functions
which add or set transformers.
default_voter_class : BaseVoter, default=None
The class of voter to which the progressive learner defaults
if None is provided in any of the functions which add or set
voters.
default_voter_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines to which type of voter the
progressive learner defaults if None is provided in any of the functions
which add or set voters.
default_decider_class : BaseDecider, default=None
The class of decider to which the progressive learner defaults
if None is provided in any of the functions which add or set
deciders.
default_decider_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines to which type of decider the
progressive learner defaults if None is provided in any of the functions
which add or set deciders.
Attributes
----------
task_id_to_X : dict
A dictionary with keys of type obj corresponding to task ids
and values of type ndarray corresponding to the input data matrix X.
This dictionary thus maps input data matrix to the task where posteriors
are to be estimated.
task_id_to_y : dict
A dictionary with keys of type obj corresponding to task ids
and values of type ndarray corresponding to output data matrix y.
This dictionary thus maps output data matrix to the task where posteriors
are to be estimated.
transformer_id_to_X : dict
A dictionary with keys of type obj corresponding to transformer ids
and values of type ndarray corresponding to the output data matrix X.
This dictionary thus maps input data matrix to a particular transformer.
transformer_id_to_y : dict
A dictionary with keys of type obj corresponding to transformer ids
and values of type ndarray corresponding to the output data matrix y.
This dictionary thus maps output data matrix to a particular transformer.
transformer_id_to_transformers : dict
A dictionary with keys of type obj corresponding to transformer ids
and values of type obj corresponding to a transformer. This dictionary thus
maps transformer ids to the corresponding transformers.
task_id_to_trasnformer_id_to_voters : dict
A nested dictionary with outer key of type obj, corresponding to task ids
inner key of type obj, corresponding to transformer ids,
and values of type obj, corresponding to a voter. This dictionary thus maps
voters to a corresponding transformer assigned to a particular task.
task_id_to_decider : dict
A dictionary with keys of type obj, corresponding to task ids,
and values of type obj corresponding to a decider. This dictionary thus
maps deciders to a particular task.
task_id_to_decider_class : dict
A dictionary with keys of type obj corresponding to task ids
and values of type obj corresponding to a decider class. This dictionary
thus maps decider classes to a particular task id.
task_id_to_voter_class : dict
A dictionary with keys of type obj corresponding to task ids
and values of type obj corresponding to a voter class. This dictionary thus
maps voter classes to a particular task id.
task_id_to_voter_kwargs : dict
A dictionary with keys of type obj corresponding to task ids
and values of type obj corresponding to a voter kwargs. This dictionary thus
maps voter kwargs to a particular task id.
task_id_to_decider_kwargs : dict
A dictionary with keys of type obj corresponding to task ids
and values of type obj corresponding to a decider kwargs. This dictionary
thus maps decider kwargs to a particular task id.
task_id_to_bag_id_to_voter_data_idx : dict
A nested dictionary with outer keys of type obj corresponding to task ids
inner keys of type obj corresponding to bag ids
and values of type obj corresponding to voter data indices.
This dictionary thus maps voter data indices to particular bags
for particular tasks.
task_id_to_decider_idx : dict
A dictionary with keys of type obj corresponding to task ids
and values of type obj corresponding to decider indices. This dictionary
thus maps decider indices to particular tasks.
"""
def __init__(
self,
default_transformer_class=None,
default_transformer_kwargs=None,
default_voter_class=None,
default_voter_kwargs=None,
default_decider_class=None,
default_decider_kwargs=None,
):
(
self.task_id_to_X,
self.task_id_to_y,
self.transformer_id_to_X,
self.transformer_id_to_y,
) = ({}, {}, {}, {})
self.transformer_id_to_transformers = {}
self.task_id_to_transformer_id_to_voters = {}
self.task_id_to_decider = {}
self.task_id_to_decider_class = {}
self.task_id_to_decider_kwargs = {}
self.task_id_to_voter_class = {}
self.task_id_to_voter_kwargs = {}
self.task_id_to_bag_id_to_voter_data_idx = {}
self.task_id_to_decider_idx = {}
self.default_transformer_class = default_transformer_class
self.default_transformer_kwargs = default_transformer_kwargs
self.default_voter_class = default_voter_class
self.default_voter_kwargs = default_voter_kwargs
self.default_decider_class = default_decider_class
self.default_decider_kwargs = default_decider_kwargs
def get_transformer_ids(self):
return np.array(list(self.transformer_id_to_transformers.keys()))
def get_task_ids(self):
return np.array(list(self.task_id_to_decider.keys()))
def _append_transformer(self, transformer_id, transformer):
if transformer_id in self.get_transformer_ids():
self.transformer_id_to_transformers[transformer_id].append(transformer)
else:
self.transformer_id_to_transformers[transformer_id] = [transformer]
def _append_voter(self, transformer_id, task_id, voter):
if task_id in list(self.task_id_to_transformer_id_to_voters.keys()):
if transformer_id in list(
self.task_id_to_transformer_id_to_voters[task_id].keys()
):
self.task_id_to_transformer_id_to_voters[task_id][
transformer_id
].append(voter)
else:
self.task_id_to_transformer_id_to_voters[task_id][transformer_id] = [
voter
]
else:
self.task_id_to_transformer_id_to_voters[task_id] = {
transformer_id: [voter]
}
def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx):
if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()):
self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx
else:
self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx}
def _append_decider_idx(self, task_id, decider_idx):
self.task_id_to_decider_idx[task_id] = decider_idx
def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split):
if transformer_voter_decider_split is None:
return ra, ra
else:
split = [
np.sum(np.array(transformer_voter_decider_split)[:2]),
transformer_voter_decider_split[2],
]
if np.sum(split) > 1:
return [
np.random.choice(ra, int(len(ra) * p), replace=False) for p in split
]
else:
first_idx = np.random.choice(ra, int(len(ra) * split[0]), replace=False)
second_idx = np.random.choice(
np.delete(ra, first_idx), int(len(ra) * split[1]), replace=False
)
return first_idx, second_idx
def _add_transformer(
self,
X,
y,
transformer_data_proportion,
transformer_voter_data_idx,
transformer_id,
num_transformers,
transformer_class,
transformer_kwargs,
backward_task_ids,
):
if transformer_id is None:
transformer_id = len(self.get_transformer_ids())
backward_task_ids = (
backward_task_ids if backward_task_ids is not None else self.get_task_ids()
)
transformer_voter_data_idx = (
range(len(X))
if transformer_voter_data_idx is None
else transformer_voter_data_idx
)
if transformer_id not in list(self.task_id_to_X.keys()):
self.transformer_id_to_X[transformer_id] = X
if transformer_id not in list(self.task_id_to_y.keys()):
self.transformer_id_to_y[transformer_id] = y
# train new transformers
for transformer_num in range(num_transformers):
if X is not None:
n = len(X)
elif y is not None:
n = len(y)
else:
n = None
if n is not None:
transformer_data_idx = np.random.choice(
transformer_voter_data_idx,
int(transformer_data_proportion * n),
replace=False,
)
else:
transformer_data_idx = None
self.set_transformer(
transformer_id=transformer_id,
transformer_data_idx=transformer_data_idx,
transformer_class=transformer_class,
transformer_kwargs=transformer_kwargs,
)
voter_data_idx = np.delete(transformer_voter_data_idx, transformer_data_idx)
self._append_voter_data_idx(
task_id=transformer_id,
bag_id=transformer_num,
voter_data_idx=voter_data_idx,
)
# train voters and deciders from new transformer to previous tasks
for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()):
self.set_voter(transformer_id=transformer_id, task_id=existing_task_id)
self.set_decider(
task_id=existing_task_id,
transformer_ids=list(
self.task_id_to_transformer_id_to_voters[existing_task_id].keys()
),
)
return self
# make sure the below ganular functions work without add_{transformer, task}
def set_transformer(
self,
transformer_id=None,
transformer=None,
transformer_data_idx=None,
transformer_class=None,
transformer_kwargs=None,
):
if transformer_id is None:
transformer_id = len(self.get_transformer_ids())
X = (
self.transformer_id_to_X[transformer_id]
if transformer_id in list(self.transformer_id_to_X.keys())
else self.task_id_to_X[transformer_id]
)
y = (
self.transformer_id_to_y[transformer_id]
if transformer_id in list(self.transformer_id_to_y.keys())
else self.task_id_to_y[transformer_id]
)
if transformer_data_idx is not None:
X, y = X[transformer_data_idx], y[transformer_data_idx]
if X is None and y is None:
if transformer.is_fitted():
self._append_transformer(transformer_id, transformer)
else:
raise ValueError(
"transformer_class is not fitted and X is None and y is None."
)
return
# Type check X
if transformer_class is None:
if self.default_transformer_class is None:
raise ValueError(
"transformer_class is None and 'default_transformer_class' is None."
)
else:
transformer_class = self.default_transformer_class
if transformer_kwargs is None:
if self.default_transformer_kwargs is None:
raise ValueError(
"""transformer_kwargs is None and
'default_transformer_kwargs' is None."""
)
else:
transformer_kwargs = self.default_transformer_kwargs
# Fit transformer and new voter
if y is None:
self._append_transformer(
transformer_id, transformer_class(**transformer_kwargs).fit(X)
)
else:
# Type check y
self._append_transformer(
transformer_id, transformer_class(**transformer_kwargs).fit(X, y)
)
def set_voter(
self,
transformer_id,
task_id=None,
voter_class=None,
voter_kwargs=None,
bag_id=None,
):
# Type check X
# Type check y
if task_id is None:
task_id = len(self.get_task_ids())
if voter_class is None:
if (
task_id in list(self.task_id_to_voter_class.keys())
and self.task_id_to_voter_class[task_id] is not None
):
voter_class = self.task_id_to_voter_class[task_id]
elif self.default_voter_class is not None:
voter_class = self.default_voter_class
else:
raise ValueError(
"""voter_class is None, the default voter class for the overall
learner is None, and the default voter class
for this transformer is None."""
)
if voter_kwargs is None:
if (
task_id in list(self.task_id_to_voter_kwargs.keys())
and self.task_id_to_voter_kwargs[task_id] is not None
):
voter_kwargs = self.task_id_to_voter_kwargs[task_id]
elif self.default_voter_kwargs is not None:
voter_kwargs = self.default_voter_kwargs
else:
raise ValueError(
"""voter_kwargs is None, the default voter kwargs for the overall
learner is None, and the default voter kwargs
for this transformer is None."""
)
X = self.task_id_to_X[task_id]
y = self.task_id_to_y[task_id]
if bag_id is None:
transformers = self.transformer_id_to_transformers[transformer_id]
else:
transformers = [self.transformer_id_to_transformers[transformer_id][bag_id]]
for transformer_num, transformer in enumerate(transformers):
if transformer_id == task_id:
voter_data_idx = self.task_id_to_bag_id_to_voter_data_idx[task_id][
transformer_num
]
else:
voter_data_idx = np.delete(
range(len(X)), self.task_id_to_decider_idx[task_id]
)
self._append_voter(
transformer_id,
task_id,
voter_class(**voter_kwargs).fit(
transformer.transform(X[voter_data_idx]), y[voter_data_idx]
),
)
self.task_id_to_voter_class[task_id] = voter_class
self.task_id_to_voter_kwargs[task_id] = voter_kwargs
def set_decider(
self, task_id, transformer_ids, decider_class=None, decider_kwargs=None
):
if decider_class is None:
if task_id in list(self.task_id_to_decider_class.keys()):
decider_class = self.task_id_to_decider_class[task_id]
elif self.default_decider_class is not None:
decider_class = self.default_decider_class
else:
raise ValueError(
"decider_class is None and 'default_decider_class' is None."
)
if decider_kwargs is None:
if task_id in list(self.task_id_to_decider_kwargs.keys()):
decider_kwargs = self.task_id_to_decider_kwargs[task_id]
elif self.default_decider_kwargs is not None:
decider_kwargs = self.default_decider_kwargs
else:
raise ValueError(
"decider_kwargs is None and 'default_decider_kwargs' is None."
)
transformer_id_to_transformers = {
transformer_id: self.transformer_id_to_transformers[transformer_id]
for transformer_id in transformer_ids
}
transformer_id_to_voters = {
transformer_id: self.task_id_to_transformer_id_to_voters[task_id][
transformer_id
]
for transformer_id in transformer_ids
}
X, y = self.task_id_to_X[task_id], self.task_id_to_y[task_id]
self.task_id_to_decider[task_id] = decider_class(**decider_kwargs)
decider_idx = self.task_id_to_decider_idx[task_id]
self.task_id_to_decider[task_id].fit(
X[decider_idx],
y[decider_idx],
transformer_id_to_transformers,
transformer_id_to_voters,
)
self.task_id_to_decider_class[task_id] = decider_class
self.task_id_to_decider_kwargs[task_id] = decider_kwargs
def add_transformer(
self,
X,
y,
transformer_data_proportion=1.0,
transformer_voter_data_idx=None,
transformer_id=None,
num_transformers=1,
transformer_class=None,
transformer_kwargs=None,
backward_task_ids=None,
):
"""
Adds a transformer to the progressive learner and trains the voters and
deciders from this new transformer to the specified backward_task_ids.
Parameters
----------
X : ndarray
Input data matrix.
y : ndarray
Output (response) data matrix.
transformer_data_proportion : float, default=1.0
The proportion of the data set aside to train the transformer. The
remainder of the data is used to train voters. This is used in the
case that you are using a bagging algorithm and want the various
components in that bagging ensemble to train on disjoint subsets of
the data. This parameter is mostly for internal use.
transformer_voter_data_idx : ndarray, default=None
A 1d array of type int used to specify the aggregate indices of the input
data used to train the transformers and voters. This is used in the
case that X and/or y contain data that you do not want to use to train
transformers or voters (e.g. X and/or y contains decider training data
disjoint from the transformer/voter data). This parameter is mostly
for internal use.
transformer_id : obj, default=None
The id corresponding to the transformer being added.
num_transformers : int, default=1
The number of transformers to add corresponding to the given inputs.
transformer_class : BaseTransformer, default=None
The class of the transformer(s) being added.
transformer_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines the kwargs of the transformer(s)
being added.
backward_task_ids : ndarray, default=None
A 1d array of type obj used to specify to which existing task voters and deciders
will be trained from the transformer(s) being added.
Returns
-------
self : ProgressiveLearner
The object itself.
"""
return self._add_transformer(
X,
y,
transformer_data_proportion=transformer_data_proportion,
transformer_voter_data_idx=transformer_voter_data_idx,
transformer_id=transformer_id,
num_transformers=num_transformers,
transformer_class=transformer_class,
transformer_kwargs=transformer_kwargs,
backward_task_ids=backward_task_ids,
)
def add_task(
self,
X,
y,
task_id=None,
transformer_voter_decider_split=[0.67, 0.33, 0],
num_transformers=1,
transformer_class=None,
transformer_kwargs=None,
voter_class=None,
voter_kwargs=None,
decider_class=None,
decider_kwargs=None,
backward_task_ids=None,
forward_transformer_ids=None,
):
"""
Adds a task to the progressive learner. Optionally trains one or more
transformer from the input data (if num_transformers > 0), adds voters
and deciders from this/these new transformer(s) to the tasks specified
in backward_task_ids, and adds voters and deciders from the transformers
specified in forward_transformer_ids (and from the newly added transformer(s)
corresponding to the input task_id if num_transformers > 0) to the
new task_id.
Parameters
----------
X : ndarray
Input data matrix.
y : ndarray
Output (response) data matrix.
task_id : obj, default=None
The id corresponding to the task being added.
transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0]
A 1d array of length 3. The 0th index indicates the proportions of the input
data used to train the (optional) newly added transformer(s) corresponding to
the task_id provided in this function call. The 1st index indicates the proportion of
the data set aside to train the voter(s) from these (optional) newly added
transformer(s) to the task_id provided in this function call. For all other tasks,
the aggregate transformer and voter data pairs from those tasks are used to train
the voter(s) from these (optional) newly added transformer(s) to those tasks;
for all other transformers, the aggregate transformer and voter data provided in
this function call is used to train the voter(s) from those transformers to
the task_id provided in this function call. The 2nd index indicates the
proportion of the data set aside to train the decider - these indices are saved
internally and will be used to train all further deciders corresponding to this
task for all function calls.
num_transformers : int, default=1
The number of transformers to add corresponding to the given inputs.
transformer_class : BaseTransformer, default=None
The class of the transformer(s) being added.
transformer_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines the kwargs of the transformer(s)
being added.
voter_class : BaseVoter, default=None
The class of the voter(s) being added.
voter_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines the kwargs of the voter(s)
being added.
decider_class : BaseDecider, default=None
The class of the decider(s) being added.
decider_kwargs : dict, default=None
A dictionary with keys of type string and values of type obj corresponding
to the given string kwarg. This determines the kwargs of the decider(s)
being added.
backward_task_ids : ndarray, default=None
A 1d array of type obj used to specify to which existing task voters and deciders
will be trained from the transformer(s) being added.
foward_transformer_ids : ndarray, default=None
A 1d array of type obj used to specify from which existing transformer(s) voters and
deciders will be trained to the new task. If num_transformers > 0, the input task_id
corresponding to the task being added is automatically appended to this 1d array.
Returns
-------
self : ProgressiveLearner
The object itself.
"""
if task_id is None:
task_id = max(
len(self.get_transformer_ids()), len(self.get_task_ids())
) # come up with something that has fewer collisions
self.task_id_to_X[task_id] = X
self.task_id_to_y[task_id] = y
# split into transformer/voter and decider data
transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs(
range(len(X)), transformer_voter_decider_split
)
self._append_decider_idx(task_id, decider_idx)
# add new transformer and train voters and decider
# from new transformer to previous tasks
if num_transformers > 0:
self._add_transformer(
X,
y,
transformer_data_proportion=transformer_voter_decider_split[0]
if transformer_voter_decider_split
else 1,
transformer_voter_data_idx=transformer_voter_data_idx,
transformer_id=task_id,
num_transformers=num_transformers,
transformer_class=transformer_class,
transformer_kwargs=transformer_kwargs,
backward_task_ids=backward_task_ids,
)
# train voters and decider from previous (and current) transformers to new task
for transformer_id in (
forward_transformer_ids
if forward_transformer_ids
else self.get_transformer_ids()
):
self.set_voter(
transformer_id=transformer_id,
task_id=task_id,
voter_class=voter_class,
voter_kwargs=voter_kwargs,
)
# train decider of new task
if forward_transformer_ids:
if num_transformers == 0:
transformer_ids = forward_transformer_ids
else:
transformer_ids = np.concatenate([forward_transformer_ids, task_id])
else:
transformer_ids = self.get_transformer_ids()
self.set_decider(
task_id=task_id,
transformer_ids=transformer_ids,
decider_class=decider_class,
decider_kwargs=decider_kwargs,
)
return self
def predict(self, X, task_id, transformer_ids=None):
"""
predicts labels under task_id for each example in input data X
using the given transformer_ids.
Parameters
----------
X : ndarray
The input data matrix.
task_id : obj
The id corresponding to the task being mapped to.
transformer_ids : list, default=None
The list of transformer_ids through which a user would like
to send X (which will be pipelined with their corresponding
voters) to make an inference prediction.
Returns
-------
y_hat : ndarray of shape [n_samples]
predicted class label per example
"""
if self.task_id_to_decider == {}:
raise NotFittedError
decider = self.task_id_to_decider[task_id]
return decider.predict(X, transformer_ids=transformer_ids)
[docs]class ClassificationProgressiveLearner(
ProgressiveLearner, BaseClassificationProgressiveLearner
):
"""
A (mostly) internal class for progressive learning in the classification
setting. Most users who desire to utilize ProgLearn should use the classes
defined in {network, forest}.py instead of this class.
"""
[docs] def predict_proba(self, X, task_id, transformer_ids=None):
"""
predicts posteriors under task_id for each example in input data X
using the given transformer_ids.
Parameters
----------
X : ndarray
The input data matrix.
task_id : obj
The id corresponding to the task being mapped to.
transformer_ids : list, default=None
The list of transformer_ids through which a user would like
to send X (which will be pipelined with their corresponding
voters) to estimate posteriors.
Returns
-------
y_proba_hat : ndarray of shape [n_samples, n_classes]
posteriors per example
"""
if self.task_id_to_decider == {}:
raise NotFittedError
decider = self.task_id_to_decider[task_id]
return decider.predict_proba(X, transformer_ids=transformer_ids)