"""Gradient-based explainer.
This module implements the gradient-based explainers grad-dot and grad-cos.
"""
import copy
import warnings
from enum import Enum
from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple,
Union)
import numpy as np
from alibi.api.defaults import DEFAULT_DATA_SIM, DEFAULT_META_SIM
from alibi.api.interfaces import Explainer, Explanation
from alibi.explainers.similarity.base import BaseSimilarityExplainer
from alibi.explainers.similarity.metrics import asym_dot, cos, dot
from alibi.utils import _get_options_string
from alibi.utils.frameworks import Framework
from typing_extensions import Literal
if TYPE_CHECKING:
import tensorflow
import torch
[docs]
class Task(str, Enum):
"""
Enum of supported tasks.
"""
CLASSIFICATION = "classification"
REGRESSION = "regression"
[docs]
class GradientSimilarity(BaseSimilarityExplainer):
[docs]
def __init__(self,
predictor: 'Union[tensorflow.keras.Model, torch.nn.Module]',
loss_fn: '''Union[Callable[[tensorflow.Tensor, tensorflow.Tensor], tensorflow.Tensor],
Callable[[torch.Tensor, torch.Tensor], torch.Tensor]]''',
sim_fn: Literal['grad_dot', 'grad_cos', 'grad_asym_dot'] = 'grad_dot',
task: Literal['classification', 'regression'] = 'classification',
precompute_grads: bool = False,
backend: Literal['tensorflow', 'pytorch'] = 'tensorflow',
device: 'Union[int, str, torch.device, None]' = None,
verbose: bool = False,
):
"""`GradientSimilarity` explainer.
The gradient similarity explainer is used to find examples in the training data that the predictor considers
similar to test instances the user wants to explain. It uses the gradients of the loss between the model output
and the training data labels. These are compared using the similarity function specified by ``sim_fn``. The
`GradientSimilarity` explainer can be applied to models trained for both classification and regression tasks.
Parameters
----------
predictor
Model to explain.
loss_fn
Loss function used. The gradient of the loss function is used to compute the similarity between the test
instances and the training set.
sim_fn
Similarity function to use. The ``'grad_dot'`` similarity function computes the dot product of the
gradients, see :py:func:`alibi.explainers.similarity.metrics.dot`. The ``'grad_cos'`` similarity function
computes the cosine similarity between the gradients, see
:py:func:`alibi.explainers.similarity.metrics.cos`. The ``'grad_asym_dot'`` similarity function is similar
to ``'grad_dot'`` but is asymmetric, see :py:func:`alibi.explainers.similarity.metrics.asym_dot`.
task
Type of task performed by the model. If the task is ``'classification'``, the target value passed to the
explain method of the test instance can be specified either directly or left as ``None``, if left ``None``
we use the model's maximum prediction. If the task is ``'regression'``, the target value of the test
instance must be specified directly.
precompute_grads
Whether to precompute the gradients. If ``False``, gradients are computed on the fly otherwise we
precompute them which can be faster when it comes to computing explanations. Note this option may be memory
intensive if the model is large.
backend
Backend to use.
device
Device to use. If ``None``, the default device for the backend is used. If using `pytorch` backend see
`pytorch device docs <https://pytorch.org/docs/stable/tensor_attributes.html#torch-device>`_ for correct
options. Note that in the `pytorch` backend case this parameter can be a ``torch.device``. If using
`tensorflow` backend see `tensorflow docs <https://www.tensorflow.org/api_docs/python/tf/device>`_ for
correct options.
verbose
Whether to print the progress of the explainer.
Raises
------
ValueError
If the ``task`` is not ``'classification'`` or ``'regression'``.
ValueError
If the ``sim_fn`` is not ``'grad_dot'``, ``'grad_cos'`` or ``'grad_asym_dot'``.
ValueError
If the ``backend`` is not ``'tensorflow'`` or ``'pytorch'``.
TypeError
If the device is not an ``int``, ``str``, ``torch.device`` or ``None`` for the torch backend option or if
the device is not ``str`` or ``None`` for the tensorflow backend option.
"""
# TODO: add link to docs page for GradientSimilarity explainer in the docstring once written
sim_fn_opts: Dict[str, Callable] = {
'grad_dot': dot,
'grad_cos': cos,
'grad_asym_dot': asym_dot
}
if sim_fn not in sim_fn_opts.keys():
raise ValueError(f"""Unknown method {sim_fn}. Consider using: '{"' | '".join(sim_fn_opts.keys())}'.""")
resolved_sim_fn = sim_fn_opts[sim_fn]
if task not in Task.__members__.values():
raise ValueError(f"Unknown task {task}. Consider using: {_get_options_string(Task)}.")
self.task = task
if backend not in Framework.__members__.values():
raise ValueError(f"Unknown backend {backend}. Consider using: {_get_options_string(Framework)}.")
super().__init__(predictor, loss_fn, resolved_sim_fn, precompute_grads, Framework(backend), device=device,
meta=copy.deepcopy(DEFAULT_META_SIM), verbose=verbose)
self.meta['params'].update(
sim_fn_name=sim_fn,
store_grads=precompute_grads,
backend_name=backend,
task_name=task
)
num_non_trainable = self.backend._count_non_trainable(self.predictor)
if num_non_trainable:
warning_msg = (f"Found {num_non_trainable} non-trainable parameters in the model. These parameters "
"don't have gradients and will not be included in the computation of gradient similarity."
" This might be because your model has layers that track statistics using non-trainable "
"parameters such as batch normalization layers. In this case, you don't need to worry. "
"Otherwise it's because you have set some parameters to be non-trainable and alibi is "
"letting you know.")
warnings.warn(warning_msg)
[docs]
def fit(self,
X_train: Union[np.ndarray, List[Any]],
Y_train: np.ndarray) -> "Explainer":
"""Fit the explainer.
The `GradientSimilarity` explainer requires the model gradients over the training data. In the explain method
it compares them to the model gradients for the test instance(s). If ``precompute_grads=True`` on
initialization then the gradients are precomputed here and stored. This will speed up the explain method call
but storing the gradients may not be feasible for large models.
Parameters
----------
X_train
Training data.
Y_train
Training labels.
Returns
-------
self
Returns self.
"""
return super().fit(X_train, Y_train)
def _preprocess_args(
self,
X: 'Union[np.ndarray, tensorflow.Tensor, torch.Tensor, Any, List[Any]]',
Y: 'Optional[Union[np.ndarray, tensorflow.Tensor, torch.Tensor]]' = None) \
-> 'Union[Tuple[torch.Tensor, torch.Tensor], Tuple[tensorflow.Tensor, tensorflow.Tensor]]':
"""Formats `X`, `Y` for explain method.
Parameters
----------
X
Input data requiring formatting.
Y
Target data requiring formatting.
Returns
-------
X
Input data formatted for explain method.
Y
Target data formatted for explain method.
"""
X = self._match_shape_to_data(X, 'X')
if isinstance(X, np.ndarray):
X = self.backend.to_tensor(X)
if self.task == Task.REGRESSION and Y is None:
err_msg = "Regression task requires a target value. 'Y' must be provided."
raise ValueError(err_msg)
if Y is None:
Y = self.predictor(X)
Y = self.backend.argmax(Y) # type: ignore
Y = self._match_shape_to_data(Y, 'Y')
if isinstance(Y, np.ndarray):
Y = self.backend.to_tensor(Y)
return X, Y
[docs]
def explain(
self,
X: 'Union[np.ndarray, tensorflow.Tensor, torch.Tensor, Any, List[Any]]',
Y: 'Optional[Union[np.ndarray, tensorflow.Tensor, torch.Tensor]]' = None) -> "Explanation":
"""Explain the predictor's predictions for a given input.
Computes the similarity score between the inputs and the training set. Returns an explainer object
containing the scores, the indices of the training set instances sorted by descending similarity and the
most similar and least similar instances of the data set for the input. Note that the input may be a single
instance or a batch of instances.
Parameters
----------
X
`X` can be a `numpy` array, `tensorflow` tensor, `pytorch` tensor of the same shape as the training data
or a list of objects, with or without a leading batch dimension. If the batch dimension is missing it's
added.
Y
`Y` can be a `numpy` array, `tensorflow` tensor or a `pytorch` tensor. In the case of a regression task, the
`Y` argument must be present. If the task is classification then `Y` defaults to the model prediction.
Returns
-------
`Explanation` object containing the ordered similarity scores for the test instance(s) with additional \
metadata as attributes. Contains the following data-related attributes
- `scores`: ``np.ndarray`` - similarity scores for each pair of instances in the training and test set \
sorted in descending order.
- `ordered_indices`: ``np.ndarray`` - indices of the paired training and test set instances sorted by the \
similarity score in descending order.
- `most_similar`: ``np.ndarray`` - 5 most similar instances in the training set for each test instance \
The first element is the most similar instance.
- `least_similar`: ``np.ndarray`` - 5 least similar instances in the training set for each test instance. \
The first element is the least similar instance.
Raises
------
ValueError
If `Y` is ``None`` and the `task` is ``'regression'``.
ValueError
If the shape of `X` or `Y` does not match the shape of the training or target data.
ValueError
If the fit method has not been called prior to calling this method.
"""
self._verify_fit()
X, Y = self._preprocess_args(X, Y)
test_grads = []
for x, y in zip(X, Y):
test_grads.append(self._compute_grad(self._format(x), y[None])[None])
grads_X_test = np.concatenate(np.array(test_grads), axis=0)
if not self.precompute_grads:
scores = self._compute_adhoc_similarity(grads_X_test)
else:
scores = self.sim_fn(grads_X_test, self.grad_X_train)
return self._build_explanation(scores)
def _build_explanation(self, scores: np.ndarray) -> "Explanation":
"""Builds an explanation object.
Parameters
----------
scores
The scores for each of the instances in the data set computed by the similarity method.
"""
data = copy.deepcopy(DEFAULT_DATA_SIM)
sorted_score_indices = np.argsort(scores)[:, ::-1]
most_similar: Union[np.ndarray, List[Any]]
least_similar: Union[np.ndarray, List[Any]]
if isinstance(self.X_train, np.ndarray):
broadcast_indices = np.expand_dims(
sorted_score_indices,
axis=tuple(range(2, len(self.X_train[None].shape)))
)
most_similar = np.take_along_axis(self.X_train[None], broadcast_indices[:, :5], axis=1)
least_similar = np.take_along_axis(self.X_train[None], broadcast_indices[:, -1:-6:-1], axis=1)
else:
most_similar = [[self.X_train[i] for i in ssi[:5]] for ssi in sorted_score_indices]
least_similar = [[self.X_train[i] for i in ssi[-1:-6:-1]] for ssi in sorted_score_indices]
data.update(
scores=np.take_along_axis(scores, sorted_score_indices, axis=1),
ordered_indices=sorted_score_indices,
most_similar=most_similar,
least_similar=least_similar
)
return Explanation(meta=self.meta, data=data)