import copy
import logging
import math
import numbers
import sys
from abc import ABC, abstractmethod
from enum import Enum
from typing import (Callable, Dict, Iterable, List, Optional, Tuple, Union,
no_type_check)
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats.mstats import mquantiles
from sklearn.base import BaseEstimator, is_classifier, is_regressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble._gb import BaseGradientBoosting
from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import \
BaseHistGradientBoosting
from sklearn.tree import DecisionTreeRegressor
from sklearn.utils.extmath import cartesian
from sklearn.utils.validation import check_is_fitted
from tqdm import tqdm
from alibi.api.defaults import DEFAULT_DATA_PD, DEFAULT_META_PD
from alibi.api.interfaces import Explainer, Explanation
from alibi.explainers.ale import get_quantiles
from alibi.utils import _get_options_string
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
logger = logging.getLogger(__name__)
[docs]
class Kind(str, Enum):
""" Enumeration of supported kind. """
AVERAGE = 'average'
INDIVIDUAL = 'individual'
BOTH = 'both'
[docs]
class PartialDependenceBase(Explainer, ABC):
[docs]
def __init__(self,
predictor: Union[BaseEstimator, Callable[[np.ndarray], np.ndarray]],
feature_names: Optional[List[str]] = None,
categorical_names: Optional[Dict[int, List[str]]] = None,
target_names: Optional[List[str]] = None,
verbose: bool = False):
"""
Base class of the partial dependence for tabular datasets. Supports multiple feature interactions.
Parameters
----------
predictor
A `sklearn` estimator or a prediction function which receives as input a `numpy` array of size `N x F`
and outputs a `numpy` array of size `N` (i.e. `(N, )`) or `N x T`, where `N` is the number of input
instances, `F` is the number of features and `T` is the number of targets.
feature_names
A list of feature names used for displaying results.
categorical_names
Dictionary where keys are feature columns and values are the categories for the feature. Necessary to
identify the categorical features in the dataset. An example for `categorical_names` would be::
category_map = {0: ["married", "divorced"], 3: ["high school diploma", "master's degree"]}
target_names
A list of target/output names used for displaying results.
verbose
Whether to print the progress of the explainer.
"""
super().__init__(meta=copy.deepcopy(DEFAULT_META_PD))
self.predictor = predictor
self.feature_names = feature_names
self.categorical_names = categorical_names
self.target_names = target_names
self.verbose = verbose
[docs]
def explain(self,
X: np.ndarray,
features: Optional[List[Union[int, Tuple[int, int]]]] = None,
kind: Literal['average', 'individual', 'both'] = 'average',
percentiles: Tuple[float, float] = (0., 1.),
grid_resolution: int = 100,
grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None) -> Explanation:
"""
Calculates the partial dependence for each feature and/or tuples of features with respect to the all targets
and the reference dataset `X`.
Parameters
----------
X
A `N x F` tabular dataset used to calculate partial dependence curves. This is typically the
training dataset or a representative sample.
features
An optional list of features or tuples of features for which to calculate the partial dependence.
If not provided, the partial dependence will be computed for every single features in the dataset.
Some example for `features` would be: ``[0, 2]``, ``[0, 2, (0, 2)]``, ``[(0, 2)]``, where
``0`` and ``2`` correspond to column 0 and 2 in `X`, respectively.
kind
If set to ``'average'``, then only the partial dependence (PD) averaged across all samples from the dataset
is returned. If set to ``'individual'``, then only the individual conditional expectation (ICE) is
returned for each data point from the dataset. Otherwise, if set to ``'both'``, then both the PD and
the ICE are returned.
percentiles
Lower and upper percentiles used to limit the feature values to potentially remove outliers from
low-density regions. Note that for features with not many data points with large/low values, the
PD estimates are less reliable in those extreme regions. The values must be in [0, 1]. Only used
with `grid_resolution`.
grid_resolution
Number of equidistant points to split the range of each target feature. Only applies if the number of
unique values of a target feature in the reference dataset `X` is greater than the `grid_resolution` value.
For example, consider a case where a feature can take the following values:
``[0.1, 0.3, 0.35, 0.351, 0.4, 0.41, 0.44, ..., 0.5, 0.54, 0.56, 0.6, 0.65, 0.7, 0.9]``, and we are not
interested in evaluating the marginal effect at every single point as it can become computationally costly
(assume hundreds/thousands of points) without providing any additional information for nearby points
(e.g., 0.35 and 351). By setting ``grid_resolution=5``, the marginal effect is computed for the values
``[0.1, 0.3, 0.5, 0.7, 0.9]`` instead, which is less computationally demanding and can provide similar
insights regarding the model's behaviour. Note that the extreme values of the grid can be controlled
using the `percentiles` argument.
grid_points
Custom grid points. Must be a `dict` where the keys are the target features indices and the values are
monotonically increasing arrays defining the grid points for a numerical feature, and a subset of
categorical feature values for a categorical feature. If the `grid_points` are not specified,
then the grid will be constructed based on the unique target feature values available in the
dataset `X`, or based on the `grid_resolution` and `percentiles` (check `grid_resolution` to see when
it applies). For categorical features, the corresponding value in the `grid_points` can be
specified either as array of strings or array of integers corresponding the label encodings.
Note that the label encoding must match the ordering of the values provided in the `categorical_names`.
Returns
-------
explanation
An `Explanation` object containing the data and the metadata of the calculated partial dependence
curves. See usage at `Partial dependence examples`_ for details
.. _Partial dependence examples:
https://docs.seldon.io/projects/alibi/en/stable/methods/PartialDependence.html
"""
if X.ndim != 2:
raise ValueError('The array X must be 2-dimensional.')
# extract number of features
n_features = X.shape[1]
# set the `features_names` when the user did not provide the feature names
if self.feature_names is None:
self.feature_names = [f'f_{i}' for i in range(n_features)]
# set `categorical_names` when the user did not provide the category mapping
if self.categorical_names is None:
self.categorical_names = {}
# sanity checks
self._grid_points_sanity_checks(grid_points=grid_points, n_features=n_features)
self._features_sanity_checks(features=features)
# construct `feature_names` based on the `features`. If `features` is ``None``, then initialize
# `features` with all single feature available in the dataset.
if features:
feature_names = [tuple([self.feature_names[f] for f in features])
if isinstance(features, tuple) else self.feature_names[features]
for features in features]
else:
feature_names = self.feature_names # type: ignore[assignment]
features = list(range(n_features))
# compute partial dependencies for every features.
# TODO: implement parallel version - future work as it can be done for ALE too
pds = []
for ifeatures in tqdm(features, disable=not self.verbose):
pds.append(
self._partial_dependence(
X=X,
features=ifeatures,
kind=kind,
percentiles=percentiles,
grid_resolution=grid_resolution,
grid_points=grid_points
)
)
# extract the number of targets that the PD/ICE was computed for
key = Kind.AVERAGE if kind in [Kind.AVERAGE, Kind.BOTH] else Kind.INDIVIDUAL
n_targets = pds[0][key].shape[0]
if self.target_names is None:
# set the `target_names` when the user did not provide the target names
# we do it here to avoid checking model's type, prediction function etc.
self.target_names = [f'c_{i}' for i in range(n_targets)]
elif len(self.target_names) != n_targets:
logger.warning('The length of `target_names` does not match the number of predicted outputs. '
'Ensure that the lengths match, otherwise a call to the `plot_pd` method might '
'raise an error or produce undesired labeling.')
# update `meta['params']` here because until this point we don't have the `target_names`
self.meta['params'].update(kind=kind,
percentiles=percentiles,
grid_resolution=grid_resolution,
feature_names=self.feature_names,
categorical_names=self.categorical_names,
target_names=self.target_names)
return self._build_explanation(kind=kind,
feature_names=feature_names, # type: ignore[arg-type]
pds=pds)
def _grid_points_sanity_checks(self, grid_points: Optional[Dict[int, Union[List, np.ndarray]]], n_features: int):
"""
Grid points sanity checks.
Parameters
----------
grid_points
See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain`.
n_features
Number of features in the dataset.
"""
if grid_points is None:
return
if not np.all(np.isin(list(grid_points.keys()), np.arange(n_features))):
raise ValueError('The features provided in `grid_points` are not a subset of the dataset features.')
for f in grid_points:
if self._is_numerical(f):
grid_points[f] = np.sort(grid_points[f]) # from this point onward, `grid_points[f]` is `np.ndarray`
else:
grid_points[f] = np.unique(grid_points[f]) # from this point onward, `grid_points[f]` is `np.ndarray`
message = "The grid points provided for the categorical feature {} are invalid. "\
"For categorical features, the grid points must be a subset of the features "\
"values defined in `categorical_names`. Received an unknown value of '{}'."
# convert to label encoding if the grid is provided as strings
if grid_points[f].dtype.type is np.str_: # type: ignore[union-attr]
int_values = []
for str_val in grid_points[f]:
try:
# `self.categorical_names` cannot be empty because of the check in `self._is_numerical`
index = self.categorical_names[f].index(str_val) # type: ignore[index]
except ValueError:
raise ValueError(message.format(f, str_val))
int_values.append(index)
grid_points[f] = np.array(int_values)
# `self.categorical_names` cannot be empty because of the check in `self._is numerical`
mask = np.isin(grid_points[f], np.arange(len(self.categorical_names[f]))) # type: ignore[index]
if not np.all(mask):
index = np.where(not mask)[0][0]
raise ValueError(message.format(f, grid_points[f][index]))
def _features_sanity_checks(self, features: Optional[List[Union[int, Tuple[int, int]]]]) -> None:
"""
Features sanity checks.
Parameters
----------
features
List of feature indices or tuples of feature indices to compute the partial dependence for.
"""
if features is None:
return
def check_feature(f):
if not isinstance(f, numbers.Integral):
raise ValueError(f'All feature entries must be integers. Got a feature value of {type(f)} type.')
if f >= len(self.feature_names):
raise ValueError(f'All feature entries must be less than '
f'``len(feature_names)={len(self.feature_names)}``. Got a feature value of {f}.')
if f < 0:
raise ValueError(f'All feature entries must be greater or equal to 0. Got a feature value of {f}.')
for feats in features:
if not isinstance(feats, tuple):
feats = (feats, ) # type: ignore[assignment]
for f in feats: # type: ignore[union-attr]
check_feature(f)
def _partial_dependence(self,
X: np.ndarray,
features: Union[int, Tuple[int, int]],
kind: Literal['average', 'individual', 'both'] = 'average',
percentiles: Tuple[float, float] = (0.05, 0.95),
grid_resolution: int = 100,
grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None
) -> Dict[str, np.ndarray]:
"""
Computes partial dependence for a feature or a tuple of features.
Parameters
----------
X, method, kind, percentiles, grid_resolution, grid_points
See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain` method.
features
A feature or tuples of features for which to calculate the partial dependence.
Returns
-------
A dictionary containing the feature(s) values, feature(s) deciles, average and/or individual values
(i.e. partial dependence or individual conditional expectation) for the given (tuple of) feature(s))
"""
if isinstance(features, numbers.Integral):
features = (features, )
if grid_points is None:
grid_points = {}
deciles, values, features_indices = [], [], [],
for f in features: # type: ignore[union-attr]
# extract column. TODO _safe_indexing in the future to support more input types.
X_f = X[:, f]
# get deciles for the current feature if the feature is numerical
deciles_f = get_quantiles(X_f, num_quantiles=11) if self._is_numerical(f) else None
if f not in grid_points:
# construct grid for feature `f`. Note that for categorical features we pass the
# grid resolution to be infinity because otherwise we risk to apply `linspace` to
# categorical values, which does not make sense.
values_f = self._grid_from_X(
X=X_f.reshape(-1, 1),
percentiles=percentiles,
grid_resolution=grid_resolution if self._is_numerical(f) else np.inf # type: ignore[arg-type]
)
else:
values_f = [grid_points[f]]
features_indices.append(f)
deciles.append(deciles_f)
values += values_f
# perform cartesian product between feature values. Covers also the case of a single feature.
features_indices = np.array(features_indices, dtype=np.int32) # type: ignore[assignment]
grid = cartesian([v.reshape(-1) for v in values])
# compute the PD and ICE - separate implementation for `PartialDependence` and `TreePartialDependence`
averaged_predictions, predictions = self._compute_pd(grid=grid,
features=features_indices, # type: ignore[arg-type]
X=X)
# reshape `averaged_predictions` to (n_outputs, n_values_feature_0, n_values_feature_1, ...)
averaged_predictions = averaged_predictions.reshape(-1, *[val.shape[0] for val in values])
if predictions is not None:
# reshape `predictions` to (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)
predictions = predictions.reshape(-1, X.shape[0], *[val.shape[0] for val in values])
# define feature values (i.e. grid values) and the corresponding deciles. Note that the deciles
# were computed on the raw (i.e. unprocessed) feature value as provided in the reference dataset `X`
pd = {
'values': values if len(values) > 1 else values[0],
'deciles': deciles if len(deciles) > 1 else deciles[0],
}
if kind == Kind.AVERAGE:
pd.update({'average': averaged_predictions})
elif kind == Kind.INDIVIDUAL:
pd.update({'individual': predictions})
else:
pd.update({
'average': averaged_predictions,
'individual': predictions
})
return pd
@abstractmethod
def _compute_pd(self,
grid: np.ndarray,
features: np.ndarray,
X: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Computes the PD and ICE.
Parameters
----------
grid
Cartesian product between feature values. Covers also the case of a single feature.
features
Feature column indices.
X
See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain`.
Returns
-------
Tuple consisting of the PD and optionally the ICE.
"""
raise NotImplementedError()
def _grid_from_X(self, X: np.ndarray, percentiles: Tuple[float, float], grid_resolution: int):
"""
Generate a grid of points based on the percentiles of `X`. If `grid_resolution` is bigger than the number
of unique values in the jth column of `X`, then those unique values will be used instead.
Code borrowed from:
https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/inspection/_partial_dependence.py
Parameters
----------
X
Array to generate the grid for.
percentiles
The percentiles which are used to construct the extreme values of the grid. Must be in [0, 1].
grid_resolution
The number of equally spaced points to be placed on the grid for each feature.
Returns
-------
The values with which the grid has been created. The size of each array `values[j]` is either
`grid_resolution`, or the number of unique values in `X[:, j]`, whichever is smaller.
"""
if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
raise ValueError("`percentiles` must be a sequence of 2 elements.")
if not all(0 <= x <= 1 for x in percentiles):
raise ValueError("`percentiles` values must be in [0, 1].")
if percentiles[0] >= percentiles[1]:
raise ValueError("`percentiles[0]` must be strictly less than `percentiles[1]`.")
if grid_resolution <= 1:
raise ValueError("`grid_resolution` must be strictly greater than 1.")
values = []
for feature in range(X.shape[1]):
uniques = np.unique(X[:, feature])
if uniques.shape[0] < grid_resolution:
# feature has low resolution use unique vals
axis = uniques
else:
# create axis based on percentiles and grid resolution
emp_percentiles = mquantiles(X[:, feature], prob=percentiles, axis=0).data
if np.allclose(emp_percentiles[0], emp_percentiles[1]):
raise ValueError("`percentiles` are too close to each other, unable to build the grid. "
"Please choose percentiles that are further apart.")
# construct equidistant grid points
axis = np.linspace(emp_percentiles[0], emp_percentiles[1], num=grid_resolution, endpoint=True)
values.append(axis)
return values
def _is_numerical(self, feature):
"""
Checks if the given feature is numerical.
Parameters
----------
feature
Feature to be checked.
Returns
-------
``True`` if the feature is numerical. ``False`` otherwise.
"""
return feature not in self.categorical_names
def _build_explanation(self,
kind: str,
feature_names: List[Union[int, Tuple[int, int]]],
pds: List[Dict[str, np.ndarray]]) -> Explanation:
"""
Helper method to build `Explanation` object.
Parameters
----------
kind
See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain` method.
feature_names
List of feature or tuples of features for which the partial dependencies/individual conditional
expectation were computed.
pds
List of dictionary containing the partial dependencies/individual conditional expectation.
Returns
-------
`Explanation` object.
"""
feature_deciles, feature_values = [], []
pd_values: Optional[List[np.ndarray]] = [] if kind in [Kind.AVERAGE, Kind.BOTH] else None
ice_values: Optional[List[np.ndarray]] = [] if kind in [Kind.INDIVIDUAL, Kind.BOTH] else None
for pd in pds:
feature_values.append(pd['values'])
feature_deciles.append(pd['deciles'])
if (pd_values is not None) and (Kind.AVERAGE in pd):
pd_values.append(pd[Kind.AVERAGE])
if (ice_values is not None) and Kind.INDIVIDUAL in pd:
ice_values.append(pd[Kind.INDIVIDUAL])
data = copy.deepcopy(DEFAULT_DATA_PD)
data.update(
feature_names=feature_names,
feature_values=feature_values,
ice_values=ice_values,
pd_values=pd_values,
feature_deciles=feature_deciles,
)
return Explanation(meta=copy.deepcopy(self.meta), data=data)
[docs]
def reset_predictor(self, predictor: Union[Callable[[np.ndarray], np.ndarray], BaseEstimator]) -> None:
"""
Resets the predictor function or tree-based `sklearn` estimator.
Parameters
----------
predictor
New predictor function or tree-based `sklearn` estimator.
"""
self.predictor = predictor
[docs]
class PartialDependence(PartialDependenceBase):
""" Black-box implementation of partial dependence for tabular datasets.
Supports multiple feature interactions. """
[docs]
def __init__(self,
predictor: Callable[[np.ndarray], np.ndarray],
feature_names: Optional[List[str]] = None,
categorical_names: Optional[Dict[int, List[str]]] = None,
target_names: Optional[List[str]] = None,
verbose: bool = False):
"""
Initialize black-box model implementation of partial dependence.
Parameters
----------
predictor
A prediction function which receives as input a `numpy` array of size `N x F` and outputs a
`numpy` array of size `N` (i.e. `(N, )`) or `N x T`, where `N` is the number of input
instances, `F` is the number of features and `T` is the number of targets.
feature_names
A list of feature names used for displaying results.
categorical_names
Dictionary where keys are feature columns and values are the categories for the feature. Necessary to
identify the categorical features in the dataset. An example for `categorical_names` would be::
category_map = {0: ["married", "divorced"], 3: ["high school diploma", "master's degree"]}
target_names
A list of target/output names used for displaying results.
verbose
Whether to print the progress of the explainer.
Notes
-----
The length of the `target_names` should match the number of columns returned by a call to the `predictor`.
For example, in the case of a binary classifier, if the predictor outputs a decision score (i.e. uses
the `decision_function` method) which returns one column, then the length of the `target_names` should be one.
On the other hand, if the predictor outputs a prediction probability (i.e. uses the `predict_proba` method)
which returns two columns (one for the negative class and one for the positive class), then the length of
the `target_names` should be two.
"""
if not callable(predictor):
raise ValueError("The predictor must be a callable.")
super().__init__(predictor=predictor,
feature_names=feature_names,
categorical_names=categorical_names,
target_names=target_names,
verbose=verbose)
[docs]
def explain(self,
X: np.ndarray,
features: Optional[List[Union[int, Tuple[int, int]]]] = None,
kind: Literal['average', 'individual', 'both'] = 'average',
percentiles: Tuple[float, float] = (0., 1.),
grid_resolution: int = 100,
grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None) -> Explanation:
"""
Calculates the partial dependence for each feature and/or tuples of features with respect to the all targets
and the reference dataset `X`.
Parameters
----------
X
A `N x F` tabular dataset used to calculate partial dependence curves. This is typically the
training dataset or a representative sample.
features
An optional list of features or tuples of features for which to calculate the partial dependence.
If not provided, the partial dependence will be computed for every single features in the dataset.
Some example for `features` would be: ``[0, 2]``, ``[0, 2, (0, 2)]``, ``[(0, 2)]``, where
``0`` and ``2`` correspond to column 0 and 2 in `X`, respectively.
kind
If set to ``'average'``, then only the partial dependence (PD) averaged across all samples from the dataset
is returned. If set to ``'individual'``, then only the individual conditional expectation (ICE) is
returned for each data point from the dataset. Otherwise, if set to ``'both'``, then both the PD and
the ICE are returned.
percentiles
Lower and upper percentiles used to limit the feature values to potentially remove outliers from
low-density regions. Note that for features with not many data points with large/low values, the
PD estimates are less reliable in those extreme regions. The values must be in [0, 1]. Only used
with `grid_resolution`.
grid_resolution
Number of equidistant points to split the range of each target feature. Only applies if the number of
unique values of a target feature in the reference dataset `X` is greater than the `grid_resolution` value.
For example, consider a case where a feature can take the following values:
``[0.1, 0.3, 0.35, 0.351, 0.4, 0.41, 0.44, ..., 0.5, 0.54, 0.56, 0.6, 0.65, 0.7, 0.9]``, and we are not
interested in evaluating the marginal effect at every single point as it can become computationally costly
(assume hundreds/thousands of points) without providing any additional information for nearby points
(e.g., 0.35 and 351). By setting ``grid_resolution=5``, the marginal effect is computed for the values
``[0.1, 0.3, 0.5, 0.7, 0.9]`` instead, which is less computationally demanding and can provide similar
insights regarding the model's behaviour. Note that the extreme values of the grid can be controlled
using the `percentiles` argument.
grid_points
Custom grid points. Must be a `dict` where the keys are the target features indices and the values are
monotonically increasing arrays defining the grid points for a numerical feature, and a subset of
categorical feature values for a categorical feature. If the `grid_points` are not specified,
then the grid will be constructed based on the unique target feature values available in the
dataset `X`, or based on the `grid_resolution` and `percentiles` (check `grid_resolution` to see when
it applies). For categorical features, the corresponding value in the `grid_points` can be
specified either as array of strings or array of integers corresponding the label encodings.
Note that the label encoding must match the ordering of the values provided in the `categorical_names`.
Returns
-------
explanation
An `Explanation` object containing the data and the metadata of the calculated partial dependence
curves. See usage at `Partial dependence examples`_ for details
.. _Partial dependence examples:
https://docs.seldon.io/projects/alibi/en/stable/methods/PartialDependence.html
"""
# kind` param sanity check.
if kind not in Kind.__members__.values():
raise ValueError(f"``kind='{kind}'`` is invalid. "
f"Accepted `kind` names are: {_get_options_string(Kind)}.")
return super().explain(X=X,
features=features,
kind=kind,
percentiles=percentiles,
grid_resolution=grid_resolution,
grid_points=grid_points)
def _compute_pd(self,
grid: np.ndarray,
features: np.ndarray,
X: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Computes the partial dependence using the brute method. Code borrowed from:
https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/inspection/_partial_dependence.py
Parameters
--------
grid
Cartesian product between feature values. Covers also the case of a single feature.
features
Feature column indices.
X
See :py:meth:`alibi.explainers.partial_dependence.PartialDependence.explain` method.
Returns
-------
Partial dependence for the given features.
"""
predictions = []
averaged_predictions = []
X_eval = X.copy()
for grid_values in grid:
X_eval[:, features] = grid_values
# Note: predictions is of shape
# (n_points,) for non-multioutput regressors
# (n_points, n_tasks) for multioutput regressors
# (n_points, 1) for the regressors in cross_decomposition (I think)
# (n_points, 2) for binary classification
# (n_points, n_classes) for multiclass classification
pred = self.predictor(X_eval)
predictions.append(pred)
# average over samples
averaged_predictions.append(np.mean(pred, axis=0))
# cast to `np.ndarray` and transpose
predictions = np.array(predictions).T # type: ignore[assignment]
averaged_predictions = np.array(averaged_predictions).T # type: ignore[assignment]
return averaged_predictions, predictions # type: ignore[return-value]
[docs]
class TreePartialDependence(PartialDependenceBase):
""" Tree-based model `sklearn` implementation of the partial dependence for tabular datasets.
Supports multiple feature interactions. This method is faster than the general black-box implementation
but is only supported by some tree-based estimators. The computation is based on a weighted tree traversal.
For more details on the computation, check the `sklearn documentation page`_. The supported `sklearn`
models are: `GradientBoostingClassifier`, `GradientBoostingRegressor`, `HistGradientBoostingClassifier`,
`HistGradientBoostingRegressor`, `HistGradientBoostingRegressor`, `DecisionTreeRegressor`, `RandomForestRegressor`.
.. _sklearn documentation page:
https://scikit-learn.org/stable/modules/partial_dependence.html#computation-methods
"""
[docs]
def __init__(self,
predictor: BaseEstimator,
feature_names: Optional[List[str]] = None,
categorical_names: Optional[Dict[int, List[str]]] = None,
target_names: Optional[List[str]] = None,
verbose: bool = False):
"""
Initialize tree-based model `sklearn` implementation of partial dependence.
Parameters
----------
predictor
A tree-based `sklearn` estimator.
feature_names
A list of feature names used for displaying results.
categorical_names
Dictionary where keys are feature columns and values are the categories for the feature. Necessary to
identify the categorical features in the dataset. An example for `categorical_names` would be::
category_map = {0: ["married", "divorced"], 3: ["high school diploma", "master's degree"]}
target_names
A list of target/output names used for displaying results.
verbose
Whether to print the progress of the explainer.
Notes
-----
The length of the `target_names` should match the number of columns returned by a call to the
`predictor.decision_function`. In the case of a binary classifier, the decision score consists
of a single column. Thus, the length of the `target_names` should be one.
"""
super().__init__(predictor=predictor,
feature_names=feature_names,
categorical_names=categorical_names,
target_names=target_names,
verbose=verbose)
# perform sanity checks on the `sklearn` predictor
self._sanity_check()
def _sanity_check(self):
""" Model sanity checks. """
if not isinstance(self.predictor, BaseEstimator):
raise ValueError('`TreePartialDependence` only supports `sklearn` models. '
'Try using the `PartialDependence` black-box alternative.')
check_is_fitted(self.predictor)
if not (is_classifier(self.predictor) or is_regressor(self.predictor)):
raise ValueError('The predictor must be a fitted regressor or a fitted classifier.')
if is_classifier(self.predictor) and isinstance(self.predictor.classes_[0], np.ndarray):
raise ValueError('Multiclass-multioutput predictors are not supported.')
if not isinstance(self.predictor, (BaseGradientBoosting,
BaseHistGradientBoosting,
DecisionTreeRegressor,
RandomForestRegressor)):
supported_classes_recursion = (
"GradientBoostingClassifier",
"GradientBoostingRegressor",
"HistGradientBoostingClassifier",
"HistGradientBoostingRegressor",
"HistGradientBoostingRegressor",
"DecisionTreeRegressor",
"RandomForestRegressor",
)
raise ValueError(f'`TreePartialDependence` only supports by the following estimators: '
f'{supported_classes_recursion}. Try using the `PartialDependence` black-box alternative.')
[docs]
def explain(self, # type: ignore[override]
X: np.ndarray,
features: Optional[List[Union[int, Tuple[int, int]]]] = None,
percentiles: Tuple[float, float] = (0., 1.),
grid_resolution: int = 100,
grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None) -> Explanation:
"""
Calculates the partial dependence for each feature and/or tuples of features with respect to the all targets
and the reference dataset `X`.
Parameters
----------
X
A `N x F` tabular dataset used to calculate partial dependence curves. This is typically the
training dataset or a representative sample.
features
An optional list of features or tuples of features for which to calculate the partial dependence.
If not provided, the partial dependence will be computed for every single features in the dataset.
Some example for `features` would be: ``[0, 2]``, ``[0, 2, (0, 2)]``, ``[(0, 2)]``, where
``0`` and ``2`` correspond to column 0 and 2 in `X`, respectively.
percentiles
Lower and upper percentiles used to limit the feature values to potentially remove outliers from
low-density regions. Note that for features with not many data points with large/low values, the
PD estimates are less reliable in those extreme regions. The values must be in [0, 1]. Only used
with `grid_resolution`.
grid_resolution
Number of equidistant points to split the range of each target feature. Only applies if the number of
unique values of a target feature in the reference dataset `X` is greater than the `grid_resolution` value.
For example, consider a case where a feature can take the following values:
``[0.1, 0.3, 0.35, 0.351, 0.4, 0.41, 0.44, ..., 0.5, 0.54, 0.56, 0.6, 0.65, 0.7, 0.9]``, and we are not
interested in evaluating the marginal effect at every single point as it can become computationally costly
(assume hundreds/thousands of points) without providing any additional information for nearby points
(e.g., 0.35 and 351). By setting ``grid_resolution=5``, the marginal effect is computed for the values
``[0.1, 0.3, 0.5, 0.7, 0.9]`` instead, which is less computationally demanding and can provide similar
insights regarding the model's behaviour. Note that the extreme values of the grid can be controlled
using the `percentiles` argument.
grid_points
Custom grid points. Must be a `dict` where the keys are the target features indices and the values are
monotonically increasing arrays defining the grid points for a numerical feature, and a subset of
categorical feature values for a categorical feature. If the `grid_points` are not specified,
then the grid will be constructed based on the unique target feature values available in the
dataset `X`, or based on the `grid_resolution` and `percentiles` (check `grid_resolution` to see when
it applies). For categorical features, the corresponding value in the `grid_points` can be
specified either as array of strings or array of integers corresponding the label encodings.
Note that the label encoding must match the ordering of the values provided in the `categorical_names`.
"""
return super().explain(X=X,
features=features,
kind=Kind.AVERAGE.value, # only `'average'` is supported for `'recursion'` method.
percentiles=percentiles,
grid_resolution=grid_resolution,
grid_points=grid_points)
def _compute_pd(self, # type: ignore[override]
grid: np.ndarray,
features: np.ndarray,
**kwargs) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Computes the PD.
Parameters
----------
grid
Cartesian product between feature values. Covers also the case of a single feature.
features
Feature column indices.
**kwargs
Other arguments. Not used.
Returns
-------
Tuple consisting of the PD and ``None``.
"""
avg_preds = self.predictor._compute_partial_dependence_recursion(grid, features) # type: ignore[union-attr]
return avg_preds, None
# No type check due to the generic explanation object
[docs]
@no_type_check
def plot_pd(exp: Explanation,
features: Union[List[int], Literal['all']] = 'all',
target: Union[str, int] = 0,
n_cols: int = 3,
n_ice: Union[Literal['all'], int, List[int]] = 100,
center: bool = False,
pd_limits: Optional[Tuple[float, float]] = None,
levels: int = 8,
ax: Optional[Union['plt.Axes', np.ndarray]] = None,
sharey: Optional[Literal['all', 'row']] = 'all',
pd_num_kw: Optional[dict] = None,
ice_num_kw: Optional[dict] = None,
pd_cat_kw: Optional[dict] = None,
ice_cat_kw: Optional[dict] = None,
pd_num_num_kw: Optional[dict] = None,
pd_num_cat_kw: Optional[dict] = None,
pd_cat_cat_kw: Optional[dict] = None,
fig_kw: Optional[dict] = None) -> 'np.ndarray':
"""
Plot partial dependence curves on matplotlib axes.
Parameters
----------
exp
An `Explanation` object produced by a call to the
:py:meth:`alibi.explainers.partial_dependence.PartialDependence.explain` method.
features
A list of features entries in the `exp.data['feature_names']` to plot the partial dependence curves for,
or ``'all'`` to plot all the explained feature or tuples of features. This includes tuples of features.
For example, if ``exp.data['feature_names'] = ['temp', 'hum', ('temp', 'windspeed')]`` and we want to plot
the partial dependence only for the ``'temp'`` and ``('temp', 'windspeed')``, then we would set
``features=[0, 2]``. Defaults to ``'all'``.
target
The target name or index for which to plot the partial dependence (PD) curves. Can be a mix of integers
denoting target index or strings denoting entries in `exp.meta['params']['target_names']`.
n_cols
Number of columns to organize the resulting plot into.
n_ice
Number of ICE plots to be displayed. Can be
- a string taking the value ``'all'`` to display the ICE curves for every instance in the reference dataset.
- an integer for which `n_ice` instances from the reference dataset will be sampled uniformly at random to \
display their ICE curves.
- a list of integers, where each integer represents an index of an instance in the reference dataset to \
display their ICE curves.
center
Boolean flag to center the individual conditional expectation (ICE) curves. As mentioned in
`Goldstein et al. (2014)`_, the heterogeneity in the model can be difficult to discern when the intercepts
of the ICE curves cover a wide range. Centering the ICE curves removes the level effects and helps
to visualise the heterogeneous effect.
.. _Goldstein et al. (2014):
https://arxiv.org/abs/1309.6392
pd_limits
Minimum and maximum y-limits for all the one-way PD plots. If ``None`` will be automatically inferred.
levels
Number of levels in the contour plot.
ax
A `matplotlib` axes object or a `numpy` array of `matplotlib` axes to plot on.
sharey
A parameter specifying whether the y-axis of the PD and ICE curves should be on the same scale
for several features. Possible values are: ``'all'`` | ``'row'`` | ``None``.
pd_num_kw
Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the PD for a
numerical feature.
ice_num_kw
Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the ICE for a
numerical feature.
pd_cat_kw
Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the PD for a
categorical feature.
ice_cat_kw
Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the ICE for a
categorical feature.
pd_num_num_kw
Keyword arguments passed to the `matplotlib.pyplot.contourf`_ function when plotting the PD for two
numerical features.
pd_num_cat_kw
Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the PD for a numerical and a
categorical feature.
pd_cat_cat_kw
Keyword arguments passed to the :py:meth:`alibi.utils.visualization.heatmap` functon when plotting the PD for
two categorical features.
fig_kw
Keyword arguments passed to the `matplotlib.figure.set`_ function.
.. _matplotlib.pyplot.plot:
https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html
.. _matplotlib.pyplot.contourf:
https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.contourf.html
.. _matplotlib.figure.set:
https://matplotlib.org/stable/api/figure_api.html
Returns
-------
An array of `plt.Axes` with the resulting partial dependence plots.
"""
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
default_fig_kw = {'tight_layout': 'tight'}
if fig_kw is None:
fig_kw = {}
fig_kw = {**default_fig_kw, **fig_kw}
if features == 'all':
features = range(0, len(exp.data['feature_names']))
else:
for ifeatures in features:
if ifeatures >= len(exp.data['feature_names']):
raise IndexError(f"The `features` indices must be less than the "
f"``len(feature_names) = {len(exp.data['feature_names'])}``. "
f"Received {ifeatures}.")
# set target index
if isinstance(target, str):
try:
target_idx = exp.meta['params']['target_names'].index(target)
except ValueError:
raise ValueError(f"Unknown `target` name. Received {target}. "
f"Available values are: {exp.meta['params']['target_names']}.")
else:
target_idx = target
if target_idx >= len(exp.meta['params']['target_names']):
raise IndexError(f"Target index out of range. Received {target_idx}. "
f"The number of targets is {len(exp.meta['params']['target_names'])}.")
# corresponds to the number of subplots
n_features = len(features)
# create axes
if ax is None:
fig, ax = plt.subplots()
def _is_categorical(feature):
feature_idx = exp.meta['params']['feature_names'].index(feature)
return feature_idx in exp.meta['params']['categorical_names']
if isinstance(ax, plt.Axes) and n_features != 1:
ax.set_axis_off() # treat passed axis as a canvas for subplots
fig = ax.figure
n_cols = min(n_cols, n_features)
n_rows = math.ceil(n_features / n_cols)
axes = np.empty((n_rows, n_cols), dtype=object)
axes_ravel = axes.ravel()
gs = GridSpec(n_rows, n_cols)
for i, spec in enumerate(list(gs)[:n_features]):
axes_ravel[i] = fig.add_subplot(spec)
else: # array-like
if isinstance(ax, plt.Axes):
ax = np.array(ax)
if ax.size < n_features:
raise ValueError(f"Expected ax to have {n_features} axes, got {ax.size}")
axes = np.atleast_2d(ax)
axes_ravel = axes.ravel()
fig = axes_ravel[0].figure
# create plots
one_way_axs = {}
for i, (ifeatures, ax_ravel) in enumerate(zip(features, axes_ravel)):
# extract the feature names
feature_names = exp.data['feature_names'][ifeatures]
# if it is tuple, then we need a 2D plot and address 4 cases: (num, num), (num, cat), (cat, num), (cat, cat)
if isinstance(feature_names, tuple):
f0, f1 = feature_names
if (not _is_categorical(f0)) and (not _is_categorical(f1)):
ax, ax_pd_limits = _plot_two_pd_num_num(exp=exp,
feature=ifeatures,
target_idx=target_idx,
levels=levels,
ax=ax_ravel,
pd_num_num_kw=pd_num_num_kw)
elif _is_categorical(f0) and _is_categorical(f1):
ax, ax_pd_limits = _plot_two_pd_cat_cat(exp=exp,
feature=ifeatures,
target_idx=target_idx,
ax=ax_ravel,
pd_cat_cat_kw=pd_cat_cat_kw)
else:
ax, ax_pd_limits = _plot_two_pd_num_cat(exp=exp,
feature=ifeatures,
target_idx=target_idx,
pd_limits=pd_limits,
ax=ax_ravel,
pd_num_cat_kw=pd_num_cat_kw)
else:
if _is_categorical(feature_names):
ax, ax_pd_limits = _plot_one_pd_cat(exp=exp,
feature=ifeatures,
target_idx=target_idx,
center=center,
pd_limits=pd_limits,
n_ice=n_ice,
ax=ax_ravel,
pd_cat_kw=pd_cat_kw,
ice_cat_kw=ice_cat_kw)
else:
ax, ax_pd_limits = _plot_one_pd_num(exp=exp,
feature=ifeatures,
target_idx=target_idx,
center=center,
pd_limits=pd_limits,
n_ice=n_ice,
ax=ax_ravel,
pd_num_kw=pd_num_kw,
ice_num_kw=ice_num_kw)
# group the `ax_ravel` that share the appropriate y axes.
if ax_pd_limits is not None:
if sharey == 'all':
if one_way_axs.get('all', None) is None:
one_way_axs['all'] = []
# add them all in the same group
one_way_axs['all'].append((ax, ax_pd_limits))
elif sharey == 'row':
# identify the row to which they belong
row = i // n_cols
if one_way_axs.get(row, None) is None:
one_way_axs[row] = []
# add them the `row` group
one_way_axs[row].append((ax, ax_pd_limits))
else:
# if no axis are share, each `ax_ravel` will have its own group
one_way_axs[i] = [(ax, ax_pd_limits)]
# share the y-axis for the axes within the same group and set the `ymin`, `ymax` values.
# This step is necessary and applied here because `vlines` overwrites the `ylim`.
for ax_group in one_way_axs.values():
min_val = min([ax_pd_lim[0] for _, ax_pd_lim in ax_group])
max_val = max([ax_pd_lim[1] for _, ax_pd_lim in ax_group])
axs = [ax[0] for ax in ax_group]
for ax in axs[1:]:
ax.sharey(axs[0])
axs[0].set_ylim(min_val, max_val)
fig.set(**fig_kw)
return axes
def _sample_ice(ice_values: np.ndarray, n_ice: Union[Literal['all'], int, List[int]]) -> np.ndarray:
"""
Samples `ice_values` based on the `n_ice` argument.
Parameters
----------
ice_values
Array of ice_values of dimension `V x N`, where `V` is the number of feature values where the PD is computed,
and `N` is the number of instances in the reference dataset.
n_ice
See :py:meth:`alibi.explainers.partial_dependence.plot_pd`.
"""
if n_ice == 'all':
return ice_values
_, N = ice_values.shape
if isinstance(n_ice, numbers.Integral):
if n_ice >= N:
logger.warning('`n_ice` is greater than the number of instances in the reference dataset. '
'Automatically setting `n_ice` to the number of instances in the reference dataset.')
return ice_values
if n_ice <= 0:
raise ValueError('`n_ice` must be an integer grater than 0.')
indices = np.random.choice(a=N, size=n_ice, replace=False)
return ice_values[:, indices]
if isinstance(n_ice, list):
n_ice = np.unique(n_ice) # type: ignore[assignment]
if not np.all(n_ice < N) or not np.all(n_ice >= 0): # type: ignore[operator]
raise ValueError(f'Some indices in `n_ice` are out of bounds. Ensure that all indices are '
f'greater or equal than 0 and less than {N}.')
return ice_values[:, n_ice]
raise ValueError(f"Unknown `n_ice` values. `n_ice` can be a string taking value 'all', "
f"an integer, or a list of integers. Received {n_ice}.")
def _process_pd_ice(exp: Explanation,
pd_values: Optional[np.ndarray] = None,
ice_values: Optional[np.ndarray] = None,
n_ice: Union[Literal['all'], int, List[int]] = 'all',
center: bool = False) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
"""
Process the `pd_values` and `ice_values` before plotting. Centers the plots if necessary and samples
the `ice_values` for visualization purposes.
Parameters
----------
exp, n_ice, center
See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method.
pd_values
Array of ice_values of dimension `V` (i.e. `(V, )`), where V is the number of feature values where
the PD is computed.
ice_values
Array of ice_values of dimension `V x N`, where `V` is the number of feature values where the PD is computed,
and `N` is the number of instances in the reference dataset.
Returns
-------
Tuple containing the processed `pd_values` and `ice_values`.
"""
# pdp processing
if exp.meta['params']['kind'] == Kind.BOTH and center:
pd_values = pd_values - pd_values[0] # type: ignore[index]
# ice processing
if exp.meta['params']['kind'] in [Kind.INDIVIDUAL, Kind.BOTH]:
# sample ice values for visualization purposes
ice_values = _sample_ice(ice_values=ice_values, n_ice=n_ice) # type: ignore[arg-type]
# center ice values if necessary
if center:
ice_values = ice_values - ice_values[0:1, :]
return pd_values, ice_values
# No type check due to the generic explanation object
@no_type_check
def _plot_one_pd_num(exp: Explanation,
feature: int,
target_idx: int,
center: bool = False,
pd_limits: Optional[Tuple[float, float]] = None,
n_ice: Union[Literal['all'], int, List[int]] = 100,
ax: Optional['plt.Axes'] = None,
pd_num_kw: Optional[dict] = None,
ice_num_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]:
"""
Plots one way partial dependence curve for a single numerical feature.
Parameters
----------
exp, feature, center, pd_limits, n_ice, pd_num_kw, ice_num_kw
See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method.
target_idx
The target index for which to plot the partial dependence (PD) curves. An integer
denoting target index in `exp.meta['params]['target_names']`
ax
Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally.
Returns
-------
`matplotlib` axes and a tuple containing the minimum and maximum y-limits.
"""
import matplotlib.pyplot as plt
from matplotlib import transforms
if ax is None:
ax = plt.gca()
feature_values = exp.data['feature_values'][feature]
pd_values = exp.data['pd_values'][feature][target_idx] if (exp.data['pd_values'] is not None) else None
ice_values = exp.data['ice_values'][feature][target_idx].T if (exp.data['ice_values'] is not None) else None
# process `pd_values` and `ice_values`
pd_values, ice_values = _process_pd_ice(exp=exp,
pd_values=pd_values,
ice_values=ice_values,
n_ice=n_ice,
center=center)
if exp.meta['params']['kind'] == Kind.AVERAGE:
default_pd_num_kw = {'markersize': 2, 'marker': 'o', 'label': None}
pd_num_kw = default_pd_num_kw if pd_num_kw is None else {**default_pd_num_kw, **pd_num_kw}
ax.plot(feature_values, pd_values, **pd_num_kw)
elif exp.meta['params']['kind'] == Kind.INDIVIDUAL:
default_ice_graph_kw = {'color': 'lightsteelblue', 'label': None}
ice_num_kw = default_ice_graph_kw if ice_num_kw is None else {**default_ice_graph_kw, **ice_num_kw}
ax.plot(feature_values, ice_values, **ice_num_kw)
else:
default_pd_num_kw = {'linestyle': '--', 'linewidth': 2, 'color': 'tab:orange', 'label': 'average'}
pd_num_kw = default_pd_num_kw if pd_num_kw is None else {**default_pd_num_kw, **pd_num_kw}
default_ice_graph_kw = {'alpha': 0.6, 'color': 'lightsteelblue', 'label': None}
ice_num_kw = default_ice_graph_kw if ice_num_kw is None else {**default_ice_graph_kw, **ice_num_kw}
ax.plot(feature_values, ice_values, **ice_num_kw)
ax.plot(feature_values, pd_values, **pd_num_kw)
ax.legend()
# save the `ylim` as they will be overwritten by `ax.vlines`
ylim = ax.get_ylim() if pd_limits is None else pd_limits
# add deciles markers to the bottom of the plot
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
ax.vlines(exp.data['feature_deciles'][feature][1:-1], 0, 0.05, transform=trans)
ax.set_xlabel(exp.data['feature_names'][feature])
ax.set_ylabel(exp.meta['params']['target_names'][target_idx])
return ax, ylim
# No type check due to the generic explanation object
@no_type_check
def _plot_one_pd_cat(exp: Explanation,
feature: int,
target_idx: int,
pd_limits: Optional[Tuple[float, float]] = None,
center: bool = False,
n_ice: Union[Literal['all'], int, List[str]] = 100,
ax: Optional['plt.Axes'] = None,
pd_cat_kw: Optional[dict] = None,
ice_cat_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]:
"""
Plots one way partial dependence curve for a single categorical feature.
Parameters
----------
exp, feature, center, pd_limits, n_ice, pd_cat_kw, ice_cat_kw
See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method.
target_idx
The target index for which to plot the partial dependence (PD) curves. An integer
denoting target index in `exp.meta['params'].target_names`
ax
Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally.
Returns
-------
`matplotlib` axes and a tuple containing the minimum and maximum y-limits.
"""
import matplotlib.pyplot as plt
if ax is None:
ax = plt.gca()
feature_names = exp.data['feature_names'][feature]
feature_values = exp.data['feature_values'][feature]
pd_values = exp.data['pd_values'][feature][target_idx] if (exp.data['pd_values'] is not None) else None
ice_values = exp.data['ice_values'][feature][target_idx].T if (exp.data['ice_values'] is not None) else None
# process `pd_values` and `ice_values`
pd_values, ice_values = _process_pd_ice(exp=exp,
pd_values=pd_values,
ice_values=ice_values,
n_ice=n_ice,
center=center)
feature_index = exp.meta['params']['feature_names'].index(feature_names)
labels = [exp.meta['params']['categorical_names'][feature_index][i] for i in feature_values.astype(np.int32)]
if exp.meta['params']['kind'] == Kind.AVERAGE:
default_pd_graph_kw = {'markersize': 8, 'marker': 's', 'color': 'tab:blue'}
pd_cat_kw = default_pd_graph_kw if pd_cat_kw is None else {**default_pd_graph_kw, **pd_cat_kw}
ax.plot(labels, pd_values, **pd_cat_kw)
elif exp.meta['params']['kind'] == Kind.INDIVIDUAL:
default_ice_cat_kw = {'markersize': 4, 'marker': 's', 'color': 'lightsteelblue'}
ice_cat_kw = default_ice_cat_kw if ice_cat_kw is None else {**default_ice_cat_kw, **ice_cat_kw}
ax.plot(labels, ice_values, **ice_cat_kw)
else:
default_pd_cat_kw = {'markersize': 8, 'marker': 's', 'color': 'tab:orange', 'label': 'average'}
pd_cat_kw = default_pd_cat_kw if pd_cat_kw is None else {**default_pd_cat_kw, **pd_cat_kw}
default_ice_cat_kw = {'alpha': 0.6, 'markersize': 4, 'marker': 's', 'color': 'lightsteelblue'}
ice_cat_kw = default_ice_cat_kw if ice_cat_kw is None else {**default_ice_cat_kw, **ice_cat_kw}
ax.plot(labels, ice_values, **ice_cat_kw)
ax.plot(labels, pd_values, **pd_cat_kw)
ax.legend()
# save `ylim`
ylim = ax.get_ylim() if pd_limits is None else pd_limits
# rotate xticks labels
ax.tick_params(axis='x', rotation=90)
# set axis labels
ax.set_xlabel(feature_names)
ax.set_ylabel(exp.meta['params']['target_names'][target_idx])
return ax, ylim
# No type check due to the generic explanation object
@no_type_check
def _plot_two_pd_num_num(exp: Explanation,
feature: int,
target_idx: int,
levels: int = 8,
ax: Optional['plt.Axes'] = None,
pd_num_num_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]:
"""
Plots two ways partial dependence curve for two numerical features.
Parameters
----------
exp, feature, pd_num_num_kw
See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method.
target_idx
The target index for which to plot the partial dependence (PD) curves. An integer
denoting target index in `exp.meta['params']['target_names']`
ax
Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally.
Returns
-------
`matplotlib` axes and ``None``.
"""
import matplotlib.pyplot as plt
from matplotlib import transforms
if exp.meta['params']['kind'] not in [Kind.AVERAGE, Kind.BOTH]:
raise ValueError("Can only plot partial dependence for `kind` in `['average', 'both']`.")
if ax is None:
ax = plt.gca()
# set contour plot default params
default_pd_num_num_kw = {"alpha": 0.75}
pd_num_num_kw = default_pd_num_num_kw if pd_num_num_kw is None else {**default_pd_num_num_kw, **pd_num_num_kw}
feature_values = exp.data['feature_values'][feature]
pd_values = exp.data['pd_values'][feature][target_idx]
X, Y = np.meshgrid(feature_values[0], feature_values[1])
Z, Z_min, Z_max = pd_values.T, pd_values.min(), pd_values.max()
if Z_max > Z_min:
Z_level = np.linspace(Z_min, Z_max, levels)
else:
# this covers the case when `Z_min` equals `Z_max`, for which `Z_level` will be constant.
# Note that `ax.contourf` accepts only increasing `Z_levels`, otherwise it throws an error.
Z_level, Z_min, Z_max = None, None, None
CS = ax.contour(X, Y, Z, levels=Z_level, linewidths=0.5, colors="k")
ax.contourf(X, Y, Z, levels=Z_level, vmax=Z_max, vmin=Z_min, **pd_num_num_kw)
ax.clabel(CS, fmt="%2.2f", colors="k", fontsize=10, inline=True)
# create the deciles line for the vertical & horizontal axis
xlim, ylim = ax.get_xlim(), ax.get_ylim()
# the horizontal lines do not display (same for the sklearn)
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
ax.vlines(exp.data['feature_deciles'][feature][0][1:-1], 0, 0.05, transform=trans)
trans = transforms.blended_transform_factory(ax.transAxes, ax.transData)
ax.hlines(exp.data['feature_deciles'][feature][1][1:-1], 0, 0.05, transform=trans)
# reset xlim and ylim since they are overwritten by hlines and vlines
ax.set_xlim(xlim)
ax.set_ylim(ylim)
# set x & y labels
ax.set_xlabel(exp.data['feature_names'][feature][0])
ax.set_ylabel(exp.data['feature_names'][feature][1])
return ax, None
# No type check due to the generic explanation object
@no_type_check
def _plot_two_pd_num_cat(exp: Explanation,
feature: int,
target_idx: int,
pd_limits: Optional[Tuple[float, float]] = None,
ax: Optional['plt.Axes'] = None,
pd_num_cat_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]:
"""
Plots two ways partial dependence curve for a numerical feature and a categorical feature.
Parameters
----------
exp, feature, pd_num_cat_kw
See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method.
target_idx
The target index for which to plot the partial dependence (PD) curves. An integer
denoting target index in `exp.meta['params']['target_names'].`
ax
Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally.
Returns
-------
`matplotlib` axes and a tuple containing the minimum and maximum y-limits.
"""
import matplotlib.pyplot as plt
from matplotlib import transforms
if exp.meta['params']['kind'] not in [Kind.AVERAGE, Kind.BOTH]:
raise ValueError("Can only plot partial dependence for `kind` in `['average', 'both']`.")
if ax is None:
ax = plt.gca()
def _is_categorical(feature):
feature_idx = exp.meta['params']['feature_names'].index(feature)
return feature_idx in exp.meta['params']['categorical_names']
# extract feature values and partial dependence values
feature_values = exp.data['feature_values'][feature]
feature_deciles = exp.data['feature_deciles'][feature]
pd_values = exp.data['pd_values'][feature][target_idx]
# find which feature is categorical and which one is numerical
feature_names = exp.data['feature_names'][feature]
if _is_categorical(feature_names[0]):
feature_names = feature_names[::-1]
feature_values = feature_values[::-1]
feature_deciles = feature_deciles[::-1]
pd_values = pd_values.T
# define labels
cat_feature_index = exp.meta['params']['feature_names'].index(feature_names[1])
labels = [exp.meta['params']['categorical_names'][cat_feature_index][i]
for i in feature_values[1].astype(np.int32)]
# plot lines
default_pd_num_cat_kw = {'markersize': 2, 'marker': 'o'}
pd_num_cat_kw = default_pd_num_cat_kw if pd_num_cat_kw is None else {**default_pd_num_cat_kw, **pd_num_cat_kw}
ax.plot([], [], ' ', label=feature_names[1])
for i in range(pd_values.shape[1]):
x, y = feature_values[0], pd_values[:, i]
pd_num_cat_kw.update({'label': labels[i]})
ax.plot(x, y, **pd_num_cat_kw)
# save `ylim` as they will be overwritten by `ax.vlines`
ylim = ax.get_ylim() if pd_limits is None else pd_limits
# add deciles markers to the bottom of the plot
trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
ax.vlines(feature_deciles[0][1:-1], 0, 0.05, transform=trans)
ax.set_ylabel(exp.meta['params']['target_names'][target_idx])
ax.set_xlabel(feature_names[0])
ax.legend()
return ax, ylim
# No type check due to the generic explanation object
@no_type_check
def _plot_two_pd_cat_cat(exp: Explanation,
feature: int,
target_idx: int,
ax: Optional['plt.Axes'] = None,
pd_cat_cat_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]:
"""
Plots two ways partial dependence curve for two categorical features.
Parameters
----------
exp, feature, pd_cat_cat_kw
See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method.
target_idx
The target index for which to plot the partial dependence (PD) curves. An integer
denoting target index in `exp.meta['params']['target_names']`.
ax
Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally.
Return
------
`matplotlib` axes and ``None``.
"""
import matplotlib.pyplot as plt
from alibi.utils.visualization import heatmap
if ax is None:
ax = plt.gca()
if exp.meta['params']['kind'] not in [Kind.AVERAGE, Kind.BOTH]:
raise ValueError("Can only plot partial dependence for `kind` in `['average', 'both']`.")
feature_names = exp.data['feature_names'][feature]
feature_values = exp.data['feature_values'][feature]
pd_values = exp.data['pd_values'][feature][target_idx]
# extract labels for each categorical features
feature0_index = exp.meta['params']['feature_names'].index(feature_names[0])
feature1_index = exp.meta['params']['feature_names'].index(feature_names[1])
labels0 = [exp.meta['params']['categorical_names'][feature0_index][i]
for i in feature_values[0].astype(np.int32)]
labels1 = [exp.meta['params']['categorical_names'][feature1_index][i]
for i in feature_values[1].astype(np.int32)]
# plot heatmap
default_pd_cat_cat_kw = {
'annot': True,
'fmt': '{x:.2f}',
'linewidths': 1.5,
'yticklabels': labels0,
'xticklabels': labels1,
'aspect': 'auto'
}
pd_cat_cat_kw = default_pd_cat_cat_kw if pd_cat_cat_kw is None else {**default_pd_cat_cat_kw, **pd_cat_cat_kw}
heatmap(pd_values, ax=ax, **pd_cat_cat_kw)
# set ticks labels
ax.set_xticklabels(labels1)
ax.set_yticklabels(labels0)
# set axis labels
ax.set_xlabel(exp.data['feature_names'][feature][1])
ax.set_ylabel(exp.data['feature_names'][feature][0])
return ax, None