Source code for alibi.explainers.partial_dependence

import copy
import logging
import math
import numbers
import sys
from abc import ABC, abstractmethod
from enum import Enum
from typing import (Callable, Dict, Iterable, List, Optional, Tuple, Union,
                    no_type_check)

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats.mstats import mquantiles
from sklearn.base import BaseEstimator, is_classifier, is_regressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble._gb import BaseGradientBoosting
from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import \
    BaseHistGradientBoosting
from sklearn.tree import DecisionTreeRegressor
from sklearn.utils.extmath import cartesian
from sklearn.utils.validation import check_is_fitted
from tqdm import tqdm

from alibi.api.defaults import DEFAULT_DATA_PD, DEFAULT_META_PD
from alibi.api.interfaces import Explainer, Explanation
from alibi.explainers.ale import get_quantiles
from alibi.utils import _get_options_string

if sys.version_info >= (3, 8):
    from typing import Literal
else:
    from typing_extensions import Literal


logger = logging.getLogger(__name__)


[docs] class Kind(str, Enum): """ Enumeration of supported kind. """ AVERAGE = 'average' INDIVIDUAL = 'individual' BOTH = 'both'
[docs] class PartialDependenceBase(Explainer, ABC):
[docs] def __init__(self, predictor: Union[BaseEstimator, Callable[[np.ndarray], np.ndarray]], feature_names: Optional[List[str]] = None, categorical_names: Optional[Dict[int, List[str]]] = None, target_names: Optional[List[str]] = None, verbose: bool = False): """ Base class of the partial dependence for tabular datasets. Supports multiple feature interactions. Parameters ---------- predictor A `sklearn` estimator or a prediction function which receives as input a `numpy` array of size `N x F` and outputs a `numpy` array of size `N` (i.e. `(N, )`) or `N x T`, where `N` is the number of input instances, `F` is the number of features and `T` is the number of targets. feature_names A list of feature names used for displaying results. categorical_names Dictionary where keys are feature columns and values are the categories for the feature. Necessary to identify the categorical features in the dataset. An example for `categorical_names` would be:: category_map = {0: ["married", "divorced"], 3: ["high school diploma", "master's degree"]} target_names A list of target/output names used for displaying results. verbose Whether to print the progress of the explainer. """ super().__init__(meta=copy.deepcopy(DEFAULT_META_PD)) self.predictor = predictor self.feature_names = feature_names self.categorical_names = categorical_names self.target_names = target_names self.verbose = verbose
[docs] def explain(self, X: np.ndarray, features: Optional[List[Union[int, Tuple[int, int]]]] = None, kind: Literal['average', 'individual', 'both'] = 'average', percentiles: Tuple[float, float] = (0., 1.), grid_resolution: int = 100, grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None) -> Explanation: """ Calculates the partial dependence for each feature and/or tuples of features with respect to the all targets and the reference dataset `X`. Parameters ---------- X A `N x F` tabular dataset used to calculate partial dependence curves. This is typically the training dataset or a representative sample. features An optional list of features or tuples of features for which to calculate the partial dependence. If not provided, the partial dependence will be computed for every single features in the dataset. Some example for `features` would be: ``[0, 2]``, ``[0, 2, (0, 2)]``, ``[(0, 2)]``, where ``0`` and ``2`` correspond to column 0 and 2 in `X`, respectively. kind If set to ``'average'``, then only the partial dependence (PD) averaged across all samples from the dataset is returned. If set to ``'individual'``, then only the individual conditional expectation (ICE) is returned for each data point from the dataset. Otherwise, if set to ``'both'``, then both the PD and the ICE are returned. percentiles Lower and upper percentiles used to limit the feature values to potentially remove outliers from low-density regions. Note that for features with not many data points with large/low values, the PD estimates are less reliable in those extreme regions. The values must be in [0, 1]. Only used with `grid_resolution`. grid_resolution Number of equidistant points to split the range of each target feature. Only applies if the number of unique values of a target feature in the reference dataset `X` is greater than the `grid_resolution` value. For example, consider a case where a feature can take the following values: ``[0.1, 0.3, 0.35, 0.351, 0.4, 0.41, 0.44, ..., 0.5, 0.54, 0.56, 0.6, 0.65, 0.7, 0.9]``, and we are not interested in evaluating the marginal effect at every single point as it can become computationally costly (assume hundreds/thousands of points) without providing any additional information for nearby points (e.g., 0.35 and 351). By setting ``grid_resolution=5``, the marginal effect is computed for the values ``[0.1, 0.3, 0.5, 0.7, 0.9]`` instead, which is less computationally demanding and can provide similar insights regarding the model's behaviour. Note that the extreme values of the grid can be controlled using the `percentiles` argument. grid_points Custom grid points. Must be a `dict` where the keys are the target features indices and the values are monotonically increasing arrays defining the grid points for a numerical feature, and a subset of categorical feature values for a categorical feature. If the `grid_points` are not specified, then the grid will be constructed based on the unique target feature values available in the dataset `X`, or based on the `grid_resolution` and `percentiles` (check `grid_resolution` to see when it applies). For categorical features, the corresponding value in the `grid_points` can be specified either as array of strings or array of integers corresponding the label encodings. Note that the label encoding must match the ordering of the values provided in the `categorical_names`. Returns ------- explanation An `Explanation` object containing the data and the metadata of the calculated partial dependence curves. See usage at `Partial dependence examples`_ for details .. _Partial dependence examples: https://docs.seldon.io/projects/alibi/en/stable/methods/PartialDependence.html """ if X.ndim != 2: raise ValueError('The array X must be 2-dimensional.') # extract number of features n_features = X.shape[1] # set the `features_names` when the user did not provide the feature names if self.feature_names is None: self.feature_names = [f'f_{i}' for i in range(n_features)] # set `categorical_names` when the user did not provide the category mapping if self.categorical_names is None: self.categorical_names = {} # sanity checks self._grid_points_sanity_checks(grid_points=grid_points, n_features=n_features) self._features_sanity_checks(features=features) # construct `feature_names` based on the `features`. If `features` is ``None``, then initialize # `features` with all single feature available in the dataset. if features: feature_names = [tuple([self.feature_names[f] for f in features]) if isinstance(features, tuple) else self.feature_names[features] for features in features] else: feature_names = self.feature_names # type: ignore[assignment] features = list(range(n_features)) # compute partial dependencies for every features. # TODO: implement parallel version - future work as it can be done for ALE too pds = [] for ifeatures in tqdm(features, disable=not self.verbose): pds.append( self._partial_dependence( X=X, features=ifeatures, kind=kind, percentiles=percentiles, grid_resolution=grid_resolution, grid_points=grid_points ) ) # extract the number of targets that the PD/ICE was computed for key = Kind.AVERAGE if kind in [Kind.AVERAGE, Kind.BOTH] else Kind.INDIVIDUAL n_targets = pds[0][key].shape[0] if self.target_names is None: # set the `target_names` when the user did not provide the target names # we do it here to avoid checking model's type, prediction function etc. self.target_names = [f'c_{i}' for i in range(n_targets)] elif len(self.target_names) != n_targets: logger.warning('The length of `target_names` does not match the number of predicted outputs. ' 'Ensure that the lengths match, otherwise a call to the `plot_pd` method might ' 'raise an error or produce undesired labeling.') # update `meta['params']` here because until this point we don't have the `target_names` self.meta['params'].update(kind=kind, percentiles=percentiles, grid_resolution=grid_resolution, feature_names=self.feature_names, categorical_names=self.categorical_names, target_names=self.target_names) return self._build_explanation(kind=kind, feature_names=feature_names, # type: ignore[arg-type] pds=pds)
def _grid_points_sanity_checks(self, grid_points: Optional[Dict[int, Union[List, np.ndarray]]], n_features: int): """ Grid points sanity checks. Parameters ---------- grid_points See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain`. n_features Number of features in the dataset. """ if grid_points is None: return if not np.all(np.isin(list(grid_points.keys()), np.arange(n_features))): raise ValueError('The features provided in `grid_points` are not a subset of the dataset features.') for f in grid_points: if self._is_numerical(f): grid_points[f] = np.sort(grid_points[f]) # from this point onward, `grid_points[f]` is `np.ndarray` else: grid_points[f] = np.unique(grid_points[f]) # from this point onward, `grid_points[f]` is `np.ndarray` message = "The grid points provided for the categorical feature {} are invalid. "\ "For categorical features, the grid points must be a subset of the features "\ "values defined in `categorical_names`. Received an unknown value of '{}'." # convert to label encoding if the grid is provided as strings if grid_points[f].dtype.type is np.str_: # type: ignore[union-attr] int_values = [] for str_val in grid_points[f]: try: # `self.categorical_names` cannot be empty because of the check in `self._is_numerical` index = self.categorical_names[f].index(str_val) # type: ignore[index] except ValueError: raise ValueError(message.format(f, str_val)) int_values.append(index) grid_points[f] = np.array(int_values) # `self.categorical_names` cannot be empty because of the check in `self._is numerical` mask = np.isin(grid_points[f], np.arange(len(self.categorical_names[f]))) # type: ignore[index] if not np.all(mask): index = np.where(not mask)[0][0] raise ValueError(message.format(f, grid_points[f][index])) def _features_sanity_checks(self, features: Optional[List[Union[int, Tuple[int, int]]]]) -> None: """ Features sanity checks. Parameters ---------- features List of feature indices or tuples of feature indices to compute the partial dependence for. """ if features is None: return def check_feature(f): if not isinstance(f, numbers.Integral): raise ValueError(f'All feature entries must be integers. Got a feature value of {type(f)} type.') if f >= len(self.feature_names): raise ValueError(f'All feature entries must be less than ' f'``len(feature_names)={len(self.feature_names)}``. Got a feature value of {f}.') if f < 0: raise ValueError(f'All feature entries must be greater or equal to 0. Got a feature value of {f}.') for feats in features: if not isinstance(feats, tuple): feats = (feats, ) # type: ignore[assignment] for f in feats: # type: ignore[union-attr] check_feature(f) def _partial_dependence(self, X: np.ndarray, features: Union[int, Tuple[int, int]], kind: Literal['average', 'individual', 'both'] = 'average', percentiles: Tuple[float, float] = (0.05, 0.95), grid_resolution: int = 100, grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None ) -> Dict[str, np.ndarray]: """ Computes partial dependence for a feature or a tuple of features. Parameters ---------- X, method, kind, percentiles, grid_resolution, grid_points See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain` method. features A feature or tuples of features for which to calculate the partial dependence. Returns ------- A dictionary containing the feature(s) values, feature(s) deciles, average and/or individual values (i.e. partial dependence or individual conditional expectation) for the given (tuple of) feature(s)) """ if isinstance(features, numbers.Integral): features = (features, ) if grid_points is None: grid_points = {} deciles, values, features_indices = [], [], [], for f in features: # type: ignore[union-attr] # extract column. TODO _safe_indexing in the future to support more input types. X_f = X[:, f] # get deciles for the current feature if the feature is numerical deciles_f = get_quantiles(X_f, num_quantiles=11) if self._is_numerical(f) else None if f not in grid_points: # construct grid for feature `f`. Note that for categorical features we pass the # grid resolution to be infinity because otherwise we risk to apply `linspace` to # categorical values, which does not make sense. values_f = self._grid_from_X( X=X_f.reshape(-1, 1), percentiles=percentiles, grid_resolution=grid_resolution if self._is_numerical(f) else np.inf # type: ignore[arg-type] ) else: values_f = [grid_points[f]] features_indices.append(f) deciles.append(deciles_f) values += values_f # perform cartesian product between feature values. Covers also the case of a single feature. features_indices = np.array(features_indices, dtype=np.int32) # type: ignore[assignment] grid = cartesian([v.reshape(-1) for v in values]) # compute the PD and ICE - separate implementation for `PartialDependence` and `TreePartialDependence` averaged_predictions, predictions = self._compute_pd(grid=grid, features=features_indices, # type: ignore[arg-type] X=X) # reshape `averaged_predictions` to (n_outputs, n_values_feature_0, n_values_feature_1, ...) averaged_predictions = averaged_predictions.reshape(-1, *[val.shape[0] for val in values]) if predictions is not None: # reshape `predictions` to (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...) predictions = predictions.reshape(-1, X.shape[0], *[val.shape[0] for val in values]) # define feature values (i.e. grid values) and the corresponding deciles. Note that the deciles # were computed on the raw (i.e. unprocessed) feature value as provided in the reference dataset `X` pd = { 'values': values if len(values) > 1 else values[0], 'deciles': deciles if len(deciles) > 1 else deciles[0], } if kind == Kind.AVERAGE: pd.update({'average': averaged_predictions}) elif kind == Kind.INDIVIDUAL: pd.update({'individual': predictions}) else: pd.update({ 'average': averaged_predictions, 'individual': predictions }) return pd @abstractmethod def _compute_pd(self, grid: np.ndarray, features: np.ndarray, X: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Computes the PD and ICE. Parameters ---------- grid Cartesian product between feature values. Covers also the case of a single feature. features Feature column indices. X See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain`. Returns ------- Tuple consisting of the PD and optionally the ICE. """ raise NotImplementedError() def _grid_from_X(self, X: np.ndarray, percentiles: Tuple[float, float], grid_resolution: int): """ Generate a grid of points based on the percentiles of `X`. If `grid_resolution` is bigger than the number of unique values in the jth column of `X`, then those unique values will be used instead. Code borrowed from: https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/inspection/_partial_dependence.py Parameters ---------- X Array to generate the grid for. percentiles The percentiles which are used to construct the extreme values of the grid. Must be in [0, 1]. grid_resolution The number of equally spaced points to be placed on the grid for each feature. Returns ------- The values with which the grid has been created. The size of each array `values[j]` is either `grid_resolution`, or the number of unique values in `X[:, j]`, whichever is smaller. """ if not isinstance(percentiles, Iterable) or len(percentiles) != 2: raise ValueError("`percentiles` must be a sequence of 2 elements.") if not all(0 <= x <= 1 for x in percentiles): raise ValueError("`percentiles` values must be in [0, 1].") if percentiles[0] >= percentiles[1]: raise ValueError("`percentiles[0]` must be strictly less than `percentiles[1]`.") if grid_resolution <= 1: raise ValueError("`grid_resolution` must be strictly greater than 1.") values = [] for feature in range(X.shape[1]): uniques = np.unique(X[:, feature]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: # create axis based on percentiles and grid resolution emp_percentiles = mquantiles(X[:, feature], prob=percentiles, axis=0).data if np.allclose(emp_percentiles[0], emp_percentiles[1]): raise ValueError("`percentiles` are too close to each other, unable to build the grid. " "Please choose percentiles that are further apart.") # construct equidistant grid points axis = np.linspace(emp_percentiles[0], emp_percentiles[1], num=grid_resolution, endpoint=True) values.append(axis) return values def _is_numerical(self, feature): """ Checks if the given feature is numerical. Parameters ---------- feature Feature to be checked. Returns ------- ``True`` if the feature is numerical. ``False`` otherwise. """ return feature not in self.categorical_names def _build_explanation(self, kind: str, feature_names: List[Union[int, Tuple[int, int]]], pds: List[Dict[str, np.ndarray]]) -> Explanation: """ Helper method to build `Explanation` object. Parameters ---------- kind See :py:meth:`alibi.explainers.partial_dependence.PartialDependenceBase.explain` method. feature_names List of feature or tuples of features for which the partial dependencies/individual conditional expectation were computed. pds List of dictionary containing the partial dependencies/individual conditional expectation. Returns ------- `Explanation` object. """ feature_deciles, feature_values = [], [] pd_values: Optional[List[np.ndarray]] = [] if kind in [Kind.AVERAGE, Kind.BOTH] else None ice_values: Optional[List[np.ndarray]] = [] if kind in [Kind.INDIVIDUAL, Kind.BOTH] else None for pd in pds: feature_values.append(pd['values']) feature_deciles.append(pd['deciles']) if (pd_values is not None) and (Kind.AVERAGE in pd): pd_values.append(pd[Kind.AVERAGE]) if (ice_values is not None) and Kind.INDIVIDUAL in pd: ice_values.append(pd[Kind.INDIVIDUAL]) data = copy.deepcopy(DEFAULT_DATA_PD) data.update( feature_names=feature_names, feature_values=feature_values, ice_values=ice_values, pd_values=pd_values, feature_deciles=feature_deciles, ) return Explanation(meta=copy.deepcopy(self.meta), data=data)
[docs] def reset_predictor(self, predictor: Union[Callable[[np.ndarray], np.ndarray], BaseEstimator]) -> None: """ Resets the predictor function or tree-based `sklearn` estimator. Parameters ---------- predictor New predictor function or tree-based `sklearn` estimator. """ self.predictor = predictor
[docs] class PartialDependence(PartialDependenceBase): """ Black-box implementation of partial dependence for tabular datasets. Supports multiple feature interactions. """
[docs] def __init__(self, predictor: Callable[[np.ndarray], np.ndarray], feature_names: Optional[List[str]] = None, categorical_names: Optional[Dict[int, List[str]]] = None, target_names: Optional[List[str]] = None, verbose: bool = False): """ Initialize black-box model implementation of partial dependence. Parameters ---------- predictor A prediction function which receives as input a `numpy` array of size `N x F` and outputs a `numpy` array of size `N` (i.e. `(N, )`) or `N x T`, where `N` is the number of input instances, `F` is the number of features and `T` is the number of targets. feature_names A list of feature names used for displaying results. categorical_names Dictionary where keys are feature columns and values are the categories for the feature. Necessary to identify the categorical features in the dataset. An example for `categorical_names` would be:: category_map = {0: ["married", "divorced"], 3: ["high school diploma", "master's degree"]} target_names A list of target/output names used for displaying results. verbose Whether to print the progress of the explainer. Notes ----- The length of the `target_names` should match the number of columns returned by a call to the `predictor`. For example, in the case of a binary classifier, if the predictor outputs a decision score (i.e. uses the `decision_function` method) which returns one column, then the length of the `target_names` should be one. On the other hand, if the predictor outputs a prediction probability (i.e. uses the `predict_proba` method) which returns two columns (one for the negative class and one for the positive class), then the length of the `target_names` should be two. """ if not callable(predictor): raise ValueError("The predictor must be a callable.") super().__init__(predictor=predictor, feature_names=feature_names, categorical_names=categorical_names, target_names=target_names, verbose=verbose)
[docs] def explain(self, X: np.ndarray, features: Optional[List[Union[int, Tuple[int, int]]]] = None, kind: Literal['average', 'individual', 'both'] = 'average', percentiles: Tuple[float, float] = (0., 1.), grid_resolution: int = 100, grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None) -> Explanation: """ Calculates the partial dependence for each feature and/or tuples of features with respect to the all targets and the reference dataset `X`. Parameters ---------- X A `N x F` tabular dataset used to calculate partial dependence curves. This is typically the training dataset or a representative sample. features An optional list of features or tuples of features for which to calculate the partial dependence. If not provided, the partial dependence will be computed for every single features in the dataset. Some example for `features` would be: ``[0, 2]``, ``[0, 2, (0, 2)]``, ``[(0, 2)]``, where ``0`` and ``2`` correspond to column 0 and 2 in `X`, respectively. kind If set to ``'average'``, then only the partial dependence (PD) averaged across all samples from the dataset is returned. If set to ``'individual'``, then only the individual conditional expectation (ICE) is returned for each data point from the dataset. Otherwise, if set to ``'both'``, then both the PD and the ICE are returned. percentiles Lower and upper percentiles used to limit the feature values to potentially remove outliers from low-density regions. Note that for features with not many data points with large/low values, the PD estimates are less reliable in those extreme regions. The values must be in [0, 1]. Only used with `grid_resolution`. grid_resolution Number of equidistant points to split the range of each target feature. Only applies if the number of unique values of a target feature in the reference dataset `X` is greater than the `grid_resolution` value. For example, consider a case where a feature can take the following values: ``[0.1, 0.3, 0.35, 0.351, 0.4, 0.41, 0.44, ..., 0.5, 0.54, 0.56, 0.6, 0.65, 0.7, 0.9]``, and we are not interested in evaluating the marginal effect at every single point as it can become computationally costly (assume hundreds/thousands of points) without providing any additional information for nearby points (e.g., 0.35 and 351). By setting ``grid_resolution=5``, the marginal effect is computed for the values ``[0.1, 0.3, 0.5, 0.7, 0.9]`` instead, which is less computationally demanding and can provide similar insights regarding the model's behaviour. Note that the extreme values of the grid can be controlled using the `percentiles` argument. grid_points Custom grid points. Must be a `dict` where the keys are the target features indices and the values are monotonically increasing arrays defining the grid points for a numerical feature, and a subset of categorical feature values for a categorical feature. If the `grid_points` are not specified, then the grid will be constructed based on the unique target feature values available in the dataset `X`, or based on the `grid_resolution` and `percentiles` (check `grid_resolution` to see when it applies). For categorical features, the corresponding value in the `grid_points` can be specified either as array of strings or array of integers corresponding the label encodings. Note that the label encoding must match the ordering of the values provided in the `categorical_names`. Returns ------- explanation An `Explanation` object containing the data and the metadata of the calculated partial dependence curves. See usage at `Partial dependence examples`_ for details .. _Partial dependence examples: https://docs.seldon.io/projects/alibi/en/stable/methods/PartialDependence.html """ # kind` param sanity check. if kind not in Kind.__members__.values(): raise ValueError(f"``kind='{kind}'`` is invalid. " f"Accepted `kind` names are: {_get_options_string(Kind)}.") return super().explain(X=X, features=features, kind=kind, percentiles=percentiles, grid_resolution=grid_resolution, grid_points=grid_points)
def _compute_pd(self, grid: np.ndarray, features: np.ndarray, X: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Computes the partial dependence using the brute method. Code borrowed from: https://github.com/scikit-learn/scikit-learn/blob/baf0ea25d/sklearn/inspection/_partial_dependence.py Parameters -------- grid Cartesian product between feature values. Covers also the case of a single feature. features Feature column indices. X See :py:meth:`alibi.explainers.partial_dependence.PartialDependence.explain` method. Returns ------- Partial dependence for the given features. """ predictions = [] averaged_predictions = [] X_eval = X.copy() for grid_values in grid: X_eval[:, features] = grid_values # Note: predictions is of shape # (n_points,) for non-multioutput regressors # (n_points, n_tasks) for multioutput regressors # (n_points, 1) for the regressors in cross_decomposition (I think) # (n_points, 2) for binary classification # (n_points, n_classes) for multiclass classification pred = self.predictor(X_eval) predictions.append(pred) # average over samples averaged_predictions.append(np.mean(pred, axis=0)) # cast to `np.ndarray` and transpose predictions = np.array(predictions).T # type: ignore[assignment] averaged_predictions = np.array(averaged_predictions).T # type: ignore[assignment] return averaged_predictions, predictions # type: ignore[return-value]
[docs] class TreePartialDependence(PartialDependenceBase): """ Tree-based model `sklearn` implementation of the partial dependence for tabular datasets. Supports multiple feature interactions. This method is faster than the general black-box implementation but is only supported by some tree-based estimators. The computation is based on a weighted tree traversal. For more details on the computation, check the `sklearn documentation page`_. The supported `sklearn` models are: `GradientBoostingClassifier`, `GradientBoostingRegressor`, `HistGradientBoostingClassifier`, `HistGradientBoostingRegressor`, `HistGradientBoostingRegressor`, `DecisionTreeRegressor`, `RandomForestRegressor`. .. _sklearn documentation page: https://scikit-learn.org/stable/modules/partial_dependence.html#computation-methods """
[docs] def __init__(self, predictor: BaseEstimator, feature_names: Optional[List[str]] = None, categorical_names: Optional[Dict[int, List[str]]] = None, target_names: Optional[List[str]] = None, verbose: bool = False): """ Initialize tree-based model `sklearn` implementation of partial dependence. Parameters ---------- predictor A tree-based `sklearn` estimator. feature_names A list of feature names used for displaying results. categorical_names Dictionary where keys are feature columns and values are the categories for the feature. Necessary to identify the categorical features in the dataset. An example for `categorical_names` would be:: category_map = {0: ["married", "divorced"], 3: ["high school diploma", "master's degree"]} target_names A list of target/output names used for displaying results. verbose Whether to print the progress of the explainer. Notes ----- The length of the `target_names` should match the number of columns returned by a call to the `predictor.decision_function`. In the case of a binary classifier, the decision score consists of a single column. Thus, the length of the `target_names` should be one. """ super().__init__(predictor=predictor, feature_names=feature_names, categorical_names=categorical_names, target_names=target_names, verbose=verbose) # perform sanity checks on the `sklearn` predictor self._sanity_check()
def _sanity_check(self): """ Model sanity checks. """ if not isinstance(self.predictor, BaseEstimator): raise ValueError('`TreePartialDependence` only supports `sklearn` models. ' 'Try using the `PartialDependence` black-box alternative.') check_is_fitted(self.predictor) if not (is_classifier(self.predictor) or is_regressor(self.predictor)): raise ValueError('The predictor must be a fitted regressor or a fitted classifier.') if is_classifier(self.predictor) and isinstance(self.predictor.classes_[0], np.ndarray): raise ValueError('Multiclass-multioutput predictors are not supported.') if not isinstance(self.predictor, (BaseGradientBoosting, BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor)): supported_classes_recursion = ( "GradientBoostingClassifier", "GradientBoostingRegressor", "HistGradientBoostingClassifier", "HistGradientBoostingRegressor", "HistGradientBoostingRegressor", "DecisionTreeRegressor", "RandomForestRegressor", ) raise ValueError(f'`TreePartialDependence` only supports by the following estimators: ' f'{supported_classes_recursion}. Try using the `PartialDependence` black-box alternative.')
[docs] def explain(self, # type: ignore[override] X: np.ndarray, features: Optional[List[Union[int, Tuple[int, int]]]] = None, percentiles: Tuple[float, float] = (0., 1.), grid_resolution: int = 100, grid_points: Optional[Dict[int, Union[List, np.ndarray]]] = None) -> Explanation: """ Calculates the partial dependence for each feature and/or tuples of features with respect to the all targets and the reference dataset `X`. Parameters ---------- X A `N x F` tabular dataset used to calculate partial dependence curves. This is typically the training dataset or a representative sample. features An optional list of features or tuples of features for which to calculate the partial dependence. If not provided, the partial dependence will be computed for every single features in the dataset. Some example for `features` would be: ``[0, 2]``, ``[0, 2, (0, 2)]``, ``[(0, 2)]``, where ``0`` and ``2`` correspond to column 0 and 2 in `X`, respectively. percentiles Lower and upper percentiles used to limit the feature values to potentially remove outliers from low-density regions. Note that for features with not many data points with large/low values, the PD estimates are less reliable in those extreme regions. The values must be in [0, 1]. Only used with `grid_resolution`. grid_resolution Number of equidistant points to split the range of each target feature. Only applies if the number of unique values of a target feature in the reference dataset `X` is greater than the `grid_resolution` value. For example, consider a case where a feature can take the following values: ``[0.1, 0.3, 0.35, 0.351, 0.4, 0.41, 0.44, ..., 0.5, 0.54, 0.56, 0.6, 0.65, 0.7, 0.9]``, and we are not interested in evaluating the marginal effect at every single point as it can become computationally costly (assume hundreds/thousands of points) without providing any additional information for nearby points (e.g., 0.35 and 351). By setting ``grid_resolution=5``, the marginal effect is computed for the values ``[0.1, 0.3, 0.5, 0.7, 0.9]`` instead, which is less computationally demanding and can provide similar insights regarding the model's behaviour. Note that the extreme values of the grid can be controlled using the `percentiles` argument. grid_points Custom grid points. Must be a `dict` where the keys are the target features indices and the values are monotonically increasing arrays defining the grid points for a numerical feature, and a subset of categorical feature values for a categorical feature. If the `grid_points` are not specified, then the grid will be constructed based on the unique target feature values available in the dataset `X`, or based on the `grid_resolution` and `percentiles` (check `grid_resolution` to see when it applies). For categorical features, the corresponding value in the `grid_points` can be specified either as array of strings or array of integers corresponding the label encodings. Note that the label encoding must match the ordering of the values provided in the `categorical_names`. """ return super().explain(X=X, features=features, kind=Kind.AVERAGE.value, # only `'average'` is supported for `'recursion'` method. percentiles=percentiles, grid_resolution=grid_resolution, grid_points=grid_points)
def _compute_pd(self, # type: ignore[override] grid: np.ndarray, features: np.ndarray, **kwargs) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Computes the PD. Parameters ---------- grid Cartesian product between feature values. Covers also the case of a single feature. features Feature column indices. **kwargs Other arguments. Not used. Returns ------- Tuple consisting of the PD and ``None``. """ avg_preds = self.predictor._compute_partial_dependence_recursion(grid, features) # type: ignore[union-attr] return avg_preds, None
# No type check due to the generic explanation object
[docs] @no_type_check def plot_pd(exp: Explanation, features: Union[List[int], Literal['all']] = 'all', target: Union[str, int] = 0, n_cols: int = 3, n_ice: Union[Literal['all'], int, List[int]] = 100, center: bool = False, pd_limits: Optional[Tuple[float, float]] = None, levels: int = 8, ax: Optional[Union['plt.Axes', np.ndarray]] = None, sharey: Optional[Literal['all', 'row']] = 'all', pd_num_kw: Optional[dict] = None, ice_num_kw: Optional[dict] = None, pd_cat_kw: Optional[dict] = None, ice_cat_kw: Optional[dict] = None, pd_num_num_kw: Optional[dict] = None, pd_num_cat_kw: Optional[dict] = None, pd_cat_cat_kw: Optional[dict] = None, fig_kw: Optional[dict] = None) -> 'np.ndarray': """ Plot partial dependence curves on matplotlib axes. Parameters ---------- exp An `Explanation` object produced by a call to the :py:meth:`alibi.explainers.partial_dependence.PartialDependence.explain` method. features A list of features entries in the `exp.data['feature_names']` to plot the partial dependence curves for, or ``'all'`` to plot all the explained feature or tuples of features. This includes tuples of features. For example, if ``exp.data['feature_names'] = ['temp', 'hum', ('temp', 'windspeed')]`` and we want to plot the partial dependence only for the ``'temp'`` and ``('temp', 'windspeed')``, then we would set ``features=[0, 2]``. Defaults to ``'all'``. target The target name or index for which to plot the partial dependence (PD) curves. Can be a mix of integers denoting target index or strings denoting entries in `exp.meta['params']['target_names']`. n_cols Number of columns to organize the resulting plot into. n_ice Number of ICE plots to be displayed. Can be - a string taking the value ``'all'`` to display the ICE curves for every instance in the reference dataset. - an integer for which `n_ice` instances from the reference dataset will be sampled uniformly at random to \ display their ICE curves. - a list of integers, where each integer represents an index of an instance in the reference dataset to \ display their ICE curves. center Boolean flag to center the individual conditional expectation (ICE) curves. As mentioned in `Goldstein et al. (2014)`_, the heterogeneity in the model can be difficult to discern when the intercepts of the ICE curves cover a wide range. Centering the ICE curves removes the level effects and helps to visualise the heterogeneous effect. .. _Goldstein et al. (2014): https://arxiv.org/abs/1309.6392 pd_limits Minimum and maximum y-limits for all the one-way PD plots. If ``None`` will be automatically inferred. levels Number of levels in the contour plot. ax A `matplotlib` axes object or a `numpy` array of `matplotlib` axes to plot on. sharey A parameter specifying whether the y-axis of the PD and ICE curves should be on the same scale for several features. Possible values are: ``'all'`` | ``'row'`` | ``None``. pd_num_kw Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the PD for a numerical feature. ice_num_kw Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the ICE for a numerical feature. pd_cat_kw Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the PD for a categorical feature. ice_cat_kw Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the ICE for a categorical feature. pd_num_num_kw Keyword arguments passed to the `matplotlib.pyplot.contourf`_ function when plotting the PD for two numerical features. pd_num_cat_kw Keyword arguments passed to the `matplotlib.pyplot.plot`_ function when plotting the PD for a numerical and a categorical feature. pd_cat_cat_kw Keyword arguments passed to the :py:meth:`alibi.utils.visualization.heatmap` functon when plotting the PD for two categorical features. fig_kw Keyword arguments passed to the `matplotlib.figure.set`_ function. .. _matplotlib.pyplot.plot: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html .. _matplotlib.pyplot.contourf: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.contourf.html .. _matplotlib.figure.set: https://matplotlib.org/stable/api/figure_api.html Returns ------- An array of `plt.Axes` with the resulting partial dependence plots. """ import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec default_fig_kw = {'tight_layout': 'tight'} if fig_kw is None: fig_kw = {} fig_kw = {**default_fig_kw, **fig_kw} if features == 'all': features = range(0, len(exp.data['feature_names'])) else: for ifeatures in features: if ifeatures >= len(exp.data['feature_names']): raise IndexError(f"The `features` indices must be less than the " f"``len(feature_names) = {len(exp.data['feature_names'])}``. " f"Received {ifeatures}.") # set target index if isinstance(target, str): try: target_idx = exp.meta['params']['target_names'].index(target) except ValueError: raise ValueError(f"Unknown `target` name. Received {target}. " f"Available values are: {exp.meta['params']['target_names']}.") else: target_idx = target if target_idx >= len(exp.meta['params']['target_names']): raise IndexError(f"Target index out of range. Received {target_idx}. " f"The number of targets is {len(exp.meta['params']['target_names'])}.") # corresponds to the number of subplots n_features = len(features) # create axes if ax is None: fig, ax = plt.subplots() def _is_categorical(feature): feature_idx = exp.meta['params']['feature_names'].index(feature) return feature_idx in exp.meta['params']['categorical_names'] if isinstance(ax, plt.Axes) and n_features != 1: ax.set_axis_off() # treat passed axis as a canvas for subplots fig = ax.figure n_cols = min(n_cols, n_features) n_rows = math.ceil(n_features / n_cols) axes = np.empty((n_rows, n_cols), dtype=object) axes_ravel = axes.ravel() gs = GridSpec(n_rows, n_cols) for i, spec in enumerate(list(gs)[:n_features]): axes_ravel[i] = fig.add_subplot(spec) else: # array-like if isinstance(ax, plt.Axes): ax = np.array(ax) if ax.size < n_features: raise ValueError(f"Expected ax to have {n_features} axes, got {ax.size}") axes = np.atleast_2d(ax) axes_ravel = axes.ravel() fig = axes_ravel[0].figure # create plots one_way_axs = {} for i, (ifeatures, ax_ravel) in enumerate(zip(features, axes_ravel)): # extract the feature names feature_names = exp.data['feature_names'][ifeatures] # if it is tuple, then we need a 2D plot and address 4 cases: (num, num), (num, cat), (cat, num), (cat, cat) if isinstance(feature_names, tuple): f0, f1 = feature_names if (not _is_categorical(f0)) and (not _is_categorical(f1)): ax, ax_pd_limits = _plot_two_pd_num_num(exp=exp, feature=ifeatures, target_idx=target_idx, levels=levels, ax=ax_ravel, pd_num_num_kw=pd_num_num_kw) elif _is_categorical(f0) and _is_categorical(f1): ax, ax_pd_limits = _plot_two_pd_cat_cat(exp=exp, feature=ifeatures, target_idx=target_idx, ax=ax_ravel, pd_cat_cat_kw=pd_cat_cat_kw) else: ax, ax_pd_limits = _plot_two_pd_num_cat(exp=exp, feature=ifeatures, target_idx=target_idx, pd_limits=pd_limits, ax=ax_ravel, pd_num_cat_kw=pd_num_cat_kw) else: if _is_categorical(feature_names): ax, ax_pd_limits = _plot_one_pd_cat(exp=exp, feature=ifeatures, target_idx=target_idx, center=center, pd_limits=pd_limits, n_ice=n_ice, ax=ax_ravel, pd_cat_kw=pd_cat_kw, ice_cat_kw=ice_cat_kw) else: ax, ax_pd_limits = _plot_one_pd_num(exp=exp, feature=ifeatures, target_idx=target_idx, center=center, pd_limits=pd_limits, n_ice=n_ice, ax=ax_ravel, pd_num_kw=pd_num_kw, ice_num_kw=ice_num_kw) # group the `ax_ravel` that share the appropriate y axes. if ax_pd_limits is not None: if sharey == 'all': if one_way_axs.get('all', None) is None: one_way_axs['all'] = [] # add them all in the same group one_way_axs['all'].append((ax, ax_pd_limits)) elif sharey == 'row': # identify the row to which they belong row = i // n_cols if one_way_axs.get(row, None) is None: one_way_axs[row] = [] # add them the `row` group one_way_axs[row].append((ax, ax_pd_limits)) else: # if no axis are share, each `ax_ravel` will have its own group one_way_axs[i] = [(ax, ax_pd_limits)] # share the y-axis for the axes within the same group and set the `ymin`, `ymax` values. # This step is necessary and applied here because `vlines` overwrites the `ylim`. for ax_group in one_way_axs.values(): min_val = min([ax_pd_lim[0] for _, ax_pd_lim in ax_group]) max_val = max([ax_pd_lim[1] for _, ax_pd_lim in ax_group]) axs = [ax[0] for ax in ax_group] for ax in axs[1:]: ax.sharey(axs[0]) axs[0].set_ylim(min_val, max_val) fig.set(**fig_kw) return axes
def _sample_ice(ice_values: np.ndarray, n_ice: Union[Literal['all'], int, List[int]]) -> np.ndarray: """ Samples `ice_values` based on the `n_ice` argument. Parameters ---------- ice_values Array of ice_values of dimension `V x N`, where `V` is the number of feature values where the PD is computed, and `N` is the number of instances in the reference dataset. n_ice See :py:meth:`alibi.explainers.partial_dependence.plot_pd`. """ if n_ice == 'all': return ice_values _, N = ice_values.shape if isinstance(n_ice, numbers.Integral): if n_ice >= N: logger.warning('`n_ice` is greater than the number of instances in the reference dataset. ' 'Automatically setting `n_ice` to the number of instances in the reference dataset.') return ice_values if n_ice <= 0: raise ValueError('`n_ice` must be an integer grater than 0.') indices = np.random.choice(a=N, size=n_ice, replace=False) return ice_values[:, indices] if isinstance(n_ice, list): n_ice = np.unique(n_ice) # type: ignore[assignment] if not np.all(n_ice < N) or not np.all(n_ice >= 0): # type: ignore[operator] raise ValueError(f'Some indices in `n_ice` are out of bounds. Ensure that all indices are ' f'greater or equal than 0 and less than {N}.') return ice_values[:, n_ice] raise ValueError(f"Unknown `n_ice` values. `n_ice` can be a string taking value 'all', " f"an integer, or a list of integers. Received {n_ice}.") def _process_pd_ice(exp: Explanation, pd_values: Optional[np.ndarray] = None, ice_values: Optional[np.ndarray] = None, n_ice: Union[Literal['all'], int, List[int]] = 'all', center: bool = False) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]: """ Process the `pd_values` and `ice_values` before plotting. Centers the plots if necessary and samples the `ice_values` for visualization purposes. Parameters ---------- exp, n_ice, center See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method. pd_values Array of ice_values of dimension `V` (i.e. `(V, )`), where V is the number of feature values where the PD is computed. ice_values Array of ice_values of dimension `V x N`, where `V` is the number of feature values where the PD is computed, and `N` is the number of instances in the reference dataset. Returns ------- Tuple containing the processed `pd_values` and `ice_values`. """ # pdp processing if exp.meta['params']['kind'] == Kind.BOTH and center: pd_values = pd_values - pd_values[0] # type: ignore[index] # ice processing if exp.meta['params']['kind'] in [Kind.INDIVIDUAL, Kind.BOTH]: # sample ice values for visualization purposes ice_values = _sample_ice(ice_values=ice_values, n_ice=n_ice) # type: ignore[arg-type] # center ice values if necessary if center: ice_values = ice_values - ice_values[0:1, :] return pd_values, ice_values # No type check due to the generic explanation object @no_type_check def _plot_one_pd_num(exp: Explanation, feature: int, target_idx: int, center: bool = False, pd_limits: Optional[Tuple[float, float]] = None, n_ice: Union[Literal['all'], int, List[int]] = 100, ax: Optional['plt.Axes'] = None, pd_num_kw: Optional[dict] = None, ice_num_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]: """ Plots one way partial dependence curve for a single numerical feature. Parameters ---------- exp, feature, center, pd_limits, n_ice, pd_num_kw, ice_num_kw See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method. target_idx The target index for which to plot the partial dependence (PD) curves. An integer denoting target index in `exp.meta['params]['target_names']` ax Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally. Returns ------- `matplotlib` axes and a tuple containing the minimum and maximum y-limits. """ import matplotlib.pyplot as plt from matplotlib import transforms if ax is None: ax = plt.gca() feature_values = exp.data['feature_values'][feature] pd_values = exp.data['pd_values'][feature][target_idx] if (exp.data['pd_values'] is not None) else None ice_values = exp.data['ice_values'][feature][target_idx].T if (exp.data['ice_values'] is not None) else None # process `pd_values` and `ice_values` pd_values, ice_values = _process_pd_ice(exp=exp, pd_values=pd_values, ice_values=ice_values, n_ice=n_ice, center=center) if exp.meta['params']['kind'] == Kind.AVERAGE: default_pd_num_kw = {'markersize': 2, 'marker': 'o', 'label': None} pd_num_kw = default_pd_num_kw if pd_num_kw is None else {**default_pd_num_kw, **pd_num_kw} ax.plot(feature_values, pd_values, **pd_num_kw) elif exp.meta['params']['kind'] == Kind.INDIVIDUAL: default_ice_graph_kw = {'color': 'lightsteelblue', 'label': None} ice_num_kw = default_ice_graph_kw if ice_num_kw is None else {**default_ice_graph_kw, **ice_num_kw} ax.plot(feature_values, ice_values, **ice_num_kw) else: default_pd_num_kw = {'linestyle': '--', 'linewidth': 2, 'color': 'tab:orange', 'label': 'average'} pd_num_kw = default_pd_num_kw if pd_num_kw is None else {**default_pd_num_kw, **pd_num_kw} default_ice_graph_kw = {'alpha': 0.6, 'color': 'lightsteelblue', 'label': None} ice_num_kw = default_ice_graph_kw if ice_num_kw is None else {**default_ice_graph_kw, **ice_num_kw} ax.plot(feature_values, ice_values, **ice_num_kw) ax.plot(feature_values, pd_values, **pd_num_kw) ax.legend() # save the `ylim` as they will be overwritten by `ax.vlines` ylim = ax.get_ylim() if pd_limits is None else pd_limits # add deciles markers to the bottom of the plot trans = transforms.blended_transform_factory(ax.transData, ax.transAxes) ax.vlines(exp.data['feature_deciles'][feature][1:-1], 0, 0.05, transform=trans) ax.set_xlabel(exp.data['feature_names'][feature]) ax.set_ylabel(exp.meta['params']['target_names'][target_idx]) return ax, ylim # No type check due to the generic explanation object @no_type_check def _plot_one_pd_cat(exp: Explanation, feature: int, target_idx: int, pd_limits: Optional[Tuple[float, float]] = None, center: bool = False, n_ice: Union[Literal['all'], int, List[str]] = 100, ax: Optional['plt.Axes'] = None, pd_cat_kw: Optional[dict] = None, ice_cat_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]: """ Plots one way partial dependence curve for a single categorical feature. Parameters ---------- exp, feature, center, pd_limits, n_ice, pd_cat_kw, ice_cat_kw See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method. target_idx The target index for which to plot the partial dependence (PD) curves. An integer denoting target index in `exp.meta['params'].target_names` ax Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally. Returns ------- `matplotlib` axes and a tuple containing the minimum and maximum y-limits. """ import matplotlib.pyplot as plt if ax is None: ax = plt.gca() feature_names = exp.data['feature_names'][feature] feature_values = exp.data['feature_values'][feature] pd_values = exp.data['pd_values'][feature][target_idx] if (exp.data['pd_values'] is not None) else None ice_values = exp.data['ice_values'][feature][target_idx].T if (exp.data['ice_values'] is not None) else None # process `pd_values` and `ice_values` pd_values, ice_values = _process_pd_ice(exp=exp, pd_values=pd_values, ice_values=ice_values, n_ice=n_ice, center=center) feature_index = exp.meta['params']['feature_names'].index(feature_names) labels = [exp.meta['params']['categorical_names'][feature_index][i] for i in feature_values.astype(np.int32)] if exp.meta['params']['kind'] == Kind.AVERAGE: default_pd_graph_kw = {'markersize': 8, 'marker': 's', 'color': 'tab:blue'} pd_cat_kw = default_pd_graph_kw if pd_cat_kw is None else {**default_pd_graph_kw, **pd_cat_kw} ax.plot(labels, pd_values, **pd_cat_kw) elif exp.meta['params']['kind'] == Kind.INDIVIDUAL: default_ice_cat_kw = {'markersize': 4, 'marker': 's', 'color': 'lightsteelblue'} ice_cat_kw = default_ice_cat_kw if ice_cat_kw is None else {**default_ice_cat_kw, **ice_cat_kw} ax.plot(labels, ice_values, **ice_cat_kw) else: default_pd_cat_kw = {'markersize': 8, 'marker': 's', 'color': 'tab:orange', 'label': 'average'} pd_cat_kw = default_pd_cat_kw if pd_cat_kw is None else {**default_pd_cat_kw, **pd_cat_kw} default_ice_cat_kw = {'alpha': 0.6, 'markersize': 4, 'marker': 's', 'color': 'lightsteelblue'} ice_cat_kw = default_ice_cat_kw if ice_cat_kw is None else {**default_ice_cat_kw, **ice_cat_kw} ax.plot(labels, ice_values, **ice_cat_kw) ax.plot(labels, pd_values, **pd_cat_kw) ax.legend() # save `ylim` ylim = ax.get_ylim() if pd_limits is None else pd_limits # rotate xticks labels ax.tick_params(axis='x', rotation=90) # set axis labels ax.set_xlabel(feature_names) ax.set_ylabel(exp.meta['params']['target_names'][target_idx]) return ax, ylim # No type check due to the generic explanation object @no_type_check def _plot_two_pd_num_num(exp: Explanation, feature: int, target_idx: int, levels: int = 8, ax: Optional['plt.Axes'] = None, pd_num_num_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]: """ Plots two ways partial dependence curve for two numerical features. Parameters ---------- exp, feature, pd_num_num_kw See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method. target_idx The target index for which to plot the partial dependence (PD) curves. An integer denoting target index in `exp.meta['params']['target_names']` ax Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally. Returns ------- `matplotlib` axes and ``None``. """ import matplotlib.pyplot as plt from matplotlib import transforms if exp.meta['params']['kind'] not in [Kind.AVERAGE, Kind.BOTH]: raise ValueError("Can only plot partial dependence for `kind` in `['average', 'both']`.") if ax is None: ax = plt.gca() # set contour plot default params default_pd_num_num_kw = {"alpha": 0.75} pd_num_num_kw = default_pd_num_num_kw if pd_num_num_kw is None else {**default_pd_num_num_kw, **pd_num_num_kw} feature_values = exp.data['feature_values'][feature] pd_values = exp.data['pd_values'][feature][target_idx] X, Y = np.meshgrid(feature_values[0], feature_values[1]) Z, Z_min, Z_max = pd_values.T, pd_values.min(), pd_values.max() if Z_max > Z_min: Z_level = np.linspace(Z_min, Z_max, levels) else: # this covers the case when `Z_min` equals `Z_max`, for which `Z_level` will be constant. # Note that `ax.contourf` accepts only increasing `Z_levels`, otherwise it throws an error. Z_level, Z_min, Z_max = None, None, None CS = ax.contour(X, Y, Z, levels=Z_level, linewidths=0.5, colors="k") ax.contourf(X, Y, Z, levels=Z_level, vmax=Z_max, vmin=Z_min, **pd_num_num_kw) ax.clabel(CS, fmt="%2.2f", colors="k", fontsize=10, inline=True) # create the deciles line for the vertical & horizontal axis xlim, ylim = ax.get_xlim(), ax.get_ylim() # the horizontal lines do not display (same for the sklearn) trans = transforms.blended_transform_factory(ax.transData, ax.transAxes) ax.vlines(exp.data['feature_deciles'][feature][0][1:-1], 0, 0.05, transform=trans) trans = transforms.blended_transform_factory(ax.transAxes, ax.transData) ax.hlines(exp.data['feature_deciles'][feature][1][1:-1], 0, 0.05, transform=trans) # reset xlim and ylim since they are overwritten by hlines and vlines ax.set_xlim(xlim) ax.set_ylim(ylim) # set x & y labels ax.set_xlabel(exp.data['feature_names'][feature][0]) ax.set_ylabel(exp.data['feature_names'][feature][1]) return ax, None # No type check due to the generic explanation object @no_type_check def _plot_two_pd_num_cat(exp: Explanation, feature: int, target_idx: int, pd_limits: Optional[Tuple[float, float]] = None, ax: Optional['plt.Axes'] = None, pd_num_cat_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]: """ Plots two ways partial dependence curve for a numerical feature and a categorical feature. Parameters ---------- exp, feature, pd_num_cat_kw See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method. target_idx The target index for which to plot the partial dependence (PD) curves. An integer denoting target index in `exp.meta['params']['target_names'].` ax Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally. Returns ------- `matplotlib` axes and a tuple containing the minimum and maximum y-limits. """ import matplotlib.pyplot as plt from matplotlib import transforms if exp.meta['params']['kind'] not in [Kind.AVERAGE, Kind.BOTH]: raise ValueError("Can only plot partial dependence for `kind` in `['average', 'both']`.") if ax is None: ax = plt.gca() def _is_categorical(feature): feature_idx = exp.meta['params']['feature_names'].index(feature) return feature_idx in exp.meta['params']['categorical_names'] # extract feature values and partial dependence values feature_values = exp.data['feature_values'][feature] feature_deciles = exp.data['feature_deciles'][feature] pd_values = exp.data['pd_values'][feature][target_idx] # find which feature is categorical and which one is numerical feature_names = exp.data['feature_names'][feature] if _is_categorical(feature_names[0]): feature_names = feature_names[::-1] feature_values = feature_values[::-1] feature_deciles = feature_deciles[::-1] pd_values = pd_values.T # define labels cat_feature_index = exp.meta['params']['feature_names'].index(feature_names[1]) labels = [exp.meta['params']['categorical_names'][cat_feature_index][i] for i in feature_values[1].astype(np.int32)] # plot lines default_pd_num_cat_kw = {'markersize': 2, 'marker': 'o'} pd_num_cat_kw = default_pd_num_cat_kw if pd_num_cat_kw is None else {**default_pd_num_cat_kw, **pd_num_cat_kw} ax.plot([], [], ' ', label=feature_names[1]) for i in range(pd_values.shape[1]): x, y = feature_values[0], pd_values[:, i] pd_num_cat_kw.update({'label': labels[i]}) ax.plot(x, y, **pd_num_cat_kw) # save `ylim` as they will be overwritten by `ax.vlines` ylim = ax.get_ylim() if pd_limits is None else pd_limits # add deciles markers to the bottom of the plot trans = transforms.blended_transform_factory(ax.transData, ax.transAxes) ax.vlines(feature_deciles[0][1:-1], 0, 0.05, transform=trans) ax.set_ylabel(exp.meta['params']['target_names'][target_idx]) ax.set_xlabel(feature_names[0]) ax.legend() return ax, ylim # No type check due to the generic explanation object @no_type_check def _plot_two_pd_cat_cat(exp: Explanation, feature: int, target_idx: int, ax: Optional['plt.Axes'] = None, pd_cat_cat_kw: Optional[dict] = None) -> Tuple['plt.Axes', Optional[Tuple[float, float]]]: """ Plots two ways partial dependence curve for two categorical features. Parameters ---------- exp, feature, pd_cat_cat_kw See :py:meth:`alibi.explainers.partial_dependence.plot_pd` method. target_idx The target index for which to plot the partial dependence (PD) curves. An integer denoting target index in `exp.meta['params']['target_names']`. ax Pre-existing axes for the plot. Otherwise, call `matplotlib.pyplot.gca()` internally. Return ------ `matplotlib` axes and ``None``. """ import matplotlib.pyplot as plt from alibi.utils.visualization import heatmap if ax is None: ax = plt.gca() if exp.meta['params']['kind'] not in [Kind.AVERAGE, Kind.BOTH]: raise ValueError("Can only plot partial dependence for `kind` in `['average', 'both']`.") feature_names = exp.data['feature_names'][feature] feature_values = exp.data['feature_values'][feature] pd_values = exp.data['pd_values'][feature][target_idx] # extract labels for each categorical features feature0_index = exp.meta['params']['feature_names'].index(feature_names[0]) feature1_index = exp.meta['params']['feature_names'].index(feature_names[1]) labels0 = [exp.meta['params']['categorical_names'][feature0_index][i] for i in feature_values[0].astype(np.int32)] labels1 = [exp.meta['params']['categorical_names'][feature1_index][i] for i in feature_values[1].astype(np.int32)] # plot heatmap default_pd_cat_cat_kw = { 'annot': True, 'fmt': '{x:.2f}', 'linewidths': 1.5, 'yticklabels': labels0, 'xticklabels': labels1, 'aspect': 'auto' } pd_cat_cat_kw = default_pd_cat_cat_kw if pd_cat_cat_kw is None else {**default_pd_cat_cat_kw, **pd_cat_cat_kw} heatmap(pd_values, ax=ax, **pd_cat_cat_kw) # set ticks labels ax.set_xticklabels(labels1) ax.set_yticklabels(labels0) # set axis labels ax.set_xlabel(exp.data['feature_names'][feature][1]) ax.set_ylabel(exp.data['feature_names'][feature][0]) return ax, None