Source code for alibi_detect.utils.visualize

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve, auc
from typing import Dict, Union
import warnings



[docs]
def plot_instance_score(preds: Dict,
                        target: np.ndarray,
                        labels: np.ndarray,
                        threshold: float,
                        ylim: tuple = (None, None)) -> None:
    """
    Scatter plot of a batch of outlier or adversarial scores compared to the threshold.

    Parameters
    ----------
    preds
        Dictionary returned by predictions of an outlier or adversarial detector.
    target
        Ground truth.
    labels
        List with names of classification labels.
    threshold
        Threshold used to classify outliers or adversarial instances.
    ylim
        Min and max y-axis values.
    """
    scores = preds['data']['instance_score']
    df = pd.DataFrame(dict(idx=np.arange(len(scores)), score=scores, label=target))
    groups = df.groupby('label')
    fig, ax = plt.subplots()
    for name, group in groups:
        ax.plot(group.idx, group.score, marker='o', linestyle='', ms=6, label=labels[name])
    plt.plot(np.arange(len(scores)), np.ones(len(scores)) * threshold, color='g', label='Threshold')
    plt.ylim(ylim)
    plt.xlabel('Number of Instances')
    plt.ylabel('Instance Level Score')
    ax.legend()
    plt.show()




[docs]
def plot_feature_outlier_image(od_preds: Dict,
                               X: np.ndarray,
                               X_recon: np.ndarray = None,
                               instance_ids: list = None,
                               max_instances: int = 5,
                               outliers_only: bool = False,
                               n_channels: int = 3,
                               figsize: tuple = (20, 20)) -> None:
    """
    Plot feature (pixel) wise outlier scores for images.

    Parameters
    ----------
    od_preds
        Output of an outlier detector's prediction.
    X
        Batch of instances to apply outlier detection to.
    X_recon
        Reconstructed instances of X.
    instance_ids
        List with indices of instances to display.
    max_instances
        Maximum number of instances to display.
    outliers_only
        Whether to only show outliers or not.
    n_channels
        Number of channels of the images.
    figsize
        Tuple for the figure size.
    """
    scores = od_preds['data']['feature_score']
    if outliers_only and instance_ids is None:
        instance_ids = list(np.where(od_preds['data']['is_outlier'])[0])
    elif instance_ids is None:
        instance_ids = list(range(len(od_preds['data']['is_outlier'])))
    n_instances = min(max_instances, len(instance_ids))
    instance_ids = instance_ids[:n_instances]

    if outliers_only and n_instances == 0:
        warnings.warn('No outliers found!', UserWarning)
        return

    n_cols = 2

    if n_channels == 3:
        n_cols += 2

    if X_recon is not None:
        n_cols += 1

    fig, axes = plt.subplots(nrows=n_instances, ncols=n_cols, figsize=figsize)

    n_subplot = 1
    for i in range(n_instances):

        idx = instance_ids[i]

        X_outlier = X[idx]
        plt.subplot(n_instances, n_cols, n_subplot)
        plt.axis('off')
        if i == 0:
            plt.title('Original')
        plt.imshow(X_outlier)
        n_subplot += 1

        if X_recon is not None:
            plt.subplot(n_instances, n_cols, n_subplot)
            plt.axis('off')
            if i == 0:
                plt.title('Reconstruction')
            plt.imshow(X_recon[idx])
            n_subplot += 1

        plt.subplot(n_instances, n_cols, n_subplot)
        plt.axis('off')
        if i == 0:
            plt.title('Outlier Score Channel 0')
        plt.imshow(scores[idx][:, :, 0])
        n_subplot += 1

        if n_channels == 3:
            plt.subplot(n_instances, n_cols, n_subplot)
            plt.axis('off')
            if i == 0:
                plt.title('Outlier Score Channel 1')
            plt.imshow(scores[idx][:, :, 1])
            n_subplot += 1

            plt.subplot(n_instances, n_cols, n_subplot)
            plt.axis('off')
            if i == 0:
                plt.title('Outlier Score Channel 2')
            plt.imshow(scores[idx][:, :, 2])
            n_subplot += 1

    plt.show()




[docs]
def plot_feature_outlier_tabular(od_preds: Dict,
                                 X: np.ndarray,
                                 X_recon: np.ndarray = None,
                                 threshold: float = None,
                                 instance_ids: list = None,
                                 max_instances: int = 5,
                                 top_n: int = int(1e12),
                                 outliers_only: bool = False,
                                 feature_names: list = None,
                                 width: float = .2,
                                 figsize: tuple = (20, 10)) -> None:
    """
    Plot feature wise outlier scores for tabular data.

    Parameters
    ----------
    od_preds
        Output of an outlier detector's prediction.
    X
        Batch of instances to apply outlier detection to.
    X_recon
        Reconstructed instances of X.
    threshold
        Threshold used for outlier score to determine outliers.
    instance_ids
        List with indices of instances to display.
    max_instances
        Maximum number of instances to display.
    top_n
        Maixmum number of features to display, ordered by outlier score.
    outliers_only
        Whether to only show outliers or not.
    feature_names
        List with feature names.
    width
        Column width for bar charts.
    figsize
        Tuple for the figure size.
    """
    if outliers_only and instance_ids is None:
        instance_ids = list(np.where(od_preds['data']['is_outlier'])[0])
    elif instance_ids is None:
        instance_ids = list(range(len(od_preds['data']['is_outlier'])))
    n_instances = min(max_instances, len(instance_ids))
    instance_ids = instance_ids[:n_instances]
    n_features = X.shape[1]
    n_cols = 2

    labels_values = ['Original']
    if X_recon is not None:
        labels_values += ['Reconstructed']
    labels_scores = ['Outlier Score']
    if threshold is not None:
        labels_scores = ['Threshold'] + labels_scores

    fig, axes = plt.subplots(nrows=n_instances, ncols=n_cols, figsize=figsize)

    n_subplot = 1
    for i in range(n_instances):

        idx = instance_ids[i]

        fscore = od_preds['data']['feature_score'][idx]
        if top_n >= n_features:
            keep_cols = np.arange(n_features)
        else:
            keep_cols = np.argsort(fscore)[::-1][:top_n]
        fscore = fscore[keep_cols]
        X_idx = X[idx][keep_cols]
        ticks = np.arange(len(keep_cols))

        plt.subplot(n_instances, n_cols, n_subplot)
        if X_recon is not None:
            X_recon_idx = X_recon[idx][keep_cols]
            plt.bar(ticks - width, X_idx, width=width, color='b', align='center')
            plt.bar(ticks, X_recon_idx, width=width, color='g', align='center')
        else:
            plt.bar(ticks, X_idx, width=width, color='b', align='center')
        if feature_names is not None:
            plt.xticks(ticks=ticks, labels=list(np.array(feature_names)[keep_cols]), rotation=45)
        plt.title('Feature Values')
        plt.xlabel('Features')
        plt.ylabel('Feature Values')
        plt.legend(labels_values)
        n_subplot += 1

        plt.subplot(n_instances, n_cols, n_subplot)
        plt.bar(ticks, fscore)
        if threshold is not None:
            plt.plot(np.ones(len(ticks)) * threshold, 'r')
        if feature_names is not None:
            plt.xticks(ticks=ticks, labels=list(np.array(feature_names)[keep_cols]), rotation=45)
        plt.title('Feature Level Outlier Score')
        plt.xlabel('Features')
        plt.ylabel('Outlier Score')
        plt.legend(labels_scores)
        n_subplot += 1

    plt.tight_layout()
    plt.show()




[docs]
def plot_feature_outlier_ts(od_preds: Dict,
                            X: np.ndarray,
                            threshold: Union[float, int, list, np.ndarray],
                            window: tuple = None,
                            t: np.ndarray = None,
                            X_orig: np.ndarray = None,
                            width: float = .2,
                            figsize: tuple = (20, 8),
                            ylim: tuple = (None, None)
                            ) -> None:
    """
    Plot feature wise outlier scores for time series data.

    Parameters
    ----------
    od_preds
        Output of an outlier detector's prediction.
    X
        Time series to apply outlier detection to.
    threshold
        Threshold used to classify outliers or adversarial instances.
    window
        Start and end timestep to plot.
    t
        Timesteps.
    X_orig
        Optional original time series without outliers.
    width
        Column width for bar charts.
    figsize
        Tuple for the figure size.
    ylim
        Min and max y-axis values for the outlier scores.
    """
    if window is not None:
        t_start, t_end = window
    else:
        t_start, t_end = 0, X.shape[0]

    if len(X.shape) == 1:
        n_features = 1
    else:
        n_features = X.shape[1]

    if t is None:
        t = np.arange(X.shape[0])
    ticks = t[t_start:t_end]

    # check if feature level scores available
    if isinstance(od_preds['data']['feature_score'], np.ndarray):
        scores = od_preds['data']['feature_score']
    else:
        scores = od_preds['data']['instance_score'].reshape(-1, 1)

    n_cols = 2

    fig, axes = plt.subplots(nrows=n_features, ncols=n_cols, figsize=figsize)

    n_subplot = 1
    for i in range(n_features):
        plt.subplot(n_features, n_cols, n_subplot)
        if i == 0 and X_orig is not None:
            plt.title('Original vs. perturbed data')
        elif i == 0:
            plt.title('Data')

        plt.plot(ticks, X[t_start:t_end, i], marker='*', markersize=4, label='Data with Outliers')
        if X_orig is not None:
            plt.plot(ticks, X_orig[t_start:t_end, i], marker='o', markersize=4, label='Data without Outliers')
        plt.xlabel('Time')
        plt.ylabel('Observation')
        plt.legend()

        n_subplot += 1

        plt.subplot(n_features, n_cols, n_subplot)
        if i == 0:
            plt.title('Outlier Score per Timestep')

        plt.bar(ticks, scores[t_start:t_end, i], width=width, color='g', align='center', label='Outlier Score')
        if isinstance(threshold, (float, int)):
            thr = threshold
        else:
            thr = threshold[i]
        plt.plot(ticks, np.ones(len(ticks)) * thr, 'r', label='Threshold')
        plt.xlabel('Time')
        plt.ylabel('Outlier Score')
        plt.legend()
        plt.ylim(ylim)

        n_subplot += 1

    plt.show()




[docs]
def plot_roc(roc_data: Dict[str, Dict[str, np.ndarray]], figsize: tuple = (10, 5)) -> None:
    """
    Plot ROC curve.

    Parameters
    ----------
    roc_data
        Dictionary with as key the label to show in the legend and as value another dictionary with as
        keys `scores` and `labels` with respectively the outlier scores and outlier labels.
    figsize
        Figure size.
    """
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=figsize)

    for k, v in roc_data.items():
        fpr, tpr, thresholds = roc_curve(v['labels'], v['scores'])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='{}: AUC={:.4f}'.format(k, roc_auc))

    plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('{}'.format('ROC curve'))
    plt.legend(loc="lower right", ncol=1)
    plt.grid()
    plt.show()