Source code for pyod.utils.example

# -*- coding: utf-8 -*-
"""Utility functions for running examples
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: BSD 2 clause


from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt
from .data import check_consistent_shape
from .data import get_outliers_inliers


[docs]def visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, y_test_pred, show_figure=True, save_figure=False): # pragma: no cover """Utility function for visualizing the results in examples. Internal use only. Parameters ---------- clf_name : str The name of the detector. X_train : numpy array of shape (n_samples, n_features) The training samples. y_train : list or array of shape (n_samples,) The ground truth of training samples. X_test : numpy array of shape (n_samples, n_features) The test samples. y_test : list or array of shape (n_samples,) The ground truth of test samples. y_train_pred : numpy array of shape (n_samples, n_features) The predicted binary labels of the training samples. y_test_pred : numpy array of shape (n_samples, n_features) The predicted binary labels of the test samples. show_figure : bool, optional (default=True) If set to True, show the figure. save_figure : bool, optional (default=False) If set to True, save the figure to the local. """ def _add_sub_plot(X_inliers, X_outliers, sub_plot_title, inlier_color='blue', outlier_color='orange'): """Internal method to add subplot of inliers and outliers. Parameters ---------- X_inliers : numpy array of shape (n_samples, n_features) Outliers. X_outliers : numpy array of shape (n_samples, n_features) Inliers. sub_plot_title : str Subplot title. inlier_color : str, optional (default='blue') The color of inliers. outlier_color : str, optional (default='orange') The color of outliers. """ plt.axis("equal") plt.scatter(X_inliers[:, 0], X_inliers[:, 1], label='inliers', color=inlier_color, s=40) plt.scatter(X_outliers[:, 0], X_outliers[:, 1], label='outliers', color=outlier_color, s=50, marker='^') plt.title(sub_plot_title, fontsize=15) plt.xticks([]) plt.yticks([]) plt.legend(loc=3, prop={'size': 10}) # check input data shapes are consistent X_train, y_train, X_test, y_test, y_train_pred, y_test_pred = \ check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred, y_test_pred) if X_train.shape[1] != 2: raise ValueError("Input data has to be 2-d for visualization. The " "input data has {shape}.".format(shape=X_train.shape)) X_train_outliers, X_train_inliers = get_outliers_inliers(X_train, y_train) X_train_outliers_pred, X_train_inliers_pred = get_outliers_inliers( X_train, y_train_pred) X_test_outliers, X_test_inliers = get_outliers_inliers(X_test, y_test) X_test_outliers_pred, X_test_inliers_pred = get_outliers_inliers( X_test, y_test_pred) # plot ground truth vs. predicted results fig = plt.figure(figsize=(12, 10)) plt.suptitle("Demo of {clf_name} Detector".format(clf_name=clf_name), fontsize=15) fig.add_subplot(221) _add_sub_plot(X_train_inliers, X_train_outliers, 'Train Set Ground Truth', inlier_color='blue', outlier_color='orange') fig.add_subplot(222) _add_sub_plot(X_train_inliers_pred, X_train_outliers_pred, 'Train Set Prediction', inlier_color='blue', outlier_color='orange') fig.add_subplot(223) _add_sub_plot(X_test_inliers, X_test_outliers, 'Test Set Ground Truth', inlier_color='green', outlier_color='red') fig.add_subplot(224) _add_sub_plot(X_test_inliers_pred, X_test_outliers_pred, 'Test Set Prediction', inlier_color='green', outlier_color='red') if save_figure: plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300) if show_figure: plt.show()
[docs]def data_visualize(X_train, y_train, show_figure=True, save_figure=False): # pragma: no cover """Utility function for visualizing the synthetic samples generated by generate_data_cluster function. Parameters ---------- X_train : numpy array of shape (n_samples, n_features) The training samples. y_train : list or array of shape (n_samples,) The ground truth of training samples. show_figure : bool, optional (default=True) If set to True, show the figure. save_figure : bool, optional (default=False) If set to True, save the figure to the local. """ def _plot(X_inliers, X_outliers, inlier_color='blue', outlier_color='orange'): """Internal method to add subplot of inliers and outliers. Parameters ---------- X_inliers : numpy array of shape (n_samples, n_features) Outliers. X_outliers : numpy array of shape (n_samples, n_features) Inliers. sub_plot_title : str Subplot title. inlier_color : str, optional (default='blue') The color of inliers. outlier_color : str, optional (default='orange') The color of outliers. """ plt.axis("equal") plt.scatter(X_inliers[:, 0], X_inliers[:, 1], label='inliers', color=inlier_color, s=40) plt.scatter(X_outliers[:, 0], X_outliers[:, 1], label='outliers', color=outlier_color, s=50, marker='^') plt.xticks([]) plt.yticks([]) plt.legend(loc='best', prop={'size': 10}) assert len(X_train) <= 5 in_colors = ['blue', 'green', 'purple', 'brown', 'black'] out_colors = ['red', 'orange', 'grey', 'violet', 'pink'] plt.figure(figsize=(13, 10)) plt.suptitle("Demo of Generating Data in Clusters", fontsize=15) for i, cluster in enumerate(X_train): X_train_outliers, X_train_inliers = get_outliers_inliers(cluster, y_train[i]) _plot(X_train_inliers, X_train_outliers, inlier_color=in_colors[i], outlier_color=out_colors[i]) if save_figure: plt.savefig() if show_figure: plt.show()