# -*- coding: utf-8 -*-
"""Utility functions for running examples
"""
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: BSD 2 clause
from __future__ import division
from __future__ import print_function
import matplotlib.pyplot as plt
from .data import check_consistent_shape
from .data import get_outliers_inliers
[docs]
def visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True,
save_figure=False): # pragma: no cover
"""Utility function for visualizing the results in examples.
Internal use only.
Parameters
----------
clf_name : str
The name of the detector.
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
show_figure : bool, optional (default=True)
If set to True, show the figure.
save_figure : bool, optional (default=False)
If set to True, save the figure to the local.
"""
def _add_sub_plot(X_inliers, X_outliers, sub_plot_title,
inlier_color='blue', outlier_color='orange'):
"""Internal method to add subplot of inliers and outliers.
Parameters
----------
X_inliers : numpy array of shape (n_samples, n_features)
Outliers.
X_outliers : numpy array of shape (n_samples, n_features)
Inliers.
sub_plot_title : str
Subplot title.
inlier_color : str, optional (default='blue')
The color of inliers.
outlier_color : str, optional (default='orange')
The color of outliers.
"""
plt.axis("equal")
plt.scatter(X_inliers[:, 0], X_inliers[:, 1], label='inliers',
color=inlier_color, s=40)
plt.scatter(X_outliers[:, 0], X_outliers[:, 1],
label='outliers', color=outlier_color, s=50, marker='^')
plt.title(sub_plot_title, fontsize=15)
plt.xticks([])
plt.yticks([])
plt.legend(loc=3, prop={'size': 10})
# check input data shapes are consistent
X_train, y_train, X_test, y_test, y_train_pred, y_test_pred = \
check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred)
if X_train.shape[1] != 2:
raise ValueError("Input data has to be 2-d for visualization. The "
"input data has {shape}.".format(shape=X_train.shape))
X_train_outliers, X_train_inliers = get_outliers_inliers(X_train, y_train)
X_train_outliers_pred, X_train_inliers_pred = get_outliers_inliers(
X_train, y_train_pred)
X_test_outliers, X_test_inliers = get_outliers_inliers(X_test, y_test)
X_test_outliers_pred, X_test_inliers_pred = get_outliers_inliers(
X_test, y_test_pred)
# plot ground truth vs. predicted results
fig = plt.figure(figsize=(12, 10))
plt.suptitle("Demo of {clf_name} Detector".format(clf_name=clf_name),
fontsize=15)
fig.add_subplot(221)
_add_sub_plot(X_train_inliers, X_train_outliers, 'Train Set Ground Truth',
inlier_color='blue', outlier_color='orange')
fig.add_subplot(222)
_add_sub_plot(X_train_inliers_pred, X_train_outliers_pred,
'Train Set Prediction', inlier_color='blue',
outlier_color='orange')
fig.add_subplot(223)
_add_sub_plot(X_test_inliers, X_test_outliers, 'Test Set Ground Truth',
inlier_color='green', outlier_color='red')
fig.add_subplot(224)
_add_sub_plot(X_test_inliers_pred, X_test_outliers_pred,
'Test Set Prediction', inlier_color='green',
outlier_color='red')
if save_figure:
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
[docs]
def data_visualize(X_train, y_train, show_figure=True,
save_figure=False): # pragma: no cover
"""Utility function for visualizing the synthetic samples generated by
generate_data_cluster function.
Parameters
----------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
show_figure : bool, optional (default=True)
If set to True, show the figure.
save_figure : bool, optional (default=False)
If set to True, save the figure to the local.
"""
def _plot(X_inliers, X_outliers, inlier_color='blue',
outlier_color='orange'):
"""Internal method to add subplot of inliers and outliers.
Parameters
----------
X_inliers : numpy array of shape (n_samples, n_features)
Outliers.
X_outliers : numpy array of shape (n_samples, n_features)
Inliers.
sub_plot_title : str
Subplot title.
inlier_color : str, optional (default='blue')
The color of inliers.
outlier_color : str, optional (default='orange')
The color of outliers.
"""
plt.axis("equal")
plt.scatter(X_inliers[:, 0], X_inliers[:, 1], label='inliers',
color=inlier_color, s=40)
plt.scatter(X_outliers[:, 0], X_outliers[:, 1],
label='outliers', color=outlier_color, s=50, marker='^')
plt.xticks([])
plt.yticks([])
plt.legend(loc='best', prop={'size': 10})
assert len(X_train) <= 5
in_colors = ['blue', 'green', 'purple', 'brown', 'black']
out_colors = ['red', 'orange', 'grey', 'violet', 'pink']
plt.figure(figsize=(13, 10))
plt.suptitle("Demo of Generating Data in Clusters", fontsize=15)
for i, cluster in enumerate(X_train):
X_train_outliers, X_train_inliers = get_outliers_inliers(cluster,
y_train[i])
_plot(X_train_inliers, X_train_outliers,
inlier_color=in_colors[i],
outlier_color=out_colors[i])
if save_figure:
plt.savefig()
if show_figure:
plt.show()