is_assignments/a2/code/.venv/lib/python3.10/site-packages/scikitplot/decomposition.py

195 lines
7.9 KiB
Python

"""
The :mod:`scikitplot.decomposition` module includes plots built specifically
for scikit-learn estimators that are used for dimensionality reduction
e.g. PCA. You can use your own estimators, but these plots assume specific
properties shared by scikit-learn estimators. The specific requirements are
documented per function.
"""
from __future__ import absolute_import, division, print_function, \
unicode_literals
import matplotlib.pyplot as plt
import numpy as np
def plot_pca_component_variance(clf, title='PCA Component Explained Variances',
target_explained_variance=0.75, ax=None,
figsize=None, title_fontsize="large",
text_fontsize="medium"):
"""Plots PCA components' explained variance ratios. (new in v0.2.2)
Args:
clf: PCA instance that has the ``explained_variance_ratio_`` attribute.
title (string, optional): Title of the generated plot. Defaults to
"PCA Component Explained Variances"
target_explained_variance (float, optional): Looks for the minimum
number of principal components that satisfies this value and
emphasizes it on the plot. Defaults to 0.75
ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
plot the curve. If None, the plot is drawn on a new set of axes.
figsize (2-tuple, optional): Tuple denoting figure size of the plot
e.g. (6, 6). Defaults to ``None``.
title_fontsize (string or int, optional): Matplotlib-style fontsizes.
Use e.g. "small", "medium", "large" or integer-values. Defaults to
"large".
text_fontsize (string or int, optional): Matplotlib-style fontsizes.
Use e.g. "small", "medium", "large" or integer-values. Defaults to
"medium".
Returns:
ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
drawn.
Example:
>>> import scikitplot as skplt
>>> pca = PCA(random_state=1)
>>> pca.fit(X)
>>> skplt.decomposition.plot_pca_component_variance(pca)
<matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
>>> plt.show()
.. image:: _static/examples/plot_pca_component_variance.png
:align: center
:alt: PCA Component variances
"""
if not hasattr(clf, 'explained_variance_ratio_'):
raise TypeError('"clf" does not have explained_variance_ratio_ '
'attribute. Has the PCA been fitted?')
if ax is None:
fig, ax = plt.subplots(1, 1, figsize=figsize)
ax.set_title(title, fontsize=title_fontsize)
cumulative_sum_ratios = np.cumsum(clf.explained_variance_ratio_)
# Magic code for figuring out closest value to target_explained_variance
idx = np.searchsorted(cumulative_sum_ratios, target_explained_variance)
ax.plot(range(len(clf.explained_variance_ratio_) + 1),
np.concatenate(([0], np.cumsum(clf.explained_variance_ratio_))),
'*-')
ax.grid(True)
ax.set_xlabel('First n principal components', fontsize=text_fontsize)
ax.set_ylabel('Explained variance ratio of first n components',
fontsize=text_fontsize)
ax.set_ylim([-0.02, 1.02])
if idx < len(cumulative_sum_ratios):
ax.plot(idx+1, cumulative_sum_ratios[idx], 'ro',
label='{0:0.3f} Explained variance ratio for '
'first {1} components'.format(cumulative_sum_ratios[idx],
idx+1),
markersize=4, markeredgewidth=4)
ax.axhline(cumulative_sum_ratios[idx],
linestyle=':', lw=3, color='black')
ax.tick_params(labelsize=text_fontsize)
ax.legend(loc="best", fontsize=text_fontsize)
return ax
def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection',
biplot=False, feature_labels=None,
ax=None, figsize=None, cmap='Spectral',
title_fontsize="large", text_fontsize="medium"):
"""Plots the 2-dimensional projection of PCA on a given dataset.
Args:
clf: Fitted PCA instance that can ``transform`` given data set into 2
dimensions.
X (array-like, shape (n_samples, n_features)):
Feature set to project, where n_samples is the number of samples
and n_features is the number of features.
y (array-like, shape (n_samples) or (n_samples, n_features)):
Target relative to X for labeling.
title (string, optional): Title of the generated plot. Defaults to
"PCA 2-D Projection"
biplot (bool, optional): If True, the function will generate and plot
biplots. If false, the biplots are not generated.
feature_labels (array-like, shape (n_classes), optional): List of labels
that represent each feature of X. Its index position must also be
relative to the features. If ``None`` is given, then labels will be
automatically generated for each feature.
e.g. "variable1", "variable2", "variable3" ...
ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
plot the curve. If None, the plot is drawn on a new set of axes.
figsize (2-tuple, optional): Tuple denoting figure size of the plot
e.g. (6, 6). Defaults to ``None``.
cmap (string or :class:`matplotlib.colors.Colormap` instance, optional):
Colormap used for plotting the projection. View Matplotlib Colormap
documentation for available options.
https://matplotlib.org/users/colormaps.html
title_fontsize (string or int, optional): Matplotlib-style fontsizes.
Use e.g. "small", "medium", "large" or integer-values. Defaults to
"large".
text_fontsize (string or int, optional): Matplotlib-style fontsizes.
Use e.g. "small", "medium", "large" or integer-values. Defaults to
"medium".
Returns:
ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
drawn.
Example:
>>> import scikitplot as skplt
>>> pca = PCA(random_state=1)
>>> pca.fit(X)
>>> skplt.decomposition.plot_pca_2d_projection(pca, X, y)
<matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
>>> plt.show()
.. image:: _static/examples/plot_pca_2d_projection.png
:align: center
:alt: PCA 2D Projection
"""
transformed_X = clf.transform(X)
if ax is None:
fig, ax = plt.subplots(1, 1, figsize=figsize)
ax.set_title(title, fontsize=title_fontsize)
classes = np.unique(np.array(y))
colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(classes)))
for label, color in zip(classes, colors):
ax.scatter(transformed_X[y == label, 0], transformed_X[y == label, 1],
alpha=0.8, lw=2, label=label, color=color)
if biplot:
xs = transformed_X[:, 0]
ys = transformed_X[:, 1]
vectors = np.transpose(clf.components_[:2, :])
vectors_scaled = vectors * [xs.max(), ys.max()]
for i in range(vectors.shape[0]):
ax.annotate("", xy=(vectors_scaled[i, 0], vectors_scaled[i, 1]),
xycoords='data', xytext=(0, 0), textcoords='data',
arrowprops={'arrowstyle': '-|>', 'ec': 'r'})
ax.text(vectors_scaled[i, 0] * 1.05, vectors_scaled[i, 1] * 1.05,
feature_labels[i] if feature_labels else "Variable" + str(i),
color='b', fontsize=text_fontsize)
ax.legend(loc='best', shadow=False, scatterpoints=1,
fontsize=text_fontsize)
ax.set_xlabel('First Principal Component', fontsize=text_fontsize)
ax.set_ylabel('Second Principal Component', fontsize=text_fontsize)
ax.tick_params(labelsize=text_fontsize)
return ax