main
Spagnolo Gasper 2022-12-27 11:34:53 +01:00
parent bd33f92903
commit cd45a6bcb8
2 changed files with 273 additions and 80 deletions

View File

@ -7,23 +7,44 @@ import uz_framework.text as uz_text
import os
def one_a() -> None:
print('Hello')
"""
Solve the following assignment by hand for practice: You are given four points
A(3, 4), B(3, 6), C(7, 6) and D(6, 4). Calculate the eigenvectors and eigenvalues for
the given set of points.
"""
print('Solved in notes')
def one_b() -> None:
"""
Write a script to calculate and visualize PCA from 2D data from the file points.txt
(the first column contains the x axis and the second column the y axis). Plot the
points and draw the representation of the Gaussian distribution using drawEllipse
from the supplementary material. Follow the Algorithm 1 to compute the eigenvectors and eigenvalues of the PCA subspace.
"""
points = np.loadtxt('./data/points.txt')
uz_image.compute_PCA(points)
uz_image.compute_PCA(points, plot=True)
def one_c() -> None:
"""
The matrix U contains the eigenvectors that represent the basis of our PCA subspace. Draw the eigenvectors on the plot from the previous task. Their origin should
lie at the mean of the data µ. Since both vectors have the length 1, a better way
for visualizing them is to multiply each vector with the corresponding eigenvalue.
Draw the first eigenvector with red and the second with green.
"""
points = np.loadtxt('./data/points.txt')
uz_image.compute_PCA(points, plot=True)
def one_d() -> None:
points = np.loadtxt('./data/points.txt')
U, S, VT = uz_image.compute_PCA(points)
U, S, VT, _ = uz_image.compute_PCA(points)
uz_image.plot_histogram_pca(U, S, VT)
# U is the matrix of eigenvectors
# S is the vector of eigenvalues
# VT is the matrix of eigenvectors
def one_e() -> None:
"""
@ -37,7 +58,7 @@ def one_e() -> None:
"""
points = np.loadtxt('./data/points.txt')
U, S, VT, mean = uz_image.compute_PCA(points)
U, _, _, mean = uz_image.compute_PCA(points)
# Drop all but first eigenvector
U = U[:, 0:1]
@ -50,53 +71,128 @@ def one_e() -> None:
# Plot the original points and the projected points
plt.scatter(points[:, 0], points[:, 1], c='b', label='Original')
plt.scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected')
plt.scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected into a space of first eigenvector')
plt.legend()
plt.show()
def two_a():
def one_f() -> None:
"""
For the point qpoint = [6, 6]T
calculate the closest point from the input data (using
Euclidean distance). Which point is the closest? Then, project all the points (including qpoint) to PCA subspace (calculated without qpoint) and remove the variation
in the direction of the second vector. Calculate the distances again. Which point is
the closest to qpoint now? Visualize the reconstruction.
"""
points = np.loadtxt('./data/points.txt')
point = np.array([6,6])
# Find the closest point using euclidian distance
d = np.linalg.norm(points - point, axis=1)
closest_point = points[np.argmin(d)]
# Plot the original points
plt.scatter(points[:,0], points[:,1], c='b', label='Original')
plt.scatter(point[0], point[1], c='g', label=f'Point: {point}')
plt.scatter(closest_point[0], closest_point[1], c='r', label=f'Closest point: {closest_point}')
U, _, _, mean = uz_image.compute_PCA(points)
# Drop all but first eigenvector
U = U[:, 0:1]
# Reproject points
y_i = np.matmul(points - mean, U)
# Reproject back
x_i = np.matmul(y_i, U.T) + mean
# Reproject point
point_repr = np.matmul(point - mean, U)
point_repr = np.matmul(point_repr, U.T) + mean
# Find the closest point using euclidian distance
d = np.linalg.norm(x_i - point_repr, axis=1)
closest_point = x_i[np.argmin(d)]
# Plot tht reprojected points
plt.scatter(x_i[:,0], x_i[:,1], c='purple', label='Reprojected')
plt.scatter(point_repr[0], point_repr[1], c='y', label=f'repr Point: {point_repr}')
plt.scatter(closest_point[0], closest_point[1], c='orange', label=f'Closest point repr: {closest_point}')
plt.legend()
plt.show()
def two_a() -> None:
"""
For our requirements it is necessary only to correctly calculate eigenvectors and
eigenvalues up to the scale factor. Therefore implement the dual method according
to the Algorithm 2 and test it using the data from points.txt. The first two
eigenvectors should be the same as with the Algorithm 1. The Algorithm 2 gives
you a larger matrix U, however, all eigenvectors but the first two equal to zero.
"""
_ = np.loadtxt('./data/points.txt')
def two_b():
"""
Project the data from the previous assignment to the PCA space using matrix U,
and then project the data back again in the original space. If you have implemented
the method correctly, you will get the original data (up to the numerical error).
"""
points = np.loadtxt('./data/points.txt')
U, S, VT, mean = dual_PCA(points)
U, _, _, mean = uz_image.dual_PCA(points)
print(U)
y_i = np.matmul(points - mean, U)
# Project points back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Plot the original points and the projected points
plt.scatter(points[:, 0], points[:, 1], c='b', label='Original')
plt.scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected')
fig, axs = plt.subplots(1,2)
fig.suptitle('DUAL PCA')
axs[0].scatter(points[:, 0], points[:, 1], c='b', label='Original')
axs[1].scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected')
fig.legend()
plt.show()
def dual_PCA(points):
X = points.T.copy()
mean = np.mean(X, axis=1)
X = X - mean[:, np.newaxis]
# Compute the covariance matrix
C = (1/(X.shape[1]-1)) * X.T @ X
# Compute the eigenvalues and eigenvectors
U, S, VT = np.linalg.svd(C)
S += 1e-15
U = X @ U @ np.sqrt(np.diag(1/(S * (X.shape[1]-1))))
return U, S, VT, mean
def three_a():
imgs = read_images('./data/faces/1')
"""
Data preparation: Firstly, we have to formulate the problem in a way that is
compatible with the PCA method. Since PCA operates on points in space, we can
represent a grayscale image of size m × n as a point in mn-dimensional space if we
reshape it into a vector. Write a function that reads all the images from one of the
series transforms them into grayscale reshapes them using np.reshape and stacks
the resulting column vectors into a matrix of size mn × 64
"""
_ = uz_image.read_images('./data/faces/1')
def three_b():
imgs = read_images('./data/faces/1')
U, S, VT, mean = dual_PCA(imgs)
"""
Using dual PCA: Use dual PCA on the vectors of images. Write a function that
takes the matrix of image vectors as input and returns the eigenvectors of the PCA
subspace and the mean of the input data.
Note: In step 5 of Algorithm 2 be careful when computing the inverse of the S as
some of the eigenvalues can be very close to 0. Division by zero can cause numerical
errors when computing a matrix inverse. You have to take into account that the
matrix S is a diagonal matrix and must therefore have non-zero diagonal elements.
One way of solving this numerical problem is that we add a very small constant
value to the diagonal elements, e.g. 1015.
Transform the first five eigenvectors using the np.reshape function back into a
matrix and display them as images. What do the resulting images represent (both
numerically and in the context of faces)?
Project the first image from the series to the PCA space and then back again. Is
the result the same? What do you notice when you change one dimension of the
vector in the image space (e.g. component with index 4074) to 0 and display the
image? Repeat a similar procedure for a vector in the PCA space (project image
in the PCA space, change one of the first five components to zero and project the
image back in the image space and display it as an image). What is the difference?
How many pixels are changed by the first operation and how many by the second
"""
imgs = uz_image.read_images('./data/faces/1')
U, _, _, mean = uz_image.dual_PCA(imgs)
# Plot those eigenvectors
fig, axs = plt.subplots(1, 5)
fig.suptitle('5 strongest eigenvectors')
for i in range(5):
axs[i].imshow(U[:, i].reshape((96, 84)), cmap='gray')
plt.show()
@ -107,10 +203,13 @@ def three_b():
# Project images back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Plot the original images and the projected images
fig, axs = plt.subplots(1, 5)
fig, axs = plt.subplots(2, 5)
fig.suptitle('Original images(top) vs reprojected images(bottom)')
for i in range(5):
axs[i].imshow(imgs[i].reshape((96, 84)), cmap='gray')
axs[0,i].imshow(imgs[i].reshape((96, 84)), cmap='gray')
for i in range(5):
axs[1,i].imshow(x_i[i].reshape((96, 84)), cmap='gray')
plt.show()
img = imgs[0].copy()
@ -124,15 +223,15 @@ def three_b():
# Project image back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Count the difference in pixels
diff = (np.sum(np.abs(img - x_i)))
# Plot the original image and the projected image
fig, axs = plt.subplots(1, 2)
fig.suptitle(f'Add noise in cell 4074 before projection into PCA subspace, diff: {np.round(diff, 2)}')
axs[0].imshow(img.reshape((96, 84)), cmap='gray')
axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
plt.show()
# Count the difference in pixels
print(np.sum(np.abs(img - x_i)))
img2 = imgs[0].copy()
# Project image into PCA subspace
@ -143,36 +242,33 @@ def three_b():
x_i = np.matmul(y_i, U.T) + mean
# Plot the original image and the projected images
diff = (np.sum(np.abs(img - x_i)))
fig, axs = plt.subplots(1, 2)
fig.suptitle(f'Add noise in PCA space, diff: {np.round(diff, 2)}')
axs[0].imshow(img2.reshape((96, 84)), cmap='gray')
axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
plt.show()
# Count the number of pixels that are different
print(np.sum(np.abs(img - x_i)))
def read_images(data_path):
imgs = np.array([])
for filename in os.listdir(data_path):
img = uz_image.imread_gray(os.path.join(data_path, filename), uz_image.ImageType.float64)
# Reshape image into a vector
img = img.reshape((img.shape[0] * img.shape[1], 1))
if imgs.size == 0:
imgs = img
else:
imgs = np.hstack((imgs, img))
return imgs.T
def three_c():
imgs = read_images('./data/faces/1')
U, S, VT, mean = dual_PCA(imgs)
"""
Effect of the number of components on the reconstruction: Take a random
image and project it into the PCA space. Then change the vector in the PCA space
by retaining only the first 32 components and setting the remaining components to
0. Project the resulting vector back to the image space and display it as an image.
Repeat the procedure for the first 16, 8, 4, 2, and one eigenvector. Display the
resulting vectors together on one figure. What do you notice?
"""
imgs = uz_image.read_images('./data/faces/1')
U, _, _, mean = uz_image.dual_PCA(imgs)
img = imgs[0].copy()
values = [32, 16, 8, 4, 2, 1]
for value in values:
fig, axs = plt.subplots( 2, len(values))
fig.suptitle('Reprojection for different number of eigenvectors')
for ix, value in enumerate(values):
# Projcet image into PCA subspace
y_i = np.matmul(img - mean, U)
y_i[value:] = 0
@ -180,18 +276,30 @@ def three_c():
# Project image back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Plot the original image and the projected images
fig, axs = plt.subplots(1, 2)
axs[0].imshow(img.reshape((96, 84)), cmap='gray')
axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
plt.show()
# Count the number of pixels that are different
print(np.sum(np.abs(img - x_i)))
diff = (np.sum(np.abs(img - x_i)))
# Plot the original image and the projected images
axs[0, ix].imshow(img.reshape((96, 84)), cmap='gray')
axs[0, ix].set(title='Original image')
axs[1, ix].imshow(x_i.reshape((96, 84)), cmap='gray')
axs[1, ix].set(title=f'#eigenvectors: {value}, diff: {np.round(diff, 2)}')
plt.show()
def three_e():
imgs = read_images('./data/faces/1')
U, S, VT, mean = dual_PCA(imgs)
"""
Reconstruction of a foreign image: The PCA space is build upon
an array of data. In this task we will check the effect that the transformation to PCA
space has on an image that is not similar to the images that were used to build the
PCA space. Write a script that computes the PCA space for the first face dataset.
Then load the image elephant.jpg, reshape it into a vector and transform it into
the precalculated PCA space, then transform it back to image space. Reshape the
resulting vector back to an image and display both the original and the reconstructed
image. Comment on the results
"""
imgs = uz_image.read_images('./data/faces/1')
U, _, _, mean = uz_image.dual_PCA(imgs)
elephant = uz_image.imread_gray('./data/elephant.jpg', uz_image.ImageType.float64)
elephant = elephant.reshape((elephant.shape[0] * elephant.shape[1], 1)).T
@ -202,30 +310,60 @@ def three_e():
# Project image back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Count the number of pixels that are different
diff = (np.sum(np.abs(elephant - x_i)))
# Plot the original image and the projected images
fig, axs = plt.subplots(1, 2)
fig.suptitle(f'Elephant reprojected into FACES subspace, diff:{np.round(diff, 2)}')
axs[0].imshow(elephant.reshape((96, 84)), cmap='gray')
axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
plt.show()
def three_g():
imgs_1 = uz_image.read_images('./data/faces/1')
imgs_2 = uz_image.read_images('./data/faces/2')
imgs_3 = uz_image.read_images('./data/faces/3')
imgs = np.vstack((imgs_1, imgs_2, imgs_3))
U, S, VT, mean = uz_image.dual_PCA(imgs)
# Reduce dimension of U to 2D
U_2d = U[:, :2].copy()
# Project images into PCA subspace
y_i = np.matmul(imgs - mean, U_2d)
y_i = -y_i
# Plot the projected images
fig, axs = plt.subplots(1, 2)
axs[0].scatter(y_i[:imgs_1.shape[0], 0], y_i[:imgs_1.shape[0], 1], c='r')
axs[0].scatter(y_i[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 0], y_i[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 1], c='g')
axs[0].scatter(y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 0], y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b')
# ######## #
# SOLUTION #
# ######## #
pts = LDA(U, 3, 64)
# Plot the projected images
axs[1].scatter(pts[:imgs_1.shape[0], 0], pts[:imgs_1.shape[0], 1], c='r')
axs[1].scatter(pts[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 0], pts[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 1], c='g')
axs[1].scatter(pts[imgs_1.shape[0] + imgs_2.shape[0]:, 0], pts[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b')
plt.show()
def main():
#one_b()
#one_d()
#one_e()
#one_f()
#two_b()
#three_a()
#three_b()
#three_c()
three_e()
#two_c()
#ex2()
#ex3()
#three_g()
if __name__ == '__main__':
main()

View File

@ -8,6 +8,7 @@ import enum
from tqdm import tqdm
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
import os
class ImageType(enum.Enum):
uint8 = 0
@ -1724,13 +1725,14 @@ def drawEllipse(mu, cov, n_std=1):
plt.gca().add_patch(ellipse)
def compute_PCA(points: npt.NDArray[np.float64]):
def compute_PCA(points: npt.NDArray[np.float64], plot: bool = False):
# 1. b)
# Build matrix X
X = points.T.copy()
mean = np.mean(X, axis=1)
plt.scatter(X[0, :], X[1, :])
if (plot):
plt.scatter(X[0, :], X[1, :])
# Center the data
X = X - mean[:, np.newaxis]
@ -1738,21 +1740,22 @@ def compute_PCA(points: npt.NDArray[np.float64]):
# Compute the covariabce matrix
cov_matrix = (1/ (X.shape[1] - 1)) * (X @ X.T) # Popravi to
drawEllipse(mean, cov_matrix, n_std=1)
if (plot):
drawEllipse(mean, cov_matrix, n_std=1)
U, S, VT = np.linalg.svd(cov_matrix)
# 1. c)
if (plot):
# Plot the eigenvectors
plt.plot([mean[0], mean[0] + U[0,0] *np.sqrt(S[0])], [mean[1], mean[1] + U[0,1] *np.sqrt(S[0])], color='red')
plt.plot([mean[0], mean[0] + U[1,0] *np.sqrt(S[1])], [mean[1], mean[1] + U[1,1] *np.sqrt(S[1])], color='green')
# Plot the eigenvectors
plt.plot([mean[0], mean[0] + U[0,0] *np.sqrt(S[0])], [mean[1], mean[1] + U[0,1] *np.sqrt(S[0])], color='red')
plt.plot([mean[0], mean[0] + U[1,0] *np.sqrt(S[1])], [mean[1], mean[1] + U[1,1] *np.sqrt(S[1])], color='green')
plt.show()
plt.show()
return U, S, VT, mean
def plot_histogram_pca(U, S, VT):
def plot_histogram_pca(U: npt.NDArray[np.float64], S: npt.NDArray[np.float64], VT: npt.NDArray[np.float64]):
eigvals = S.copy()
# Normalize eginevalues
eigvals = eigvals / np.sum(eigvals)
@ -1760,3 +1763,55 @@ def plot_histogram_pca(U, S, VT):
# Plot the histogram
plt.bar(range(1, len(eigvals) + 1), eigvals, alpha=0.5, align='center')
plt.show()
def dual_PCA(points: npt.NDArray[np.float64]):
X = points.T.copy()
mean = np.mean(X, axis=1)
X = X - mean[:, np.newaxis]
# Compute the covariance matrix
C = (1/(X.shape[1]-1)) * X.T @ X
# Compute the eigenvalues and eigenvectors
U, S, VT = np.linalg.svd(C)
S += 1e-15
U = X @ U @ np.sqrt(np.diag(1/(S * (X.shape[1]-1))))
return U, S, VT, mean
def read_images(data_path: str):
imgs = np.array([])
for filename in os.listdir(data_path):
img = imread_gray(os.path.join(data_path, filename), ImageType.float64)
# Reshape image into a vector
img = img.reshape((img.shape[0] * img.shape[1], 1))
if imgs.size == 0:
imgs = img
else:
imgs = np.hstack((imgs, img))
return imgs.T
def LDA(points, c, n):
MM = np.mean(points)
Ms = []
Sb = np.zeros((points.shape[1], points.shape[1]))
Sw = np.zeros((points.shape[1], points.shape[1]))
for i in range(c):
Ms.append(np.mean(points[i * n:(i + 1) * n], axis=0))
Sb += n * np.outer(Ms[i] - MM, Ms[i] - MM)
Sw += np.cov(points[i * n:(i + 1) * n].T)
Sw_inv = np.linalg.inv(Sw)
SWB = np.matmul(Sw_inv, Sb)
eig_vals, eig_vecs = np.linalg.eig(SWB)
print(eig_vecs)
Ms = np.array(Ms)
Ms = eig_vecs.T.dot(Ms.T).T
return np.matmul(points, Ms.T)