import numpy as np import numpy.typing as npt from matplotlib import pyplot as plt import cv2 import uz_framework.image as uz_image import uz_framework.text as uz_text import os def one_a() -> None: """ Solve the following assignment by hand for practice: You are given four points A(3, 4), B(3, 6), C(7, 6) and D(6, 4). Calculate the eigenvectors and eigenvalues for the given set of points. """ print('Solved in notes') def one_b() -> None: """ Write a script to calculate and visualize PCA from 2D data from the file points.txt (the first column contains the x axis and the second column the y axis). Plot the points and draw the representation of the Gaussian distribution using drawEllipse from the supplementary material. Follow the Algorithm 1 to compute the eigenvectors and eigenvalues of the PCA subspace. """ points = np.loadtxt('./data/points.txt') uz_image.compute_PCA(points, plot=True) def one_c() -> None: """ The matrix U contains the eigenvectors that represent the basis of our PCA subspace. Draw the eigenvectors on the plot from the previous task. Their origin should lie at the mean of the data µ. Since both vectors have the length 1, a better way for visualizing them is to multiply each vector with the corresponding eigenvalue. Draw the first eigenvector with red and the second with green. """ points = np.loadtxt('./data/points.txt') uz_image.compute_PCA(points, plot=True) def one_d() -> None: points = np.loadtxt('./data/points.txt') U, S, VT, _ = uz_image.compute_PCA(points) uz_image.plot_histogram_pca(U, S, VT) # U is the matrix of eigenvectors # S is the vector of eigenvalues def one_e() -> None: """ Now remove the direction of the lowest variance from the input data. This means we will project the data into the subspace of the first eigenvector. We can do this by transforming the data into the PCA space then setting to 0 the components corresponding to the eigenvectors we want to remove. The resulting points can then be transformed back to the original space. Project each of the input points to PCA space, then project them back to Cartesian space by multiplying them by the diminished matrix U. """ points = np.loadtxt('./data/points.txt') U, _, _, mean = uz_image.compute_PCA(points) # Drop all but first eigenvector U = U[:, 0:1] # Project points into PCA subspace y_i = np.matmul(points - mean, U) # Project points back into original subspace x_i = np.matmul(y_i, U.T) + mean # Plot the original points and the projected points plt.scatter(points[:, 0], points[:, 1], c='b', label='Original') plt.scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected into a space of first eigenvector') plt.legend() plt.show() def one_f() -> None: """ For the point qpoint = [6, 6]T calculate the closest point from the input data (using Euclidean distance). Which point is the closest? Then, project all the points (including qpoint) to PCA subspace (calculated without qpoint) and remove the variation in the direction of the second vector. Calculate the distances again. Which point is the closest to qpoint now? Visualize the reconstruction. """ points = np.loadtxt('./data/points.txt') point = np.array([6,6]) # Find the closest point using euclidian distance d = np.linalg.norm(points - point, axis=1) closest_point = points[np.argmin(d)] # Plot the original points plt.scatter(points[:,0], points[:,1], c='b', label='Original') plt.scatter(point[0], point[1], c='g', label=f'Point: {point}') plt.scatter(closest_point[0], closest_point[1], c='r', label=f'Closest point: {closest_point}') U, _, _, mean = uz_image.compute_PCA(points) # Drop all but first eigenvector U = U[:, 0:1] # Reproject points y_i = np.matmul(points - mean, U) # Reproject back x_i = np.matmul(y_i, U.T) + mean # Reproject point point_repr = np.matmul(point - mean, U) point_repr = np.matmul(point_repr, U.T) + mean # Find the closest point using euclidian distance d = np.linalg.norm(x_i - point_repr, axis=1) closest_point = x_i[np.argmin(d)] # Plot tht reprojected points plt.scatter(x_i[:,0], x_i[:,1], c='purple', label='Reprojected') plt.scatter(point_repr[0], point_repr[1], c='y', label=f'repr Point: {point_repr}') plt.scatter(closest_point[0], closest_point[1], c='orange', label=f'Closest point repr: {closest_point}') plt.legend() plt.show() def two_a() -> None: """ For our requirements it is necessary only to correctly calculate eigenvectors and eigenvalues up to the scale factor. Therefore implement the dual method according to the Algorithm 2 and test it using the data from points.txt. The first two eigenvectors should be the same as with the Algorithm 1. The Algorithm 2 gives you a larger matrix U, however, all eigenvectors but the first two equal to zero. """ _ = np.loadtxt('./data/points.txt') def two_b(): """ Project the data from the previous assignment to the PCA space using matrix U, and then project the data back again in the original space. If you have implemented the method correctly, you will get the original data (up to the numerical error). """ points = np.loadtxt('./data/points.txt') U, _, _, mean = uz_image.dual_PCA(points) y_i = np.matmul(points - mean, U) # Project points back into original subspace x_i = np.matmul(y_i, U.T) + mean # Plot the original points and the projected points fig, axs = plt.subplots(1,2) fig.suptitle('DUAL PCA') axs[0].scatter(points[:, 0], points[:, 1], c='b', label='Original') axs[1].scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected') fig.legend() plt.show() def three_a(): """ Data preparation: Firstly, we have to formulate the problem in a way that is compatible with the PCA method. Since PCA operates on points in space, we can represent a grayscale image of size m × n as a point in mn-dimensional space if we reshape it into a vector. Write a function that reads all the images from one of the series transforms them into grayscale reshapes them using np.reshape and stacks the resulting column vectors into a matrix of size mn × 64 """ _ = uz_image.read_images('./data/faces/1') def three_b(): """ Using dual PCA: Use dual PCA on the vectors of images. Write a function that takes the matrix of image vectors as input and returns the eigenvectors of the PCA subspace and the mean of the input data. Note: In step 5 of Algorithm 2 be careful when computing the inverse of the S as some of the eigenvalues can be very close to 0. Division by zero can cause numerical errors when computing a matrix inverse. You have to take into account that the matrix S is a diagonal matrix and must therefore have non-zero diagonal elements. One way of solving this numerical problem is that we add a very small constant value to the diagonal elements, e.g. 10−15. Transform the first five eigenvectors using the np.reshape function back into a matrix and display them as images. What do the resulting images represent (both numerically and in the context of faces)? Project the first image from the series to the PCA space and then back again. Is the result the same? What do you notice when you change one dimension of the vector in the image space (e.g. component with index 4074) to 0 and display the image? Repeat a similar procedure for a vector in the PCA space (project image in the PCA space, change one of the first five components to zero and project the image back in the image space and display it as an image). What is the difference? How many pixels are changed by the first operation and how many by the second """ imgs = uz_image.read_images('./data/faces/1') U, _, _, mean = uz_image.dual_PCA(imgs) # Plot those eigenvectors fig, axs = plt.subplots(1, 5) fig.suptitle('5 strongest eigenvectors') for i in range(5): axs[i].imshow(U[:, i].reshape((96, 84)), cmap='gray') plt.show() # Project images into PCA subspace y_i = np.matmul(imgs - mean, U) # Project images back into original subspace x_i = np.matmul(y_i, U.T) + mean fig, axs = plt.subplots(2, 5) fig.suptitle('Original images(top) vs reprojected images(bottom)') for i in range(5): axs[0,i].imshow(imgs[i].reshape((96, 84)), cmap='gray') for i in range(5): axs[1,i].imshow(x_i[i].reshape((96, 84)), cmap='gray') plt.show() img = imgs[0].copy() # Add noise to index 4074 img[4074] = 0 # Project image into PCA subspace y_i = np.matmul(img - mean, U) # Project image back into original subspace x_i = np.matmul(y_i, U.T) + mean # Count the difference in pixels diff = (np.sum(np.abs(img - x_i))) # Plot the original image and the projected image fig, axs = plt.subplots(1, 2) fig.suptitle(f'Add noise in cell 4074 before projection into PCA subspace, diff: {np.round(diff, 2)}') axs[0].imshow(img.reshape((96, 84)), cmap='gray') axs[1].imshow(x_i.reshape((96, 84)), cmap='gray') plt.show() img2 = imgs[0].copy() # Project image into PCA subspace y_i = np.matmul(img2 - mean, U) y_i[0] = 0 # Project image back into original subspace x_i = np.matmul(y_i, U.T) + mean # Plot the original image and the projected images diff = (np.sum(np.abs(img - x_i))) fig, axs = plt.subplots(1, 2) fig.suptitle(f'Add noise in PCA space, diff: {np.round(diff, 2)}') axs[0].imshow(img2.reshape((96, 84)), cmap='gray') axs[1].imshow(x_i.reshape((96, 84)), cmap='gray') plt.show() def three_c(): """ Effect of the number of components on the reconstruction: Take a random image and project it into the PCA space. Then change the vector in the PCA space by retaining only the first 32 components and setting the remaining components to 0. Project the resulting vector back to the image space and display it as an image. Repeat the procedure for the first 16, 8, 4, 2, and one eigenvector. Display the resulting vectors together on one figure. What do you notice? """ imgs = uz_image.read_images('./data/faces/1') U, _, _, mean = uz_image.dual_PCA(imgs) img = imgs[0].copy() values = [32, 16, 8, 4, 2, 1] fig, axs = plt.subplots( 2, len(values)) fig.suptitle('Reprojection for different number of eigenvectors') for ix, value in enumerate(values): # Projcet image into PCA subspace y_i = np.matmul(img - mean, U) y_i[value:] = 0 # Project image back into original subspace x_i = np.matmul(y_i, U.T) + mean # Count the number of pixels that are different diff = (np.sum(np.abs(img - x_i))) # Plot the original image and the projected images axs[0, ix].imshow(img.reshape((96, 84)), cmap='gray') axs[0, ix].set(title='Original image') axs[1, ix].imshow(x_i.reshape((96, 84)), cmap='gray') axs[1, ix].set(title=f'#eigenvectors: {value}, diff: {np.round(diff, 2)}') plt.show() def three_e(): """ Reconstruction of a foreign image: The PCA space is build upon an array of data. In this task we will check the effect that the transformation to PCA space has on an image that is not similar to the images that were used to build the PCA space. Write a script that computes the PCA space for the first face dataset. Then load the image elephant.jpg, reshape it into a vector and transform it into the precalculated PCA space, then transform it back to image space. Reshape the resulting vector back to an image and display both the original and the reconstructed image. Comment on the results """ imgs = uz_image.read_images('./data/faces/1') U, _, _, mean = uz_image.dual_PCA(imgs) elephant = uz_image.imread_gray('./data/elephant.jpg', uz_image.ImageType.float64) elephant = elephant.reshape((elephant.shape[0] * elephant.shape[1], 1)).T # Project image into PCA subspace y_i = np.matmul(elephant - mean, U) # Project image back into original subspace x_i = np.matmul(y_i, U.T) + mean # Count the number of pixels that are different diff = (np.sum(np.abs(elephant - x_i))) # Plot the original image and the projected images fig, axs = plt.subplots(1, 2) fig.suptitle(f'Elephant reprojected into FACES subspace, diff:{np.round(diff, 2)}') axs[0].imshow(elephant.reshape((96, 84)), cmap='gray') axs[1].imshow(x_i.reshape((96, 84)), cmap='gray') plt.show() def three_g(): imgs_1 = uz_image.read_images('./data/faces/1') imgs_2 = uz_image.read_images('./data/faces/2') imgs_3 = uz_image.read_images('./data/faces/3') imgs = np.vstack((imgs_1, imgs_2, imgs_3)) U, S, VT, mean = uz_image.dual_PCA(imgs) # Reduce dimension of U to 2D U_2d = U[:, :2].copy() # Project images into PCA subspace y_i = np.matmul(imgs - mean, U_2d) y_i = -y_i # Plot the projected images fig, axs = plt.subplots(1, 2) axs[0].scatter(y_i[:imgs_1.shape[0], 0], y_i[:imgs_1.shape[0], 1], c='r') axs[0].scatter(y_i[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 0], y_i[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 1], c='g') axs[0].scatter(y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 0], y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b') pts = LDA(U, 3, 64) # Plot the projected images axs[1].scatter(pts[:imgs_1.shape[0], 0], pts[:imgs_1.shape[0], 1], c='r') axs[1].scatter(pts[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 0], pts[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 1], c='g') axs[1].scatter(pts[imgs_1.shape[0] + imgs_2.shape[0]:, 0], pts[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b') plt.show() def main(): #one_b() #one_d() #one_e() #one_f() #two_b() #three_a() #three_b() #three_c() three_e() #three_g() if __name__ == '__main__': main()