uz_assignments/assignment6/solution.py

import numpy as np
import numpy.typing as npt
from matplotlib import pyplot as plt
import cv2
import uz_framework.image as uz_image
import uz_framework.text as uz_text
import os

def one_a() -> None:
    """
    Solve the following assignment by hand for practice: You are given four points
    A(3, 4), B(3, 6), C(7, 6) and D(6, 4). Calculate the eigenvectors and eigenvalues for
    the given set of points.
    """
    print('Solved in notes')

def one_b() -> None:
    """
    Write a script to calculate and visualize PCA from 2D data from the file points.txt
    (the first column contains the x axis and the second column the y axis). Plot the
    points and draw the representation of the Gaussian distribution using drawEllipse
    from the supplementary material. Follow the Algorithm 1 to compute the eigenvectors and eigenvalues of the PCA subspace.
    """
    points = np.loadtxt('./data/points.txt')

    uz_image.compute_PCA(points, plot=True)

def one_c() -> None:
    """
    The matrix U contains the eigenvectors that represent the basis of our PCA subspace. Draw the eigenvectors on the plot from the previous task. Their origin should
    lie at the mean of the data µ. Since both vectors have the length 1, a better way
    for visualizing them is to multiply each vector with the corresponding eigenvalue.
    Draw the first eigenvector with red and the second with green.
    """
    points = np.loadtxt('./data/points.txt')

    uz_image.compute_PCA(points, plot=True)

def one_d() -> None:
	points = np.loadtxt('./data/points.txt')

	U, S, VT, _ = uz_image.compute_PCA(points)

	uz_image.plot_histogram_pca(U, S, VT)

# U is the matrix of eigenvectors
# S is the vector of eigenvalues

def one_e() -> None:
    """
    Now remove the direction of the lowest variance from the input data. This means
    we will project the data into the subspace of the first eigenvector. We can do this
    by transforming the data into the PCA space then setting to 0 the components
    corresponding to the eigenvectors we want to remove. The resulting points can
    then be transformed back to the original space. Project each of the input points to
    PCA space, then project them back to Cartesian space by multiplying them by the
    diminished matrix U.
    """
    points = np.loadtxt('./data/points.txt')

    U, _, _, mean = uz_image.compute_PCA(points)

    # Drop all but first eigenvector
    U = U[:, 0:1]

    # Project points into PCA subspace
    y_i = np.matmul(points - mean, U)

    # Project points back into original subspace
    x_i = np.matmul(y_i, U.T) + mean

    # Plot the original points and the projected points
    plt.scatter(points[:, 0], points[:, 1], c='b', label='Original')
    plt.scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected into a space of first eigenvector')
    plt.legend()

    plt.show()

def one_f() -> None:
    """
    For the point qpoint = [6, 6]T
    calculate the closest point from the input data (using
    Euclidean distance). Which point is the closest? Then, project all the points (including qpoint) to PCA subspace (calculated without qpoint) and remove the variation
    in the direction of the second vector. Calculate the distances again. Which point is
    the closest to qpoint now? Visualize the reconstruction.
    """
    points = np.loadtxt('./data/points.txt')
    point = np.array([6,6])

    # Find the closest point using euclidian distance
    d = np.linalg.norm(points - point, axis=1)
    closest_point = points[np.argmin(d)]

    # Plot the original points
    plt.scatter(points[:,0], points[:,1], c='b', label='Original')
    plt.scatter(point[0], point[1], c='g', label=f'Point: {point}')
    plt.scatter(closest_point[0], closest_point[1], c='r', label=f'Closest point: {closest_point}')

    U, _, _, mean = uz_image.compute_PCA(points)

    # Drop all but first eigenvector
    U = U[:, 0:1]

    # Reproject points
    y_i = np.matmul(points - mean, U)

    # Reproject back
    x_i = np.matmul(y_i, U.T) + mean
    # Reproject point
    point_repr = np.matmul(point - mean, U)
    point_repr = np.matmul(point_repr, U.T) + mean

    # Find the closest point using euclidian distance
    d = np.linalg.norm(x_i - point_repr, axis=1)
    closest_point = x_i[np.argmin(d)]

    # Plot tht reprojected points
    plt.scatter(x_i[:,0], x_i[:,1], c='purple', label='Reprojected')
    plt.scatter(point_repr[0], point_repr[1], c='y', label=f'repr Point: {point_repr}')
    plt.scatter(closest_point[0], closest_point[1], c='orange', label=f'Closest point repr: {closest_point}')

    plt.legend()
    plt.show()

def two_a() -> None:
    """
    For our requirements it is necessary only to correctly calculate eigenvectors and
    eigenvalues up to the scale factor. Therefore implement the dual method according
    to the Algorithm 2 and test it using the data from points.txt. The first two
    eigenvectors should be the same as with the Algorithm 1. The Algorithm 2 gives
    you a larger matrix U, however, all eigenvectors but the first two equal to zero.
    """
    _ = np.loadtxt('./data/points.txt')

def two_b():
    """
    Project the data from the previous assignment to the PCA space using matrix U,
    and then project the data back again in the original space. If you have implemented
    the method correctly, you will get the original data (up to the numerical error).
    """
    points = np.loadtxt('./data/points.txt')
    U, _, _, mean = uz_image.dual_PCA(points)

    y_i = np.matmul(points - mean, U)

    # Project points back into original subspace
    x_i = np.matmul(y_i, U.T) + mean

    # Plot the original points and the projected points
    fig, axs = plt.subplots(1,2)
    fig.suptitle('DUAL PCA')
    axs[0].scatter(points[:, 0], points[:, 1], c='b', label='Original')
    axs[1].scatter(x_i[:, 0], x_i[:, 1], c='r', label='Projected')
    fig.legend()
    plt.show()

def three_a():
    """
    Data preparation: Firstly, we have to formulate the problem in a way that is
    compatible with the PCA method. Since PCA operates on points in space, we can
    represent a grayscale image of size m × n as a point in mn-dimensional space if we
    reshape it into a vector. Write a function that reads all the images from one of the
    series transforms them into grayscale reshapes them using np.reshape and stacks
    the resulting column vectors into a matrix of size mn × 64
    """
    _ = uz_image.read_images('./data/faces/1')

def three_b():
    """
    Using dual PCA: Use dual PCA on the vectors of images. Write a function that
    takes the matrix of image vectors as input and returns the eigenvectors of the PCA
    subspace and the mean of the input data.
    Note: In step 5 of Algorithm 2 be careful when computing the inverse of the S as
    some of the eigenvalues can be very close to 0. Division by zero can cause numerical
    errors when computing a matrix inverse. You have to take into account that the
    matrix S is a diagonal matrix and must therefore have non-zero diagonal elements.
    One way of solving this numerical problem is that we add a very small constant
    value to the diagonal elements, e.g. 10−15.
    Transform the first five eigenvectors using the np.reshape function back into a
    matrix and display them as images. What do the resulting images represent (both
    numerically and in the context of faces)?
    Project the first image from the series to the PCA space and then back again. Is
    the result the same? What do you notice when you change one dimension of the
    vector in the image space (e.g. component with index 4074) to 0 and display the
    image? Repeat a similar procedure for a vector in the PCA space (project image
    in the PCA space, change one of the first five components to zero and project the
    image back in the image space and display it as an image). What is the difference?
    How many pixels are changed by the first operation and how many by the second
    """
    imgs = uz_image.read_images('./data/faces/1')
    U, _, _, mean = uz_image.dual_PCA(imgs)

    # Plot those eigenvectors
    fig, axs = plt.subplots(1, 5)
    fig.suptitle('5 strongest eigenvectors')
    for i in range(5):
        axs[i].imshow(U[:, i].reshape((96, 84)), cmap='gray')
    plt.show()

    # Project images into PCA subspace
    y_i = np.matmul(imgs - mean, U)

    # Project images back into original subspace
    x_i = np.matmul(y_i, U.T) + mean

    fig, axs = plt.subplots(2, 5)
    fig.suptitle('Original images(top) vs reprojected images(bottom)')
    for i in range(5):
        axs[0,i].imshow(imgs[i].reshape((96, 84)), cmap='gray')

    for i in range(5):
        axs[1,i].imshow(x_i[i].reshape((96, 84)), cmap='gray')
    plt.show()

    img = imgs[0].copy()

    # Add noise to index 4074
    img[4074] = 0

    # Project image into PCA subspace
    y_i = np.matmul(img - mean, U)

    # Project image back into original subspace
    x_i = np.matmul(y_i, U.T) + mean

    # Count the difference in pixels
    diff = (np.sum(np.abs(img - x_i)))
    # Plot the original image and the projected image
    fig, axs = plt.subplots(1, 2)
    fig.suptitle(f'Add noise in cell 4074 before projection into PCA subspace, diff: {np.round(diff, 2)}')
    axs[0].imshow(img.reshape((96, 84)), cmap='gray')
    axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
    plt.show()

    img2 = imgs[0].copy()

    # Project image into PCA subspace
    y_i = np.matmul(img2 - mean, U)
    y_i[0] = 0

    # Project image back into original subspace
    x_i = np.matmul(y_i, U.T) + mean

    # Plot the original image and the projected images
    diff = (np.sum(np.abs(img - x_i)))
    fig, axs = plt.subplots(1, 2)
    fig.suptitle(f'Add noise in PCA space, diff: {np.round(diff, 2)}')
    axs[0].imshow(img2.reshape((96, 84)), cmap='gray')
    axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
    plt.show()

def three_c():
    """
    Effect of the number of components on the reconstruction: Take a random
    image and project it into the PCA space. Then change the vector in the PCA space
    by retaining only the first 32 components and setting the remaining components to
    0. Project the resulting vector back to the image space and display it as an image.
    Repeat the procedure for the first 16, 8, 4, 2, and one eigenvector. Display the
    resulting vectors together on one figure. What do you notice?
    """
    imgs = uz_image.read_images('./data/faces/1')
    U, _, _, mean = uz_image.dual_PCA(imgs)

    img = imgs[0].copy()

    values = [32, 16, 8, 4, 2, 1]

    fig, axs = plt.subplots( 2, len(values))
    fig.suptitle('Reprojection for different number of eigenvectors')
    for ix, value in enumerate(values):

        # Projcet image into PCA subspace
        y_i = np.matmul(img - mean, U)
        y_i[value:] = 0

        # Project image back into original subspace
        x_i = np.matmul(y_i, U.T) + mean

        # Count the number of pixels that are different
        diff = (np.sum(np.abs(img - x_i)))

        # Plot the original image and the projected images
        axs[0, ix].imshow(img.reshape((96, 84)), cmap='gray')
        axs[0, ix].set(title='Original image')
        axs[1, ix].imshow(x_i.reshape((96, 84)), cmap='gray')
        axs[1, ix].set(title=f'#eigenvectors: {value}, diff: {np.round(diff, 2)}')

    plt.show()

def three_e():
    """
    Reconstruction of a foreign image: The PCA space is build upon
    an array of data. In this task we will check the effect that the transformation to PCA
    space has on an image that is not similar to the images that were used to build the
    PCA space. Write a script that computes the PCA space for the first face dataset.
    Then load the image elephant.jpg, reshape it into a vector and transform it into
    the precalculated PCA space, then transform it back to image space. Reshape the
    resulting vector back to an image and display both the original and the reconstructed
    image. Comment on the results
    """
    imgs = uz_image.read_images('./data/faces/1')
    U, _, _, mean = uz_image.dual_PCA(imgs)

    elephant = uz_image.imread_gray('./data/elephant.jpg', uz_image.ImageType.float64)
    elephant = elephant.reshape((elephant.shape[0] * elephant.shape[1], 1)).T

    # Project image into PCA subspace
    y_i = np.matmul(elephant - mean, U)

    # Project image back into original subspace
    x_i = np.matmul(y_i, U.T) + mean

    # Count the number of pixels that are different
    diff = (np.sum(np.abs(elephant - x_i)))

    # Plot the original image and the projected images
    fig, axs = plt.subplots(1, 2)
    fig.suptitle(f'Elephant reprojected into FACES subspace, diff:{np.round(diff, 2)}')
    axs[0].imshow(elephant.reshape((96, 84)), cmap='gray')
    axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
    plt.show()


def three_g():
    imgs_1 = uz_image.read_images('./data/faces/1')
    imgs_2 = uz_image.read_images('./data/faces/2')
    imgs_3 = uz_image.read_images('./data/faces/3')

    imgs = np.vstack((imgs_1, imgs_2, imgs_3))

    U, S, VT, mean = uz_image.dual_PCA(imgs)

    # Reduce dimension of U to 2D
    U_2d = U[:, :2].copy()

    # Project images into PCA subspace
    y_i = np.matmul(imgs - mean, U_2d)
    y_i = -y_i

    # Plot the projected images
    fig, axs = plt.subplots(1, 2)
    axs[0].scatter(y_i[:imgs_1.shape[0], 0], y_i[:imgs_1.shape[0], 1], c='r')
    axs[0].scatter(y_i[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 0], y_i[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 1], c='g')
    axs[0].scatter(y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 0], y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b')


    pts = LDA(U, 3, 64)

    # Plot the projected images
    axs[1].scatter(pts[:imgs_1.shape[0], 0], pts[:imgs_1.shape[0], 1], c='r')
    axs[1].scatter(pts[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 0], pts[imgs_1.shape[0]:imgs_1.shape[0] + imgs_2.shape[0], 1], c='g')
    axs[1].scatter(pts[imgs_1.shape[0] + imgs_2.shape[0]:, 0], pts[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b')

    plt.show()

def main():
    #one_b()
	#one_d()
    #one_e()
    #one_f()
    #two_b()
    #three_a()
    #three_b()
	#three_c()
    three_e()
    #three_g()

if __name__ == '__main__':
	main()