main
Spagnolo Gasper 2022-12-27 14:41:02 +01:00
parent 60239a74dd
commit 2cfc29de10
24 changed files with 35906 additions and 4 deletions

119
assignment6/cam.py Normal file
View File

@ -0,0 +1,119 @@
import cv2
import numpy as np
import os
import uz_framework.image as uz
IMAGES_FOLDER = './datam/me/'
def read_images(path):
# Get all the path to the images and save them in a list
image_paths = [os.path.join(path, f) for f in os.listdir(path)]
images = []
for image_path in image_paths:
# Read the image and convert to grayscale
image_pil = cv2.imread(image_path, 0)
# Convert the image format into numpy array
image = np.array(image_pil, 'uint8')
images.append(image)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
faces = []
for image in images:
# Detect face in the image
face = face_cascade.detectMultiScale(image, 1.3, 5)
for (x, y, w, h) in face:
# Crop the face out
H = 300
W = 280
y_offset = 20
x_offset = 30
face = image[y-y_offset:(y-y_offset)+H, x-x_offset:(x-x_offset)+W].copy()
faces.append(face)
return np.array(faces)
def train(faces):
# Construct PCA of the faces
# Reshape all the images into a single matrix of size (n, m)
# Downsample the images
fcc = np.array([])
for face in faces:
face = cv2.pyrDown(face)
face = face.reshape((face.shape[0] * face.shape[1]), 1)
if fcc.size == 0:
fcc = face
else:
fcc = np.hstack((fcc, face))
print(fcc.shape)
U, _, _, mean = uz.dual_PCA(fcc.T)
return U, mean
def recognize(U, mean):
# Open webcam stream
cam = cv2.VideoCapture(0)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
while True:
# Read the frame
_, frame = cam.read()
# Convert to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces
face = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in face:
# Crop the face
H = 300
W = 280
y_offset = 20
x_offset = 30
face = gray[y-y_offset:(y-y_offset)+H, x-x_offset:(x-x_offset)+W].copy()
if face.shape[0] != H or face.shape[1] != W:
continue
# Project the face into the PCA subspace
# Project images into PCA subspace
# Downsample the image
face = cv2.pyrDown(face)
face = face.reshape(-1) # Reshape it into a vector
y_i = np.matmul(face - mean, U)
# Project images back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Compute the L2 norm between the original image and the reconstructed image
norm = np.linalg.norm(face - x_i)
# If the norm is less than 1000, then the face is recognized
if norm < 8000:
cv2.putText(frame, 'Recognized Gasper', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
# Plot the rectangle around the face
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Display the frame
cv2.imshow('frame', frame)
# Exit if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
def main():
faces = read_images(IMAGES_FOLDER)
U, mean = train(faces)
recognize(U, mean)
if __name__ == '__main__':
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 252 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

File diff suppressed because it is too large Load Diff

View File

@ -287,6 +287,57 @@ def three_c():
plt.show()
def three_d():
"""
Informativeness of each component: Each eigenvector holds
information that defines some aspect of the PCA space. By changing the values of
a vector in a periodic way, we can observe how different weights for an eigenvector
affect the reconstructed image.
Use the second series of images for this task. Take the average photo that you
compute based on all images in the series2
. Project the average image to PCA
space. Then, select one of the more important eigenvectors and manually set its
corresponding weight in the projected vector to some value of your choice. Project
the modified vector back to image space and observe the change.
In order to see the changes easier, write a script that goes over a range of values for your selected eigenvector. To smoothly change the values, use np.sin and
np.linspace as well as some scaling factor x to show the differences more strongly.
Also, use plt.draw in combination with plt.pause to display the results as an
animated sequence of images.
Hint: We recommend the value range of about [10, 10] and the scaling factor of
around 3000.
Modify the script to change two parameters at the same time, in a circular way (i.e.
using both np.sin and np.cos). Experiment with different eigenvector pairs and
report your observations.
"""
imgs = uz_image.read_images('./data/faces/2')
# Perform dual PCA
U, _, _, mean = uz_image.dual_PCA(imgs)
# Compute the average image
avg_img = np.mean(imgs, axis=0)
def pplot(U, avg_img, mean):
# Create linspace of sine values
x = np.linspace(-10, 10, 100)
sinx = np.sin(x)
cosx = np.cos(x)
for ix, value in enumerate(sinx):
# Project image into PCA subspace
y_i = np.matmul(avg_img - mean, U)
y_i[0] = value * 3000
y_i[1] = cosx[ix] * 3000
# Project image back into original subspace
x_i = np.matmul(y_i, U.T) + mean
# Plot the original image and the projected images
plt.imshow(x_i.reshape((96, 84)), cmap='gray')
plt.pause(0.001)
plt.draw()
pplot(U, avg_img, mean)
def three_e():
"""
Reconstruction of a foreign image: The PCA space is build upon
@ -320,6 +371,25 @@ def three_e():
axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
plt.show()
def three_f():
"""
Recognition with a subspace: PCA subspaces can be used in a
simple object recognition scenario. Generally, if an image is similar to the images
used to build the PCA subspace, the reconstruction error incurred when projecting
to said PCA space should be small. In this task, you will try to implement a simple
facial recognition system based on this property.
Use a camera to capture images (10 at the very least) of your face with varying
illumination and facial expressions. Resize them to a common resolution and try to
align them by eye location. Construct a PCA space from these images. Then, write
a script that captures images from your webcam, detects faces in them (you can use
something from OpenCV like Haar cascades) and extracts the regions that contain
faces. Reshape these regions to correct size, transform them to the precalculated
PCA space and back to image space. Then, based on some similarity measure (can
be simple L2 norm) and some threshold, decide whether a region contains your face
or not. Display the information on the image stream.
Note: You can prepare a video that demonstrates the performance of your system
"""
# implemented in cam.py file
def three_g():
imgs_1 = uz_image.read_images('./data/faces/1')
@ -344,7 +414,7 @@ def three_g():
axs[0].scatter(y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 0], y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b')
pts = LDA(U, 3, 64)
pts = uz_image.LDA(U, 3, 64)
# Plot the projected images
axs[1].scatter(pts[:imgs_1.shape[0], 0], pts[:imgs_1.shape[0], 1], c='r')
@ -362,7 +432,8 @@ def main():
#three_a()
#three_b()
#three_c()
three_e()
#three_d()
#three_e()
#three_g()
if __name__ == '__main__':

View File

@ -1811,7 +1811,7 @@ def dual_PCA(points: npt.NDArray[np.float64]):
U = X @ U @ np.sqrt(np.diag(1/(S * (X.shape[1]-1))))
return U, S, VT, mean
def read_images(data_path: str):
def read_images(data_path: str) -> npt.NDArray[np.float64]:
"""
Read images from directory
Accepts path to direcgtory
@ -1828,7 +1828,7 @@ def read_images(data_path: str):
imgs = np.hstack((imgs, img))
return imgs.T
def LDA(points, c, n):
def LDA(points, c, n) -> npt.NDArray[np.float64]:
"""
LDA algorithm
Accepts points, number of participants in every class, number of classes