3f done
|
@ -0,0 +1,119 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
import os
|
||||
import uz_framework.image as uz
|
||||
|
||||
IMAGES_FOLDER = './datam/me/'
|
||||
|
||||
|
||||
|
||||
def read_images(path):
|
||||
# Get all the path to the images and save them in a list
|
||||
image_paths = [os.path.join(path, f) for f in os.listdir(path)]
|
||||
|
||||
images = []
|
||||
|
||||
for image_path in image_paths:
|
||||
# Read the image and convert to grayscale
|
||||
image_pil = cv2.imread(image_path, 0)
|
||||
# Convert the image format into numpy array
|
||||
image = np.array(image_pil, 'uint8')
|
||||
|
||||
images.append(image)
|
||||
|
||||
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
|
||||
|
||||
faces = []
|
||||
for image in images:
|
||||
# Detect face in the image
|
||||
face = face_cascade.detectMultiScale(image, 1.3, 5)
|
||||
|
||||
for (x, y, w, h) in face:
|
||||
# Crop the face out
|
||||
H = 300
|
||||
W = 280
|
||||
y_offset = 20
|
||||
x_offset = 30
|
||||
face = image[y-y_offset:(y-y_offset)+H, x-x_offset:(x-x_offset)+W].copy()
|
||||
faces.append(face)
|
||||
|
||||
return np.array(faces)
|
||||
|
||||
def train(faces):
|
||||
# Construct PCA of the faces
|
||||
# Reshape all the images into a single matrix of size (n, m)
|
||||
|
||||
# Downsample the images
|
||||
fcc = np.array([])
|
||||
for face in faces:
|
||||
face = cv2.pyrDown(face)
|
||||
face = face.reshape((face.shape[0] * face.shape[1]), 1)
|
||||
if fcc.size == 0:
|
||||
fcc = face
|
||||
else:
|
||||
fcc = np.hstack((fcc, face))
|
||||
|
||||
print(fcc.shape)
|
||||
U, _, _, mean = uz.dual_PCA(fcc.T)
|
||||
|
||||
return U, mean
|
||||
|
||||
def recognize(U, mean):
|
||||
# Open webcam stream
|
||||
cam = cv2.VideoCapture(0)
|
||||
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
|
||||
|
||||
while True:
|
||||
# Read the frame
|
||||
_, frame = cam.read()
|
||||
# Convert to grayscale
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
# Detect faces
|
||||
face = face_cascade.detectMultiScale(gray, 1.3, 5)
|
||||
|
||||
for (x, y, w, h) in face:
|
||||
# Crop the face
|
||||
H = 300
|
||||
W = 280
|
||||
y_offset = 20
|
||||
x_offset = 30
|
||||
face = gray[y-y_offset:(y-y_offset)+H, x-x_offset:(x-x_offset)+W].copy()
|
||||
|
||||
if face.shape[0] != H or face.shape[1] != W:
|
||||
continue
|
||||
|
||||
# Project the face into the PCA subspace
|
||||
# Project images into PCA subspace
|
||||
# Downsample the image
|
||||
face = cv2.pyrDown(face)
|
||||
face = face.reshape(-1) # Reshape it into a vector
|
||||
y_i = np.matmul(face - mean, U)
|
||||
|
||||
# Project images back into original subspace
|
||||
x_i = np.matmul(y_i, U.T) + mean
|
||||
|
||||
# Compute the L2 norm between the original image and the reconstructed image
|
||||
norm = np.linalg.norm(face - x_i)
|
||||
|
||||
# If the norm is less than 1000, then the face is recognized
|
||||
if norm < 8000:
|
||||
cv2.putText(frame, 'Recognized Gasper', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
# Plot the rectangle around the face
|
||||
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
||||
|
||||
# Display the frame
|
||||
cv2.imshow('frame', frame)
|
||||
|
||||
# Exit if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
def main():
|
||||
faces = read_images(IMAGES_FOLDER)
|
||||
U, mean = train(faces)
|
||||
recognize(U, mean)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
After Width: | Height: | Size: 66 KiB |
After Width: | Height: | Size: 84 KiB |
After Width: | Height: | Size: 65 KiB |
After Width: | Height: | Size: 80 KiB |
After Width: | Height: | Size: 82 KiB |
After Width: | Height: | Size: 83 KiB |
After Width: | Height: | Size: 70 KiB |
After Width: | Height: | Size: 108 KiB |
After Width: | Height: | Size: 82 KiB |
After Width: | Height: | Size: 257 KiB |
After Width: | Height: | Size: 96 KiB |
After Width: | Height: | Size: 252 KiB |
After Width: | Height: | Size: 105 KiB |
After Width: | Height: | Size: 96 KiB |
After Width: | Height: | Size: 90 KiB |
After Width: | Height: | Size: 243 KiB |
After Width: | Height: | Size: 89 KiB |
After Width: | Height: | Size: 64 KiB |
After Width: | Height: | Size: 77 KiB |
After Width: | Height: | Size: 77 KiB |
|
@ -287,6 +287,57 @@ def three_c():
|
|||
|
||||
plt.show()
|
||||
|
||||
def three_d():
|
||||
"""
|
||||
Informativeness of each component: Each eigenvector holds
|
||||
information that defines some aspect of the PCA space. By changing the values of
|
||||
a vector in a periodic way, we can observe how different weights for an eigenvector
|
||||
affect the reconstructed image.
|
||||
Use the second series of images for this task. Take the average photo that you
|
||||
compute based on all images in the series2
|
||||
. Project the average image to PCA
|
||||
space. Then, select one of the more important eigenvectors and manually set its
|
||||
corresponding weight in the projected vector to some value of your choice. Project
|
||||
the modified vector back to image space and observe the change.
|
||||
In order to see the changes easier, write a script that goes over a range of values for your selected eigenvector. To smoothly change the values, use np.sin and
|
||||
np.linspace as well as some scaling factor x to show the differences more strongly.
|
||||
Also, use plt.draw in combination with plt.pause to display the results as an
|
||||
animated sequence of images.
|
||||
Hint: We recommend the value range of about [−10, 10] and the scaling factor of
|
||||
around 3000.
|
||||
Modify the script to change two parameters at the same time, in a circular way (i.e.
|
||||
using both np.sin and np.cos). Experiment with different eigenvector pairs and
|
||||
report your observations.
|
||||
"""
|
||||
imgs = uz_image.read_images('./data/faces/2')
|
||||
# Perform dual PCA
|
||||
U, _, _, mean = uz_image.dual_PCA(imgs)
|
||||
|
||||
# Compute the average image
|
||||
avg_img = np.mean(imgs, axis=0)
|
||||
|
||||
def pplot(U, avg_img, mean):
|
||||
# Create linspace of sine values
|
||||
x = np.linspace(-10, 10, 100)
|
||||
sinx = np.sin(x)
|
||||
cosx = np.cos(x)
|
||||
|
||||
for ix, value in enumerate(sinx):
|
||||
# Project image into PCA subspace
|
||||
y_i = np.matmul(avg_img - mean, U)
|
||||
y_i[0] = value * 3000
|
||||
y_i[1] = cosx[ix] * 3000
|
||||
|
||||
# Project image back into original subspace
|
||||
x_i = np.matmul(y_i, U.T) + mean
|
||||
|
||||
# Plot the original image and the projected images
|
||||
plt.imshow(x_i.reshape((96, 84)), cmap='gray')
|
||||
plt.pause(0.001)
|
||||
plt.draw()
|
||||
|
||||
pplot(U, avg_img, mean)
|
||||
|
||||
def three_e():
|
||||
"""
|
||||
Reconstruction of a foreign image: The PCA space is build upon
|
||||
|
@ -320,6 +371,25 @@ def three_e():
|
|||
axs[1].imshow(x_i.reshape((96, 84)), cmap='gray')
|
||||
plt.show()
|
||||
|
||||
def three_f():
|
||||
"""
|
||||
Recognition with a subspace: PCA subspaces can be used in a
|
||||
simple object recognition scenario. Generally, if an image is similar to the images
|
||||
used to build the PCA subspace, the reconstruction error incurred when projecting
|
||||
to said PCA space should be small. In this task, you will try to implement a simple
|
||||
facial recognition system based on this property.
|
||||
Use a camera to capture images (10 at the very least) of your face with varying
|
||||
illumination and facial expressions. Resize them to a common resolution and try to
|
||||
align them by eye location. Construct a PCA space from these images. Then, write
|
||||
a script that captures images from your webcam, detects faces in them (you can use
|
||||
something from OpenCV like Haar cascades) and extracts the regions that contain
|
||||
faces. Reshape these regions to correct size, transform them to the precalculated
|
||||
PCA space and back to image space. Then, based on some similarity measure (can
|
||||
be simple L2 norm) and some threshold, decide whether a region contains your face
|
||||
or not. Display the information on the image stream.
|
||||
Note: You can prepare a video that demonstrates the performance of your system
|
||||
"""
|
||||
# implemented in cam.py file
|
||||
|
||||
def three_g():
|
||||
imgs_1 = uz_image.read_images('./data/faces/1')
|
||||
|
@ -344,7 +414,7 @@ def three_g():
|
|||
axs[0].scatter(y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 0], y_i[imgs_1.shape[0] + imgs_2.shape[0]:, 1], c='b')
|
||||
|
||||
|
||||
pts = LDA(U, 3, 64)
|
||||
pts = uz_image.LDA(U, 3, 64)
|
||||
|
||||
# Plot the projected images
|
||||
axs[1].scatter(pts[:imgs_1.shape[0], 0], pts[:imgs_1.shape[0], 1], c='r')
|
||||
|
@ -362,7 +432,8 @@ def main():
|
|||
#three_a()
|
||||
#three_b()
|
||||
#three_c()
|
||||
three_e()
|
||||
#three_d()
|
||||
#three_e()
|
||||
#three_g()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1811,7 +1811,7 @@ def dual_PCA(points: npt.NDArray[np.float64]):
|
|||
U = X @ U @ np.sqrt(np.diag(1/(S * (X.shape[1]-1))))
|
||||
return U, S, VT, mean
|
||||
|
||||
def read_images(data_path: str):
|
||||
def read_images(data_path: str) -> npt.NDArray[np.float64]:
|
||||
"""
|
||||
Read images from directory
|
||||
Accepts path to direcgtory
|
||||
|
@ -1828,7 +1828,7 @@ def read_images(data_path: str):
|
|||
imgs = np.hstack((imgs, img))
|
||||
return imgs.T
|
||||
|
||||
def LDA(points, c, n):
|
||||
def LDA(points, c, n) -> npt.NDArray[np.float64]:
|
||||
"""
|
||||
LDA algorithm
|
||||
Accepts points, number of participants in every class, number of classes
|
||||
|
|