Keypoints are now detected

2022-11-29 19:30:39 +01:00 · 2022-11-29 19:30:39 +01:00 · 555da1ce1a
parent 55048a5eb3
commit 555da1ce1a
3 changed files with 164 additions and 11 deletions
--- a/assignment4/cam.py
+++ b/assignment4/cam.py
@ -1,5 +1,8 @@
 import numpy as np
 import numpy.typing as npt
 import cv2
-
+import uz_framework.image as uz_image
 from matplotlib import pyplot as plt
 def start_realtime_keypoint_detection():
    cap = cv2.VideoCapture(0)
@ -8,12 +11,22 @@ def start_realtime_keypoint_detection():
    while True:
        ret, frame = cap.read()
        frame = cv2.resize(frame, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)
        imagegray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        features = cv2.SIFT_create()
        keypoints = features.detect(imagegray, None)
        output_image = cv2.drawKeypoints(frame, keypoints, 0, (0, 255, 0),flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
-        cv2.imshow('Webcam', output_image)
+        grayscale_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        _, harris_keypoints  = uz_image.hessian_points(grayscale_frame, 12, treshold=1e-6)
        harris_keypoints = np.uint32(harris_keypoints)
        print(harris_keypoints)
        for kp in harris_keypoints:
            x, y = kp.ravel()
            cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)
        cv2.imshow('Harris Corner Detector', frame)
        #output_image = cv2.drawKeypoints(frame, harris_keypoints, 0, (0, 255, 0),flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        #cv2.imshow('Webcam', output_image)
        c = cv2.waitKey(1)
        if c == 27:
            break
--- a/assignment4/solution.py
+++ b/assignment4/solution.py
@ -11,7 +11,7 @@ import os
 ##############################################
 def ex1():
-	#one_a()
+	one_a()
 	one_b()
 def one_a() -> None:
@ -105,7 +105,8 @@ def two_b() -> None:
 def ex3():
 	#three_a()
-	three_b()
+	#three_b()
 	three_lol()
 def three_a() -> None:
 	"""
@ -184,6 +185,24 @@ def three_b() -> None:
 	uz_image.display_matches(image_a, a, image_b, b)
 def three_lol():
 	"""
 	Hi
 	"""
 	image_a = uz_image.imread_gray("data/graf/graf_a.jpg", uz_image.ImageType.float64)
 	image_b = uz_image.imread_gray("data/graf/graf_b.jpg", uz_image.ImageType.float64)
 	image_b = uz_image.imread_gray("datam/img1.jpg", uz_image.ImageType.float64)
 	# Get the keypoints   
 	#a = uz_image.sift(image_a, True)
 	b = uz_image.sift(image_b, True)
 	fig, axs = plt.subplots(1, 2)
 	#axs[0].imshow(image_a, cmap="gray")
 	#axs[0].scatter(a[:,1], a[:,0], s=20)
 	axs[1].imshow(image_b, cmap="gray")
 	axs[1].scatter(b[:,1], b[:,0], s=20)
 	plt.show()
 # ######## #
 # SOLUTION #
--- a/assignment4/uz_framework/image.py
+++ b/assignment4/uz_framework/image.py
@ -1065,8 +1065,10 @@ def find_matches(image_a: npt.NDArray[np.float64],
    """
    # Get the keypoints
-    _, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold)
+    #_, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold)
-    _, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold)
+    #_, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold)
    image_a_keypoints = sift(image_a)
    image_b_keypoints = sift(image_b)
    print("[+] Keypoints detected")
@ -1234,3 +1236,122 @@ def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.f
    best_keypoints = np.concatenate((correspondences_a[best_inliers], correspondences_b[best_inliers]), axis=1)
    return best_homography.astype(np.float64), best_keypoints
 def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
    """
    SIFT algorithm for finding keypoints and descriptors.
    """
    grayscale_image = grayscale_image.copy()
    def number_of_ocatves(image):
        """
        Calculate the number of octaves.
        """
        return int(np.log2(min(image.shape)))
    # Firstly downcale the image three times
    different_scale_images = []
    different_scale_images.append(grayscale_image)
    downsample_size = number_of_ocatves(grayscale_image)
    for i in range(downsample_size):
        different_scale_images.append(cv2.pyrDown(different_scale_images[-1]))
    def generateGaussianKernels(sigma, num_intervals):
        """Generate list of gaussian kernels at which to blur the input image. Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper.
        """
        num_images_per_octave = num_intervals + 3
        k = 2 ** (1. / num_intervals)
        gaussian_kernels = np.zeros(num_images_per_octave)  # scale of gaussian blur necessary to go from one blur scale to the next within an octave
        gaussian_kernels[0] = sigma
        for image_index in range(1, num_images_per_octave):
            sigma_previous = (k ** (image_index - 1)) * sigma
            sigma_total = k * sigma_previous
            gaussian_kernels[image_index] = np.sqrt(sigma_total ** 2 - sigma_previous ** 2)
        return gaussian_kernels
    # Blur different scale images with gaussian blur num_of_octaves times
    downsampled_and_blurred_images = []
    gauss_kernels = generateGaussianKernels(1.6, downsample_size)
    print(gauss_kernels)
    for i in range(len(different_scale_images)):
        image = different_scale_images[i]
        images = [image] # Do not blur the first image
        for kernel in gauss_kernels[1:]:
            images.append(gaussfilter2D(images[-1], kernel))
        downsampled_and_blurred_images.append(np.array(images))
    # Plot all downsampled and blurred images
    if plot:
        fig,axs=plt.subplots(len(downsampled_and_blurred_images), len(downsampled_and_blurred_images[0]), figsize=(20, 20))
        fig.suptitle('Downsampled and blurred images')
        for i in range(len(downsampled_and_blurred_images)):
            for j in range(len(downsampled_and_blurred_images[i])):
                axs[i, j].imshow(downsampled_and_blurred_images[i][j], cmap='gray')
        plt.show()
    # Compute the difference of gaussian
    DOG = [[] for _ in range(len(downsampled_and_blurred_images))]
    for i in range(len(downsampled_and_blurred_images)):
        for j in range(len(downsampled_and_blurred_images[i])-1):
            DOG[i].append(np.array(downsampled_and_blurred_images[i][j+1] - downsampled_and_blurred_images[i][j]))
    # Plot the difference of gaussian
    if plot:
        fig,axs=plt.subplots(len(DOG), len(DOG[0]), figsize=(20, 20))
        fig.suptitle('Difference of gaussian')
        for i in range(len(DOG)):
            for j in range(len(DOG[i])):
                axs[i, j].imshow(DOG[i][j], cmap='gray')
        plt.show()
    def is_extremum(image_prev_part, image_part, image_next_part):
        """
        Check if the given image part is an extremum.
        """
        # Check if the value is the biggest or the smallest in the 3x3x3 neighbourhood
        compare_pixel = image_part[1, 1]
        if np.abs(compare_pixel) < 0.01:
            return False
        if compare_pixel > 0:
            if np.all(compare_pixel >= image_prev_part) and \
                np.all(compare_pixel >= image_part[0,:]) and \
                np.all(compare_pixel >= image_part[2,:]) and \
                np.all(compare_pixel >= image_next_part):
                return True
        else:
            if np.all(compare_pixel <= image_prev_part) and \
                np.all(compare_pixel <= image_part[0,:]) and \
                np.all(compare_pixel <= image_part[2,:]) and \
                np.all(compare_pixel <= image_next_part):
                return True
        return False
    # Find the keypoints
    keypoints = []
    for i in range(len(DOG)):
        per_octave_images = DOG[i]
        for j in range(1, len(per_octave_images)-1):
            print(len(per_octave_images))
            image_prev, image, image_next = per_octave_images[j-1], per_octave_images[j], per_octave_images[j+1]
            for y in range(1, image.shape[0]-1):
                for x in range(1, image.shape[1]-1):
                    # Check if the pixel is a local maximum
                    if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]):
                        keypoints.append((y, x, i))
                        print('Keypoint found')
    # Rescale the keypoints
    keypoints = np.array(keypoints)
    for keypoint in keypoints:
        if keypoint[2] > 0:
            keypoint[0] *= 2 * keypoint[2]
            keypoint[1] *= 2 * keypoint[2]
    # Remove last column from keypoints
    keypoints = keypoints[:, :-1]
    return np.array(keypoints)