fresh

2022-11-30 16:15:07 +01:00 · 2022-11-30 16:15:07 +01:00 · d400fb0a69
parent 9bd3ba14ab
commit d400fb0a69
4 changed files with 83 additions and 61 deletions
--- a/assignment4/cam.py
+++ b/assignment4/cam.py
@ -4,35 +4,35 @@ import cv2
 import uz_framework.image as uz_image
 from matplotlib import pyplot as plt

+VIDEO_PATH = "./datam/raw_recording.webm"
+MY_SIFT = True
+
 def start_realtime_keypoint_detection():
-    cap = cv2.VideoCapture(0)
-    scaling_factor = 1
-
-    while True:
-        ret, frame = cap.read()
-        frame = cv2.resize(frame, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)
-
-        grayscale_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
-        _, harris_keypoints  = uz_image.hessian_points(grayscale_frame, 12, treshold=1e-6)
-        harris_keypoints = np.uint32(harris_keypoints)
-        print(harris_keypoints)
-
-        for kp in harris_keypoints:
-            x, y = kp.ravel()
-            cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)
-
-        cv2.imshow('Harris Corner Detector', frame)
-
-        #output_image = cv2.drawKeypoints(frame, harris_keypoints, 0, (0, 255, 0),flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
-
-        #cv2.imshow('Webcam', output_image)
-
-        c = cv2.waitKey(1)
-        if c == 27:
-            break
+    # Read video
+    cap = cv2.VideoCapture(VIDEO_PATH)
+    count = 0
+    sift = cv2.SIFT_create()
+    while cap.isOpened():
+        ret,frame = cap.read()
+        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Convert to grayscale
+        if MY_SIFT:
+            sift_keypoints,_  = uz_image.sift(frame)
+            sift_keypoints = np.float64(sift_keypoints)
+            kps = []
+            for keypoint in sift_keypoints:
+                y, x = keypoint
+                kps.append(cv2.KeyPoint(x=x, y=y, size=1))
+        else:
+            kps, _ = sift.detectAndCompute(frame, None)
+        frame = cv2.drawKeypoints(frame, kps, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
+        #cv2.imshow('hello', frame)
+        cv2.imwrite("./datam/frames/%d.jpg" % count, frame)
+        count = count + 1
+        #if cv2.waitKey(10) & 0xFF == ord('q'):
+            #break
    
    cap.release()
-    cv2.destroyAllWindows()
+    cv2.destroyAllWindows() # destroy all opened windows

 if __name__ == '__main__':
    start_realtime_keypoint_detection()
--- a/assignment4/datam/raw_recording.webm
+++ b/assignment4/datam/raw_recording.webm
--- a/assignment4/solution.py
+++ b/assignment4/solution.py
@ -104,7 +104,7 @@ def two_b() -> None:


 def ex3():
-	three_a()
+	#three_a()
 	three_b()
 	#three_lol()

@ -182,7 +182,6 @@ def three_b() -> None:
 		return np.array(a_points), np.array(b_points)
 	
 	a,b = map_keypoints(best_inliers)
-
 	uz_image.display_matches(image_a, a, image_b, b)

 def three_lol():
--- a/assignment4/uz_framework/image.py
+++ b/assignment4/uz_framework/image.py
@ -911,7 +911,8 @@ def get_line_to_plot(rho, theta, h, w):

 def hessian_points(image: Union[npt.NDArray[np.float64], 
                   npt.NDArray[np.uint8]], sigma: float, 
-                   treshold: float) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+                   treshold: float) \
+    -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
    """
    Accepts: image, sigma, treshold
    Returns: image with hessian points
@ -931,7 +932,8 @@ def hessian_points(image: Union[npt.NDArray[np.float64],

 def harris_detector(image: Union[npt.NDArray[np.float64], 
                    npt.NDArray[np.uint8]], sigma: float, treshold: float,
-                    alpha = 0.06) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+                    alpha = 0.06) \
+    -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
    """
    Accepts: image, sigma, treshold
    """
@ -964,7 +966,8 @@ def harris_detector(image: Union[npt.NDArray[np.float64],

    return features.astype(np.float64), points.astype(np.float64)

-def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2):
+def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2)\
+    -> npt.NDArray[np.float64]:
    """
        Computes descriptors for locations given in X and Y.

@ -1019,8 +1022,12 @@ def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2):
    return np.array(desc)

 def find_correspondences(img_a_descriptors: npt.NDArray[np.float64], 
-                         img_b_descriptors: npt.NDArray[np.float64]):
+                         img_b_descriptors: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
    correspondances = []
+    """
+    Accepts: img_a_descriptors, img_b_descriptors
+    Returns: correspondances indices
+    """

    # Find img_a correspondences
    for idx, descriptor_a in enumerate(img_a_descriptors):
@ -1030,7 +1037,8 @@ def find_correspondences(img_a_descriptors: npt.NDArray[np.float64],

    return np.array(correspondances)

-def display_matches(I1, pts1, I2, pts2):
+def display_matches(I1, pts1, I2, pts2) \
+    -> None:
    """
    Displays matches between images.

@ -1057,24 +1065,25 @@ def display_matches(I1, pts1, I2, pts2):

 def find_matches(image_a: npt.NDArray[np.float64], 
                 image_b: npt.NDArray[np.float64],
-                 sigma=3, treshold=1e-6):
+                 sigma=3, treshold=1e-6) \
+    -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
    """
    Finds matches between two images.

    image_a, image_b: Image in grayscale.
+
+    Returns: tuple of two arrays of shape (N, 2) where N is the number of matches.
    """

    # Get the keypoints
-    #_, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold)
-    #_, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold)
-    image_a_keypoints, image_a_descriptors = sift(image_a)
-    image_b_keypoints, image_b_descriptors = sift(image_b)
+    _, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold)
+    _, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold)

    print("[+] Keypoints detected")

    # Get the descriptors
-    #image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1])
-    #image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1])
+    image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1])
+    image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1])

    print("[+] Descriptors computed")

@ -1148,14 +1157,14 @@ def find_matches(image_a: npt.NDArray[np.float64],
    return np.array(image_a_points),np.array(image_b_points)


-def estimate_homography(image_a: npt.NDArray[np.float64],
-                        image_b: npt.NDArray[np.float64],
-                        keypoints: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
+def estimate_homography(keypoints: npt.NDArray[np.float64]) \
+    -> npt.NDArray[np.float64]:

    """
    [x_r1 yr_1 1 0 0 0 -x_t1*x_r1 -x_t1*yr_1 -x_t1]
    [0 0 0 x_r1 yr_1 1 -y_t1*x_r1 -y_t1*yr_1 -y_t1]
    ....
+    Accepts a set of keypoints and returns the homography matrix.
    """
    
    # Construct the A matrix 
@ -1177,10 +1186,13 @@ def estimate_homography(image_a: npt.NDArray[np.float64],

 def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.float64],
           image_b: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64],
-            iterations: int = 5000,
-            threshold: float = 3):
+            iterations: int = 1000,
+            threshold: float = 3) \
+    -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
    """
    RANSAC algorithm for estimating homography.
+    Accepts two images and their corresponding keypoints.
+    Returns the best homography matrix and the inliers.
    """
    # Find the best homography
    best_inliers = [] 
@ -1212,9 +1224,11 @@ def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.f
            if distance < threshold:
                inlier_indices.append(i)

+        inliers_proportion = len(inlier_indices)/len(correspondences_a)
+
        # Check if we have a new best homography
-        if len(inlier_indices) > 4:
-            homography_2 = estimate_homography(image_a, image_b, np.concatenate((correspondences_a[inlier_indices], correspondences_b[inlier_indices]), axis=1))
+        if inliers_proportion > 0.2:
+            homography_2 = estimate_homography(np.concatenate((correspondences_a[inlier_indices], correspondences_b[inlier_indices]), axis=1))
            inlier_indices_2 = []
            for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)):
                (x_r, y_r), (x_t, y_t) = correspondence
@ -1228,17 +1242,22 @@ def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.f
                if distance < threshold:
                    inlier_indices_2.append(i)

-            if len(inlier_indices_2) > len(best_inliers):
+            if len(inlier_indices_2) / len(correspondences_a) > len(best_inliers) / len(correspondences_a):
                best_inliers = inlier_indices_2
                best_homography = homography_2

    
    best_keypoints = np.concatenate((correspondences_a[best_inliers], correspondences_b[best_inliers]), axis=1)
    
+    if best_homography == []:
+        raise Exception("Ransac did not converge")
+    
    return best_homography.astype(np.float64), best_keypoints


-def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
+def sift(grayscale_image: npt.NDArray[np.float64], 
+         plot=False, get_descriptors=False) \
+    -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
    """
    SIFT algorithm for finding keypoints and descriptors.
    """
@ -1278,7 +1297,6 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
    # Blur different scale images with gaussian blur num_of_octaves times
    downsampled_and_blurred_images = []
    gauss_kernels = generateGaussianKernels(1.6, downsample_size)
-    print(gauss_kernels)
    for i in range(len(different_scale_images)):
        image = different_scale_images[i]
        images = [image] # Do not blur the first image
@ -1326,16 +1344,18 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
                return True # Local maximum
        else:
            if np.all(compare_pixel <= image_prev_part) and \
-                np.all(compare_pixel <= image_part[0,:]) and \
-                np.all(compare_pixel <= image_part[2,:]) and \
+                np.all(compare_pixel <= image_part) and \
+                np.all(compare_pixel <= image_part) and \
                np.all(compare_pixel <= image_next_part):
                return True # Local minimum
        return False

-    # Find the keypoints
+    ##############################
+    # Find keypoints
+    ##############################
    keypoints = []
    # Go throgh all image scales per octave
-    for i in tqdm(range(len(DOG)), desc='Finding keypoints'):
+    for i in tqdm(range(len(DOG)), desc='Finding SIFT keypoints'):
        per_octave_images = DOG[i] # Retrive all images per octave
        for j in range(1, len(per_octave_images)-1): 
            # Go through all images per octave by 3
@ -1346,6 +1366,8 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
                    if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]):
                        # If keypoint is a good local extremum, add it to the list
                        keypoints.append((y, x, i, j)) # (y, x, octave, image scale)
+    ##############################
+
    # Compute descriptors
    def compute_descriptors(keypoints):
        """
@ -1415,7 +1437,7 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
                # Concatenate the histograms
                histograms = np.concatenate(histograms)
                # Smooth the histogram
-                all_histograms.append(histograms)
+                all_histograms.append(histograms/np.sum(histograms))
            # Concatenate the histograms
            all_histograms = np.concatenate(all_histograms)
            # Normalize the histogram
@ -1423,8 +1445,10 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
            descriptors.append(all_histograms)
        return descriptors
    
+    if get_descriptors:
        descriptors = compute_descriptors(keypoints)
-
+    else:
+        descriptors = []
    # Rescale the keypoints, as the images were downsampled
    keypoints = np.array(keypoints)
    for keypoint in keypoints:
@ -1436,4 +1460,3 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
    keypoints = keypoints[:, :-2]

    return np.array(keypoints), np.array(descriptors)
-