SIFT DONE

2022-11-29 22:36:45 +01:00 · 2022-11-29 22:36:45 +01:00 · 9bd3ba14ab
parent 555da1ce1a
commit 9bd3ba14ab
2 changed files with 114 additions and 32 deletions
--- a/assignment4/solution.py
+++ b/assignment4/solution.py
@ -53,8 +53,8 @@ def one_b() -> None:
 	plt.show()

 def ex2():
-	#two_a()
-	two_b()
+	two_a()
+	#two_b()

 def two_a() -> None:
 	"""
@ -104,9 +104,9 @@ def two_b() -> None:


 def ex3():
-	#three_a()
-	#three_b()
-	three_lol()
+	three_a()
+	three_b()
+	#three_lol()

 def three_a() -> None:
 	"""
@ -157,10 +157,10 @@ def three_b() -> None:
 	"""
 	Hi
 	"""
-	#image_a = uz_image.imread_gray("data/graf/graf_a.jpg", uz_image.ImageType.float64)
-	#image_b = uz_image.imread_gray("data/graf/graf_b.jpg", uz_image.ImageType.float64)
-	image_a = uz_image.imread_gray("datam/img1.jpg", uz_image.ImageType.float64)
-	image_b = uz_image.imread_gray("datam/img2.jpg", uz_image.ImageType.float64)
+	image_a = uz_image.imread_gray("data/graf/graf_a.jpg", uz_image.ImageType.float64)
+	image_b = uz_image.imread_gray("data/graf/graf_b.jpg", uz_image.ImageType.float64)
+	#image_a = uz_image.imread_gray("datam/img1.jpg", uz_image.ImageType.float64)
+	#image_b = uz_image.imread_gray("datam/img2.jpg", uz_image.ImageType.float64)
 	#image_a = uz_image.imread_gray("data/newyork/newyork_a.jpg", uz_image.ImageType.float64)
 	#image_b = uz_image.imread_gray("data/newyork/newyork_b.jpg", uz_image.ImageType.float64)
 	# Does not work for newyork dataset, becouse the keypoints are not reciprocal
--- a/assignment4/uz_framework/image.py
+++ b/assignment4/uz_framework/image.py
@ -1067,14 +1067,14 @@ def find_matches(image_a: npt.NDArray[np.float64],
    # Get the keypoints
    #_, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold)
    #_, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold)
-    image_a_keypoints = sift(image_a)
-    image_b_keypoints = sift(image_b)
+    image_a_keypoints, image_a_descriptors = sift(image_a)
+    image_b_keypoints, image_b_descriptors = sift(image_b)

    print("[+] Keypoints detected")

    # Get the descriptors
-    image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1])
-    image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1])
+    #image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1])
+    #image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1])

    print("[+] Descriptors computed")

@ -1177,8 +1177,8 @@ def estimate_homography(image_a: npt.NDArray[np.float64],

 def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.float64],
           image_b: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64],
-            iterations: int = 1000,
-            threshold: float = 1.5):
+            iterations: int = 5000,
+            threshold: float = 3):
    """
    RANSAC algorithm for estimating homography.
    """
@ -1258,7 +1258,10 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
        different_scale_images.append(cv2.pyrDown(different_scale_images[-1]))

    def generateGaussianKernels(sigma, num_intervals):
-        """Generate list of gaussian kernels at which to blur the input image. Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper.
+        """
+        Generate list of gaussian kernels at which to blur the input image. 
+        Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper.
+        (I stole this from the internet)
        """
        num_images_per_octave = num_intervals + 3
        k = 2 ** (1. / num_intervals)
@ -1312,7 +1315,6 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
        """
        Check if the given image part is an extremum.
        """
-        # Check if the value is the biggest or the smallest in the 3x3x3 neighbourhood
        compare_pixel = image_part[1, 1]
        if np.abs(compare_pixel) < 0.01:
            return False
@ -1321,37 +1323,117 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False):
                np.all(compare_pixel >= image_part[0,:]) and \
                np.all(compare_pixel >= image_part[2,:]) and \
                np.all(compare_pixel >= image_next_part):
-                return True
+                return True # Local maximum
        else:
            if np.all(compare_pixel <= image_prev_part) and \
                np.all(compare_pixel <= image_part[0,:]) and \
                np.all(compare_pixel <= image_part[2,:]) and \
                np.all(compare_pixel <= image_next_part):
-                return True
+                return True # Local minimum
        return False

    # Find the keypoints
    keypoints = []
-    for i in range(len(DOG)):
-        per_octave_images = DOG[i]
+    # Go throgh all image scales per octave
+    for i in tqdm(range(len(DOG)), desc='Finding keypoints'):
+        per_octave_images = DOG[i] # Retrive all images per octave
        for j in range(1, len(per_octave_images)-1): 
-            print(len(per_octave_images))
+            # Go through all images per octave by 3
            image_prev, image, image_next = per_octave_images[j-1], per_octave_images[j], per_octave_images[j+1]
-            for y in range(1, image.shape[0]-1):
-                for x in range(1, image.shape[1]-1):
-                    # Check if the pixel is a local maximum
+            for y in range(8, image.shape[0]-8):
+                for x in range(8, image.shape[1]-8):
+                    # Check if the pixel is a local extremum
                    if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]):
-                        keypoints.append((y, x, i))
-                        print('Keypoint found')
+                        # If keypoint is a good local extremum, add it to the list
+                        keypoints.append((y, x, i, j)) # (y, x, octave, image scale)
+    # Compute descriptors
+    def compute_descriptors(keypoints):
+        """
+        Compute the descriptors for the given keypoints.
+        """
+        descriptors = []

-    # Rescale the keypoints
+        for keypoint in keypoints:
+            y, x = keypoint[:2]
+            octave, img_ix = keypoint[2:]
+            # Get the image
+            image = downsampled_and_blurred_images[octave][img_ix]
+
+            def compute_mag_and_angle(image, y, x):
+                # Compute the gradient magnitude and orientation at the point
+                dx = image[y+1, x] - image[y-1, x]
+                dy = image[y, x+1] - image[y, x-1]
+                magnitude = np.sqrt(dx**2 + dy**2)
+                orientation = np.arctan2(dy, dx)
+                return magnitude, orientation
+            _, orientation = compute_mag_and_angle(image, y, x)
+
+            if orientation < 0:
+                orientation %= np.pi
+
+            def create_indexes(y, x, x_offset, y_offset):
+                # Create indexes for the given offsets
+                indexes = []
+                for i in range(x_offset):
+                    for j in range(y_offset):
+                        indexes.append((y+j, x+i))
+                return np.array(indexes)
+
+            # Map orientation to which direction to look at
+            if orientation < np.pi/8: # Right
+                # Get indexes
+                indexes = create_indexes(y, x, 8, 8)
+            elif orientation < 3*np.pi/8: # Right up
+                indexes = create_indexes(y-8, x, 8, 8)
+            elif orientation < 5*np.pi/8: # Up
+                indexes = create_indexes(y-4, x-8, 8, 8)
+            elif orientation < 7*np.pi/8: # Left up
+                indexes = create_indexes(y-8, x-8, 8, 8)
+            else: # Left
+                indexes = create_indexes(y-4, x-8, 8, 8)
+            
+            # Split indexes into 4 blocks
+            blocks = np.array_split(indexes, 4)
+        
+            histogram_space = np.linspace(0, np.pi, 8)
+            all_histograms = []
+            for block in blocks:
+                # Split each block in 4x4 cells and compute histogram for each cell
+                cells = np.array_split(block, 4)
+                histograms = []
+                for cell in cells:
+                    histogram = np.zeros(8)
+                    for y, x in cell:
+                        magnitude, orientation = compute_mag_and_angle(image,y, x)
+                        # Compute the histogram  
+                        if orientation < 0:
+                            orientation %= np.pi
+                        # Find the bin
+                        bin = np.digitize(orientation, histogram_space)
+                        histogram[bin - 1] += magnitude
+                    histograms.append(histogram)
+                # Concatenate the histograms
+                histograms = np.concatenate(histograms)
+                # Smooth the histogram
+                all_histograms.append(histograms)
+            # Concatenate the histograms
+            all_histograms = np.concatenate(all_histograms)
+            # Normalize the histogram
+            all_histograms = all_histograms / np.sum(all_histograms)
+            descriptors.append(all_histograms)
+        return descriptors
+
+    descriptors = compute_descriptors(keypoints)
+
+    # Rescale the keypoints, as the images were downsampled
    keypoints = np.array(keypoints)
    for keypoint in keypoints:
        if keypoint[2] > 0:
            keypoint[0] *= 2 * keypoint[2]
            keypoint[1] *= 2 * keypoint[2]

-    # Remove last column from keypoints
-    keypoints = keypoints[:, :-1]
+    # Remove two last column from keypoints
+    keypoints = keypoints[:, :-2]
+
+    return np.array(keypoints), np.array(descriptors)

-    return np.array(keypoints)