diff --git a/assignment4/solution.py b/assignment4/solution.py index 34b2098..22a3826 100644 --- a/assignment4/solution.py +++ b/assignment4/solution.py @@ -53,8 +53,8 @@ def one_b() -> None: plt.show() def ex2(): - #two_a() - two_b() + two_a() + #two_b() def two_a() -> None: """ @@ -104,9 +104,9 @@ def two_b() -> None: def ex3(): - #three_a() - #three_b() - three_lol() + three_a() + three_b() + #three_lol() def three_a() -> None: """ @@ -157,10 +157,10 @@ def three_b() -> None: """ Hi """ - #image_a = uz_image.imread_gray("data/graf/graf_a.jpg", uz_image.ImageType.float64) - #image_b = uz_image.imread_gray("data/graf/graf_b.jpg", uz_image.ImageType.float64) - image_a = uz_image.imread_gray("datam/img1.jpg", uz_image.ImageType.float64) - image_b = uz_image.imread_gray("datam/img2.jpg", uz_image.ImageType.float64) + image_a = uz_image.imread_gray("data/graf/graf_a.jpg", uz_image.ImageType.float64) + image_b = uz_image.imread_gray("data/graf/graf_b.jpg", uz_image.ImageType.float64) + #image_a = uz_image.imread_gray("datam/img1.jpg", uz_image.ImageType.float64) + #image_b = uz_image.imread_gray("datam/img2.jpg", uz_image.ImageType.float64) #image_a = uz_image.imread_gray("data/newyork/newyork_a.jpg", uz_image.ImageType.float64) #image_b = uz_image.imread_gray("data/newyork/newyork_b.jpg", uz_image.ImageType.float64) # Does not work for newyork dataset, becouse the keypoints are not reciprocal diff --git a/assignment4/uz_framework/image.py b/assignment4/uz_framework/image.py index f923a1b..8535c8d 100644 --- a/assignment4/uz_framework/image.py +++ b/assignment4/uz_framework/image.py @@ -1067,14 +1067,14 @@ def find_matches(image_a: npt.NDArray[np.float64], # Get the keypoints #_, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold) #_, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold) - image_a_keypoints = sift(image_a) - image_b_keypoints = sift(image_b) + image_a_keypoints, image_a_descriptors = sift(image_a) + image_b_keypoints, image_b_descriptors = sift(image_b) print("[+] Keypoints detected") # Get the descriptors - image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1]) - image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1]) + #image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1]) + #image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1]) print("[+] Descriptors computed") @@ -1177,8 +1177,8 @@ def estimate_homography(image_a: npt.NDArray[np.float64], def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.float64], image_b: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64], - iterations: int = 1000, - threshold: float = 1.5): + iterations: int = 5000, + threshold: float = 3): """ RANSAC algorithm for estimating homography. """ @@ -1258,7 +1258,10 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): different_scale_images.append(cv2.pyrDown(different_scale_images[-1])) def generateGaussianKernels(sigma, num_intervals): - """Generate list of gaussian kernels at which to blur the input image. Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper. + """ + Generate list of gaussian kernels at which to blur the input image. + Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper. + (I stole this from the internet) """ num_images_per_octave = num_intervals + 3 k = 2 ** (1. / num_intervals) @@ -1312,7 +1315,6 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): """ Check if the given image part is an extremum. """ - # Check if the value is the biggest or the smallest in the 3x3x3 neighbourhood compare_pixel = image_part[1, 1] if np.abs(compare_pixel) < 0.01: return False @@ -1321,37 +1323,117 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): np.all(compare_pixel >= image_part[0,:]) and \ np.all(compare_pixel >= image_part[2,:]) and \ np.all(compare_pixel >= image_next_part): - return True + return True # Local maximum else: if np.all(compare_pixel <= image_prev_part) and \ np.all(compare_pixel <= image_part[0,:]) and \ np.all(compare_pixel <= image_part[2,:]) and \ np.all(compare_pixel <= image_next_part): - return True + return True # Local minimum return False # Find the keypoints keypoints = [] - for i in range(len(DOG)): - per_octave_images = DOG[i] - for j in range(1, len(per_octave_images)-1): - print(len(per_octave_images)) + # Go throgh all image scales per octave + for i in tqdm(range(len(DOG)), desc='Finding keypoints'): + per_octave_images = DOG[i] # Retrive all images per octave + for j in range(1, len(per_octave_images)-1): + # Go through all images per octave by 3 image_prev, image, image_next = per_octave_images[j-1], per_octave_images[j], per_octave_images[j+1] - for y in range(1, image.shape[0]-1): - for x in range(1, image.shape[1]-1): - # Check if the pixel is a local maximum + for y in range(8, image.shape[0]-8): + for x in range(8, image.shape[1]-8): + # Check if the pixel is a local extremum if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]): - keypoints.append((y, x, i)) - print('Keypoint found') + # If keypoint is a good local extremum, add it to the list + keypoints.append((y, x, i, j)) # (y, x, octave, image scale) + # Compute descriptors + def compute_descriptors(keypoints): + """ + Compute the descriptors for the given keypoints. + """ + descriptors = [] - # Rescale the keypoints + for keypoint in keypoints: + y, x = keypoint[:2] + octave, img_ix = keypoint[2:] + # Get the image + image = downsampled_and_blurred_images[octave][img_ix] + + def compute_mag_and_angle(image, y, x): + # Compute the gradient magnitude and orientation at the point + dx = image[y+1, x] - image[y-1, x] + dy = image[y, x+1] - image[y, x-1] + magnitude = np.sqrt(dx**2 + dy**2) + orientation = np.arctan2(dy, dx) + return magnitude, orientation + _, orientation = compute_mag_and_angle(image, y, x) + + if orientation < 0: + orientation %= np.pi + + def create_indexes(y, x, x_offset, y_offset): + # Create indexes for the given offsets + indexes = [] + for i in range(x_offset): + for j in range(y_offset): + indexes.append((y+j, x+i)) + return np.array(indexes) + + # Map orientation to which direction to look at + if orientation < np.pi/8: # Right + # Get indexes + indexes = create_indexes(y, x, 8, 8) + elif orientation < 3*np.pi/8: # Right up + indexes = create_indexes(y-8, x, 8, 8) + elif orientation < 5*np.pi/8: # Up + indexes = create_indexes(y-4, x-8, 8, 8) + elif orientation < 7*np.pi/8: # Left up + indexes = create_indexes(y-8, x-8, 8, 8) + else: # Left + indexes = create_indexes(y-4, x-8, 8, 8) + + # Split indexes into 4 blocks + blocks = np.array_split(indexes, 4) + + histogram_space = np.linspace(0, np.pi, 8) + all_histograms = [] + for block in blocks: + # Split each block in 4x4 cells and compute histogram for each cell + cells = np.array_split(block, 4) + histograms = [] + for cell in cells: + histogram = np.zeros(8) + for y, x in cell: + magnitude, orientation = compute_mag_and_angle(image,y, x) + # Compute the histogram + if orientation < 0: + orientation %= np.pi + # Find the bin + bin = np.digitize(orientation, histogram_space) + histogram[bin - 1] += magnitude + histograms.append(histogram) + # Concatenate the histograms + histograms = np.concatenate(histograms) + # Smooth the histogram + all_histograms.append(histograms) + # Concatenate the histograms + all_histograms = np.concatenate(all_histograms) + # Normalize the histogram + all_histograms = all_histograms / np.sum(all_histograms) + descriptors.append(all_histograms) + return descriptors + + descriptors = compute_descriptors(keypoints) + + # Rescale the keypoints, as the images were downsampled keypoints = np.array(keypoints) for keypoint in keypoints: if keypoint[2] > 0: keypoint[0] *= 2 * keypoint[2] keypoint[1] *= 2 * keypoint[2] - # Remove last column from keypoints - keypoints = keypoints[:, :-1] + # Remove two last column from keypoints + keypoints = keypoints[:, :-2] + + return np.array(keypoints), np.array(descriptors) - return np.array(keypoints)