diff --git a/assignment4/cam.py b/assignment4/cam.py index af61e28..afee2f8 100644 --- a/assignment4/cam.py +++ b/assignment4/cam.py @@ -4,35 +4,35 @@ import cv2 import uz_framework.image as uz_image from matplotlib import pyplot as plt +VIDEO_PATH = "./datam/raw_recording.webm" +MY_SIFT = True + def start_realtime_keypoint_detection(): - cap = cv2.VideoCapture(0) - scaling_factor = 1 - - while True: - ret, frame = cap.read() - frame = cv2.resize(frame, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA) - - grayscale_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) - _, harris_keypoints = uz_image.hessian_points(grayscale_frame, 12, treshold=1e-6) - harris_keypoints = np.uint32(harris_keypoints) - print(harris_keypoints) - - for kp in harris_keypoints: - x, y = kp.ravel() - cv2.circle(frame, (x, y), 3, (0, 255, 0), -1) - - cv2.imshow('Harris Corner Detector', frame) - - #output_image = cv2.drawKeypoints(frame, harris_keypoints, 0, (0, 255, 0),flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) - - #cv2.imshow('Webcam', output_image) - - c = cv2.waitKey(1) - if c == 27: - break - + # Read video + cap = cv2.VideoCapture(VIDEO_PATH) + count = 0 + sift = cv2.SIFT_create() + while cap.isOpened(): + ret,frame = cap.read() + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Convert to grayscale + if MY_SIFT: + sift_keypoints,_ = uz_image.sift(frame) + sift_keypoints = np.float64(sift_keypoints) + kps = [] + for keypoint in sift_keypoints: + y, x = keypoint + kps.append(cv2.KeyPoint(x=x, y=y, size=1)) + else: + kps, _ = sift.detectAndCompute(frame, None) + frame = cv2.drawKeypoints(frame, kps, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) + #cv2.imshow('hello', frame) + cv2.imwrite("./datam/frames/%d.jpg" % count, frame) + count = count + 1 + #if cv2.waitKey(10) & 0xFF == ord('q'): + #break + cap.release() - cv2.destroyAllWindows() + cv2.destroyAllWindows() # destroy all opened windows if __name__ == '__main__': start_realtime_keypoint_detection() diff --git a/assignment4/datam/raw_recording.webm b/assignment4/datam/raw_recording.webm new file mode 100644 index 0000000..1a7e2cc Binary files /dev/null and b/assignment4/datam/raw_recording.webm differ diff --git a/assignment4/solution.py b/assignment4/solution.py index 22a3826..2267d3c 100644 --- a/assignment4/solution.py +++ b/assignment4/solution.py @@ -104,7 +104,7 @@ def two_b() -> None: def ex3(): - three_a() + #three_a() three_b() #three_lol() @@ -182,7 +182,6 @@ def three_b() -> None: return np.array(a_points), np.array(b_points) a,b = map_keypoints(best_inliers) - uz_image.display_matches(image_a, a, image_b, b) def three_lol(): diff --git a/assignment4/uz_framework/image.py b/assignment4/uz_framework/image.py index 8535c8d..6c5c8f9 100644 --- a/assignment4/uz_framework/image.py +++ b/assignment4/uz_framework/image.py @@ -911,7 +911,8 @@ def get_line_to_plot(rho, theta, h, w): def hessian_points(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], sigma: float, - treshold: float) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + treshold: float) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: """ Accepts: image, sigma, treshold Returns: image with hessian points @@ -931,7 +932,8 @@ def hessian_points(image: Union[npt.NDArray[np.float64], def harris_detector(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], sigma: float, treshold: float, - alpha = 0.06) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + alpha = 0.06) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: """ Accepts: image, sigma, treshold """ @@ -964,7 +966,8 @@ def harris_detector(image: Union[npt.NDArray[np.float64], return features.astype(np.float64), points.astype(np.float64) -def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2): +def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2)\ + -> npt.NDArray[np.float64]: """ Computes descriptors for locations given in X and Y. @@ -1019,8 +1022,12 @@ def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2): return np.array(desc) def find_correspondences(img_a_descriptors: npt.NDArray[np.float64], - img_b_descriptors: npt.NDArray[np.float64]): + img_b_descriptors: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: correspondances = [] + """ + Accepts: img_a_descriptors, img_b_descriptors + Returns: correspondances indices + """ # Find img_a correspondences for idx, descriptor_a in enumerate(img_a_descriptors): @@ -1030,7 +1037,8 @@ def find_correspondences(img_a_descriptors: npt.NDArray[np.float64], return np.array(correspondances) -def display_matches(I1, pts1, I2, pts2): +def display_matches(I1, pts1, I2, pts2) \ + -> None: """ Displays matches between images. @@ -1057,24 +1065,25 @@ def display_matches(I1, pts1, I2, pts2): def find_matches(image_a: npt.NDArray[np.float64], image_b: npt.NDArray[np.float64], - sigma=3, treshold=1e-6): + sigma=3, treshold=1e-6) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: """ Finds matches between two images. image_a, image_b: Image in grayscale. + + Returns: tuple of two arrays of shape (N, 2) where N is the number of matches. """ # Get the keypoints - #_, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold) - #_, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold) - image_a_keypoints, image_a_descriptors = sift(image_a) - image_b_keypoints, image_b_descriptors = sift(image_b) + _, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold) + _, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold) print("[+] Keypoints detected") # Get the descriptors - #image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1]) - #image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1]) + image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1]) + image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1]) print("[+] Descriptors computed") @@ -1148,14 +1157,14 @@ def find_matches(image_a: npt.NDArray[np.float64], return np.array(image_a_points),np.array(image_b_points) -def estimate_homography(image_a: npt.NDArray[np.float64], - image_b: npt.NDArray[np.float64], - keypoints: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: +def estimate_homography(keypoints: npt.NDArray[np.float64]) \ + -> npt.NDArray[np.float64]: """ [x_r1 yr_1 1 0 0 0 -x_t1*x_r1 -x_t1*yr_1 -x_t1] [0 0 0 x_r1 yr_1 1 -y_t1*x_r1 -y_t1*yr_1 -y_t1] .... + Accepts a set of keypoints and returns the homography matrix. """ # Construct the A matrix @@ -1177,10 +1186,13 @@ def estimate_homography(image_a: npt.NDArray[np.float64], def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.float64], image_b: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64], - iterations: int = 5000, - threshold: float = 3): + iterations: int = 1000, + threshold: float = 3) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: """ RANSAC algorithm for estimating homography. + Accepts two images and their corresponding keypoints. + Returns the best homography matrix and the inliers. """ # Find the best homography best_inliers = [] @@ -1212,9 +1224,11 @@ def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.f if distance < threshold: inlier_indices.append(i) + inliers_proportion = len(inlier_indices)/len(correspondences_a) + # Check if we have a new best homography - if len(inlier_indices) > 4: - homography_2 = estimate_homography(image_a, image_b, np.concatenate((correspondences_a[inlier_indices], correspondences_b[inlier_indices]), axis=1)) + if inliers_proportion > 0.2: + homography_2 = estimate_homography(np.concatenate((correspondences_a[inlier_indices], correspondences_b[inlier_indices]), axis=1)) inlier_indices_2 = [] for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)): (x_r, y_r), (x_t, y_t) = correspondence @@ -1228,17 +1242,22 @@ def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.f if distance < threshold: inlier_indices_2.append(i) - if len(inlier_indices_2) > len(best_inliers): + if len(inlier_indices_2) / len(correspondences_a) > len(best_inliers) / len(correspondences_a): best_inliers = inlier_indices_2 best_homography = homography_2 best_keypoints = np.concatenate((correspondences_a[best_inliers], correspondences_b[best_inliers]), axis=1) - + + if best_homography == []: + raise Exception("Ransac did not converge") + return best_homography.astype(np.float64), best_keypoints -def sift(grayscale_image: npt.NDArray[np.float64], plot=False): +def sift(grayscale_image: npt.NDArray[np.float64], + plot=False, get_descriptors=False) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: """ SIFT algorithm for finding keypoints and descriptors. """ @@ -1278,7 +1297,6 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): # Blur different scale images with gaussian blur num_of_octaves times downsampled_and_blurred_images = [] gauss_kernels = generateGaussianKernels(1.6, downsample_size) - print(gauss_kernels) for i in range(len(different_scale_images)): image = different_scale_images[i] images = [image] # Do not blur the first image @@ -1326,16 +1344,18 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): return True # Local maximum else: if np.all(compare_pixel <= image_prev_part) and \ - np.all(compare_pixel <= image_part[0,:]) and \ - np.all(compare_pixel <= image_part[2,:]) and \ + np.all(compare_pixel <= image_part) and \ + np.all(compare_pixel <= image_part) and \ np.all(compare_pixel <= image_next_part): return True # Local minimum return False - # Find the keypoints + ############################## + # Find keypoints + ############################## keypoints = [] # Go throgh all image scales per octave - for i in tqdm(range(len(DOG)), desc='Finding keypoints'): + for i in tqdm(range(len(DOG)), desc='Finding SIFT keypoints'): per_octave_images = DOG[i] # Retrive all images per octave for j in range(1, len(per_octave_images)-1): # Go through all images per octave by 3 @@ -1346,6 +1366,8 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]): # If keypoint is a good local extremum, add it to the list keypoints.append((y, x, i, j)) # (y, x, octave, image scale) + ############################## + # Compute descriptors def compute_descriptors(keypoints): """ @@ -1415,16 +1437,18 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): # Concatenate the histograms histograms = np.concatenate(histograms) # Smooth the histogram - all_histograms.append(histograms) + all_histograms.append(histograms/np.sum(histograms)) # Concatenate the histograms all_histograms = np.concatenate(all_histograms) # Normalize the histogram all_histograms = all_histograms / np.sum(all_histograms) descriptors.append(all_histograms) return descriptors - - descriptors = compute_descriptors(keypoints) - + + if get_descriptors: + descriptors = compute_descriptors(keypoints) + else: + descriptors = [] # Rescale the keypoints, as the images were downsampled keypoints = np.array(keypoints) for keypoint in keypoints: @@ -1436,4 +1460,3 @@ def sift(grayscale_image: npt.NDArray[np.float64], plot=False): keypoints = keypoints[:, :-2] return np.array(keypoints), np.array(descriptors) -