1701 lines
61 KiB
Python
1701 lines
61 KiB
Python
import numpy as np
|
|
import cv2 as cv2
|
|
from matplotlib import pyplot as plt
|
|
from PIL import Image
|
|
from typing import Union
|
|
import numpy.typing as npt
|
|
import enum
|
|
from tqdm import tqdm
|
|
|
|
class ImageType(enum.Enum):
|
|
uint8 = 0
|
|
float64 = 1
|
|
|
|
class DistanceMeasure(enum.Enum):
|
|
euclidian_distance = 0
|
|
chi_square_distance = 1
|
|
intersection_distance = 2
|
|
hellinger_distance = 3
|
|
|
|
def imread(path: str, type: ImageType) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Reads an image in RGB order. Image type is transformed from uint8 to float, and
|
|
range of values is reduced from [0, 255] to [0, 1].
|
|
"""
|
|
I = Image.open(path).convert('RGB') # PIL image.
|
|
I = np.asarray(I) # Converting to Numpy array.
|
|
if type == ImageType.float64:
|
|
I = I.astype(np.float64) / 255
|
|
return I
|
|
elif type == ImageType.uint8:
|
|
return I
|
|
|
|
raise Exception(f"Unrecognized image format! {type}")
|
|
|
|
def imread_gray(path: str, type: ImageType) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Reads an image in gray. Image type is transformed from uint8 to float, and
|
|
range of values is reduced from [0, 255] to [0, 1].
|
|
"""
|
|
|
|
I = Image.open(path).convert('L') # PIL image opening and converting to gray.
|
|
I = np.asarray(I) # Converting to Numpy array.
|
|
|
|
if type == ImageType.float64:
|
|
I = I.astype(np.float64) / 255
|
|
return I
|
|
elif type == ImageType.uint8:
|
|
return I
|
|
|
|
raise Exception("Unrecognized image format!")
|
|
|
|
def signal_show(*signals):
|
|
"""
|
|
Plots all given 1D signals in the same plot.
|
|
Signals can be Python lists or 1D numpy array.
|
|
"""
|
|
for s in signals:
|
|
if type(s) == np.ndarray:
|
|
s = s.squeeze()
|
|
plt.plot(s)
|
|
plt.show()
|
|
|
|
|
|
def convolve(I: np.ndarray, *ks):
|
|
"""
|
|
Convolves input image I with all given kernels.
|
|
|
|
:param I: Image, should be of type float64 and scaled from 0 to 1.
|
|
:param ks: 2D Kernels
|
|
:return: Image convolved with all kernels.
|
|
"""
|
|
for k in ks:
|
|
k = np.flip(k) # filter2D performs correlation, so flipping is necessary
|
|
I = cv2.filter2D(I, cv2.CV_64F, k)
|
|
return I
|
|
|
|
def transform_coloured_image_to_grayscale(image: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts float64 picture with three colour channels and returns float64 grayscale image
|
|
with one channel.
|
|
"""
|
|
grayscale_image = np.zeros(image.shape[:2])
|
|
|
|
for i in range(image.shape[0]):
|
|
for j in range(image.shape[1]):
|
|
grayscale_image[i, j] = (image[i, j, 0] + image[i,j, 1] + image[i, j, 2]) / 3
|
|
|
|
|
|
return grayscale_image
|
|
|
|
def invert_coloured_image_part(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
startx: int, endx: int, starty: int, endy: int) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts image, starting position end end position for axes x & y. Returns whole image with inverted part.
|
|
"""
|
|
inverted_image = image.copy()
|
|
|
|
if image.dtype.type == np.float64:
|
|
for i in range(startx, endx):
|
|
for j in range(starty, endy):
|
|
inverted_image[i, j, 0] = 1 - image[i, j, 0]
|
|
inverted_image[i, j, 1] = 1 - image[i, j, 1]
|
|
inverted_image[i, j, 2] = 1 - image[i, j, 2]
|
|
return inverted_image
|
|
elif image.dtype.type == np.uint8:
|
|
for i in range(startx, endx):
|
|
for j in range(starty, endy):
|
|
inverted_image[i, j, 0] = 255 - image[i, j, 0]
|
|
inverted_image[i, j, 1] = 255 - image[i, j, 1]
|
|
inverted_image[i, j, 2] = 255 - image[i, j, 2]
|
|
return inverted_image
|
|
|
|
raise Exception("Unrecognized image format!")
|
|
|
|
def invert_coloured_image(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts image and inverts it
|
|
"""
|
|
return invert_coloured_image_part(image, 0, image.shape[0], 0, image.shape[1])
|
|
|
|
def calculate_best_treshold_using_otsu_method(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> int:
|
|
"""
|
|
Accepts image and returns best treshold using otsu method
|
|
"""
|
|
|
|
if image.dtype.type == np.float64:
|
|
im = image.copy()
|
|
im = im * (255.0/im.max())
|
|
elif image.dtype.type == np.uint8:
|
|
im = image.copy()
|
|
else:
|
|
raise Exception("Unrecognized image format!")
|
|
|
|
treshold_range = np.arange(np.max(im) + 1)
|
|
criterias = []
|
|
|
|
for treshold in treshold_range:
|
|
# create the thresholded image
|
|
thresholded_im = np.zeros(im.shape)
|
|
|
|
thresholded_im[im >= treshold] = 1
|
|
|
|
# compute weights
|
|
nb_pixels = im.size
|
|
nb_pixels1 = np.count_nonzero(thresholded_im)
|
|
weight1 = nb_pixels1 / nb_pixels
|
|
weight0 = 1 - weight1
|
|
|
|
# if one the classes is empty, eg all pixels are below or above the threshold, that threshold will not be considered
|
|
# in the search for the best threshold
|
|
if weight1 == 0 or weight0 == 0:
|
|
continue
|
|
|
|
# find all pixels belonging to each class
|
|
val_pixels1 = im[thresholded_im == 1]
|
|
val_pixels0 = im[thresholded_im == 0]
|
|
|
|
# compute variance of these classes
|
|
var0 = np.var(val_pixels0) if len(val_pixels0) > 0 else 0
|
|
var1 = np.var(val_pixels1) if len(val_pixels1) > 0 else 0
|
|
|
|
criterias.append( weight0 * var0 + weight1 * var1)
|
|
|
|
best_threshold = treshold_range[np.argmin(criterias)]
|
|
return best_threshold
|
|
|
|
|
|
def get_image_bins_for_loop(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
number_of_bins: int) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts image in the float64 format or uint8, returns normailzed
|
|
image bins, histogram
|
|
"""
|
|
if image.dtype.type == np.float64 or image.dtype.type == np.uint8:
|
|
bin_restrictions = np.linspace(np.min(image), np.max(image), num=number_of_bins)
|
|
else:
|
|
raise Exception("Unrecognized image format!")
|
|
|
|
bins = np.zeros(number_of_bins).astype(np.float64)
|
|
|
|
for pixel in image.reshape(-1):
|
|
# https://stackoverflow.com/a/16244044
|
|
bins[np.argmax(bin_restrictions > pixel)] += 1
|
|
|
|
return bins / np.sum(bins)
|
|
|
|
|
|
# Much faster implementation than for loop
|
|
def get_image_bins(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
number_of_bins: int) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts image in the float64 format or uint8, returns normailzed
|
|
image bins, histogram
|
|
"""
|
|
if image.dtype.type == np.float64 or image.dtype.type == np.uint8:
|
|
bins = np.linspace(np.min(image), np.max(image), num=number_of_bins)
|
|
else:
|
|
raise Exception("Unrecognized image format!")
|
|
|
|
# Put pixels into classes
|
|
# ex. binsize = 10 then 0.4 would map into 4
|
|
binarray = np.digitize(image.reshape(-1), bins).astype(np.uint8)
|
|
|
|
# Now count those values
|
|
binarray = np.unique(binarray, return_counts=True)
|
|
|
|
counts = binarray[1].astype(np.float64) # Get the counts out of tuple
|
|
|
|
# Check if there is any empty bin
|
|
empty_bins = []
|
|
bins = binarray[0]
|
|
for i in range(1, number_of_bins + 1):
|
|
if i not in bins:
|
|
empty_bins.append(i)
|
|
|
|
# Add empty bins with zeros
|
|
if empty_bins != []:
|
|
for i in empty_bins:
|
|
counts = np.insert(counts, i - 1, 0)
|
|
|
|
return counts
|
|
|
|
def get_image_bins_ND(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
number_of_bins: int) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts image in the float64 format or uint8 and number of bins
|
|
Returns normailzed image histogram bins
|
|
"""
|
|
bs = []
|
|
hist = np.zeros((number_of_bins, number_of_bins, number_of_bins))
|
|
bins = np.linspace(np.min(image), np.max(image), num=number_of_bins)
|
|
|
|
for i in range(image.shape[2]):
|
|
v = image[:, :, i].reshape(-1)
|
|
bs.append(np.digitize(v, bins).astype(np.uint32))
|
|
|
|
for i in range(len(bs[0])):
|
|
hist[bs[2][i] -1, bs[1][i] -1, bs[0][i] - 1] += 1
|
|
|
|
return hist / np.sum(hist)
|
|
|
|
def get_image_bins_gradient_magnitude_and_angles(image: Union[npt.NDArray[np.float64],
|
|
npt.NDArray[np.uint8]]) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: image,
|
|
Returns: 1D histogram of image using gradient magnitude and gradient angles
|
|
Works OK on many dimensions
|
|
"""
|
|
WIDTH = image.shape[0]
|
|
HEIGHT = image.shape[1]
|
|
WIDTH_8 = WIDTH // 8
|
|
HEIGHT_8 = HEIGHT // 8
|
|
|
|
def split_into_blocks(image):
|
|
blocks = []
|
|
for y in range(0, 7):
|
|
for x in range(0, 8):
|
|
if x == 7:
|
|
blocks.append(image[y*WIDTH_8 : (y +1)* WIDTH_8, 7*HEIGHT_8 : HEIGHT].copy()) # handle the last column
|
|
else:
|
|
blocks.append(image[y*WIDTH_8 : (y +1)* WIDTH_8, x*HEIGHT_8 : (x +1)* HEIGHT_8].copy())
|
|
|
|
for x in range(0, 7):
|
|
blocks.append(image[7*WIDTH_8 : WIDTH, x*HEIGHT_8 : (x +1)* HEIGHT_8].copy()) # handle the last row
|
|
blocks.append(image[7*WIDTH_8 : WIDTH, 7*HEIGHT_8 : HEIGHT].copy())
|
|
return blocks
|
|
|
|
angle_linspace = np.linspace(-np.pi, np.pi, num=8)
|
|
gm, da = gradient_magnitude(image, 1 )
|
|
gm_blocks = split_into_blocks(gm)
|
|
da_blocks = split_into_blocks(da)
|
|
|
|
histogram = []
|
|
for ix in range(64):
|
|
# Gradient magnitude and angles
|
|
gm_l = gm_blocks[ix].reshape(-1)
|
|
da_l = da_blocks[ix].reshape(-1)
|
|
|
|
accumulator = np.zeros(8)
|
|
binned = np.digitize(da_l, angle_linspace)
|
|
for i in range(binned.size):
|
|
accumulator[binned[i] - 1] += gm_l[i]
|
|
|
|
# Normalize accumulator
|
|
histogram.extend(accumulator / np.sum(accumulator))
|
|
|
|
histogram = np.array(histogram)
|
|
return histogram / np.sum(histogram)
|
|
|
|
def compare_two_histograms(h1: npt.NDArray[np.float64], h2: npt.NDArray[np.float64],
|
|
method: DistanceMeasure) -> float:
|
|
"""
|
|
Accepts two histograms and method of comparison
|
|
Returns distance between them
|
|
"""
|
|
|
|
if method == DistanceMeasure.euclidian_distance:
|
|
d = np.sqrt(np.sum(np.square(h1 - h2)))
|
|
elif method == DistanceMeasure.chi_square_distance:
|
|
d = 0.5 * np.sum(np.square(h1 - h2) / (h1 + h2 + np.finfo(float).eps))
|
|
elif method == DistanceMeasure.intersection_distance:
|
|
d = 1 - np.sum(np.minimum(h1, h2))
|
|
elif method == DistanceMeasure.hellinger_distance:
|
|
d = np.sqrt(0.5 * np.sum(np.square(np.sqrt(h1) - np.sqrt(h2))))
|
|
else:
|
|
raise Exception('Unsuported method chosen!')
|
|
|
|
return d.astype(float)
|
|
|
|
|
|
def apply_mask_on_image(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
mask: npt.NDArray[np.uint8]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts image and applys mask to image
|
|
"""
|
|
image = image.copy()
|
|
|
|
mask = np.expand_dims(mask, axis=2)
|
|
image = mask * image
|
|
|
|
return image
|
|
|
|
def simple_convolution(signal: npt.NDArray[np.float64], kernel: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: signal & kernel
|
|
|
|
Returns: convolved signal with a kernel
|
|
"""
|
|
N = int(np.ceil(len(kernel) / 2 - 1))
|
|
n_conv = signal.size - kernel.size + 1
|
|
print(N, signal.size -N, n_conv)
|
|
|
|
convolved_signal = np.zeros(len(signal))
|
|
rev_kernel = kernel[::-1].copy()
|
|
|
|
for i in range(n_conv):
|
|
convolved_signal[i] = np.dot(signal[i: i+kernel.size], rev_kernel) # Well if you would add i+N then you wuold shift this
|
|
|
|
return convolved_signal
|
|
|
|
def simple_convolution_improved(signal: npt.NDArray[np.float64], kernel: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: signal & kernel
|
|
|
|
Returns: convolved signal with a kernel
|
|
Improved method replicates edges of an signal
|
|
"""
|
|
signal_len = signal.size
|
|
kernel_len = kernel.size
|
|
signal = signal.copy()
|
|
|
|
# Calculate which values to fill in and range
|
|
EDGE_FRONT = signal[0]
|
|
EDGE_BACK = signal[-1]
|
|
EXTEND_RANGE = int(np.floor(kernel.size / 2))
|
|
|
|
# Append end insert edges
|
|
front = [ EDGE_FRONT for _ in range(EXTEND_RANGE)]
|
|
back = [ EDGE_BACK for _ in range(EXTEND_RANGE)]
|
|
signal = np.insert(signal, 1, front, axis=0)
|
|
signal = np.append(signal, back, axis=0)
|
|
|
|
convolved_signal = np.zeros(signal_len)
|
|
rev_kernel = kernel[::-1].copy()
|
|
n_conv = signal_len - kernel_len + 1
|
|
|
|
for i in range(n_conv):
|
|
convolved_signal[i] = np.dot(signal[i: i+kernel.size], rev_kernel)
|
|
|
|
return convolved_signal
|
|
|
|
def get_gaussian_kernel(sigma: float) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts sigma
|
|
Returns gaussian kernel
|
|
"""
|
|
# https://github.com/mikepound/convolve/blob/master/run.gaussian.py
|
|
kernel_size = int(2 * np.ceil(3*sigma) + 1)
|
|
k_min_max = np.ceil(3*sigma)
|
|
k_interval = np.arange(-k_min_max, k_min_max + 1.)
|
|
|
|
result = (1. / (np.sqrt(2. * np.pi )* sigma)) * np.exp(- (np.square(k_interval)) / (2.*np.square(sigma)))
|
|
|
|
assert(kernel_size == len(result))
|
|
|
|
return result / np.sum(result)
|
|
|
|
def sharpen_image(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sharpen_factor=1.0) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image & sharpen factor
|
|
|
|
Returns: sharpened image
|
|
https://blog.demofox.org/2022/02/26/image-sharpening-convolution-kernels/ <-- sharpening kernel, but also on slides
|
|
good explanation
|
|
"""
|
|
sharpened_image = image.copy()
|
|
|
|
KERNEL = np.array([[-1, -1, -1],
|
|
[-1, 17, -1],
|
|
[-1, -1,-1]]) * 1./9. * sharpen_factor
|
|
|
|
if image.dtype.type == np.float64:
|
|
sharpened_image = cv2.filter2D(sharpened_image, cv2.CV_64F, KERNEL)
|
|
elif image.dtype.type == np.uint8:
|
|
sharpened_image = cv2.filter2D(sharpened_image, cv2.CV_8U, KERNEL)
|
|
|
|
return sharpened_image
|
|
|
|
def gaussfilter2D(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image, sigma
|
|
Applies gaussian noise on image
|
|
returns: filtered_image
|
|
"""
|
|
filtered_image = image.copy()
|
|
kernel = np.array([get_gaussian_kernel(sigma)])
|
|
filtered_image = cv2.filter2D(filtered_image, cv2.CV_64F, kernel)
|
|
filtered_image = cv2.filter2D(filtered_image, cv2.CV_64F, kernel.T)
|
|
|
|
return filtered_image
|
|
|
|
def gaussdx(sigma: float) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts sigma
|
|
Returns gaussian kernel
|
|
"""
|
|
kernel_size = int(2 * np.ceil(3*sigma) + 1)
|
|
k_min_max = np.ceil(3*sigma)
|
|
k_interval = np.arange(-k_min_max, k_min_max + 1.)
|
|
|
|
result = - (1. / (np.sqrt(2. * np.pi )* np.power(sigma, 3))) * k_interval* np.exp(- (np.square(k_interval)) / (2.*np.square(sigma)))
|
|
|
|
assert(kernel_size == len(result))
|
|
|
|
return result / np.sum(np.abs(result))
|
|
|
|
|
|
def simple_median(signal: npt.NDArray[np.float64], width: int) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: signal & width
|
|
returns signal improved using median filter
|
|
"""
|
|
if width % 2 == 0:
|
|
raise Exception('No u won\'t do that')
|
|
|
|
signal = signal.copy()
|
|
for i in range(len(signal) - int(np.ceil(width/2))):
|
|
middle_element = int(i + np.floor(width/2))
|
|
signal[middle_element] = np.median(signal[i:i+width])
|
|
return signal
|
|
|
|
def apply_median_method_2D(image:Union[npt.NDArray[np.float64],
|
|
npt.NDArray[np.uint8]], width: int) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image & filter width
|
|
returns: image with median filter applied
|
|
"""
|
|
if width % 2 == 0:
|
|
raise Exception('No u won\'t do that')
|
|
|
|
image = image.copy()
|
|
W_HALF = int(np.floor(width/2))
|
|
padded_image = np.pad(image, W_HALF, mode='edge')
|
|
IMAGE_HEIGHT = image.shape[0] # y
|
|
IMAGE_WIDTH = image.shape[1] # x
|
|
|
|
for x in range(W_HALF, IMAGE_WIDTH): # I think we can start from 0, cuz we padded an image
|
|
for y in range(W_HALF, IMAGE_HEIGHT):
|
|
median_filter = np.zeros(0)
|
|
STARTX = x - W_HALF
|
|
STARTY = y - W_HALF
|
|
for m in range(width):
|
|
median_filter = np.append(median_filter, padded_image[STARTY + m][STARTX: STARTX + width], axis=0)
|
|
if image.dtype.type == np.uint8:
|
|
image[y][x] = int(np.mean(median_filter))
|
|
else:
|
|
image[y][x] = np.mean(median_filter)
|
|
return image
|
|
|
|
def filter_laplace(image:Union[npt.NDArray[np.float64],
|
|
npt.NDArray[np.uint8]], sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image & sigma
|
|
returns: image with laplace filter applied
|
|
"""
|
|
# Prepare unit impulse and gauss kernel
|
|
unit_impulse = np.zeros((1, 2 * int(np.ceil(3*sigma)) + 1))
|
|
unit_impulse[0][int(np.ceil(unit_impulse.size /2)) - 1]= 1
|
|
gauss_kernel = np.array([get_gaussian_kernel(sigma)])
|
|
assert(len(gauss_kernel[0]) == len(unit_impulse[0]))
|
|
|
|
laplacian_filter = unit_impulse - gauss_kernel[0]
|
|
|
|
# Now apply laplacian filter
|
|
applied_by_x = cv2.filter2D(image, -1, laplacian_filter)
|
|
applied_by_y = cv2.filter2D(applied_by_x, -1, laplacian_filter.T)
|
|
|
|
return applied_by_y
|
|
|
|
def gauss_noise(I, magnitude=.1) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: image & magnitude
|
|
Returns: image with gaussian noise applied
|
|
"""
|
|
# input: image, magnitude of noise
|
|
# output: modified image
|
|
I = I.copy()
|
|
|
|
return I + np.random.normal(size=I.shape) * magnitude
|
|
|
|
|
|
def sp_noise(I, percent=.1) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: image & percent
|
|
Returns: image with salt and pepper noise applied
|
|
"""
|
|
# input: image, percent of corrupted pixels
|
|
# output: modified image
|
|
|
|
res = I.copy()
|
|
res[np.random.rand(I.shape[0], I.shape[1]) < percent / 2] = 1
|
|
res[np.random.rand(I.shape[0], I.shape[1]) < percent / 2] = 0
|
|
|
|
return res
|
|
|
|
def sp_noise1D(signal, percent=.1) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: signal & percent
|
|
Returns: signal with salt and pepper noise applied
|
|
"""
|
|
signal = signal.copy()
|
|
signal[np.random.rand(signal.shape[0]) < percent / 2] = 2
|
|
signal[np.random.rand(signal.shape[0]) < percent / 2] = 1
|
|
signal[np.random.rand(signal.shape[0]) < percent / 2] = 4
|
|
signal[np.random.rand(signal.shape[0]) < percent / 2] = 0.4
|
|
return signal
|
|
|
|
def sum_two_grayscale_images(image_a: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
image_b :Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image_a, image_b
|
|
Returns: image_a + image_b
|
|
"""
|
|
# Merge image_a and image_b
|
|
return (image_a + image_b)/ 2
|
|
|
|
def generate_dirac_impulse(size: int) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: size
|
|
Returns: dirac impulse of size
|
|
"""
|
|
|
|
dirac_impulse = np.zeros((size, size))
|
|
dirac_impulse[int(size/2), int(size/2)] = 1
|
|
|
|
return dirac_impulse
|
|
|
|
def derive_image_by_x(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image
|
|
Returns: image derived by x
|
|
"""
|
|
image = image.copy()
|
|
gaussd = np.array([gaussdx(sigma)])
|
|
gauss = np.array([get_gaussian_kernel(sigma)])
|
|
gaussd = np.flip(gaussd, axis=1)
|
|
|
|
applied_by_y = cv2.filter2D(image, cv2.CV_64F, gauss.T)
|
|
applied_by_x = cv2.filter2D(applied_by_y, cv2.CV_64F, gaussd)
|
|
|
|
return applied_by_x
|
|
|
|
def derive_image_by_y(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image
|
|
Returns: image derived by y
|
|
"""
|
|
image = image.copy()
|
|
gaussd = np.array([gaussdx(sigma)])
|
|
gauss = np.array([get_gaussian_kernel(sigma)])
|
|
gaussd = np.flip(gaussd, axis=1)
|
|
|
|
applied_by_x = cv2.filter2D(image, cv2.CV_64F, gauss)
|
|
applied_by_y = cv2.filter2D(applied_by_x, cv2.CV_64F, gaussd.T)
|
|
|
|
return applied_by_y
|
|
|
|
def derive_image_first_order(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float) -> tuple[Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]]:
|
|
"""
|
|
Accepts: image
|
|
returns: image derived by x, image derived by y
|
|
"""
|
|
return derive_image_by_x(image, sigma), derive_image_by_y(image, sigma)
|
|
|
|
def derive_image_second_order(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float) -> tuple[tuple[Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]], tuple[Union[npt.NDArray[np.float64],
|
|
npt.NDArray[np.uint8]], Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]]]:
|
|
"""
|
|
Accepts: image
|
|
Returns: Ixx, Ixy, Iyx, Iyy
|
|
"""
|
|
derived_by_x = derive_image_by_x(image, sigma)
|
|
derived_by_y = derive_image_by_y(image, sigma)
|
|
|
|
return derive_image_first_order(derived_by_x, sigma), derive_image_first_order(derived_by_y, sigma)
|
|
|
|
def gradient_magnitude(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float) -> tuple[Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]]:
|
|
"""
|
|
Accepts: image
|
|
Returns: gradient magnitude of image and derivative angles
|
|
"""
|
|
Ix, Iy = derive_image_first_order(image, sigma)
|
|
return np.sqrt(Ix**2 + Iy**2), np.arctan2(Iy, Ix)
|
|
|
|
|
|
def find_edges_primitive(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float, theta: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Aceppts: image, sigma & theta
|
|
Returns: image with edges
|
|
"""
|
|
derivative_magnitude, _ = gradient_magnitude(image, sigma)
|
|
|
|
binary_mask = np.zeros_like(derivative_magnitude)
|
|
binary_mask[(derivative_magnitude >= theta)] = 1
|
|
|
|
return binary_mask
|
|
|
|
|
|
def find_edges_nms(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float, theta: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Aceppts: image, sigma & theta
|
|
Returns: image with edges
|
|
"""
|
|
step_size = np.pi/8
|
|
|
|
|
|
def get_gradient_orientation(angle: float) -> tuple[tuple[int, int], tuple[int, int]]:
|
|
"""
|
|
Accepts: angle
|
|
Returns: indexes of gradient orientation (x, y), (x, y)
|
|
Basically walks around the unit circle and returns the indexes of the closest angle
|
|
"""
|
|
angle = angle % np.pi
|
|
|
|
for i in range(0, 8):
|
|
if angle >= i * step_size and angle <= (i+1) * step_size:
|
|
if i == 0 or i == 7:
|
|
return (0, 1), (0, -1)
|
|
elif i == 1 or i == 2:
|
|
return (-1, -1), (1, 1)
|
|
elif i == 3 or i == 4:
|
|
return (-1, 0), (1, 0)
|
|
elif i == 5 or i == 6:
|
|
return (-1, 1), (1, -1)
|
|
raise ValueError(f"Angle {angle} is not in range")
|
|
|
|
|
|
derivative_magnitude, derivative_angles = gradient_magnitude(image, sigma)
|
|
reduced_magnitude = derivative_magnitude.copy()
|
|
# Set low elements to 0
|
|
reduced_magnitude[(derivative_magnitude <= theta)] = 0
|
|
|
|
|
|
for y in range(1, reduced_magnitude.shape[0]-1):
|
|
for x in range(1, reduced_magnitude.shape[1]-1):
|
|
gp1, gp2 = get_gradient_orientation(derivative_angles[y, x])
|
|
|
|
if reduced_magnitude[y, x] < reduced_magnitude[y+gp1[0], x+gp1[1]] or reduced_magnitude[y, x] < reduced_magnitude[y+gp2[0], x+gp2[1]]:
|
|
reduced_magnitude[y, x] = 0
|
|
|
|
return reduced_magnitude
|
|
|
|
|
|
def find_edges_canny(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
sigma: float, theta: float, t_low: float, t_high: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Aceppts: image, sigma, theta, t_low, t_high
|
|
Returns: image with edges
|
|
"""
|
|
|
|
image_nms = find_edges_nms(image, sigma, theta)
|
|
image_nms[image_nms < t_low] = 0
|
|
image_nms[image_nms >= t_low] = 1
|
|
|
|
image_nms = image_nms.astype(np.uint8)
|
|
new_image = np.zeros_like(image_nms)
|
|
|
|
num_labels, labels, _, _ = cv2.connectedComponentsWithStats(image_nms, connectivity=4, ltype=cv2.CV_32S)
|
|
|
|
for i in range (1, num_labels):
|
|
idxs = np.where(image_nms[labels == i] > t_high)
|
|
|
|
if idxs != []:
|
|
new_image[labels == i] = 1
|
|
|
|
return new_image
|
|
|
|
|
|
def hough_transform_a_point(x: int, y: int, n_bins: int) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: x, y, n_bins
|
|
Returns: x, y transformed into hough space
|
|
"""
|
|
theta_values = np.linspace(-np.pi/2, np.pi, n_bins)
|
|
|
|
cos_theta = np.cos(theta_values)
|
|
sin_theta = np.sin(theta_values)
|
|
accumlator = np.zeros((n_bins, n_bins))
|
|
|
|
for i in range(n_bins):
|
|
r = np.round(x * cos_theta[i] + y * sin_theta[i]) + n_bins/2
|
|
accumlator[int(r), i] += 1
|
|
|
|
return accumlator
|
|
|
|
def hough_transform_a_circle(image: Union[npt.NDArray[np.float64] , npt.NDArray[np.uint8]],
|
|
edged_image: Union[npt.NDArray[np.float64] , npt.NDArray[np.uint8]],
|
|
r_start: int, r_end: int, treshold: float) -> npt.NDArray[np.float64]:
|
|
"""
|
|
Accepts: image, r_start, r_end
|
|
Returns: hough space
|
|
"""
|
|
|
|
edged_image = edged_image.copy()
|
|
edged_image[edged_image < treshold] = 0
|
|
|
|
accumlator = np.zeros((edged_image.shape[0], edged_image.shape[1], r_end - r_start))
|
|
indices = np.argwhere(edged_image)
|
|
|
|
gm, ga = gradient_magnitude(image, 1)
|
|
|
|
# Loop through all nonzero pixels above treshold
|
|
for i in tqdm(range(len(indices)), desc='Hough transform'):
|
|
for r in range(0, r_end - r_start):
|
|
y, x = indices[i]
|
|
a = x - r * np.cos(ga[y, x])
|
|
b = y - r * np.sin(ga[y, x])
|
|
accumlator[int(a), int(b), r ] += 1
|
|
return accumlator
|
|
|
|
|
|
def hough_find_lines(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]],
|
|
n_bins_theta: int, n_bins_rho: int, treshold: float) -> npt.NDArray[np.uint64]:
|
|
""""
|
|
Accepts: bw image with lines, n_bins_theta, n_bins_rho, treshold
|
|
Returns: image points above treshold transformed into hough space
|
|
"""
|
|
|
|
image = image.copy()
|
|
image[image < treshold] = 0
|
|
theta_values = np.linspace(-np.pi/2, np.pi/2, n_bins_theta)
|
|
|
|
D = np.sqrt(image.shape[0]**2 + image.shape[1]**2)
|
|
rho_values = np.linspace(-D, D, n_bins_rho)
|
|
accumulator = np.zeros((n_bins_rho, n_bins_theta), dtype=np.uint64)
|
|
|
|
cos_precalculated = np.cos(theta_values)
|
|
sin_precalculated = np.sin(theta_values)
|
|
|
|
y_s, x_s = np.nonzero(image)
|
|
|
|
# Loop through all nonzero pixels above treshold
|
|
for i in tqdm(range(len(y_s)), desc='Hough transform'):
|
|
x = x_s[i]
|
|
y = y_s[i]
|
|
|
|
# Precalculate rhos
|
|
rhos = np.round(x* cos_precalculated + y* sin_precalculated).astype(np.int64)
|
|
|
|
# Bin the rhos
|
|
binned = np.digitize(rhos, rho_values) -1
|
|
|
|
# Add to accumulator
|
|
for theta in range(n_bins_theta):
|
|
accumulator[binned[theta], theta] += 1
|
|
|
|
return accumulator
|
|
|
|
def hough_find_lines_i(image_with_lines: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], gradient_angles: npt.NDArray[np.float64],
|
|
gradient_magnitude: npt.NDArray[np.float64],
|
|
n_bins_theta: int, n_bins_rho: int, treshold: float) -> Union[npt.NDArray[np.uint64], npt.NDArray[np.float64]]:
|
|
""""
|
|
Accepts: bw image with lines, n_bins_theta, n_bins_rho, treshold
|
|
Returns: image points above treshold transformed into hough space
|
|
"""
|
|
|
|
image = image_with_lines.copy()
|
|
image[image < treshold] = 0
|
|
theta_values = np.linspace(-np.pi/2, np.pi/2, n_bins_theta)
|
|
D = np.sqrt(image.shape[0]**2 + image.shape[1]**2)
|
|
rho_values = np.linspace(-D, D, n_bins_rho)
|
|
accumulator = np.zeros((n_bins_rho, n_bins_theta), dtype=np.float64)
|
|
|
|
cos_precalculated = np.cos(theta_values)
|
|
sin_precalculated = np.sin(theta_values)
|
|
|
|
indices = np.argwhere(image)
|
|
|
|
# Loop through all nonzero pixels above treshold
|
|
for i in tqdm(range(len(indices)), desc='Hough transform'):
|
|
y, x = indices[i]
|
|
angle = (np.mod(gradient_angles[y, x] + np.pi/2 , np.pi)) - np.pi/2
|
|
|
|
theta = np.digitize(angle, theta_values) -1
|
|
rho = np.round(x* cos_precalculated[theta] + y* sin_precalculated[theta]).astype(np.float64)
|
|
binned_rho = np.digitize(rho, rho_values) - 1 # cuz digitize is returning bin number + 1
|
|
|
|
# Add to accumulator
|
|
accumulator[binned_rho, theta] += gradient_magnitude[y, x]
|
|
|
|
return accumulator
|
|
|
|
def nonmaxima_suppression_box(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
|
|
"""
|
|
Accepts: image with sinusoids in hough space
|
|
Returns: image with sinusoids
|
|
"""
|
|
nms_image = np.zeros_like(image)
|
|
|
|
for y in range(1, image.shape[0]-1):
|
|
for x in range(1, image.shape[1]-1):
|
|
neighbours = image[y - 1 : y + 2, x - 1 : x + 2]
|
|
if image[y, x] >= np.max(neighbours):
|
|
nms_image[y, x] = image[y, x]
|
|
|
|
return nms_image
|
|
|
|
def retrieve_hough_pairs(original_image: npt.NDArray[np.float64], hough_image: npt.NDArray[np.uint64],
|
|
treshold: int, n_bins_theta: int, n_bins_rho: int) -> list[tuple[int, int]]:
|
|
"""
|
|
Accepts: original image, image with sinusoids in hough space, treshold, n_bins theta in h space, n_bins rho in h space
|
|
Returns: list of pairs of sinusoids
|
|
"""
|
|
theta_values = np.linspace(-np.pi/2, np.pi/2, n_bins_theta)
|
|
D = np.sqrt(original_image.shape[0]**2 + original_image.shape[1]**2)
|
|
rho_values = np.linspace(-D, D, n_bins_rho)
|
|
|
|
hough_image = hough_image.copy()
|
|
hough_image[hough_image < treshold] = 0
|
|
|
|
y_s, x_s = np.nonzero(hough_image)
|
|
|
|
pairs = []
|
|
for i in range(len(x_s)):
|
|
x = x_s[i] # theta values
|
|
y = y_s[i] # rho values
|
|
|
|
theta = theta_values[x]
|
|
rho = rho_values[y]
|
|
|
|
pairs.append((rho, theta))
|
|
return pairs
|
|
|
|
def select_best_pairs(image_line_params, n =10):
|
|
"""
|
|
Accepts: list of pairs of sinusoids
|
|
Returns: reduced and prioritized list of pairs of sinusoids
|
|
"""
|
|
image_line_params = np.array(image_line_params)
|
|
# Sorts just kth element so every eleement before kth element is lower than kth element
|
|
# and every element after kth element is higher than kth element
|
|
partition = np.argpartition(image_line_params, kth=len(image_line_params) - n - 1, axis=0)[-n:]
|
|
image_line_params = image_line_params[partition.T[0]]
|
|
return image_line_params
|
|
|
|
def get_line_to_plot(rho, theta, h, w):
|
|
"""
|
|
Accepts: rho, theta, image height h, image width w
|
|
Returns: line to plot
|
|
usage example: plt.plot(uz_image.get_line_to_plot(rho, theta, h, w), 'r', linewidth=.7)
|
|
"""
|
|
|
|
c = np.cos(theta)
|
|
s = np.sin(theta)
|
|
|
|
xs = []
|
|
ys = []
|
|
if s != 0:
|
|
y = int(rho / s)
|
|
if 0 <= y < h:
|
|
xs.append(0)
|
|
ys.append(y)
|
|
|
|
y = int((rho - w * c) / s)
|
|
if 0 <= y < h:
|
|
xs.append(w - 1)
|
|
ys.append(y)
|
|
if c != 0:
|
|
x = int(rho / c)
|
|
if 0 <= x < w:
|
|
xs.append(x)
|
|
ys.append(0)
|
|
|
|
x = int((rho - h * s) / c)
|
|
if 0 <= x < w:
|
|
xs.append(x)
|
|
ys.append(h - 1)
|
|
|
|
return xs[:2], ys[:2]
|
|
|
|
def hessian_points(image: Union[npt.NDArray[np.float64],
|
|
npt.NDArray[np.uint8]], sigma: float,
|
|
treshold: float) \
|
|
-> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
|
|
"""
|
|
Accepts: image, sigma, treshold
|
|
Returns: image with hessian points
|
|
"""
|
|
image = image.copy()
|
|
(ixx, ixy), (iyx, iyy) = derive_image_second_order(image, sigma)
|
|
|
|
# Calculate determinant
|
|
determinant = ixx * iyy - ixy * iyx
|
|
|
|
# Apply nonmaxima suppression
|
|
points = nonmaxima_suppression_box(determinant)
|
|
|
|
points = np.argwhere(points > treshold)
|
|
|
|
return determinant.astype(np.float64), points.astype(np.float64)
|
|
|
|
def harris_detector(image: Union[npt.NDArray[np.float64],
|
|
npt.NDArray[np.uint8]], sigma: float, treshold: float,
|
|
alpha = 0.06) \
|
|
-> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
|
|
"""
|
|
Accepts: image, sigma, treshold
|
|
"""
|
|
image = image.copy()
|
|
|
|
(ix, iy) = derive_image_first_order(image, sigma)
|
|
|
|
ix2 = np.square(ix)
|
|
iy2 = np.square(iy)
|
|
ixy = np.multiply(ix, iy)
|
|
|
|
# Apply subsequent smoothing with gaussian kernel
|
|
gauss = np.array([get_gaussian_kernel(sigma * 1.6)]) # 1.6 is empirically chosen
|
|
ix2 = cv2.filter2D(ix2, cv2.CV_64F, gauss)
|
|
iy2 = cv2.filter2D(iy2, cv2.CV_64F, gauss)
|
|
ixy = cv2.filter2D(ixy, cv2.CV_64F, gauss)
|
|
ix2 = cv2.filter2D(ix2, cv2.CV_64F, gauss.T)
|
|
iy2 = cv2.filter2D(iy2, cv2.CV_64F, gauss.T)
|
|
ixy = cv2.filter2D(ixy, cv2.CV_64F, gauss.T)
|
|
|
|
|
|
determinant = ix2 * iy2 - np.square(ixy)
|
|
trace = ix2 + iy2
|
|
|
|
# Calculate harris response
|
|
features = determinant - alpha * np.square(trace)
|
|
|
|
nms_features = nonmaxima_suppression_box(features)
|
|
points = np.argwhere(nms_features > treshold)
|
|
|
|
return features.astype(np.float64), points.astype(np.float64)
|
|
|
|
def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2)\
|
|
-> npt.NDArray[np.float64]:
|
|
"""
|
|
Computes descriptors for locations given in X and Y.
|
|
|
|
I: Image in grayscale.
|
|
Y: list of Y coordinates of locations. (Y: index of row from top to bottom)
|
|
X: list of X coordinates of locations. (X: index of column from left to right)
|
|
|
|
Returns: tensor of shape (len(X), n_bins^2), so for each point a feature of length n_bins^2.
|
|
"""
|
|
|
|
assert np.max(I) <= 1, "Image needs to be in range [0, 1]"
|
|
assert I.dtype == np.float64, "Image needs to be in np.float64"
|
|
# Additional assertions for dumb programmers as me
|
|
assert len(X) == len(Y), "X and Y need to have same length"
|
|
assert len(X) > 0, "X and Y need to have at least one element"
|
|
|
|
g = get_gaussian_kernel(sigma)
|
|
d = gaussdx(sigma)
|
|
|
|
Ix = convolve(I, g.T, d)
|
|
Iy = convolve(I, g, d.T)
|
|
Ixx = convolve(Ix, g.T, d)
|
|
Iyy = convolve(Iy, g, d.T)
|
|
|
|
mag = np.sqrt(Ix ** 2 + Iy ** 2)
|
|
mag = np.floor(mag * ((n_bins - 1) / np.max(mag)))
|
|
|
|
feat = Ixx + Iyy
|
|
feat += abs(np.min(feat))
|
|
feat = np.floor(feat * ((n_bins - 1) / np.max(feat)))
|
|
|
|
desc = []
|
|
|
|
for y, x in zip(Y, X):
|
|
miny = max(y - radius, 0)
|
|
maxy = min(y + radius, I.shape[0])
|
|
minx = max(x - radius, 0)
|
|
maxx = min(x + radius, I.shape[1])
|
|
miny, maxy, minx, maxx = int(miny), int(maxy), int(minx), int(maxx) # Convert to int for indexing
|
|
r1 = mag[miny:maxy, minx:maxx].reshape(-1)
|
|
r2 = feat[miny:maxy, minx:maxx].reshape(-1)
|
|
|
|
a = np.zeros((n_bins, n_bins))
|
|
for m, l in zip(r1, r2):
|
|
a[int(m), int(l)] += 1
|
|
|
|
a = a.reshape(-1)
|
|
a /= np.sum(a)
|
|
|
|
desc.append(a)
|
|
|
|
return np.array(desc)
|
|
|
|
def find_correspondences(img_a_descriptors: npt.NDArray[np.float64],
|
|
img_b_descriptors: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
correspondances = []
|
|
"""
|
|
Accepts: img_a_descriptors, img_b_descriptors
|
|
Returns: correspondances indices
|
|
"""
|
|
|
|
# Find img_a correspondences
|
|
for idx, descriptor_a in enumerate(img_a_descriptors):
|
|
dists = np.sqrt(0.5 * np.sum(np.square(np.sqrt(descriptor_a) - np.sqrt(img_b_descriptors)), axis=1))
|
|
min_dist_idx = np.argmin(dists)
|
|
correspondances.append((idx, min_dist_idx))
|
|
|
|
return np.array(correspondances)
|
|
|
|
def display_matches(I1, pts1, I2, pts2) \
|
|
-> None:
|
|
"""
|
|
Displays matches between images.
|
|
|
|
I1, I2: Image in grayscale.
|
|
pts1, pts2: Nx2 arrays of coordinates of feature points for each image (first column is x, second is y coordinates)
|
|
"""
|
|
|
|
assert I1.shape[0] == I2.shape[0] and I1.shape[1] == I2.shape[1], "Images need to be of the same size."
|
|
|
|
I = np.hstack((I1, I2))
|
|
w = I1.shape[1]
|
|
plt.imshow(I, cmap='gray')
|
|
|
|
for p1, p2 in zip(pts1, pts2):
|
|
x1 = p1[0]
|
|
y1 = p1[1]
|
|
x2 = p2[0]
|
|
y2 = p2[1]
|
|
plt.plot(x1, y1, 'bo', markersize=3)
|
|
plt.plot(x2 + w, y2, 'bo', markersize=3)
|
|
plt.plot([x1, x2 + w], [y1, y2], 'r', linewidth=.8)
|
|
|
|
plt.show()
|
|
|
|
def find_matches(image_a: npt.NDArray[np.float64],
|
|
image_b: npt.NDArray[np.float64],
|
|
sigma=3, treshold=1e-6) \
|
|
-> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
|
|
"""
|
|
Finds matches between two images.
|
|
|
|
image_a, image_b: Image in grayscale.
|
|
|
|
Returns: tuple of two arrays of shape (N, 2) where N is the number of matches.
|
|
"""
|
|
|
|
# Get the keypoints
|
|
_, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold)
|
|
_, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold)
|
|
|
|
print("[+] Keypoints detected")
|
|
|
|
# Get the descriptors
|
|
image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1])
|
|
image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1])
|
|
|
|
print("[+] Descriptors computed")
|
|
|
|
# Find correspondences
|
|
correspondences_a = find_correspondences(image_a_descriptors, image_b_descriptors)
|
|
correspondences_b = find_correspondences(image_b_descriptors, image_a_descriptors)
|
|
|
|
print("[+] Correspondences found")
|
|
|
|
def select_best_correspondences(c_a, d_a, c_b, d_b):
|
|
#Find correspondances that map into same index a->b (b)
|
|
unique, counts = np.unique(c_a[:, 1], return_counts=True)
|
|
# Find all b's
|
|
duplicates = unique[counts > 1]
|
|
for duplicate in duplicates:
|
|
# Find the indices of the duplicates find all a's
|
|
indices = np.where(c_a[:, 1] == duplicate)[0]
|
|
# Extract those from the descriptors
|
|
candidates = d_a[indices]
|
|
# Extract comparing distance from b's descriptors
|
|
comparing_distance = d_b[duplicate]
|
|
# Find the closest one using hellingers distance
|
|
dists = np.sqrt(0.5 * np.sum(np.square(np.sqrt(comparing_distance) - np.sqrt(candidates)), axis=1))
|
|
# Select the best one
|
|
min_dist_idx = np.argmin(dists)
|
|
# and map that candidate back to the indces index
|
|
candidate = indices[min_dist_idx]
|
|
candidate = c_a[candidate]
|
|
#if np.flip(candidate) in c_b:
|
|
# # Remove it from the indices so it does not get removed in the next step
|
|
# indices = np.delete(indices, min_dist_idx)
|
|
#else:
|
|
# print(f'[i] select_best_correspondences -> Not reciprocal {np.flip(candidate)}')
|
|
#Remove remainig from the correspondences
|
|
c_a = np.delete(c_a, indices, axis=0)
|
|
|
|
return c_a
|
|
|
|
correspondences_a = select_best_correspondences(correspondences_a, image_a_descriptors, correspondences_b, image_b_descriptors)
|
|
correspondences_b = select_best_correspondences(correspondences_b, image_b_descriptors, correspondences_a, image_a_descriptors)
|
|
|
|
print("[+] Correspondences filtered")
|
|
def check_repciporacvbillity(c_a, c_b):
|
|
for _, match in enumerate(c_a):
|
|
if np.flip(match) not in c_b:
|
|
print(f'[i] check_repciporacvbillity -> Not reciprocal {match}')
|
|
index = np.where((c_a == match).all(axis=1))[0][0]
|
|
c_a = np.delete(c_a, index, axis=0)
|
|
return c_a
|
|
|
|
correspondences_a = check_repciporacvbillity(correspondences_a, correspondences_b)
|
|
correspondences_b = check_repciporacvbillity(correspondences_b, correspondences_a)
|
|
|
|
print("[+] Correspondences reciprocated")
|
|
|
|
# Now map correspondences to the keypoints
|
|
def map_indexes_to_points(a_points, b_points, corrs_a, corrs_b, img_a_k, img_b_k):
|
|
for correspondence in corrs_a:
|
|
ix = np.argwhere(correspondence[0] == corrs_b[:, 1])
|
|
if ix.size > 0:
|
|
a_points.append(np.flip(img_a_k[correspondence[0]]))
|
|
b_points.append(np.flip(img_b_k[correspondence[1]]))
|
|
|
|
image_a_points = []
|
|
image_b_points = []
|
|
|
|
map_indexes_to_points(image_a_points, image_b_points, correspondences_a, correspondences_b, image_a_keypoints, image_b_keypoints)
|
|
map_indexes_to_points(image_b_points, image_a_points, correspondences_b, correspondences_a, image_b_keypoints, image_a_keypoints)
|
|
print("[+] Correspondences mapped to points")
|
|
|
|
return np.array(image_a_points),np.array(image_b_points)
|
|
|
|
|
|
def estimate_homography(keypoints: npt.NDArray[np.float64]) \
|
|
-> npt.NDArray[np.float64]:
|
|
|
|
"""
|
|
[x_r1 yr_1 1 0 0 0 -x_t1*x_r1 -x_t1*yr_1 -x_t1]
|
|
[0 0 0 x_r1 yr_1 1 -y_t1*x_r1 -y_t1*yr_1 -y_t1]
|
|
....
|
|
Accepts a set of keypoints and returns the homography matrix.
|
|
"""
|
|
|
|
# Construct the A matrix
|
|
A = np.zeros((2 * keypoints.shape[0], 9))
|
|
|
|
for ix, pair in enumerate(keypoints):
|
|
x_r, y_r, x_t, y_t = pair
|
|
A[ix*2] = [x_r, y_r, 1, 0, 0, 0, -x_t*x_r, -x_t*y_r, -x_t]
|
|
A[ix*2+1] = [0, 0, 0, x_r, y_r, 1, -y_t*x_r, -y_t*y_r, -y_t]
|
|
|
|
# Perform SVD
|
|
_, _, V = np.linalg.svd(A)
|
|
# Compute vector h
|
|
h = V[-1] / V[-1][-1]
|
|
# Reshape to 3x3
|
|
H = h.reshape(3, 3)
|
|
|
|
return H
|
|
|
|
def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.float64],
|
|
image_b: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64],
|
|
iterations: int = 5000,
|
|
threshold: float = 3) \
|
|
-> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
|
|
"""
|
|
RANSAC algorithm for estimating homography.
|
|
Accepts two images and their corresponding keypoints.
|
|
Returns the best homography matrix and the inliers.
|
|
"""
|
|
# Find the best homography
|
|
best_inliers = []
|
|
best_homography = []
|
|
|
|
for i in tqdm(range(iterations), desc='RANSAC'):
|
|
# Randomly sample 4 correspondences
|
|
sample = np.random.choice(correspondences_a.shape[0], 4, replace=False)
|
|
# correspondance_a[0] (x,y) -> correspondance_b[0] (x,y)
|
|
sample_a = correspondences_a[sample]
|
|
sample_b = correspondences_b[sample]
|
|
# Make it (x,y) -> (x,y) = x,y,x,y matrix
|
|
keypoints = np.concatenate((sample_a, sample_b), axis=1)
|
|
# Estimate homography
|
|
homography = estimate_homography(keypoints)
|
|
# Compute the inliers
|
|
inlier_indices = []
|
|
# Calculate the distance between the transformed points and the actual points
|
|
# and count the number of inliers
|
|
for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)):
|
|
(x_r, y_r), (x_t, y_t) = correspondence
|
|
res = np.dot(homography, [x_r, y_r, 1])
|
|
# Make sure that res has last element 1
|
|
res = res / res[-1]
|
|
x_t_, y_t_ = res[:2]
|
|
# Compute the distance
|
|
distance = np.sqrt((x_t - x_t_)**2 + (y_t - y_t_)**2)
|
|
# Check if it is an inlier
|
|
if distance < threshold:
|
|
inlier_indices.append(i)
|
|
|
|
inliers_proportion = len(inlier_indices)/len(correspondences_a)
|
|
|
|
# Check if we have a new best homography
|
|
if inliers_proportion > 0.2:
|
|
homography_2 = estimate_homography(np.concatenate((correspondences_a[inlier_indices], correspondences_b[inlier_indices]), axis=1))
|
|
inlier_indices_2 = []
|
|
for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)):
|
|
(x_r, y_r), (x_t, y_t) = correspondence
|
|
res = np.dot(homography_2, [x_r, y_r, 1])
|
|
# Make sure that res has last element 1
|
|
res = res / res[-1]
|
|
x_t_, y_t_ = res[:2]
|
|
# Compute the distance
|
|
distance = np.sqrt((x_t - x_t_)**2 + (y_t - y_t_)**2)
|
|
# Check if it is an inlier
|
|
if distance < threshold:
|
|
inlier_indices_2.append(i)
|
|
|
|
if len(inlier_indices_2) / len(correspondences_a) > len(best_inliers) / len(correspondences_a):
|
|
best_inliers = inlier_indices_2
|
|
best_homography = homography_2
|
|
|
|
|
|
best_keypoints = np.concatenate((correspondences_a[best_inliers], correspondences_b[best_inliers]), axis=1)
|
|
|
|
if best_homography == []:
|
|
raise Exception("Ransac did not converge")
|
|
|
|
return best_homography.astype(np.float64), best_keypoints
|
|
|
|
|
|
def sift(grayscale_image: npt.NDArray[np.float64],
|
|
plot=False, get_descriptors=False) \
|
|
-> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
|
|
"""
|
|
SIFT algorithm for finding keypoints and descriptors.
|
|
"""
|
|
grayscale_image = grayscale_image.copy()
|
|
|
|
def number_of_ocatves(image):
|
|
"""
|
|
Calculate the number of octaves.
|
|
"""
|
|
return int(np.log2(min(image.shape)))
|
|
|
|
# Firstly downcale the image three times
|
|
different_scale_images = []
|
|
different_scale_images.append(grayscale_image)
|
|
downsample_size = number_of_ocatves(grayscale_image)
|
|
for i in range(downsample_size):
|
|
different_scale_images.append(cv2.pyrDown(different_scale_images[-1]))
|
|
|
|
def generateGaussianKernels(sigma, num_intervals):
|
|
"""
|
|
Generate list of gaussian kernels at which to blur the input image.
|
|
Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper.
|
|
(I stole this from the internet)
|
|
"""
|
|
num_images_per_octave = num_intervals + 3
|
|
k = 2 ** (1. / num_intervals)
|
|
gaussian_kernels = np.zeros(num_images_per_octave) # scale of gaussian blur necessary to go from one blur scale to the next within an octave
|
|
gaussian_kernels[0] = sigma
|
|
|
|
for image_index in range(1, num_images_per_octave):
|
|
sigma_previous = (k ** (image_index - 1)) * sigma
|
|
sigma_total = k * sigma_previous
|
|
gaussian_kernels[image_index] = np.sqrt(sigma_total ** 2 - sigma_previous ** 2)
|
|
return gaussian_kernels
|
|
|
|
|
|
# Blur different scale images with gaussian blur num_of_octaves times
|
|
downsampled_and_blurred_images = []
|
|
gauss_kernels = generateGaussianKernels(1.6, downsample_size)
|
|
for i in range(len(different_scale_images)):
|
|
image = different_scale_images[i]
|
|
images = [image] # Do not blur the first image
|
|
for kernel in gauss_kernels[1:]:
|
|
images.append(gaussfilter2D(images[-1], kernel))
|
|
downsampled_and_blurred_images.append(np.array(images))
|
|
|
|
# Plot all downsampled and blurred images
|
|
if plot:
|
|
fig,axs=plt.subplots(len(downsampled_and_blurred_images), len(downsampled_and_blurred_images[0]), figsize=(20, 20))
|
|
fig.suptitle('Downsampled and blurred images')
|
|
for i in range(len(downsampled_and_blurred_images)):
|
|
for j in range(len(downsampled_and_blurred_images[i])):
|
|
axs[i, j].imshow(downsampled_and_blurred_images[i][j], cmap='gray')
|
|
|
|
plt.show()
|
|
|
|
# Compute the difference of gaussian
|
|
DOG = [[] for _ in range(len(downsampled_and_blurred_images))]
|
|
for i in range(len(downsampled_and_blurred_images)):
|
|
for j in range(len(downsampled_and_blurred_images[i])-1):
|
|
DOG[i].append(np.array(downsampled_and_blurred_images[i][j+1] - downsampled_and_blurred_images[i][j]))
|
|
|
|
# Plot the difference of gaussian
|
|
if plot:
|
|
fig,axs=plt.subplots(len(DOG), len(DOG[0]), figsize=(20, 20))
|
|
fig.suptitle('Difference of gaussian')
|
|
for i in range(len(DOG)):
|
|
for j in range(len(DOG[i])):
|
|
axs[i, j].imshow(DOG[i][j], cmap='gray')
|
|
plt.show()
|
|
|
|
def is_extremum(image_prev_part, image_part, image_next_part):
|
|
"""
|
|
Check if the given image part is an extremum.
|
|
"""
|
|
compare_pixel = image_part[1, 1]
|
|
if np.abs(compare_pixel) < 0.01:
|
|
return False
|
|
if compare_pixel > 0:
|
|
if np.all(compare_pixel >= image_prev_part) and \
|
|
np.all(compare_pixel >= image_part[0,:]) and \
|
|
np.all(compare_pixel >= image_part[2,:]) and \
|
|
np.all(compare_pixel >= image_next_part):
|
|
return True # Local maximum
|
|
else:
|
|
if np.all(compare_pixel <= image_prev_part) and \
|
|
np.all(compare_pixel <= image_part) and \
|
|
np.all(compare_pixel <= image_part) and \
|
|
np.all(compare_pixel <= image_next_part):
|
|
return True # Local minimum
|
|
return False
|
|
|
|
##############################
|
|
# Find keypoints
|
|
##############################
|
|
keypoints = []
|
|
# Go throgh all image scales per octave
|
|
for i in tqdm(range(len(DOG)), desc='Finding SIFT keypoints'):
|
|
per_octave_images = DOG[i] # Retrive all images per octave
|
|
for j in range(1, len(per_octave_images)-1):
|
|
# Go through all images per octave by 3
|
|
image_prev, image, image_next = per_octave_images[j-1], per_octave_images[j], per_octave_images[j+1]
|
|
for y in range(8, image.shape[0]-8):
|
|
for x in range(8, image.shape[1]-8):
|
|
# Check if the pixel is a local extremum
|
|
if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]):
|
|
# If keypoint is a good local extremum, add it to the list
|
|
keypoints.append((y, x, i, j)) # (y, x, octave, image scale)
|
|
##############################
|
|
|
|
# Compute descriptors
|
|
def compute_descriptors(keypoints):
|
|
"""
|
|
Compute the descriptors for the given keypoints.
|
|
"""
|
|
descriptors = []
|
|
|
|
for keypoint in keypoints:
|
|
y, x = keypoint[:2]
|
|
octave, img_ix = keypoint[2:]
|
|
# Get the image
|
|
image = downsampled_and_blurred_images[octave][img_ix]
|
|
|
|
def compute_mag_and_angle(image, y, x):
|
|
# Compute the gradient magnitude and orientation at the point
|
|
dx = image[y+1, x] - image[y-1, x]
|
|
dy = image[y, x+1] - image[y, x-1]
|
|
magnitude = np.sqrt(dx**2 + dy**2)
|
|
orientation = np.arctan2(dy, dx)
|
|
return magnitude, orientation
|
|
_, orientation = compute_mag_and_angle(image, y, x)
|
|
|
|
if orientation < 0:
|
|
orientation %= np.pi
|
|
|
|
def create_indexes(y, x, x_offset, y_offset):
|
|
# Create indexes for the given offsets
|
|
indexes = []
|
|
for i in range(x_offset):
|
|
for j in range(y_offset):
|
|
indexes.append((y+j, x+i))
|
|
return np.array(indexes)
|
|
|
|
# Map orientation to which direction to look at
|
|
if orientation < np.pi/8: # Right
|
|
# Get indexes
|
|
indexes = create_indexes(y, x, 8, 8)
|
|
elif orientation < 3*np.pi/8: # Right up
|
|
indexes = create_indexes(y-8, x, 8, 8)
|
|
elif orientation < 5*np.pi/8: # Up
|
|
indexes = create_indexes(y-4, x-8, 8, 8)
|
|
elif orientation < 7*np.pi/8: # Left up
|
|
indexes = create_indexes(y-8, x-8, 8, 8)
|
|
else: # Left
|
|
indexes = create_indexes(y-4, x-8, 8, 8)
|
|
|
|
# Split indexes into 4 blocks
|
|
blocks = np.array_split(indexes, 4)
|
|
|
|
histogram_space = np.linspace(0, np.pi, 8)
|
|
all_histograms = []
|
|
for block in blocks:
|
|
# Split each block in 4x4 cells and compute histogram for each cell
|
|
cells = np.array_split(block, 4)
|
|
histograms = []
|
|
for cell in cells:
|
|
histogram = np.zeros(8)
|
|
for y, x in cell:
|
|
magnitude, orientation = compute_mag_and_angle(image,y, x)
|
|
# Compute the histogram
|
|
if orientation < 0:
|
|
orientation %= np.pi
|
|
# Find the bin
|
|
bin = np.digitize(orientation, histogram_space)
|
|
histogram[bin - 1] += magnitude
|
|
histograms.append(histogram)
|
|
# Concatenate the histograms
|
|
histograms = np.concatenate(histograms)
|
|
# Smooth the histogram
|
|
all_histograms.append(histograms/np.sum(histograms))
|
|
# Concatenate the histograms
|
|
all_histograms = np.concatenate(all_histograms)
|
|
# Normalize the histogram
|
|
all_histograms = all_histograms / np.sum(all_histograms)
|
|
descriptors.append(all_histograms)
|
|
return descriptors
|
|
|
|
if get_descriptors:
|
|
descriptors = compute_descriptors(keypoints)
|
|
else:
|
|
descriptors = []
|
|
# Rescale the keypoints, as the images were downsampled
|
|
keypoints = np.array(keypoints)
|
|
for keypoint in keypoints:
|
|
if keypoint[2] > 0:
|
|
keypoint[0] *= 2 * keypoint[2]
|
|
keypoint[1] *= 2 * keypoint[2]
|
|
|
|
# Remove two last column from keypoints
|
|
keypoints = keypoints[:, :-2]
|
|
|
|
return np.array(keypoints), np.array(descriptors)
|
|
|
|
|
|
def get_disparity(left_image: npt.NDArray[np.float64], right_image: npt.NDArray[np.float64],\
|
|
window_size: int = 35, patch_size: int = 11, reduce_size: float = 0.3) -> npt.NDArray[np.float64]:
|
|
|
|
if(patch_size % 2 == 0 or window_size % 2 == 0):
|
|
raise ValueError("Patch/window size must be odd")
|
|
|
|
# Reduce the size of the images
|
|
left_image = cv2.resize(left_image, None, fx=reduce_size, fy=reduce_size)
|
|
right_image = cv2.resize(right_image, None, fx=reduce_size, fy=reduce_size)
|
|
|
|
disparity = np.zeros(left_image.shape)
|
|
|
|
def get_patches_per_row(y, image, patch_size):
|
|
# Get the patches for the given y value
|
|
dd = patch_size//2
|
|
patches = []
|
|
for x in range(dd, image.shape[1] - dd, 1):
|
|
patches.append(image[y-dd:y+dd, x-dd:x+dd+1])
|
|
return np.array(patches)
|
|
|
|
def compute_ncc(a, bs):
|
|
nccs = []
|
|
a = a - np.mean(a)
|
|
for b in bs:
|
|
b = b - np.mean(b)
|
|
nccs.append(np.sum(a*b) / (np.sqrt(np.sum(a**2)) * np.sqrt(np.sum(b**2))))
|
|
return np.array(nccs)
|
|
|
|
# Get the patches
|
|
dd = patch_size//2
|
|
dw = window_size//2
|
|
|
|
for y in tqdm(range(dd, left_image.shape[0] - dd, 1), desc="Computing disparity"):
|
|
# Get the patches for the right image
|
|
right_patches = get_patches_per_row(y, right_image, patch_size)
|
|
left_patches = get_patches_per_row(y, left_image, patch_size)
|
|
dd = patch_size//2
|
|
dw = window_size//2
|
|
xlen = len(left_patches)
|
|
|
|
for ix in range(xlen):
|
|
# Compute the NCC
|
|
x = ix + dd
|
|
patch = left_patches[ix]
|
|
|
|
window = right_patches[max(0, ix-dw):min(xlen, ix+dw+1)]
|
|
ncc = compute_ncc(patch, window)
|
|
|
|
# Get the index of the maximum value
|
|
index = np.argmax(ncc) + 1
|
|
|
|
# Compute the disparity
|
|
disparity[y, x] = np.abs(dw - index)
|
|
|
|
return disparity
|
|
|
|
def normalize_points(P):
|
|
# P must be a Nx2 vector of points
|
|
# first coordinate is x, second is y
|
|
|
|
# returns: normalized points in homogeneous coordinates and 3x3 transformation matrix
|
|
|
|
mu = np.mean(P, axis=0) # mean
|
|
scale = np.sqrt(2) / np.mean(np.sqrt(np.sum((P-mu)**2,axis=1))) # scale
|
|
T = np.array([[scale, 0, -mu[0]*scale],[0, scale, -mu[1]*scale],[0,0,1]]) # transformation matrix
|
|
P = np.hstack((P,np.ones((P.shape[0],1)))) # homogeneous coordinates
|
|
res = np.dot(T,P.T).T
|
|
return res, T
|
|
|
|
def draw_epiline(l,h,w):
|
|
# l: line equation (vector of size 3)
|
|
# h: image height
|
|
# w: image width
|
|
|
|
x0, y0 = map(int, [0, -l[2]/l[1]])
|
|
x1, y1 = map(int, [w-1, -(l[2]+l[0]*w)/l[1]])
|
|
|
|
plt.plot([x0,x1],[y0,y1],'r')
|
|
|
|
plt.ylim([0,h])
|
|
plt.gca().invert_yaxis()
|
|
|
|
def fundamential_matrix(P1, P2):
|
|
# P1 and P2 are Nx2 vectors of points
|
|
|
|
# returns: fundamental matrix
|
|
N = P1.shape[0]
|
|
# normalize points
|
|
P1n, T1 = normalize_points(P1)
|
|
P2n, T2 = normalize_points(P2)
|
|
|
|
# compute fundamental matrix
|
|
A = np.zeros((N,9))
|
|
for i in range(N):
|
|
u = P1n[i,0]
|
|
v = P1n[i,1]
|
|
u_ = P2n[i,0]
|
|
v_ = P2n[i,1]
|
|
A[i,:] = [u*u_, u_*v, u_, u*v_, v*v_, v_, u, v, 1]
|
|
|
|
U,S,V = np.linalg.svd(A)
|
|
# F is the last column of V
|
|
F = V[-1,:].reshape((3,3))
|
|
|
|
# enforce rank 2
|
|
U,S,V = np.linalg.svd(F)
|
|
S[-1] = 0
|
|
F = np.dot(U,np.dot(np.diag(S),V))
|
|
|
|
# denormalize
|
|
F = np.dot(T2.T,np.dot(F,T1))
|
|
|
|
return F
|
|
|
|
|
|
def get_epipolar_lines(p1, p2):
|
|
F = fundamential_matrix(p1, p2)
|
|
|
|
p1 = np.hstack((p1, np.ones((p1.shape[0], 1))))
|
|
p2 = np.hstack((p2, np.ones((p2.shape[0], 1))))
|
|
|
|
lines_p2 = np.dot(F, p1.T).T
|
|
lines_p1 = np.dot(F.T, p2.T).T
|
|
|
|
return lines_p1, lines_p2
|
|
|
|
|
|
def reprojection_error(F, p1, p2):
|
|
|
|
# Add a column of ones to the points, even if there is single point
|
|
p1 = np.hstack((p1, np.ones((p1.shape[0], 1))))
|
|
p2 = np.hstack((p2, np.ones((p2.shape[0], 1))))
|
|
|
|
lines_right = []
|
|
lines_left = []
|
|
for p_left, p_right in zip(p1, p2):
|
|
# Left to right
|
|
lines_right.append(np.dot(F, p_left).T)
|
|
# Right to left
|
|
lines_left.append(np.dot(F.T, p_right).T)
|
|
|
|
lines_right = np.array(lines_right)
|
|
lines_left = np.array(lines_left)
|
|
|
|
# Normalize
|
|
lines_right = lines_right / lines_right[:, 2][:, np.newaxis]
|
|
lines_left = lines_left / lines_left[:, 2][:, np.newaxis]
|
|
|
|
# Compute the distances
|
|
d_left = np.abs(np.sum(lines_right * p2, axis=1)) / np.sqrt(lines_right[:, 0] ** 2 + lines_right[:, 1] ** 2)
|
|
d_right = np.abs(np.sum(lines_left * p1, axis=1)) / np.sqrt(lines_left[:, 0] ** 2 + lines_left[:, 1] ** 2)
|
|
|
|
repr_err = np.sum((d_left + d_right) ) / (2 * p1.shape[0])
|
|
|
|
return repr_err
|
|
|
|
|
|
def triangulate(p1, p2, P1, P2):
|
|
# p1, p2: Nx2 vectors of points
|
|
# P1, P2: 3x4 projection matrices
|
|
|
|
# returns: 3D points
|
|
|
|
# Add a column of ones to the points, even if there is single point
|
|
p1 = np.hstack((p1, np.ones((p1.shape[0], 1))))
|
|
p2 = np.hstack((p2, np.ones((p2.shape[0], 1))))
|
|
|
|
D_points = []
|
|
|
|
def get_matrix_from_vec(a):
|
|
return np.array([[0, -a[2], a[1]], [a[2], 0, -a[0]], [-a[1], a[0], 0]])
|
|
|
|
for p_left, p_right in zip(p1, p2):
|
|
# Build A1 and A2
|
|
X1 = get_matrix_from_vec(p_left)
|
|
X2 = get_matrix_from_vec(p_right)
|
|
A1 = np.dot(X1, P1)
|
|
A2 = np.dot(X2, P2)
|
|
# Construct A
|
|
A = np.vstack((A1, A2)) # Compute the SVD
|
|
U, S, V = np.linalg.svd(A)
|
|
# Get the last column of V
|
|
X = V[-1, :]
|
|
# Normalize
|
|
X = X / X[-1]
|
|
|
|
D_points.append(X[:-1])
|
|
|
|
return np.array(D_points)
|
|
|
|
def ransac_fundamental(correspondences_a: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64],
|
|
iterations: int = 5000,
|
|
threshold: float = 3):
|
|
"""
|
|
RANSAC algorithm for estimating homography.
|
|
Accepts two images and their corresponding keypoints.
|
|
Returns the best homography matrix and the inliers.
|
|
"""
|
|
# Find the best homography
|
|
best_inliers = []
|
|
best_fund_matrix = None
|
|
|
|
for i in tqdm(range(iterations), desc='RANSAC'):
|
|
# Randomly sample 4 correspondences
|
|
sample = np.random.choice(correspondences_a.shape[0], 8, replace=False)
|
|
# correspondance_a[0] (x,y) -> correspondance_b[0] (x,y)
|
|
sample_a = correspondences_a[sample]
|
|
sample_b = correspondences_b[sample]
|
|
# Make it (x,y) -> (x,y) = x,y,x,y matrix
|
|
# Estimate homography
|
|
fundamential_matrix_x = fundamential_matrix(sample_a, sample_b)
|
|
# Compute the inliers
|
|
inlier_indices = []
|
|
# Calculate the distance between the transformed points and the actual points
|
|
# and count the number of inliers
|
|
for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)):
|
|
p1, p2 = correspondence
|
|
if reprojection_error(fundamential_matrix_x, np.array([p1]), np.array([p2])) < threshold:
|
|
inlier_indices.append(i)
|
|
|
|
inliers_proportion = len(inlier_indices)/len(correspondences_a)
|
|
print(inliers_proportion)
|
|
# Check if we have a new best homography
|
|
if inliers_proportion > 0.15:
|
|
fundamential_matrix_x_2 = fundamential_matrix(correspondences_a[inlier_indices], correspondences_b[inlier_indices])
|
|
inlier_indices_2 = []
|
|
for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)):
|
|
p1, p2 = correspondence
|
|
if reprojection_error(fundamential_matrix_x_2, np.array([p1]), np.array([p2])) < threshold:
|
|
inlier_indices_2.append(i)
|
|
|
|
if len(inlier_indices_2) / len(correspondences_a) > len(best_inliers) / len(correspondences_a):
|
|
best_inliers = inlier_indices_2
|
|
best_fund_matrix = fundamential_matrix_x_2
|
|
|
|
return best_fund_matrix, correspondences_a[best_inliers], correspondences_b[best_inliers]
|