464 lines
19 KiB
Python
464 lines
19 KiB
Python
import numpy as np
|
||
import numpy.typing as npt
|
||
from matplotlib import pyplot as plt
|
||
import cv2
|
||
import uz_framework.image as uz_image
|
||
import uz_framework.text as uz_text
|
||
import os
|
||
|
||
#################################################################
|
||
# EXCERCISE 1: Exercise 1: Global approach to image description #
|
||
#################################################################
|
||
|
||
def ex1():
|
||
one_a()
|
||
one_b()
|
||
one_c()
|
||
distances, selected_distances = one_d('./data/dataset', 8)
|
||
one_e(distances, selected_distances)
|
||
|
||
def one_a() -> npt.NDArray[np.float64]:
|
||
"""
|
||
Firstly, you will implement the function myhist3 that computes a 3-D histogram
|
||
from a three channel image. The images you will use are RGB, but the function
|
||
should also work on other color spaces. The resulting histogram is stored in a 3-D
|
||
matrix. The size of the resulting histogram is determined by the parameter n_bins.
|
||
The bin range calculation is exactly the same as in the previous assignment, except
|
||
now you will get one index for each image channel. Iterate through the image pixels
|
||
and increment the appropriate histogram cells. You can create an empty 3-D numpy
|
||
array with H = np.zeros((n_bins,n_bins,n_bins)). Take care that you normalize
|
||
the resulting histogram.
|
||
"""
|
||
lena = uz_image.imread('./data/images/lena.png', uz_image.ImageType.float64)
|
||
lincoln = uz_image.imread('./data/images/lincoln.jpg', uz_image.ImageType.float64)
|
||
lena_h = uz_image.get_image_bins_ND(lena, 128)
|
||
lincoln_h = uz_image.get_image_bins_ND(lincoln, 128)
|
||
print(uz_image.compare_two_histograms(lena_h, lincoln_h, uz_image.DistanceMeasure.euclidian_distance))
|
||
return lena_h
|
||
|
||
def one_b() -> None:
|
||
"""
|
||
In order to perform image comparison using histograms, we need to implement
|
||
some distance measures. These are defined for two input histograms and return a
|
||
single scalar value that represents the similarity (or distance) between the two histograms.
|
||
Implement a function compare_histograms that accepts two histograms
|
||
and a string that identifies the distance measure you wish to calculate
|
||
Implement L2 metric, chi-square distance, intersection and Hellinger distance.
|
||
Function implemented in uz_framework
|
||
"""
|
||
return None
|
||
|
||
def one_c() -> None:
|
||
"""
|
||
Test your function
|
||
Compute a 8×8×8-bin 3-D histogram for each image. Reshape each of them into a
|
||
1-D array. Using plt.subplot(), display all three images in the same window as well
|
||
as their corresponding histograms. Compute the L2 distance between histograms of
|
||
object 1 and 2 as well as L2 distance between histograms of objects 1 and 3.
|
||
|
||
Question: Which image (object_02_1.png or object_03_1.png) is more similar
|
||
to image object_01_1.png considering the L2 distance? How about the other three
|
||
distances? We can see that all three histograms contain a strongly expressed component (one bin has a much higher value than the others). Which color does this
|
||
bin represent
|
||
Answer:
|
||
"""
|
||
IM1 = uz_image.imread('./data/dataset/object_01_1.png', uz_image.ImageType.float64)
|
||
IM2 = uz_image.imread('./data/dataset/object_02_1.png', uz_image.ImageType.float64)
|
||
IM3 = uz_image.imread('./data/dataset/object_03_1.png', uz_image.ImageType.float64)
|
||
N_BINS = 8
|
||
|
||
H1 = uz_image.get_image_bins_ND(IM1, N_BINS).reshape(-1)
|
||
H2 = uz_image.get_image_bins_ND(IM2, N_BINS).reshape(-1)
|
||
H3 = uz_image.get_image_bins_ND(IM3, N_BINS).reshape(-1)
|
||
|
||
fig, axs = plt.subplots(2,3)
|
||
fig.suptitle('Euclidian distance between three images')
|
||
|
||
axs[0, 0].imshow(IM1)
|
||
axs[0, 0].set(title='Image1')
|
||
axs[0, 1].imshow(IM2)
|
||
axs[0, 1].set(title='Image2')
|
||
axs[0, 2].imshow(IM3)
|
||
axs[0, 2].set(title='Image3')
|
||
|
||
axs[1, 0].bar(np.arange(N_BINS**3), H1, width=3)
|
||
axs[1, 0].set(title=f'L_2(h1, h1) = {np.round(uz_image.compare_two_histograms(H1, H1, uz_image.DistanceMeasure.euclidian_distance), 2)}')
|
||
axs[1, 1].bar(np.arange(N_BINS**3), H2, width=3)
|
||
axs[1, 1].set(title=f'L_2(h1, h2) = {np.round(uz_image.compare_two_histograms(H1, H2, uz_image.DistanceMeasure.euclidian_distance), 2)}')
|
||
axs[1, 2].bar(np.arange(N_BINS**3), H3, width=3)
|
||
axs[1, 2].set(title=f'L_2(h1, h3) = {np.round(uz_image.compare_two_histograms(H1, H3, uz_image.DistanceMeasure.euclidian_distance), 2)}')
|
||
|
||
plt.show()
|
||
|
||
def find_position(a, ix):
|
||
for i in range(len(a)):
|
||
if a[i] == ix:
|
||
return i
|
||
|
||
def one_d(directory: str, n_bins: int):
|
||
"""
|
||
You will now implement a simple image retrieval system that will use histograms.
|
||
Write a function that will accept the path to the image directory and the parameter
|
||
n_bins and then calculate RGB histograms for all images in the directory as well as
|
||
transform them to 1-D arrays. Store the histograms in an appropriate data structure.
|
||
Select some image from the directory dataset/ and compute the distance between
|
||
its histogram and all the other histograms you calculated before. Sort the list according to the calculated similarity and display the reference image and the first
|
||
five most similar images to it. Also display the corresponding histograms. Do this
|
||
for all four distance measures that you implemented earlier.
|
||
Question: Which distance is in your opinion best suited for image retrieval? How
|
||
does the retrieved sequence change if you use a different number of bins? Is the
|
||
execution time affected by the number of bins?
|
||
"""
|
||
methods=[uz_image.DistanceMeasure.euclidian_distance, uz_image.DistanceMeasure.chi_square_distance,
|
||
uz_image.DistanceMeasure.intersection_distance, uz_image.DistanceMeasure.hellinger_distance ]
|
||
img_names = os.listdir(directory)
|
||
all_dists = [[] for _ in range(len(methods))]
|
||
selected_distances = [[] for _ in range(len(methods))]
|
||
|
||
compare_image = uz_image.imread('./data/dataset/object_05_4.png', uz_image.ImageType.float64)
|
||
h_compare_image = uz_image.get_image_bins_ND(compare_image, n_bins).reshape(-1)
|
||
|
||
imgs = []
|
||
hists = []
|
||
|
||
for i in range(len(img_names)):
|
||
# Firstly read all images
|
||
current_image = uz_image.imread(f'{directory}/{img_names[i]}', uz_image.ImageType.float64)
|
||
imgs.append(current_image)
|
||
current_image_histogram = uz_image.get_image_bins_ND(current_image, n_bins).reshape(-1)
|
||
hists.append(current_image_histogram)
|
||
# Then iterate through all methods and calculate distances
|
||
for j in range(len(methods)):
|
||
all_dists[j].append(uz_image.compare_two_histograms(h_compare_image, current_image_histogram, methods[j]))
|
||
|
||
for i in range(len(all_dists)):
|
||
# Setup plot
|
||
fig, axs = plt.subplots(2, 6)
|
||
fig.suptitle(f'Comparrison between different measures, using:{methods[i].name}')
|
||
# Sort the distances
|
||
sorted_dists = np.sort(all_dists[i])
|
||
ixs=[]
|
||
|
||
# find the closest and plot em
|
||
for j in range(6):
|
||
# Find indexes of closes distances
|
||
for k in range(len(all_dists[i])):
|
||
if all_dists[i][k] == sorted_dists[j]:
|
||
ixs.append(k)
|
||
selected_distances[i].append(sorted_dists[j])
|
||
continue
|
||
# Now plot them
|
||
axs[0, j].imshow(imgs[ixs[j]])
|
||
axs[0, j].set(title=f'{img_names[ixs[j]]}')
|
||
axs[1, j].bar(np.arange(n_bins**3), hists[ixs[j]], width=3)
|
||
axs[1, j].set(title=f'd={np.round(all_dists[i][ixs[j]], 2)}')
|
||
plt.show()
|
||
|
||
return all_dists, selected_distances
|
||
|
||
def one_e(distances: list, selected_dists: list):
|
||
"""
|
||
You can get a better sense of the differences in the distance values if you plot all
|
||
of them at the same time. Use the function plt.plot() to display image indices
|
||
on the x axis and distances to the reference image on the y axis. Display both the
|
||
unsorted and the sorted image sequence and mark the most similar values using a
|
||
circle (see pyplot documentation)
|
||
"""
|
||
methods=[uz_image.DistanceMeasure.euclidian_distance, uz_image.DistanceMeasure.chi_square_distance,
|
||
uz_image.DistanceMeasure.intersection_distance, uz_image.DistanceMeasure.hellinger_distance ]
|
||
|
||
for i in range(len(distances)):
|
||
fig, axs = plt.subplots(1, 2)
|
||
fig.suptitle(f'Using {methods[i].name}')
|
||
indexes = np.arange(0, len(distances[i]) , 1)
|
||
makevery_indexes = []
|
||
|
||
for j in range(len(distances[i])):
|
||
if distances[i][j] in selected_dists[i]:
|
||
makevery_indexes.append(j)
|
||
|
||
axs[0].plot(indexes,distances[i],markevery=makevery_indexes, markerfacecolor = "none", marker = "o", markeredgecolor = "orange")
|
||
axs[1].plot(indexes,np.sort(distances[i]),markevery=[i for i in range(6)], markerfacecolor = "none", marker = "o", markeredgecolor = "orange")
|
||
plt.show()
|
||
|
||
############################
|
||
# EXCERCISE 2: Convolution #
|
||
############################
|
||
|
||
def ex2():
|
||
two_b()
|
||
two_d()
|
||
two_e()
|
||
|
||
def two_b():
|
||
"""
|
||
Implement the function simple_convolution that uses a 1-D signal I and a kernel
|
||
k of size 2N + 1. The function should return the convolution between the two.
|
||
To simplify, you only need to calculate the convolution on signal elements from
|
||
i = N to i = |I| − N. The first and last N elements of the signal will not be
|
||
used (this is different in practice where signal edges must be accounted for). Test
|
||
your implementation by loading the signal (file signal.txt) and the kernel (file
|
||
kernal.txt) using the function read_data from a2_utils.py and performing the
|
||
operation. Display the signal, the kernel and the result on the same figure. You can
|
||
compare your result with the result of function cv2.filter2D. Note that the shape
|
||
should be generally identical, while the values at the edges of the results and the
|
||
results’ offset might be different since you will not be addressing the issue of the
|
||
border pixels.
|
||
Question: Can you recognize the shape of the kernel? What is the sum of the
|
||
elements in the kernel? How does the kernel affect the signal?
|
||
"""
|
||
signal = uz_text.read_data('./data/signal.txt')
|
||
kernel = uz_text.read_data('./data/kernel.txt')
|
||
convolved_signal = uz_image.simple_convolution(signal, kernel)
|
||
|
||
cv2_convolved_signal = cv2.filter2D(signal, cv2.CV_64F, kernel)
|
||
|
||
plt.plot(convolved_signal, color='tab:green', label='Result')
|
||
plt.plot(cv2_convolved_signal, color='tab:red', label='cv2')
|
||
plt.plot(signal, color='tab:blue', label='Original')
|
||
plt.plot(kernel, color='tab:orange', label='Kernel')
|
||
plt.legend()
|
||
plt.show()
|
||
|
||
def two_d():
|
||
"""
|
||
Write a function that calculates a Gaussian kernel. Use the definition:
|
||
The input to the function should be parameter σ, which defines the shape of the
|
||
kernel. Because the values beyond 3σ are very small, we usually limit the kernel size
|
||
to 2 ∗ d3σe + 1. Don’t forget to normalize the kernel. Generate kernels for different
|
||
values of σ = 0.5, 1, 2, 3, 4 and display them on the same figure (aligned).
|
||
"""
|
||
sigmas = [0.5, 1, 2, 3, 4]
|
||
|
||
for sigma in sigmas:
|
||
kernel = uz_image.get_gaussian_kernel(sigma)
|
||
k_min_max = np.ceil(3*sigma)
|
||
x = np.arange(-k_min_max, k_min_max+1.)
|
||
plt.plot(x, kernel, label=f'σ= {sigma}')
|
||
|
||
plt.legend()
|
||
plt.show()
|
||
|
||
def two_e():
|
||
"""
|
||
The main advantage of convolution in comparison to correlation is the associativity
|
||
of operations. This allows us to pre-calculate multiple kernels that we want to use
|
||
on an image. Test this property by loading the signal from signal.txt and then
|
||
performing two consecutive convolutions on it. The first one will be with a Gaussian
|
||
kernel k1 with σ = 2 and the second one will be with kernel k2 = [0.1, 0.6, 0.4]. Then,
|
||
convolve the signal again, but switch the order of the operations. Finally, create a
|
||
kernel k3 = k1 ∗k2 and perform the convolution of the original signal with it. Display
|
||
all the resulting signals and comment on the effect the different order of operations
|
||
has on the signal. Use the function from c) or cv2.filter2D() to take care of the
|
||
edges when convolving.
|
||
"""
|
||
signal = uz_text.read_data('./data/signal.txt')
|
||
k1 = uz_image.get_gaussian_kernel(2)
|
||
k2 = np.array([0.1, 0.6, 0.4])
|
||
k2 = np.flip(k2)
|
||
|
||
s1 = signal.copy()
|
||
|
||
s2 = cv2.filter2D(signal, cv2.CV_64F, k1)
|
||
s2 = cv2.filter2D(s2, cv2.CV_64F, k2)
|
||
|
||
s3 = cv2.filter2D(signal, cv2.CV_64F, k2)
|
||
s3 = cv2.filter2D(s3, cv2.CV_64F, k1)
|
||
|
||
k3 = cv2.filter2D(k1, cv2.CV_64F, k2)
|
||
k3 = np.flip(k3)
|
||
s4 = cv2.filter2D(signal, cv2.CV_64F, k3)
|
||
|
||
fig, axs = plt.subplots(1, 4)
|
||
|
||
fig.suptitle('Convolution')
|
||
|
||
axs[0].plot(s1)
|
||
axs[0].set(title='s')
|
||
axs[1].plot(s2)
|
||
axs[1].set(title='(s*k1)*k2')
|
||
axs[2].plot(s3)
|
||
axs[2].set(title='(s*k2)*k1')
|
||
axs[3].plot(s4)
|
||
axs[3].set(title='s*(k1*k2)')
|
||
|
||
plt.show()
|
||
|
||
################################
|
||
# EXCERCISE 3: Image Filtering #
|
||
################################
|
||
|
||
def ex3():
|
||
three_a()
|
||
three_b()
|
||
three_c()
|
||
three_d()
|
||
three_e()
|
||
|
||
def three_a():
|
||
"""
|
||
Write a function gaussfilter that generates a Gaussian filter and applies it to a
|
||
2-D image. You can use the function cv2.filter2D() to perform the convolution
|
||
using the desired kernel. Generate a 1-D Gaussian kernel and first use it to filter
|
||
the image along the first dimension, then convolve the result using the same kernel,
|
||
but transposed.
|
||
Hint: Numpy arrays have an attribute named T, which is used to access the transpose
|
||
of the array, e.g. k_transposed = k.T.
|
||
Test the function by loading the image lena.png and converting it to grayscale.
|
||
Then, corrupt the image with Gaussian noise (every pixel value is offset by a random number
|
||
sampled from the Gaussian distribution) and separately with saltand-pepper noise.
|
||
You can use the functions gauss_noise and sp_noise that are
|
||
included with the instructions (a2_utils.py). Use the function gaussfilter to try
|
||
and remove noise from both images
|
||
"""
|
||
lena = uz_image.imread('./data/images/lena.png', uz_image.ImageType.float64)
|
||
lena_grayscale = uz_image.transform_coloured_image_to_grayscale(lena.astype(np.float64))
|
||
|
||
# Gaussian noise
|
||
lena_gausssian_noise = uz_image.gauss_noise(lena_grayscale)
|
||
# Salt and pepper noise
|
||
lena_salt_and_pepper = uz_image.sp_noise(lena_grayscale)
|
||
kernel = np.array(uz_image.get_gaussian_kernel(2)) # MUST BE A 2D for TRANSPOSE
|
||
|
||
# Denoised
|
||
denosised_lena = cv2.filter2D(lena_gausssian_noise, cv2.CV_64F, kernel)
|
||
denosised_lena = cv2.filter2D(denosised_lena, cv2.CV_64F, kernel.T)
|
||
# Desalted
|
||
desalted_lena = cv2.filter2D(lena_salt_and_pepper, cv2.CV_64F, kernel)
|
||
desalted_lena = cv2.filter2D(desalted_lena, cv2.CV_64F, kernel.T)
|
||
|
||
fig, axs = plt.subplots(2, 3)
|
||
|
||
axs[0, 0].imshow(lena_grayscale, cmap='gray')
|
||
axs[0, 0].set(title='Orginal image')
|
||
axs[0, 1].imshow(lena_gausssian_noise, cmap='gray')
|
||
axs[0, 1].set(title='Gaussian noise applied')
|
||
axs[1, 1].imshow(denosised_lena, cmap='gray')
|
||
axs[1, 1].set(title='Denoised Lena')
|
||
axs[0, 2].imshow(lena_salt_and_pepper, cmap='gray')
|
||
axs[0, 2].set(title='Salt and Pepper applied')
|
||
axs[1, 2].imshow(desalted_lena, cmap='gray')
|
||
axs[1, 2].set(title='Desalted Lena')
|
||
axs[1, 0].set_visible(False)
|
||
|
||
plt.show()
|
||
|
||
def three_b():
|
||
"""
|
||
Convolution can also be used for image sharpening. Look at its definition in the
|
||
lecture slides and implement it. Test it on the image from file museum.jpg.
|
||
"""
|
||
museum_grayscale = uz_image.imread_gray('./data/images/museum.jpg', uz_image.ImageType.uint8)
|
||
|
||
|
||
museo = uz_image.sharpen_image(museum_grayscale, 1.2)
|
||
fig, axs = plt.subplots(1, 2)
|
||
fig.suptitle('Sharpening operation')
|
||
axs[0].imshow(museum_grayscale, cmap='gray')
|
||
axs[0].set(title='Original')
|
||
axs[1].imshow(museo, cmap='gray')
|
||
axs[1].set(title='Sharpened')
|
||
plt.show()
|
||
|
||
def three_c():
|
||
signal = np.zeros(40)
|
||
signal[15:20] = 1.0
|
||
|
||
fig, axs = plt.subplots(1, 4)
|
||
fig.suptitle('Signal manipulation')
|
||
|
||
axs[0].plot(signal)
|
||
axs[0].set(title='Original')
|
||
|
||
signal_sp = uz_image.sp_noise1D(signal, 0.05)
|
||
axs[1].plot(signal_sp)
|
||
axs[1].set(title='Corrupted')
|
||
|
||
kernel = uz_image.get_gaussian_kernel(1.4)
|
||
signal_gauss = cv2.filter2D(signal_sp, cv2.CV_64F, kernel)
|
||
axs[2].plot(signal_gauss)
|
||
axs[2].set(title='Gauss')
|
||
|
||
signal = uz_image.simple_median(signal_sp, 3)
|
||
axs[3].plot(signal)
|
||
axs[3].set(title='Median')
|
||
plt.show()
|
||
|
||
|
||
def three_d():
|
||
"""
|
||
Implement a 2-D version of the median filter. Test it on an image
|
||
that was corrupted by Gaussian noise and on an image that was corrupted by salt
|
||
and pepper noise. Compare the results with the Gaussian filter for multiple noise
|
||
intensities and filter sizes.
|
||
"""
|
||
lena = uz_image.imread('./data/images/lena.png', uz_image.ImageType.float64)
|
||
lena_grayscale = uz_image.transform_coloured_image_to_grayscale(lena.astype(np.float64))
|
||
# Peppered
|
||
lena_salt_and_pepper = uz_image.sp_noise(lena_grayscale)
|
||
# Depeppered
|
||
deppepered_lena = uz_image.apply_median_method_2D(lena_salt_and_pepper, 7)
|
||
# Sharpened
|
||
sharpened_lena = uz_image.sharpen_image(deppepered_lena,1)
|
||
|
||
|
||
fig, axs = plt.subplots(1, 4)
|
||
fig.suptitle('Common methods applied over Lena image')
|
||
|
||
axs[0].imshow(lena_grayscale, cmap='gray')
|
||
axs[0].set(title='Orginal image')
|
||
axs[1].imshow(lena_salt_and_pepper, cmap='gray')
|
||
axs[1].set(title='Salt and Pepper applied')
|
||
axs[2].imshow(deppepered_lena, cmap='gray')
|
||
axs[2].set(title='Deppepeerd lena')
|
||
axs[3].imshow(sharpened_lena, cmap='gray')
|
||
axs[3].set(title='Sharpened lena')
|
||
|
||
plt.show()
|
||
|
||
def three_e():
|
||
"""
|
||
Implement the hybrid image merging that was presented at the lectures.
|
||
To do this you will have to implement the Laplacian filter. Filter the images
|
||
(one with the Gaussian and one with the Laplacian filter) and merge them together
|
||
(regular or weighted average). You can use images lincoln.jpg and obama.jpg.
|
||
Hint: To get good results, experiment with different kernel sizes for each operation
|
||
and different weights when merging images.
|
||
"""
|
||
obama_image = uz_image.imread_gray('./data/images/obama.jpg', uz_image.ImageType.float64)
|
||
lincoln_image = uz_image.imread_gray('./data/images/lincoln.jpg', uz_image.ImageType.float64)
|
||
laplaced_obama = uz_image.filter_laplace(obama_image, 35)
|
||
gaussed_lincoln = uz_image.gaussfilter2D(lincoln_image, 5)
|
||
|
||
merged = uz_image.sum_two_grayscale_images(laplaced_obama, gaussed_lincoln)
|
||
|
||
fig, axs = plt.subplots(2, 3)
|
||
fig.suptitle('Linoln and Obama')
|
||
|
||
axs[0, 0].imshow(lincoln_image, cmap='gray')
|
||
axs[0, 0].set(title='Lincoln')
|
||
axs[1, 0].imshow(gaussed_lincoln, cmap='gray')
|
||
axs[1, 0].set(title='Lincoln gauss')
|
||
axs[0, 1].imshow(obama_image, cmap='gray')
|
||
axs[0, 1].set(title='Obama')
|
||
axs[1, 1].imshow(laplaced_obama, cmap='gray')
|
||
axs[1, 1].set(title='Obama laplace')
|
||
axs[0, 2].imshow(merged, cmap='gray')
|
||
axs[0, 2].set(title='Merged')
|
||
axs[1, 2].set_visible(False)
|
||
|
||
plt.show()
|
||
|
||
|
||
# ######## #
|
||
# SOLUTION #
|
||
# ######## #
|
||
|
||
def main():
|
||
ex1()
|
||
#ex2()
|
||
#ex3()
|
||
|
||
if __name__ == '__main__':
|
||
main()
|