import numpy as np import numpy.typing as npt from matplotlib import pyplot as plt import random import cv2 import uz_framework.image as uz_image ####################################### # EXCERCISE 1: Basic image processing # ####################################### def excercise_one() -> None: image = one_a() one_b(image) one_c(image) one_d(100, 200, 200, 400, image) one_e() def one_a() -> npt.NDArray[np.float64]: """ Read the image from the file umbrellas.jpg and display it """ umbrellas = uz_image.imread('./images/umbrellas.jpg', uz_image.ImageType.float64) plt.imshow(umbrellas) plt.suptitle("Umbrellas Picture") plt.show() return umbrellas def one_b(image: npt.NDArray[np.float64]) -> None: """ Convert the loaded image to grayscale. A very simple way of doing this is summing up the color channels and dividing the result by 3, effectively averaging the values. The issue, however, is that the sum easily reaches beyond the np.uint8 range. We can avoid that by casting the data to a floating point type. You can access a specific image channel using the indexing syntax like red = I[:,:,0]. """ grayscale_image = uz_image.transform_coloured_image_to_grayscale(image) plt.imshow(grayscale_image, cmap='gray') plt.suptitle("Umbrellas Grayscale") plt.show() def one_c(image: npt.NDArray[np.float64]) -> None: """ Cut and display a specific part of the loaded image. Extract only one of the channels so you get a grayscale image. You can do this by indexing along the first two axes, for instance: cutout=I[130:260, 240:450, 1]. You can display multiple images in a single window using plt.subplot(). Grayscale images can be displayed using different mappings (on a RGB monitor, every value needs to be mapped to a RGB triplet). Pyplot defaults to a color map named viridis, but often it is preferable to use a grayscale color map. This can be set with an additional argument to plt.imshow, like plt.imshow(I, cmap=’gray’). Question: Why would you use different color maps? Answer: """ cutout_0 = image[50:200, 100:400, 0] cutout_1 = image[50:200, 100:400, 1] cutout_2 = image[50:200, 100:400, 2] fig, axs = plt.subplots(1, 3) fig.suptitle('Displaying cutouts for different image channels') axs[0].imshow(cutout_0, cmap='gray') axs[0].set(title='Channel 0') axs[1].imshow(cutout_1, cmap='gray') axs[1].set(title='Channel 1') axs[2].imshow(cutout_2, cmap='gray') axs[2].set(title='Channel 2') plt.show() def one_d(startx: int, endx: int, starty: int, endy: int, image:npt.NDArray[np.float64]) -> None: """ You can also replace only a part of the image using indexing. Write a script that inverts a rectangular part of the image. This can be done pixel by pixel in a loop or by using indexing. (height, width, color) (x , y , color) y -> ################# x # # | # # v # # # # # # ################# Question: How is inverting a grayscale value defined for uint8 ? Answer: """ inverted_image = uz_image.invert_coloured_image_part(image, startx, endx, starty, endy) fig, axs = plt.subplots(1, 2) fig.suptitle("Lomberlini") axs[0].imshow(image) axs[0].set(title="Original Image") axs[1].imshow(inverted_image, vmax=255) axs[1].set(title="Inverted part of Image") plt.show() def one_e() -> None: """ Perform a reduction of grayscale levels in the image. First read the image from umbrellas.jpg and convert it to grayscale. You can write your own function for grayscale conversion or use the function in UZ_utils.py. Convert the grayscale image to floating point type. Then, rescale the image values so that the largest possible value is 63. Convert the image back to uint8 and display both the original and the modified image. Notice that both look the same. Pyplot tries to maximize the contrast in displayed images by checking their values and scaling them to cover the entire uint8 interval. If you want to avoid this, you need to set the maximum expected value when using plt.imshow(), like plt.imshow(I, vmax=255. Use this to display the resulting image so the change is visible. """ grayscale_image = uz_image.imread_gray("./images/umbrellas.jpg", uz_image.ImageType.float64) upscaled_grayscale_image = (grayscale_image.copy() * 63).astype(np.uint8) fig, axs = plt.subplots(1, 2) fig.suptitle("Lomberlini") axs[0].imshow(grayscale_image, cmap="gray") axs[0].set(title="Original grayscale image") axs[1].imshow(upscaled_grayscale_image, vmax=255, cmap="gray") axs[1].set(title="Upscaled grayscale image") plt.show() ############################################ # EXCERCISE 2: Thresholding and histograms # ############################################ def excercise_two() -> None: """ Thresholding an image is an operation that produces a binary image (mask) of the same size where the value of pixels is determined by whether the value of the corresponding pixels in the source image is greater or lower than the given threshold. """ two_a() two_b('./images/bird.jpg', 100, 20) two_c('./images/bird.jpg', 20, 100) two_d() two_e(uz_image.imread_gray('./images/bird.jpg', uz_image.ImageType.uint8).astype(np.uint8)) def two_a() -> tuple[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: """ Create a binary mask from a grayscale image. The binary mask is a matrix the same size as the image which contains 1 where some condition holds and 0 everywhere else. In this case the condition is simply the original image intensity. Use the image bird.jpg. Display both the image and the mask. """ random_number = random.random() image = uz_image.imread_gray("./images/bird.jpg", uz_image.ImageType.float64) TRESHOLD = uz_image.calculate_best_treshold_using_otsu_method(image) / 255 binary_mask = image.copy() if random_number < 0.5: binary_mask[binary_mask < TRESHOLD] = 0 binary_mask[binary_mask >= TRESHOLD] = 1 else: binary_mask = np.where(binary_mask < TRESHOLD, 0, 1) binary_mask = binary_mask.astype(np.uint8) fig, axs = plt.subplots(1, 2) fig.suptitle("Birdie and its mask") axs[0].imshow(image, cmap="gray") axs[0].set(title="Original image") axs[1].imshow(binary_mask, cmap="gray") axs[1].set(title="Mask of birdie") plt.show() return (image, binary_mask) def two_b(image_path: str, number_of_bins_first: int, number_of_bins_second: int) -> None: """ Write a function myhist that accepts a grayscale image and the number of bins that will be used in building a histogram. The function should return a 1D array that represents the image histogram (the size should be equal to the number of bins, of course). The histogram is simply a count of pixels with same (or similar) intensity for all bins. You can assume the values of the image are within the interval [0,255]. If you use fewer than 255 bins, intensities will have to be grouped together, e.g. if using 10 bins, all values on the interval [0,25] will fall into bin 0. Write a script that calculates and displays histograms for different numbers of bins using bird.jpg Question: The histograms are usually normalized by dividing the result by the sum of all cells. Why is that? Answer: """ image = uz_image.imread_gray(image_path, uz_image.ImageType.uint8) H1 = uz_image.get_image_bins(image, number_of_bins_first) H2 = uz_image.get_image_bins(image, number_of_bins_second) fig, axs = plt.subplots(1, 3) fig.suptitle("Birdie and histograms") axs[0].imshow(image, cmap="gray") axs[0].set(title="Birdie image") axs[1].bar(np.arange(number_of_bins_first), H1) axs[1].set(title="100 bins") axs[2].bar(np.arange(number_of_bins_second), H2) axs[2].set(title="20 bins") plt.show() def two_c(image_path: str, number_of_bins_first: int, number_of_bins_second: int) -> None: """ Modify your function myhist to no longer assume the uint8 range for values. Instead, it should find the maximum and minimum values in the image and calculate the bin ranges based on these values. Write a script that shows the difference between both versions of the function. """ image_uint8 = uz_image.imread_gray(image_path, uz_image.ImageType.uint8) image_float64 = uz_image.imread_gray(image_path, uz_image.ImageType.float64) H01 = uz_image.get_image_bins(image_uint8, number_of_bins_first) H02 = uz_image.get_image_bins(image_uint8, number_of_bins_second) H11 = uz_image.get_image_bins(image_float64, number_of_bins_first) H12 = uz_image.get_image_bins(image_float64, number_of_bins_second) fig, axs = plt.subplots(2, 3) fig.suptitle("Comparison between two histograms") axs[0, 0].imshow(image_float64, cmap="gray") axs[0, 0].set(title="Grayscale image in float64 representation") axs[0, 1].bar(np.arange(number_of_bins_first), H01) axs[0, 1].set(title=f'{number_of_bins_first} bins used') axs[0, 2].bar(np.arange(number_of_bins_second), H02) axs[0, 2].set(title=f'{number_of_bins_second} bins used') axs[1, 0].imshow(image_uint8, cmap="gray") axs[1, 0].set(title="Grayscale image in uint8 representation") axs[1, 1].bar(np.arange(number_of_bins_first), H11) axs[1, 1].set(title=f'{number_of_bins_first} bins used') axs[1, 2].bar(np.arange(number_of_bins_second), H12) axs[1, 2].set(title=f'{number_of_bins_second} bins used') plt.show() def two_d() -> None: """ Test myhist function on images (three or more) of the same scene in different lighting conditions. One way to do this is to capture several images using your web camera and change the lighting of the room. Visualize the histograms for all images for different number of bins and interpret the results. """ imgs = [] bins = [20, 60, 100] imgs.append(uz_image.imread_gray("./images/ROOM_LIGHTS_ON.jpg", uz_image.ImageType.float64)) # light imgs.append(uz_image.imread_gray("./images/ONE_ROOM_LIGH_ON.jpg", uz_image.ImageType.float64)) # darker imgs.append(uz_image.imread_gray("./images/DARK.jpg", uz_image.ImageType.float64)) # dark fig, axs = plt.subplots(3, 4) fig.suptitle("Me and my histograms") for i in range(3): for j in range(3): axs[i, j+1].bar(np.arange(bins[j]), uz_image.get_image_bins(imgs[i], bins[j])) axs[i, j+1].set(title=f"Using {bins[j]} bins") axs[0, 0].imshow(imgs[0], cmap="gray") axs[0, 0].set(title="Image in light conditions") axs[1, 0].imshow(imgs[1], cmap="gray") axs[1, 0].set(title="Image in darker conditions") axs[2, 0].imshow(imgs[2], cmap="gray") axs[2, 0].set(title="Image in dark conditions") plt.show() def two_e(image: npt.NDArray[np.uint8]): """ Implement Otsu’s method for automatic threshold calculation. It should accept a grayscale image and return the optimal threshold. Using normalized histograms, the probabilities of both classes are easy to calculate. Write a script that shows the algorithm’s results on different images. References: https://en.wikipedia.org/wiki/Otsu%27s_method """ return uz_image.calculate_best_treshold_using_otsu_method(image) ###################################################### # EXCERCISE 3: Morphological operations and regions # ###################################################### def excercise_three() -> None: #three_a() #mask1, _ = three_b() #three_c(uz_image.imread('./images/bird.jpg', uz_image.ImageType.float64), mask1) #three_d() three_e() def three_a() -> None: """ We will perform two basic morphological operations on the image mask.png, erosion and dilation. We will also experiment with combinations of both operations, named opening and closing. Also combine both operations sequentially and display the results. Question: Based on the results, which order of erosion and dilation operations produces opening and which closing? Answer: Opening = Erosion then dialation Closing = Dialation then erosion """ img_orig = uz_image.imread_gray('./images/mask.png', uz_image.ImageType.float64) img_er_dil = img_orig.copy() img_dil_er = img_orig.copy() img_comb = img_orig.copy() SE_size = 2 SE = np.ones((SE_size, SE_size), np.float64) fig, axs = plt.subplots(3, 4) fig.suptitle("Image after N iterations of Opening, Closing and combination of opening and closing") for im_ix in range(4): SE = np.ones((SE_size, SE_size), np.float64) img_er_dil = cv2.erode(img_er_dil, SE) img_er_dil = cv2.dilate(img_er_dil, SE) axs[0, im_ix].imshow(img_er_dil.copy(), cmap='gray') axs[0, im_ix].set(title=f"after {im_ix + 1} iterations of Opening, SE: {SE_size} x {SE_size}") SE_size +=1 SE_size = 2 for im_ix in range(4): SE = np.ones((SE_size, SE_size), np.float64) img_dil_er = cv2.dilate(img_dil_er, SE) img_dil_er = cv2.erode(img_dil_er, SE) axs[1, im_ix].imshow(img_dil_er.copy(), cmap='gray') axs[1, im_ix].set(title=f"after {im_ix + 1} iterations of Closing, SE: {SE_size} x {SE_size}") SE_size +=1 SE_size = 2 for im_ix in range(4): img_comb = cv2.erode(img_comb, SE) img_comb = cv2.dilate(img_comb, SE) img_comb = cv2.dilate(img_comb, SE) img_comb = cv2.erode(img_comb, SE) axs[2, im_ix].imshow(img_comb.copy(), cmap='gray') axs[2, im_ix].set(title=f"after {im_ix + 1} iterations of O+C, SE: {SE_size} x {SE_size}") SE_size +=1 plt.show() def three_b(): """ Try to clean up the mask of the image bird.jpg using morphological operations as shown in the image. Experiment with different sizes of the structuring element. You can also try different shapes, like cv2.getStructuringElement(cv2.MORPH_- ELLIPSE,(n,n)). """ _, original_mask = two_a() mask1 = original_mask.copy() mask2 = original_mask.copy() SE_ELIPSE = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4)) SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4)) # Perform operation of opening and then closing (almost) # Values found through trial and error mask1 = cv2.dilate(mask1, SE_ELIPSE, iterations = 4) mask1 = cv2.erode(mask1, SE_ELIPSE) mask1 = cv2.dilate(mask1, SE_ELIPSE) mask1 = cv2.erode(mask1, SE_ELIPSE, iterations=3) mask2 = cv2.dilate(mask2, SE_CROSS, iterations = 5) mask2 = cv2.erode(mask2, SE_CROSS) mask2 = cv2.dilate(mask2, SE_CROSS) mask2 = cv2.erode(mask2, SE_CROSS, iterations=4) fig, axs = plt.subplots(1, 3) fig.suptitle("Operation of Opening and closing using elipse and cross") axs[0].imshow(original_mask, cmap='gray') axs[0].set(title="Original mask") axs[1].imshow(mask1, cmap='gray') axs[1].set(title="Using elipse") axs[2].imshow(mask2, cmap='gray') axs[2].set(title="Using cross") plt.show() return (mask1, mask2) def three_c(image: npt.NDArray[np.float64], mask: npt.NDArray[np.uint8]): """ Write a function immask that accepts a three channel image and a binary mask and returns an image where pixel values are set to black if the corresponding pixel in the mask is equal to 0. Otherwise, the pixel value should be equal to the corresponding image pixel. Do not use for loops, as they are slow """ img = uz_image.apply_mask_on_image(image, mask) plt.imshow(img) plt.show() def three_d(): """ Create a mask from the image in file eagle.jpg and visualize the result with immask (if available, otherwise simply display the mask). Use Otsu’s method if available, else use a manually set threshold. Question: Why is the background included in the mask and not the object? How would you fix that in general? (just inverting the mask if necessary doesn’t count) Answer: """ eagle_img_gray = uz_image.imread_gray('./images/eagle.jpg', uz_image.ImageType.uint8).astype(np.uint8) eagle_img_color = uz_image.imread('./images/eagle.jpg', uz_image.ImageType.float64) TRESHOLD = two_e(eagle_img_gray) binary_mask = eagle_img_gray.copy() binary_mask = np.where(binary_mask < TRESHOLD, 0, 1) binary_mask = binary_mask.astype(np.uint8) # If I would invert image here, then we would get crap # So workaround: SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (2, 2)) binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=2) binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=3) SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4)) binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=4) SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4)) binary_mask = cv2.erode(binary_mask, SE_CROSS) SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3)) binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=8) # Now invert binary mask binary_mask = np.where(binary_mask == 1, 0 , 1) removed_background_img = uz_image.apply_mask_on_image(eagle_img_color, binary_mask) fig, axs = plt.subplots(1, 3) fig.suptitle('Removing background of Eagle') axs[0].imshow(eagle_img_color) axs[0].set(title='Original image') axs[1].imshow(binary_mask, cmap='gray') axs[1].set(title='Computed Mask') axs[2].imshow(removed_background_img, cmap='gray') axs[2].set(title='Image with removed background') plt.show() def three_e(): """ Another way to process a mask is to extract connected components. To do this, you can use the function cv2.connectedComponentsWithStats that accepts a binary image and returns information about the connected components present in it. Write a script that loads the image coins.jpg, calculates a mask and cleans it up using morphological operations. Your goal is to get the coins as precisely as possible. Then, using connected components, remove the coins whose area is larger than 700 pixels from the original image (replace them with white background). Display the results """ # https://stackoverflow.com/a/42812226 coin_img_gray = uz_image.imread_gray('./images/coins.jpg', uz_image.ImageType.uint8).astype(np.uint8) coin_img_color = uz_image.imread('./images/coins.jpg', uz_image.ImageType.float64) TRESHOLD = two_e(coin_img_gray) COIN_SIZE = 700 binary_mask = coin_img_gray.copy() binary_mask = np.where(binary_mask < TRESHOLD, 0, 1) binary_mask = binary_mask.astype(np.uint8) SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=3) binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=2) SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 3)) binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=2) # Now invert binary mask binary_mask = np.where(binary_mask == 1, 0 , 1) binary_mask = binary_mask.astype(np.uint8) output = cv2.connectedComponentsWithStats(binary_mask, 4, cv2.CV_32S) n_blobs, im_with_separated_blobs, stats, _ = output sizes = stats[:, -1] for blob in range(n_blobs): if sizes[blob] > COIN_SIZE: binary_mask[im_with_separated_blobs == blob] = 0 removed_coins = uz_image.apply_mask_on_image(coin_img_color, binary_mask) removed_coins[removed_coins == 0] = 1 fig, axs = plt.subplots(1, 3) fig.suptitle('Removal of >700px coins') axs[0].imshow(coin_img_color) axs[0].set(title='Original image') axs[1].imshow(binary_mask, cmap='gray') axs[1].set(title='Computed Mask') axs[2].imshow(removed_coins) axs[2].set(title='Image with coins removed') plt.show() def main() -> None: excercise_one() excercise_two() excercise_three() if __name__ == "__main__": main()