uz_assignments/assignment1/solution.py

import numpy as np
import numpy.typing as npt
from matplotlib import pyplot as plt
import random
import cv2
import uz_framework.image as uz_image
import UZ_utils as uz

#######################################
# EXCERCISE 1: Basic image processing #
#######################################

def excercise_one() -> None:
    image = one_a()
    #one_b(image)
    #one_c(image)
    #one_d(100, 200, 200, 400, image)
    one_e()

def one_a() -> npt.NDArray[np.float64]:
    """
    Read the image from the file umbrellas.jpg and display it
    """
    umbrellas = uz_image.imread('./images/umbrellas.jpg', uz_image.ImageType.float64)

    plt.imshow(umbrellas)
    plt.suptitle("Umbrellas Picture")
    plt.show()

    return umbrellas

def one_b(image: npt.NDArray[np.float64]) -> None:
    """
    Convert the loaded image to grayscale. A very simple way of doing this is summing
    up the color channels and dividing the result by 3, effectively averaging the values.
    The issue, however, is that the sum easily reaches beyond the np.uint8 range. We
    can avoid that by casting the data to a floating point type. You can access a specific
    image channel using the indexing syntax like red = I[:,:,0].
    """
    grayscale_image = uz_image.transform_coloured_image_to_grayscale(image)

    plt.imshow(grayscale_image, cmap='gray')
    plt.suptitle("Umbrellas Grayscale")
    plt.show()


def one_c(image: npt.NDArray[np.float64]) -> None:
    """
    Cut and display a specific part of the loaded image. Extract only one of the channels
    so you get a grayscale image. You can do this by indexing along the first two axes,
    for instance: cutout=I[130:260, 240:450, 1]. You can display multiple images in
    a single window using plt.subplot().
    Grayscale images can be displayed using different mappings (on a RGB monitor,
    every value needs to be mapped to a RGB triplet). Pyplot defaults to a color map
    named viridis, but often it is preferable to use a grayscale color map. This can be set
    with an additional argument to plt.imshow, like plt.imshow(I, cmap=’gray’).
    Question: Why would you use different color maps?
    Answer:
    """
    cutout_0 = image[50:200, 100:400, 0]
    cutout_1 = image[50:200, 100:400, 1]
    cutout_2 = image[50:200, 100:400, 2]

    fig, axs = plt.subplots(1, 3)
    fig.suptitle('Displaying cutouts for different image channels')

    axs[0].imshow(cutout_0, cmap='gray')
    axs[0].set(title='Channel 0')

    axs[1].imshow(cutout_1, cmap='gray')
    axs[1].set(title='Channel 1')

    axs[2].imshow(cutout_2, cmap='gray')
    axs[2].set(title='Channel 2')

    plt.show

def one_d(startx: int, endx: int, starty: int, endy: int, image:npt.NDArray[np.float64]) -> None:
    """
    You can also replace only a part of the image using indexing. Write a script that
    inverts a rectangular part of the image. This can be done pixel by pixel in a loop or
    by using indexing.

    (height, width, color)
    (x     , y    , color)
    y ->
    ################# x
    #               # |
    #               # v
    #               #
    #               #
    #               #
    #################
    Question: How is inverting a grayscale value defined for uint8 ?
    Answer:
    """

    inverted_image = uz_image.invert_coloured_image_part(image, startx, endx, starty, endy)
    fig, axs = plt.subplots(1, 2)
    fig.suptitle("Lomberlini")

    axs[0].imshow(image)
    axs[0].set(title="Original Image")
    axs[1].imshow(inverted_image, vmax=255)
    axs[1].set(title="Inverted part of Image")

    plt.show()

def one_e() -> None:
    """
    Perform a reduction of grayscale levels in the image. First read the image from
    umbrellas.jpg and convert it to grayscale. You can write your own function for
    grayscale conversion or use the function in UZ_utils.py.
    Convert the grayscale image to floating point type. Then, rescale the image values
    so that the largest possible value is 63. Convert the image back to uint8 and display
    both the original and the modified image. Notice that both look the same. Pyplot
    tries to maximize the contrast in displayed images by checking their values and
    scaling them to cover the entire uint8 interval. If you want to avoid this, you need
    to set the maximum expected value when using plt.imshow(), like plt.imshow(I,
    vmax=255. Use this to display the resulting image so the change is visible.
    """
    grayscale_image = uz_image.imread_gray("./images/umbrellas.jpg", uz_image.ImageType.float64)
    upscaled_grayscale_image = (grayscale_image.copy() * 63).astype(np.uint8)

    fig, axs = plt.subplots(1, 2)
    fig.suptitle("Lomberlini")


    axs[0].imshow(grayscale_image, cmap="gray")
    axs[0].set(title="Original grayscale image")
    axs[1].imshow(upscaled_grayscale_image, vmax=255, cmap="gray")
    axs[1].set(title="Upscaled grayscale image")

    plt.show()


############################################
# EXCERCISE 2: Thresholding and histograms #
############################################

def excercise_two() -> None:
    """
    Thresholding an image is an operation that produces a binary image (mask) of the same
    size where the value of pixels is determined by whether the value of the corresponding
    pixels in the source image is greater or lower than the given threshold.
    """
    #two_a()
    #two_b('./images/bird.jpg', 100, 20)
    #two_c('./images/bird.jpg', 20, 100)
    #two_d()
    two_e(uz.imread_gray('./images/bird.jpg', uz.ImageType.uint8).astype(np.uint8))


def two_a() -> tuple[npt.NDArray[np.float64], npt.NDArray[np.uint8]]:
    """
    Create a binary mask from a grayscale image. The binary mask is a matrix the same
    size as the image which contains 1 where some condition holds and 0 everywhere
    else. In this case the condition is simply the original image intensity. Use the image
    bird.jpg. Display both the image and the mask.
    """
    random_number = random.random()
    TRESHOLD = 75/255 # Found using otsu method

    image = uz.imread_gray("./images/bird.jpg", uz.ImageType.float64)
    binary_mask = image.copy()

    if random_number < 0.5:
        binary_mask[binary_mask < TRESHOLD] = 0
        binary_mask[binary_mask >= TRESHOLD] = 1
    else:
        binary_mask = np.where(binary_mask < TRESHOLD, 0, 1)

    binary_mask = uz.convert_float64_array_to_uint8_array(binary_mask)

    fig, (ax0, ax1) = plt.subplots(1, 2)
    fig.suptitle("Birdie and its mask")


    ax0.imshow(image, cmap="gray")
    ax1.imshow(binary_mask, cmap="gray")

    ax0.set(title="Original image")
    ax1.set(title="Mask of birdie")

    plt.show()
    return (image, binary_mask)


def my_hist_for_loop(image: npt.NDArray[np.float64], number_of_bins: int) -> npt.NDArray[np.float64]:
    bin_restrictions = np.arange(0, 1, 1 / number_of_bins)
    bins = np.zeros(number_of_bins).astype(np.float64)

    for pixel in image.reshape(-1):
        # https://stackoverflow.com/a/16244044
        bins[np.argmax(bin_restrictions > pixel)] += 1

    return bins / np.sum(bins)

# Much faster implementation than for loop
def my_hist(image: npt.NDArray[np.float64], number_of_bins: int, img_typ: uz.ImageType) -> npt.NDArray[np.float64]:
    if img_typ == uz.ImageType.float64:
        bins = np.arange(0, 1, 1 / number_of_bins)
    elif img_typ == uz.ImageType.uint8:
        bins = np.arange(0, 255, 255/number_of_bins)

    # Put pixels into classes
    # ex. binsize = 10 then 0.4 would map into 4
    binarray = np.digitize(image.reshape(-1), bins).astype(np.uint8)

    # Now count those values
    binarray = np.unique(binarray, return_counts=True)

    counts = binarray[1].astype(np.float64) # Get the counts out of tuple

    # Check if there is any empty bin
    empty_bins = []
    bins = binarray[0]
    for i in range(1, number_of_bins + 1):
        if i not in bins:
            empty_bins.append(i)

    # Add empty bins with zeros
    if empty_bins != []:
        for i in empty_bins:
            counts = np.insert(counts, i - 1, 0)

    return counts / np.sum(counts)


def two_b(image_path: str, number_of_bins_first: int, number_of_bins_second: int) -> None:
    """
    Write a function myhist that accepts a grayscale image and the number of bins that
    will be used in building a histogram. The function should return a 1D array that
    represents the image histogram (the size should be equal to the number of bins, of
    course).
    The histogram is simply a count of pixels with same (or similar) intensity for all
    bins. You can assume the values of the image are within the interval [0,255]. If you
    use fewer than 255 bins, intensities will have to be grouped together, e.g. if using 10
    bins, all values on the interval [0,25] will fall into bin 0.

    Write a script that calculates and displays histograms for different numbers of bins
    using bird.jpg
    Question: The histograms are usually normalized by dividing the result by the
    sum of all cells. Why is that?
    Answer:
    """
    image = uz.imread_gray(image_path, uz.ImageType.uint8)

    H1 = my_hist(image, number_of_bins_first, uz.ImageType.uint8)
    H2 = my_hist(image, number_of_bins_second, uz.ImageType.uint8)

    fig, (ax0, ax1, ax2) = plt.subplots(1, 3)
    fig.suptitle("Birdie and histgrams")

    ax0.imshow(image, cmap="gray")
    ax0.set(title="Birdie image")
    ax1.bar(np.arange(number_of_bins_first), H1)
    ax1.set(title="100 bins")
    ax2.bar(np.arange(number_of_bins_second), H2)
    ax2.set(title="20 bins")

    plt.show()

def two_c(image_path: str, number_of_bins_first: int, number_of_bins_second: int) -> None:
    """
    Modify your function myhist to no longer assume the uint8 range
    for values. Instead, it should find the maximum and minimum values in the image
    and calculate the bin ranges based on these values. Write a script that shows the
    difference between both versions of the function.
    """

    image_uint8 = uz.imread_gray(image_path, uz.ImageType.uint8)
    image_float64 = uz.imread_gray(image_path, uz.ImageType.float64)

    H01 = my_hist(image_uint8, number_of_bins_first, uz.ImageType.uint8)
    H02 = my_hist(image_uint8, number_of_bins_second, uz.ImageType.uint8)

    H11 = my_hist(image_float64, number_of_bins_first, uz.ImageType.float64)
    H12 = my_hist(image_float64, number_of_bins_second, uz.ImageType.float64)

    fig, axs = plt.subplots(2, 3)
    fig.suptitle("Comparison between two histograms")

    axs[0, 0].imshow(image_float64, cmap="gray")
    axs[0, 0].set(title="Grayscale image in float64 representation")
    axs[0, 1].bar(np.arange(number_of_bins_first), H01)
    axs[0, 1].set(title=f'{number_of_bins_first} bins used')
    axs[0, 2].bar(np.arange(number_of_bins_second), H02)
    axs[0, 2].set(title=f'{number_of_bins_second} bins used')

    axs[1, 0].imshow(image_float64, cmap="gray")
    axs[1, 0].set(title="Grayscale image in uint8 representation")
    axs[1, 1].bar(np.arange(number_of_bins_first), H11)
    axs[1, 1].set(title=f'{number_of_bins_first} bins used')
    axs[1, 2].bar(np.arange(number_of_bins_second), H12)
    axs[1, 2].set(title=f'{number_of_bins_second} bins used')
    plt.show()

def two_d() -> None:
    """
    Test myhist function on images (three or more) of the same scene in
    different lighting conditions. One way to do this is to capture several images using
    your web camera and change the lighting of the room. Visualize the histograms for
    all images for different number of bins and interpret the results.
    """
    light = uz.imread_gray("./images/ROOM_LIGHTS_ON.jpg", uz.ImageType.float64)
    darker = uz.imread_gray("./images/ONE_ROOM_LIGH_ON.jpg", uz.ImageType.float64)
    dark = uz.imread_gray("./images/DARK.jpg", uz.ImageType.float64)

    H10 = my_hist(light, 20, uz.ImageType.float64)
    H11 = my_hist(light, 60, uz.ImageType.float64)
    H12 = my_hist(light, 100, uz.ImageType.float64)
    H20 = my_hist(darker, 20, uz.ImageType.float64)
    H21 = my_hist(darker, 60, uz.ImageType.float64)
    H22 = my_hist(darker, 100, uz.ImageType.float64)
    H30 = my_hist(dark, 20, uz.ImageType.float64)
    H31 = my_hist(dark, 60, uz.ImageType.float64)
    H32 = my_hist(dark, 100, uz.ImageType.float64)

    fig, axs = plt.subplots(3, 4)
    fig.suptitle("spanskiduh and histgrams")

    axs[0, 0].imshow(light, cmap="gray")
    axs[0, 0].set(title="Image in light conditions")
    axs[0, 1].bar(np.arange(20), H10)
    axs[0, 1].set(title="Using 20 bins")
    axs[0, 2].bar(np.arange(60), H11)
    axs[0, 2].set(title="Using 60 bins")
    axs[0, 3].bar(np.arange(100), H12)
    axs[0, 3].set(title="Using 100 bins")

    axs[1, 0].imshow(darker, cmap="gray")
    axs[1, 0].set(title="Image in darker conditions")
    axs[1, 1].bar(np.arange(20), H20)
    axs[1, 1].set(title="Using 20 bins")
    axs[1, 2].bar(np.arange(60), H21)
    axs[1, 2].set(title="Using 60 bins")
    axs[1, 3].bar(np.arange(100), H22)
    axs[1, 3].set(title="Using 100 bins")

    axs[2, 0].imshow(dark, cmap="gray")
    axs[2, 0].set(title="Image in dark conditions")
    axs[2, 1].bar(np.arange(20), H30)
    axs[2, 1].set(title="Using 20 bins")
    axs[2, 2].bar(np.arange(60), H31)
    axs[2, 2].set(title="Using 60 bins")
    axs[2, 3].bar(np.arange(100), H32)
    axs[2, 3].set(title="Using 100 bins")

    plt.show()

def two_e(image: npt.NDArray[np.uint8]):
    """
    Implement Otsu’s method for automatic threshold calculation. It
    should accept a grayscale image and return the optimal threshold. Using normalized
    histograms, the probabilities of both classes are easy to calculate. Write a script that
    shows the algorithm’s results on different images.
    References: https://en.wikipedia.org/wiki/Otsu%27s_method
    """
    treshold_range = np.arange(np.max(image) + 1)
    criterias = []

    for treshold in treshold_range:
        # create the thresholded image
        thresholded_im = np.zeros(image.shape)

        thresholded_im[image >= treshold] = 1

        # compute weights
        nb_pixels = image.size
        nb_pixels1 = np.count_nonzero(thresholded_im)
        weight1 = nb_pixels1 / nb_pixels
        weight0 = 1 - weight1

        # if one the classes is empty, eg all pixels are below or above the threshold, that threshold will not be considered
        # in the search for the best threshold
        if weight1 == 0 or weight0 == 0:
            continue

        # find all pixels belonging to each class
        val_pixels1 = image[thresholded_im == 1]
        val_pixels0 = image[thresholded_im == 0]

        # compute variance of these classes
        var0 = np.var(val_pixels0) if len(val_pixels0) > 0 else 0
        var1 = np.var(val_pixels1) if len(val_pixels1) > 0 else 0

        criterias.append( weight0 * var0 + weight1 * var1)

    best_threshold = treshold_range[np.argmin(criterias)]
    print(f'best treshold is: {best_threshold}')
    return best_threshold


######################################################
# EXCERCISE 3:  Morphological operations and regions #
######################################################

def excercise_three() -> None:
    #three_a()
    #mask1, _ = three_b()
    #three_c(uz.imread('./images/bird.jpg', uz.ImageType.float64), mask1)
    #three_d()
    three_e()


def three_a() -> None:
    """
    We will perform two basic morphological operations on the image mask.png, erosion
    and dilation. We will also experiment with combinations of both operations, named
    opening and closing. Also combine both operations sequentially and display the results.
    Question: Based on the results, which order of erosion and dilation operations
    produces opening and which closing?
    Answer: Opening = Erosion then dialation
            Closing = Dialation then erosion
    """
    img_orig = uz.imread_gray('./images/mask.png', uz.ImageType.float64)

    img_er_dil = img_orig.copy()
    img_dil_er = img_orig.copy()
    img_comb = img_orig.copy()
    SE_size = 2
    SE = np.ones((SE_size, SE_size), np.float64)

    fig, axs = plt.subplots(3, 4)
    fig.suptitle("Image after N iterations of Opening, Closing and combination of opening and closing")

    for im_ix in range(4):
        SE = np.ones((SE_size, SE_size), np.float64)
        img_er_dil = cv2.erode(img_er_dil, SE)
        img_er_dil = cv2.dilate(img_er_dil, SE)
        axs[0, im_ix].imshow(img_er_dil.copy(), cmap='gray')
        axs[0, im_ix].set(title=f"after {im_ix + 1} iterations of Opening, SE: {SE_size} x {SE_size}")
        SE_size +=1


    SE_size = 2
    for im_ix in range(4):
        SE = np.ones((SE_size, SE_size), np.float64)
        img_dil_er = cv2.dilate(img_dil_er, SE)
        img_dil_er = cv2.erode(img_dil_er, SE)
        axs[1, im_ix].imshow(img_dil_er.copy(), cmap='gray')
        axs[1, im_ix].set(title=f"after {im_ix + 1} iterations of Closing, SE: {SE_size} x {SE_size}")
        SE_size +=1

    SE_size = 2
    for im_ix in range(4):
        img_comb = cv2.erode(img_comb, SE)
        img_comb = cv2.dilate(img_comb, SE)
        img_comb = cv2.dilate(img_comb, SE)
        img_comb = cv2.erode(img_comb, SE)
        axs[2, im_ix].imshow(img_comb.copy(), cmap='gray')
        axs[2, im_ix].set(title=f"after {im_ix + 1} iterations of O+C, SE: {SE_size} x {SE_size}")
        SE_size +=1


    plt.show()


def three_b():
    """
    Try to clean up the mask of the image bird.jpg using morphological operations
    as shown in the image. Experiment with different sizes of the structuring element.
    You can also try different shapes, like cv2.getStructuringElement(cv2.MORPH_-
    ELLIPSE,(n,n)).
    """
    _, original_mask = two_a()
    mask1 = original_mask.copy()
    mask2 = original_mask.copy()

    SE_ELIPSE = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))

    # Perform operation of opening and then closing (almost)
    # Values found through trial and error
    mask1 = cv2.dilate(mask1, SE_ELIPSE, iterations = 4)
    mask1 = cv2.erode(mask1, SE_ELIPSE)
    mask1 = cv2.dilate(mask1, SE_ELIPSE)
    mask1 = cv2.erode(mask1, SE_ELIPSE, iterations=3)

    mask2 = cv2.dilate(mask2, SE_CROSS, iterations = 5)
    mask2 = cv2.erode(mask2, SE_CROSS)
    mask2 = cv2.dilate(mask2, SE_CROSS)
    mask2 = cv2.erode(mask2, SE_CROSS, iterations=4)

    fig, axs = plt.subplots(1, 3)
    fig.suptitle("Operation of Opening and closing using elipse and cross")

    axs[0].imshow(original_mask, cmap='gray')
    axs[0].set(title="Original mask")

    axs[1].imshow(mask1, cmap='gray')
    axs[1].set(title="Using elipse")

    axs[2].imshow(mask2, cmap='gray')
    axs[2].set(title="Using cross")


    plt.show()

    return (mask1, mask2)

# Ez lmao
def three_c(image: npt.NDArray[np.float64], mask: npt.NDArray[np.uint8]):
    """
    Write a function immask that accepts a three channel image and
    a binary mask and returns an image where pixel values are set to black if the
    corresponding pixel in the mask is equal to 0. Otherwise, the pixel value should be
    equal to the corresponding image pixel. Do not use for loops, as they are slow
    """
    mask = np.expand_dims(mask, axis=2)

    image = mask * image

    plt.imshow(image)
    plt.show()

def three_d():
    """
    Create a mask from the image in file eagle.jpg and visualize the result with immask
    (if available, otherwise simply display the mask). Use Otsu’s method if available,
    else use a manually set threshold.
    Question: Why is the background included in the mask and not the object? How
    would you fix that in general? (just inverting the mask if necessary doesn’t count)
    Answer:
    """
    eagle_img_gray = uz.imread_gray('./images/eagle.jpg', uz.ImageType.uint8).astype(np.uint8)
    eagle_img_color = uz.imread('./images/eagle.jpg', uz.ImageType.float64)
    TRESHOLD = two_e(eagle_img_gray)
    binary_mask = eagle_img_gray.copy()

    binary_mask = np.where(binary_mask < TRESHOLD, 0, 1)
    binary_mask = uz.convert_float64_array_to_uint8_array(binary_mask)

    # If I would invert image here, then we would get crap
    # So workaround:
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (2, 2))
    binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=2)
    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=3)
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
    binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=4)
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))
    binary_mask = cv2.erode(binary_mask, SE_CROSS)
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=8)

    # Now invert binary mask
    binary_mask = np.where(binary_mask == 1, 0 , 1)

    three_c(eagle_img_color, binary_mask)

def three_e():
    """
    Another way to process a mask is to extract connected components. To do this, you
    can use the function cv2.connectedComponentsWithStats that accepts a binary image
    and returns information about the connected components present in it. Write a script
    that loads the image coins.jpg, calculates a mask and cleans it up using morphological
    operations. Your goal is to get the coins as precisely as possible. Then,
    using connected components, remove the coins whose area is larger than 700 pixels
    from the original image (replace them with white background). Display the results
    """
    # https://stackoverflow.com/a/42812226
    coin_img_gray = uz.imread_gray('./images/coins.jpg', uz.ImageType.uint8).astype(np.uint8)
    coin_img_color = uz.imread('./images/coins.jpg', uz.ImageType.float64)
    TRESHOLD = two_e(coin_img_gray)
    COIN_SIZE = 700

    binary_mask = coin_img_gray.copy()
    binary_mask = np.where(binary_mask < TRESHOLD, 0, 1)
    binary_mask = uz.convert_float64_array_to_uint8_array(binary_mask)

    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=3)
    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=2)
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 3))
    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=2)
    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 2))
    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=2)

    # Now invert binary mask
    binary_mask = np.where(binary_mask == 1, 0 , 1)
    binary_mask = uz.convert_float64_array_to_uint8_array(binary_mask)

    output = cv2.connectedComponentsWithStats(binary_mask, 4, cv2.CV_32S)
    n_blobs, im_with_separated_blobs, stats, _ = output

    sizes = stats[:, -1]

    for blob in range(n_blobs):
        if sizes[blob] > 700:
            binary_mask[im_with_separated_blobs == blob] = 0

    three_c(coin_img_color, binary_mask)


def main() -> None:
    excercise_one()
    #excercise_two()
    #excercise_three()

if __name__ == "__main__":
    main()