Close enough iguess

2022-10-21 18:08:04 +02:00 · 2022-10-21 18:08:04 +02:00 · 761817f2d4
parent 30dbaf7bc6
commit 761817f2d4
2 changed files with 49 additions and 9 deletions
--- a/assignment1/UZ_utils.py
+++ b/assignment1/UZ_utils.py
@ -19,15 +19,20 @@ class ImageType(enum.Enum):
    float64 = 1


-def imread(path: str) -> npt.NDArray[np.float64]:
+def imread(path: str, type: ImageType) -> npt.NDArray[np.float64] or npt.NDArray[np.uint8]:
    """
    Reads an image in RGB order. Image type is transformed from uint8 to float, and
    range of values is reduced from [0, 255] to [0, 1].
    """
    I = Image.open(path).convert('RGB')  # PIL image.
    I = np.asarray(I)  # Converting to Numpy array.
-    I = I.astype(np.float64) / 255
-    return I
+    if type == ImageType.float64:
+        I = I.astype(np.float64) / 255
+        return I
+    elif type == ImageType.uint8:
+        return I
+
+    raise Exception("Wrong image format picked!")


 def imread_gray(path: str, type: ImageType) -> npt.NDArray[np.float64] or npt.NDArray[np.uint8]:
--- a/assignment1/solution.py
+++ b/assignment1/solution.py
@ -3,7 +3,6 @@ import numpy as np
 import numpy.typing as npt
 from matplotlib import pyplot as plt
 import random
-from PIL import Image
 import cv2 as cv2

 #######################################
@ -21,7 +20,7 @@ def one_a() -> npt.NDArray[np.float64]:
    """
    Read the image from the file umbrellas.jpg and display it
    """
-    image = uz.imread('./images/umbrellas.jpg')
+    image = uz.imread('./images/umbrellas.jpg', uz.ImageType.float64)
    uz.imshow(image, 'Umbrellas')
    return image

@ -338,7 +337,7 @@ def two_d() -> None:
    
    plt.show()

-def two_e(image: npt.NDArray[np.uint8]) -> None:
+def two_e(image: npt.NDArray[np.uint8]):
    """
    Implement Otsu’s method for automatic threshold calculation. It
    should accept a grayscale image and return the optimal threshold. Using normalized
@ -378,6 +377,7 @@ def two_e(image: npt.NDArray[np.uint8]) -> None:

    best_threshold = treshold_range[np.argmin(criterias)]
    print(f'best treshold is: {best_threshold}')
+    return best_threshold


 ######################################################
@ -386,8 +386,9 @@ def two_e(image: npt.NDArray[np.uint8]) -> None:

 def excercise_three() -> None:
    #three_a()
-    mask1, mask2 = three_b()
-    three_c(uz.imread('./images/bird.jpg'), mask1)
+    #mask1, _ = three_b()
+    #three_c(uz.imread('./images/bird.jpg', uz.ImageType.float64), mask1)
+    three_d()


 def three_a() -> None:
@ -497,10 +498,44 @@ def three_c(image: npt.NDArray[np.float64], mask: npt.NDArray[np.uint8]):
    mask = np.expand_dims(mask, axis=2)
    
    image = mask * image
-
+    
    plt.imshow(image)
    plt.show()

+def three_d():
+    """
+    Create a mask from the image in file eagle.jpg and visualize the result with immask
+    (if available, otherwise simply display the mask). Use Otsu’s method if available,
+    else use a manually set threshold.
+    Question: Why is the background included in the mask and not the object? How
+    would you fix that in general? (just inverting the mask if necessary doesn’t count)
+    Answer:
+    """
+    eagle_img_gray = uz.imread_gray('./images/eagle.jpg', uz.ImageType.uint8).astype(np.uint8)
+    eagle_img_color = uz.imread('./images/eagle.jpg', uz.ImageType.float64)
+    TRESHOLD = two_e(eagle_img_gray) 
+    binary_mask = eagle_img_gray.copy()
+ 
+    binary_mask = np.where(binary_mask < TRESHOLD, 0, 1) 
+    binary_mask = uz.convert_float64_array_to_uint8_array(binary_mask)
+
+    # If I would invert image here, then we would get crap 
+    # So workaround:
+    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (2, 2))
+    binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=2)
+    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=3)
+    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
+    binary_mask = cv2.erode(binary_mask, SE_CROSS, iterations=4)
+    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (4, 4))
+    binary_mask = cv2.erode(binary_mask, SE_CROSS)
+    SE_CROSS = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
+    binary_mask = cv2.dilate(binary_mask, SE_CROSS, iterations=8)
+
+    # Now invert binary mask
+    binary_mask = np.where(binary_mask == 1, 0 , 1)
+
+    three_c(eagle_img_color, binary_mask)
+

 def main() -> None:
    #excercise_one()