From ec603b85fd13035042e394cef721fb0d2c3c99a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C5=A1per=20Spagnolo?= <gasper.spagnolo@outlook.com>
Date: Wed, 8 Nov 2023 15:30:37 +0100
Subject: [PATCH] Test vj

---
 a1/dataloader.py |  53 +++++++++++--------
 a1/pipeline.py   | 130 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 137 insertions(+), 46 deletions(-)

diff --git a/a1/dataloader.py b/a1/dataloader.py
index 305d23b..6768f56 100644
--- a/a1/dataloader.py
+++ b/a1/dataloader.py
@@ -1,64 +1,77 @@
-
 import os
 import cv2
 
-class EarDataClass():
 
-    def __init__(self, root_dir:str ,  annot_file: str, mode: str):
+class EarDataClass:
+    def __init__(self, root_dir: str, annot_file: str, mode: str):
         if not os.path.isdir(root_dir):
-            raise ValueError('root_dir must be a valid directory')
+            raise ValueError("root_dir must be a valid directory")
         if os.path.isfile(os.path.join(root_dir, annot_file)):
-            raise ValueError('annot_file must be a valid file')
-        if mode not in ['train', 'test']:
-            raise ValueError('mode must be either train or test')
+            raise ValueError("annot_file must be a valid file")
+        if mode not in ["train", "test"]:
+            raise ValueError("mode must be either train or test")
 
         self.root_dir = root_dir
         self.annot_file = annot_file
         self.mode = mode
         self._set_paths()
+        self._set_bboxes()
 
     def _set_paths(self):
         paths = []
         labels = []
 
         def _convert_path_to_number(path):
-            return int(path.split('/')[-1].split('.')[0])
+            return int(path.split("/")[-1].split(".")[0])
 
-        with open(self.annot_file, 'r') as f:
+        with open(self.annot_file, "r") as f:
             lines = f.readlines()
             for line in lines:
-                line = line.split(' ')
+                line = line.split(" ")
                 path = os.path.join(self.root_dir, line[0])
                 p_int = _convert_path_to_number(path)
-                if self.mode == 'train':
+                if self.mode == "train":
                     if p_int % 5 != 0:
                         paths.append(path)
-                        labels.append(line[1])
-                elif self.mode == 'test':
+                        labels.append(int(line[1].strip()))
+                elif self.mode == "test":
                     if p_int % 5 == 0:
                         paths.append(path)
-                        labels.append(line[1])
+                        labels.append(int(line[1].strip()))
 
         self.paths = paths
         self.labels = labels
 
+    def _set_bboxes(self):
+        bboxes = []
+        for path in self.paths:
+            path = path.replace(".png", ".txt")
+            with open(path, "r") as f:
+                lines = f.read().split(sep=" ")
+                bbox = [float(x) for x in lines[1:]]
+                bboxes.append(bbox)
+
+        self.bboxes = bboxes
+
     def __getitem__(self, idx):
         image_path = self.paths[idx]
         label = self.labels[idx]
+        bbox = self.bboxes[idx]
         image = cv2.imread(image_path)
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        return image, label
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        return image, label, bbox
 
     def __len__(self):
         return len(self.paths)
 
 
 def main():
-    dat = EarDataClass(root_dir='./ears', annot_file='identites.txt', mode='train')
+    dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="test")
     for i in range(len(dat)):
-        image, label = dat[i]
-        print(image.shape, label)
+        image, label, bbox = dat[i]
+        print(image.shape, label, bbox)
     pass
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/a1/pipeline.py b/a1/pipeline.py
index f57f009..e61f41a 100644
--- a/a1/pipeline.py
+++ b/a1/pipeline.py
@@ -18,6 +18,14 @@ class ViolaJones:
         self.scale_factor = scale_factor
         self.min_neighbors = min_neighbors
 
+    def _convert_to_yolo_format(self, bbox, img_width, img_height):
+        x, y, w, h = bbox
+        x_center = (x + w / 2) / img_width
+        y_center = (y + h / 2) / img_height
+        w_norm = w / img_width
+        h_norm = h / img_height
+        return [x_center, y_center, w_norm, h_norm]
+
     def _extract_ear(self, image, ear):
         x, y, w, h = ear
         return image[y : y + h, x : x + w]
@@ -30,20 +38,43 @@ class ViolaJones:
             image, self.scale_factor, self.min_neighbors
         )
 
+        img_height, img_width = image.shape[:2]
+
         if len(left_ears) > 0:
-            # Crop out the region of interest
-            return left_ears[0], self._extract_ear(image, left_ears[0])
+            return self._convert_to_yolo_format(
+                left_ears[0], img_width, img_height
+            ), self._extract_ear(image, left_ears[0])
         elif len(right_ears) > 0:
-            return right_ears[0], self._extract_ear(image, right_ears[0])
+            return self._convert_to_yolo_format(
+                right_ears[0], img_width, img_height
+            ), self._extract_ear(image, right_ears[0])
         else:
             return None, None
 
+    def compute_iou(self, bbox1, bbox2):
+        x1_min, y1_min, x1_max, y1_max = bbox1
+        x2_min, y2_min, x2_max, y2_max = bbox2
+
+        xA = max(x1_min, x2_min)
+        yA = max(y1_min, y2_min)
+        xB = min(x1_max, x2_max)
+        yB = min(y1_max, y2_max)
+
+        interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+
+        boxAArea = (x1_max - x1_min + 1) * (y1_max - y1_min + 1)
+        boxBArea = (x2_max - x2_min + 1) * (y2_max - y2_min + 1)
+        unionArea = boxAArea + boxBArea - interArea
+
+        iou = interArea / unionArea if unionArea > 0 else 0
+
+        return iou
+
 
 class LBP:
     def __init__(self, P=8, R=1):
         self.P = P
         self.R = R
-        pass
 
     def _bilinear_interpolation(self, image, x, y):
         x1, y1 = int(x), int(y)
@@ -61,41 +92,88 @@ class LBP:
             + f22 * (x - x1) * (y - y1)
         )
 
+    def _get_lbp_histogram(self, lbp_image):
+        """
+        Compute the normalized histogram of the LBP image.
+        """
+        histogram, _ = np.histogram(
+            lbp_image.ravel(), bins=np.arange(0, 2**self.P, 1), range=(0, 2**self.P)
+        )
+        histogram = histogram.astype("float")
+        histogram /= histogram.sum()
+        return histogram
+
     def forward(self, img):
-        gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        lbp = np.zeros_like(gray_image)
+        # Already grayscale
+        try:
+            gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        except:
+            raise ValueError("The input image must be a valid BGR image.")
+
+        lbp_image = np.zeros_like(gray_image)
         for i in range(self.R, gray_image.shape[0] - self.R):
             for j in range(self.R, gray_image.shape[1] - self.R):
                 center = gray_image[i, j]
                 lbp_code = 0
                 for p in range(self.P):
-                    # Angle in radians
                     theta = 2 * np.pi * p / self.P
                     x = j + self.R * np.cos(theta)
-                    y = i - self.R * np.sin(
-                        theta
-                    )  # y coordinates go from top to bottom
+                    y = i - self.R * np.sin(theta)
                     pixel_value = self._bilinear_interpolation(gray_image, x, y)
-                    lbp_code |= (1 << p) if pixel_value >= center else 0
-                lbp[i, j] = lbp_code
-        return lbp
+                    lbp_code |= (pixel_value >= center) << p
+                lbp_image[i, j] = lbp_code
+
+        return self._get_lbp_histogram(lbp_image)
+
+
+def test_vj(dat: EarDataClass):
+    os.makedirs("./pred/vj", exist_ok=True)
+    #scale_factors = [
+    #    1.05,
+    #    1.1,
+    #    1.15,
+    #    1.2,
+    #    1.25,
+    #    1.3,
+    #    1.35,
+    #    1.4,
+    #    1.45,
+    #    1.5,
+    #    1.55,
+    #    1.60,
+    #    1.65,
+    #    1.70,
+    #    1.75,
+    #    1.80,
+    #    1.85,
+    #    1.90,
+    #    1.95,
+    #    2.0,
+    #]
+    scale_factors = [1.01, 1.02, 1.03, 1.04]
+    min_neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+
+    for scale_factor in scale_factors:
+        for min_neigh in min_neighbors:
+            vj = ViolaJones(scale_factor=scale_factor, min_neighbors=min_neigh)
+            ious = []
+            os.makedirs(f"./pred/vj/sf={scale_factor}_mn={min_neigh}", exist_ok=True)
+            for i in tqdm(range(len(dat))):
+                image, _, bbox = dat[i]
+                bbox_vj, _ = vj.forward(image)
+                if bbox_vj is not None:
+                    iou = vj.compute_iou(bbox, bbox_vj)
+                    ious.append(iou)
+                else:
+                    ious.append(0)
+
+            with open(f"./pred/vj/sf={scale_factor}_mn={min_neigh}/iou.txt", "w") as f:
+                f.write(str(np.mean(ious)))
 
 
 def main():
     dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="train")
-    vj = ViolaJones()
-    lbp = LBP()
-    for i in tqdm(range(len(dat))):
-        img, label = dat[i]
-        ear_lab, ear_img = vj.forward(img)
-        if ear_lab is not None:
-            # Save the image
-            print(lbp.forward(ear_img).shape)
-            os.makedirs("./pred", exist_ok=True)
-            # Save the image
-            cv2.imwrite(f"./pred/{i}.jpg", ear_img)
-
-    pass
+    test_vj(dat)
 
 
 if __name__ == "__main__":