Test vj

2023-11-08 15:30:37 +01:00 · 2023-11-08 15:30:37 +01:00 · ec603b85fd
parent 27c77a5928
commit ec603b85fd
2 changed files with 137 additions and 46 deletions
--- a/a1/dataloader.py
+++ b/a1/dataloader.py
@ -1,64 +1,77 @@
-
 import os
 import cv2

-class EarDataClass():

+class EarDataClass:
    def __init__(self, root_dir: str, annot_file: str, mode: str):
        if not os.path.isdir(root_dir):
-            raise ValueError('root_dir must be a valid directory')
+            raise ValueError("root_dir must be a valid directory")
        if os.path.isfile(os.path.join(root_dir, annot_file)):
-            raise ValueError('annot_file must be a valid file')
-        if mode not in ['train', 'test']:
-            raise ValueError('mode must be either train or test')
+            raise ValueError("annot_file must be a valid file")
+        if mode not in ["train", "test"]:
+            raise ValueError("mode must be either train or test")

        self.root_dir = root_dir
        self.annot_file = annot_file
        self.mode = mode
        self._set_paths()
+        self._set_bboxes()

    def _set_paths(self):
        paths = []
        labels = []

        def _convert_path_to_number(path):
-            return int(path.split('/')[-1].split('.')[0])
+            return int(path.split("/")[-1].split(".")[0])

-        with open(self.annot_file, 'r') as f:
+        with open(self.annot_file, "r") as f:
            lines = f.readlines()
            for line in lines:
-                line = line.split(' ')
+                line = line.split(" ")
                path = os.path.join(self.root_dir, line[0])
                p_int = _convert_path_to_number(path)
-                if self.mode == 'train':
+                if self.mode == "train":
                    if p_int % 5 != 0:
                        paths.append(path)
-                        labels.append(line[1])
-                elif self.mode == 'test':
+                        labels.append(int(line[1].strip()))
+                elif self.mode == "test":
                    if p_int % 5 == 0:
                        paths.append(path)
-                        labels.append(line[1])
+                        labels.append(int(line[1].strip()))

        self.paths = paths
        self.labels = labels

+    def _set_bboxes(self):
+        bboxes = []
+        for path in self.paths:
+            path = path.replace(".png", ".txt")
+            with open(path, "r") as f:
+                lines = f.read().split(sep=" ")
+                bbox = [float(x) for x in lines[1:]]
+                bboxes.append(bbox)
+
+        self.bboxes = bboxes
+
    def __getitem__(self, idx):
        image_path = self.paths[idx]
        label = self.labels[idx]
+        bbox = self.bboxes[idx]
        image = cv2.imread(image_path)
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        return image, label
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        return image, label, bbox

    def __len__(self):
        return len(self.paths)


 def main():
-    dat = EarDataClass(root_dir='./ears', annot_file='identites.txt', mode='train')
+    dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="test")
    for i in range(len(dat)):
-        image, label = dat[i]
-        print(image.shape, label)
+        image, label, bbox = dat[i]
+        print(image.shape, label, bbox)
    pass

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    main()
--- a/a1/pipeline.py
+++ b/a1/pipeline.py
@ -18,6 +18,14 @@ class ViolaJones:
        self.scale_factor = scale_factor
        self.min_neighbors = min_neighbors

+    def _convert_to_yolo_format(self, bbox, img_width, img_height):
+        x, y, w, h = bbox
+        x_center = (x + w / 2) / img_width
+        y_center = (y + h / 2) / img_height
+        w_norm = w / img_width
+        h_norm = h / img_height
+        return [x_center, y_center, w_norm, h_norm]
+
    def _extract_ear(self, image, ear):
        x, y, w, h = ear
        return image[y : y + h, x : x + w]
@ -30,20 +38,43 @@ class ViolaJones:
            image, self.scale_factor, self.min_neighbors
        )

+        img_height, img_width = image.shape[:2]
+
        if len(left_ears) > 0:
-            # Crop out the region of interest
-            return left_ears[0], self._extract_ear(image, left_ears[0])
+            return self._convert_to_yolo_format(
+                left_ears[0], img_width, img_height
+            ), self._extract_ear(image, left_ears[0])
        elif len(right_ears) > 0:
-            return right_ears[0], self._extract_ear(image, right_ears[0])
+            return self._convert_to_yolo_format(
+                right_ears[0], img_width, img_height
+            ), self._extract_ear(image, right_ears[0])
        else:
            return None, None

+    def compute_iou(self, bbox1, bbox2):
+        x1_min, y1_min, x1_max, y1_max = bbox1
+        x2_min, y2_min, x2_max, y2_max = bbox2
+
+        xA = max(x1_min, x2_min)
+        yA = max(y1_min, y2_min)
+        xB = min(x1_max, x2_max)
+        yB = min(y1_max, y2_max)
+
+        interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+
+        boxAArea = (x1_max - x1_min + 1) * (y1_max - y1_min + 1)
+        boxBArea = (x2_max - x2_min + 1) * (y2_max - y2_min + 1)
+        unionArea = boxAArea + boxBArea - interArea
+
+        iou = interArea / unionArea if unionArea > 0 else 0
+
+        return iou
+

 class LBP:
    def __init__(self, P=8, R=1):
        self.P = P
        self.R = R
-        pass

    def _bilinear_interpolation(self, image, x, y):
        x1, y1 = int(x), int(y)
@ -61,41 +92,88 @@ class LBP:
            + f22 * (x - x1) * (y - y1)
        )

+    def _get_lbp_histogram(self, lbp_image):
+        """
+        Compute the normalized histogram of the LBP image.
+        """
+        histogram, _ = np.histogram(
+            lbp_image.ravel(), bins=np.arange(0, 2**self.P, 1), range=(0, 2**self.P)
+        )
+        histogram = histogram.astype("float")
+        histogram /= histogram.sum()
+        return histogram
+
    def forward(self, img):
+        # Already grayscale
+        try:
            gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        lbp = np.zeros_like(gray_image)
+        except:
+            raise ValueError("The input image must be a valid BGR image.")
+
+        lbp_image = np.zeros_like(gray_image)
        for i in range(self.R, gray_image.shape[0] - self.R):
            for j in range(self.R, gray_image.shape[1] - self.R):
                center = gray_image[i, j]
                lbp_code = 0
                for p in range(self.P):
-                    # Angle in radians
                    theta = 2 * np.pi * p / self.P
                    x = j + self.R * np.cos(theta)
-                    y = i - self.R * np.sin(
-                        theta
-                    )  # y coordinates go from top to bottom
+                    y = i - self.R * np.sin(theta)
                    pixel_value = self._bilinear_interpolation(gray_image, x, y)
-                    lbp_code |= (1 << p) if pixel_value >= center else 0
-                lbp[i, j] = lbp_code
-        return lbp
+                    lbp_code |= (pixel_value >= center) << p
+                lbp_image[i, j] = lbp_code
+
+        return self._get_lbp_histogram(lbp_image)
+
+
+def test_vj(dat: EarDataClass):
+    os.makedirs("./pred/vj", exist_ok=True)
+    #scale_factors = [
+    #    1.05,
+    #    1.1,
+    #    1.15,
+    #    1.2,
+    #    1.25,
+    #    1.3,
+    #    1.35,
+    #    1.4,
+    #    1.45,
+    #    1.5,
+    #    1.55,
+    #    1.60,
+    #    1.65,
+    #    1.70,
+    #    1.75,
+    #    1.80,
+    #    1.85,
+    #    1.90,
+    #    1.95,
+    #    2.0,
+    #]
+    scale_factors = [1.01, 1.02, 1.03, 1.04]
+    min_neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+
+    for scale_factor in scale_factors:
+        for min_neigh in min_neighbors:
+            vj = ViolaJones(scale_factor=scale_factor, min_neighbors=min_neigh)
+            ious = []
+            os.makedirs(f"./pred/vj/sf={scale_factor}_mn={min_neigh}", exist_ok=True)
+            for i in tqdm(range(len(dat))):
+                image, _, bbox = dat[i]
+                bbox_vj, _ = vj.forward(image)
+                if bbox_vj is not None:
+                    iou = vj.compute_iou(bbox, bbox_vj)
+                    ious.append(iou)
+                else:
+                    ious.append(0)
+
+            with open(f"./pred/vj/sf={scale_factor}_mn={min_neigh}/iou.txt", "w") as f:
+                f.write(str(np.mean(ious)))


 def main():
    dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="train")
-    vj = ViolaJones()
-    lbp = LBP()
-    for i in tqdm(range(len(dat))):
-        img, label = dat[i]
-        ear_lab, ear_img = vj.forward(img)
-        if ear_lab is not None:
-            # Save the image
-            print(lbp.forward(ear_img).shape)
-            os.makedirs("./pred", exist_ok=True)
-            # Save the image
-            cv2.imwrite(f"./pred/{i}.jpg", ear_img)
-
-    pass
+    test_vj(dat)


 if __name__ == "__main__":