From ec603b85fd13035042e394cef721fb0d2c3c99a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Spagnolo?= Date: Wed, 8 Nov 2023 15:30:37 +0100 Subject: [PATCH] Test vj --- a1/dataloader.py | 53 +++++++++++-------- a1/pipeline.py | 130 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 137 insertions(+), 46 deletions(-) diff --git a/a1/dataloader.py b/a1/dataloader.py index 305d23b..6768f56 100644 --- a/a1/dataloader.py +++ b/a1/dataloader.py @@ -1,64 +1,77 @@ - import os import cv2 -class EarDataClass(): - def __init__(self, root_dir:str , annot_file: str, mode: str): +class EarDataClass: + def __init__(self, root_dir: str, annot_file: str, mode: str): if not os.path.isdir(root_dir): - raise ValueError('root_dir must be a valid directory') + raise ValueError("root_dir must be a valid directory") if os.path.isfile(os.path.join(root_dir, annot_file)): - raise ValueError('annot_file must be a valid file') - if mode not in ['train', 'test']: - raise ValueError('mode must be either train or test') + raise ValueError("annot_file must be a valid file") + if mode not in ["train", "test"]: + raise ValueError("mode must be either train or test") self.root_dir = root_dir self.annot_file = annot_file self.mode = mode self._set_paths() + self._set_bboxes() def _set_paths(self): paths = [] labels = [] def _convert_path_to_number(path): - return int(path.split('/')[-1].split('.')[0]) + return int(path.split("/")[-1].split(".")[0]) - with open(self.annot_file, 'r') as f: + with open(self.annot_file, "r") as f: lines = f.readlines() for line in lines: - line = line.split(' ') + line = line.split(" ") path = os.path.join(self.root_dir, line[0]) p_int = _convert_path_to_number(path) - if self.mode == 'train': + if self.mode == "train": if p_int % 5 != 0: paths.append(path) - labels.append(line[1]) - elif self.mode == 'test': + labels.append(int(line[1].strip())) + elif self.mode == "test": if p_int % 5 == 0: paths.append(path) - labels.append(line[1]) + labels.append(int(line[1].strip())) self.paths = paths self.labels = labels + def _set_bboxes(self): + bboxes = [] + for path in self.paths: + path = path.replace(".png", ".txt") + with open(path, "r") as f: + lines = f.read().split(sep=" ") + bbox = [float(x) for x in lines[1:]] + bboxes.append(bbox) + + self.bboxes = bboxes + def __getitem__(self, idx): image_path = self.paths[idx] label = self.labels[idx] + bbox = self.bboxes[idx] image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - return image, label + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + return image, label, bbox def __len__(self): return len(self.paths) def main(): - dat = EarDataClass(root_dir='./ears', annot_file='identites.txt', mode='train') + dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="test") for i in range(len(dat)): - image, label = dat[i] - print(image.shape, label) + image, label, bbox = dat[i] + print(image.shape, label, bbox) pass -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/a1/pipeline.py b/a1/pipeline.py index f57f009..e61f41a 100644 --- a/a1/pipeline.py +++ b/a1/pipeline.py @@ -18,6 +18,14 @@ class ViolaJones: self.scale_factor = scale_factor self.min_neighbors = min_neighbors + def _convert_to_yolo_format(self, bbox, img_width, img_height): + x, y, w, h = bbox + x_center = (x + w / 2) / img_width + y_center = (y + h / 2) / img_height + w_norm = w / img_width + h_norm = h / img_height + return [x_center, y_center, w_norm, h_norm] + def _extract_ear(self, image, ear): x, y, w, h = ear return image[y : y + h, x : x + w] @@ -30,20 +38,43 @@ class ViolaJones: image, self.scale_factor, self.min_neighbors ) + img_height, img_width = image.shape[:2] + if len(left_ears) > 0: - # Crop out the region of interest - return left_ears[0], self._extract_ear(image, left_ears[0]) + return self._convert_to_yolo_format( + left_ears[0], img_width, img_height + ), self._extract_ear(image, left_ears[0]) elif len(right_ears) > 0: - return right_ears[0], self._extract_ear(image, right_ears[0]) + return self._convert_to_yolo_format( + right_ears[0], img_width, img_height + ), self._extract_ear(image, right_ears[0]) else: return None, None + def compute_iou(self, bbox1, bbox2): + x1_min, y1_min, x1_max, y1_max = bbox1 + x2_min, y2_min, x2_max, y2_max = bbox2 + + xA = max(x1_min, x2_min) + yA = max(y1_min, y2_min) + xB = min(x1_max, x2_max) + yB = min(y1_max, y2_max) + + interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) + + boxAArea = (x1_max - x1_min + 1) * (y1_max - y1_min + 1) + boxBArea = (x2_max - x2_min + 1) * (y2_max - y2_min + 1) + unionArea = boxAArea + boxBArea - interArea + + iou = interArea / unionArea if unionArea > 0 else 0 + + return iou + class LBP: def __init__(self, P=8, R=1): self.P = P self.R = R - pass def _bilinear_interpolation(self, image, x, y): x1, y1 = int(x), int(y) @@ -61,41 +92,88 @@ class LBP: + f22 * (x - x1) * (y - y1) ) + def _get_lbp_histogram(self, lbp_image): + """ + Compute the normalized histogram of the LBP image. + """ + histogram, _ = np.histogram( + lbp_image.ravel(), bins=np.arange(0, 2**self.P, 1), range=(0, 2**self.P) + ) + histogram = histogram.astype("float") + histogram /= histogram.sum() + return histogram + def forward(self, img): - gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - lbp = np.zeros_like(gray_image) + # Already grayscale + try: + gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + except: + raise ValueError("The input image must be a valid BGR image.") + + lbp_image = np.zeros_like(gray_image) for i in range(self.R, gray_image.shape[0] - self.R): for j in range(self.R, gray_image.shape[1] - self.R): center = gray_image[i, j] lbp_code = 0 for p in range(self.P): - # Angle in radians theta = 2 * np.pi * p / self.P x = j + self.R * np.cos(theta) - y = i - self.R * np.sin( - theta - ) # y coordinates go from top to bottom + y = i - self.R * np.sin(theta) pixel_value = self._bilinear_interpolation(gray_image, x, y) - lbp_code |= (1 << p) if pixel_value >= center else 0 - lbp[i, j] = lbp_code - return lbp + lbp_code |= (pixel_value >= center) << p + lbp_image[i, j] = lbp_code + + return self._get_lbp_histogram(lbp_image) + + +def test_vj(dat: EarDataClass): + os.makedirs("./pred/vj", exist_ok=True) + #scale_factors = [ + # 1.05, + # 1.1, + # 1.15, + # 1.2, + # 1.25, + # 1.3, + # 1.35, + # 1.4, + # 1.45, + # 1.5, + # 1.55, + # 1.60, + # 1.65, + # 1.70, + # 1.75, + # 1.80, + # 1.85, + # 1.90, + # 1.95, + # 2.0, + #] + scale_factors = [1.01, 1.02, 1.03, 1.04] + min_neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9] + + for scale_factor in scale_factors: + for min_neigh in min_neighbors: + vj = ViolaJones(scale_factor=scale_factor, min_neighbors=min_neigh) + ious = [] + os.makedirs(f"./pred/vj/sf={scale_factor}_mn={min_neigh}", exist_ok=True) + for i in tqdm(range(len(dat))): + image, _, bbox = dat[i] + bbox_vj, _ = vj.forward(image) + if bbox_vj is not None: + iou = vj.compute_iou(bbox, bbox_vj) + ious.append(iou) + else: + ious.append(0) + + with open(f"./pred/vj/sf={scale_factor}_mn={min_neigh}/iou.txt", "w") as f: + f.write(str(np.mean(ious))) def main(): dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="train") - vj = ViolaJones() - lbp = LBP() - for i in tqdm(range(len(dat))): - img, label = dat[i] - ear_lab, ear_img = vj.forward(img) - if ear_lab is not None: - # Save the image - print(lbp.forward(ear_img).shape) - os.makedirs("./pred", exist_ok=True) - # Save the image - cv2.imwrite(f"./pred/{i}.jpg", ear_img) - - pass + test_vj(dat) if __name__ == "__main__":