diff --git a/a1/.gitignore b/a1/.gitignore index 920fe40..8b0e62f 100644 --- a/a1/.gitignore +++ b/a1/.gitignore @@ -1,2 +1,3 @@ -flake.lock ears/** +pred/** +__pycache__/** diff --git a/a1/dataloader.py b/a1/dataloader.py index 7eb1623..305d23b 100644 --- a/a1/dataloader.py +++ b/a1/dataloader.py @@ -47,7 +47,6 @@ class EarDataClass(): label = self.labels[idx] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - image = image.astype('float32') / 255.0 return image, label def __len__(self): diff --git a/a1/flake.lock b/a1/flake.lock new file mode 100644 index 0000000..9f80961 --- /dev/null +++ b/a1/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1694529238, + "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1699169573, + "narHash": "sha256-cvUb1xZkvOp3W2SzylStrTirhVd9zCeo5utJl9nSIhw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "aeefe2054617cae501809b82b44a8e8f7be7cc4b", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-23.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/a1/flake.nix b/a1/flake.nix index 7c9737d..24cf629 100644 --- a/a1/flake.nix +++ b/a1/flake.nix @@ -1,5 +1,5 @@ { - description = "Python development environment with Matplotlib and OpenCV"; + description = "Python development environment"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05"; @@ -18,6 +18,8 @@ opencv4 pip pillow + tqdm + numpy ]); in { devShells.default = pkgs.mkShell { diff --git a/a1/pipeline.py b/a1/pipeline.py new file mode 100644 index 0000000..f57f009 --- /dev/null +++ b/a1/pipeline.py @@ -0,0 +1,102 @@ +import cv2 +from dataloader import EarDataClass +import os +from tqdm import tqdm +import numpy as np + + +class ViolaJones: + def __init__( + self, + left_ear_cascade="haarcascade_mcs_leftear.xml", + right_ear_cascade="haarcascade_mcs_rightear.xml", + scale_factor=1.05, # how much is image reduced at each image scale + min_neighbors=2, # How many candidates to classify as square + ): + self.left_ear_cascade = cv2.CascadeClassifier(left_ear_cascade) + self.right_ear_cascade = cv2.CascadeClassifier(right_ear_cascade) + self.scale_factor = scale_factor + self.min_neighbors = min_neighbors + + def _extract_ear(self, image, ear): + x, y, w, h = ear + return image[y : y + h, x : x + w] + + def forward(self, image): + left_ears = self.left_ear_cascade.detectMultiScale( + image, self.scale_factor, self.min_neighbors + ) + right_ears = self.right_ear_cascade.detectMultiScale( + image, self.scale_factor, self.min_neighbors + ) + + if len(left_ears) > 0: + # Crop out the region of interest + return left_ears[0], self._extract_ear(image, left_ears[0]) + elif len(right_ears) > 0: + return right_ears[0], self._extract_ear(image, right_ears[0]) + else: + return None, None + + +class LBP: + def __init__(self, P=8, R=1): + self.P = P + self.R = R + pass + + def _bilinear_interpolation(self, image, x, y): + x1, y1 = int(x), int(y) + x2, y2 = min(x1 + 1, image.shape[1] - 1), min(y1 + 1, image.shape[0] - 1) + + f11 = image[y1, x1] + f12 = image[y1, x2] + f21 = image[y2, x1] + f22 = image[y2, x2] + + return ( + f11 * (x2 - x) * (y2 - y) + + f21 * (x - x1) * (y2 - y) + + f12 * (x2 - x) * (y - y1) + + f22 * (x - x1) * (y - y1) + ) + + def forward(self, img): + gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + lbp = np.zeros_like(gray_image) + for i in range(self.R, gray_image.shape[0] - self.R): + for j in range(self.R, gray_image.shape[1] - self.R): + center = gray_image[i, j] + lbp_code = 0 + for p in range(self.P): + # Angle in radians + theta = 2 * np.pi * p / self.P + x = j + self.R * np.cos(theta) + y = i - self.R * np.sin( + theta + ) # y coordinates go from top to bottom + pixel_value = self._bilinear_interpolation(gray_image, x, y) + lbp_code |= (1 << p) if pixel_value >= center else 0 + lbp[i, j] = lbp_code + return lbp + + +def main(): + dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="train") + vj = ViolaJones() + lbp = LBP() + for i in tqdm(range(len(dat))): + img, label = dat[i] + ear_lab, ear_img = vj.forward(img) + if ear_lab is not None: + # Save the image + print(lbp.forward(ear_img).shape) + os.makedirs("./pred", exist_ok=True) + # Save the image + cv2.imwrite(f"./pred/{i}.jpg", ear_img) + + pass + + +if __name__ == "__main__": + main()