main
Gašper Spagnolo 2023-11-08 15:30:37 +01:00
parent 27c77a5928
commit ec603b85fd
No known key found for this signature in database
GPG Key ID: 46DCDBC936F8414C
2 changed files with 137 additions and 46 deletions

View File

@ -1,64 +1,77 @@
import os
import cv2
class EarDataClass():
class EarDataClass:
def __init__(self, root_dir: str, annot_file: str, mode: str):
if not os.path.isdir(root_dir):
raise ValueError('root_dir must be a valid directory')
raise ValueError("root_dir must be a valid directory")
if os.path.isfile(os.path.join(root_dir, annot_file)):
raise ValueError('annot_file must be a valid file')
if mode not in ['train', 'test']:
raise ValueError('mode must be either train or test')
raise ValueError("annot_file must be a valid file")
if mode not in ["train", "test"]:
raise ValueError("mode must be either train or test")
self.root_dir = root_dir
self.annot_file = annot_file
self.mode = mode
self._set_paths()
self._set_bboxes()
def _set_paths(self):
paths = []
labels = []
def _convert_path_to_number(path):
return int(path.split('/')[-1].split('.')[0])
return int(path.split("/")[-1].split(".")[0])
with open(self.annot_file, 'r') as f:
with open(self.annot_file, "r") as f:
lines = f.readlines()
for line in lines:
line = line.split(' ')
line = line.split(" ")
path = os.path.join(self.root_dir, line[0])
p_int = _convert_path_to_number(path)
if self.mode == 'train':
if self.mode == "train":
if p_int % 5 != 0:
paths.append(path)
labels.append(line[1])
elif self.mode == 'test':
labels.append(int(line[1].strip()))
elif self.mode == "test":
if p_int % 5 == 0:
paths.append(path)
labels.append(line[1])
labels.append(int(line[1].strip()))
self.paths = paths
self.labels = labels
def _set_bboxes(self):
bboxes = []
for path in self.paths:
path = path.replace(".png", ".txt")
with open(path, "r") as f:
lines = f.read().split(sep=" ")
bbox = [float(x) for x in lines[1:]]
bboxes.append(bbox)
self.bboxes = bboxes
def __getitem__(self, idx):
image_path = self.paths[idx]
label = self.labels[idx]
bbox = self.bboxes[idx]
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image, label
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return image, label, bbox
def __len__(self):
return len(self.paths)
def main():
dat = EarDataClass(root_dir='./ears', annot_file='identites.txt', mode='train')
dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="test")
for i in range(len(dat)):
image, label = dat[i]
print(image.shape, label)
image, label, bbox = dat[i]
print(image.shape, label, bbox)
pass
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@ -18,6 +18,14 @@ class ViolaJones:
self.scale_factor = scale_factor
self.min_neighbors = min_neighbors
def _convert_to_yolo_format(self, bbox, img_width, img_height):
x, y, w, h = bbox
x_center = (x + w / 2) / img_width
y_center = (y + h / 2) / img_height
w_norm = w / img_width
h_norm = h / img_height
return [x_center, y_center, w_norm, h_norm]
def _extract_ear(self, image, ear):
x, y, w, h = ear
return image[y : y + h, x : x + w]
@ -30,20 +38,43 @@ class ViolaJones:
image, self.scale_factor, self.min_neighbors
)
img_height, img_width = image.shape[:2]
if len(left_ears) > 0:
# Crop out the region of interest
return left_ears[0], self._extract_ear(image, left_ears[0])
return self._convert_to_yolo_format(
left_ears[0], img_width, img_height
), self._extract_ear(image, left_ears[0])
elif len(right_ears) > 0:
return right_ears[0], self._extract_ear(image, right_ears[0])
return self._convert_to_yolo_format(
right_ears[0], img_width, img_height
), self._extract_ear(image, right_ears[0])
else:
return None, None
def compute_iou(self, bbox1, bbox2):
x1_min, y1_min, x1_max, y1_max = bbox1
x2_min, y2_min, x2_max, y2_max = bbox2
xA = max(x1_min, x2_min)
yA = max(y1_min, y2_min)
xB = min(x1_max, x2_max)
yB = min(y1_max, y2_max)
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
boxAArea = (x1_max - x1_min + 1) * (y1_max - y1_min + 1)
boxBArea = (x2_max - x2_min + 1) * (y2_max - y2_min + 1)
unionArea = boxAArea + boxBArea - interArea
iou = interArea / unionArea if unionArea > 0 else 0
return iou
class LBP:
def __init__(self, P=8, R=1):
self.P = P
self.R = R
pass
def _bilinear_interpolation(self, image, x, y):
x1, y1 = int(x), int(y)
@ -61,41 +92,88 @@ class LBP:
+ f22 * (x - x1) * (y - y1)
)
def _get_lbp_histogram(self, lbp_image):
"""
Compute the normalized histogram of the LBP image.
"""
histogram, _ = np.histogram(
lbp_image.ravel(), bins=np.arange(0, 2**self.P, 1), range=(0, 2**self.P)
)
histogram = histogram.astype("float")
histogram /= histogram.sum()
return histogram
def forward(self, img):
# Already grayscale
try:
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
lbp = np.zeros_like(gray_image)
except:
raise ValueError("The input image must be a valid BGR image.")
lbp_image = np.zeros_like(gray_image)
for i in range(self.R, gray_image.shape[0] - self.R):
for j in range(self.R, gray_image.shape[1] - self.R):
center = gray_image[i, j]
lbp_code = 0
for p in range(self.P):
# Angle in radians
theta = 2 * np.pi * p / self.P
x = j + self.R * np.cos(theta)
y = i - self.R * np.sin(
theta
) # y coordinates go from top to bottom
y = i - self.R * np.sin(theta)
pixel_value = self._bilinear_interpolation(gray_image, x, y)
lbp_code |= (1 << p) if pixel_value >= center else 0
lbp[i, j] = lbp_code
return lbp
lbp_code |= (pixel_value >= center) << p
lbp_image[i, j] = lbp_code
return self._get_lbp_histogram(lbp_image)
def test_vj(dat: EarDataClass):
os.makedirs("./pred/vj", exist_ok=True)
#scale_factors = [
# 1.05,
# 1.1,
# 1.15,
# 1.2,
# 1.25,
# 1.3,
# 1.35,
# 1.4,
# 1.45,
# 1.5,
# 1.55,
# 1.60,
# 1.65,
# 1.70,
# 1.75,
# 1.80,
# 1.85,
# 1.90,
# 1.95,
# 2.0,
#]
scale_factors = [1.01, 1.02, 1.03, 1.04]
min_neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9]
for scale_factor in scale_factors:
for min_neigh in min_neighbors:
vj = ViolaJones(scale_factor=scale_factor, min_neighbors=min_neigh)
ious = []
os.makedirs(f"./pred/vj/sf={scale_factor}_mn={min_neigh}", exist_ok=True)
for i in tqdm(range(len(dat))):
image, _, bbox = dat[i]
bbox_vj, _ = vj.forward(image)
if bbox_vj is not None:
iou = vj.compute_iou(bbox, bbox_vj)
ious.append(iou)
else:
ious.append(0)
with open(f"./pred/vj/sf={scale_factor}_mn={min_neigh}/iou.txt", "w") as f:
f.write(str(np.mean(ious)))
def main():
dat = EarDataClass(root_dir="./ears", annot_file="identites.txt", mode="train")
vj = ViolaJones()
lbp = LBP()
for i in tqdm(range(len(dat))):
img, label = dat[i]
ear_lab, ear_img = vj.forward(img)
if ear_lab is not None:
# Save the image
print(lbp.forward(ear_img).shape)
os.makedirs("./pred", exist_ok=True)
# Save the image
cv2.imwrite(f"./pred/{i}.jpg", ear_img)
pass
test_vj(dat)
if __name__ == "__main__":