235 lines
8.8 KiB
Python
235 lines
8.8 KiB
Python
import warnings
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import torch
|
|
from kornia.color import rgb_to_grayscale
|
|
from packaging import version
|
|
|
|
try:
|
|
import pycolmap
|
|
except ImportError:
|
|
pycolmap = None
|
|
|
|
from ..base_model import BaseModel
|
|
from ..utils.misc import pad_to_length
|
|
|
|
|
|
def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
|
|
h, w = image_shape
|
|
ij = np.round(points - 0.5).astype(int).T[::-1]
|
|
|
|
# Remove duplicate points (identical coordinates).
|
|
# Pick highest scale or score
|
|
s = scales if scores is None else scores
|
|
buffer = np.zeros((h, w))
|
|
np.maximum.at(buffer, tuple(ij), s)
|
|
keep = np.where(buffer[tuple(ij)] == s)[0]
|
|
|
|
# Pick lowest angle (arbitrary).
|
|
ij = ij[:, keep]
|
|
buffer[:] = np.inf
|
|
o_abs = np.abs(angles[keep])
|
|
np.minimum.at(buffer, tuple(ij), o_abs)
|
|
mask = buffer[tuple(ij)] == o_abs
|
|
ij = ij[:, mask]
|
|
keep = keep[mask]
|
|
|
|
if nms_radius > 0:
|
|
# Apply NMS on the remaining points
|
|
buffer[:] = 0
|
|
buffer[tuple(ij)] = s[keep] # scores or scale
|
|
|
|
local_max = torch.nn.functional.max_pool2d(
|
|
torch.from_numpy(buffer).unsqueeze(0),
|
|
kernel_size=nms_radius * 2 + 1,
|
|
stride=1,
|
|
padding=nms_radius,
|
|
).squeeze(0)
|
|
is_local_max = buffer == local_max.numpy()
|
|
keep = keep[is_local_max[tuple(ij)]]
|
|
return keep
|
|
|
|
|
|
def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
|
|
x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
|
|
x.clip_(min=eps).sqrt_()
|
|
return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
|
|
|
|
|
|
def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Detect keypoints using OpenCV Detector.
|
|
Optionally, perform description.
|
|
Args:
|
|
features: OpenCV based keypoints detector and descriptor
|
|
image: Grayscale image of uint8 data type
|
|
Returns:
|
|
keypoints: 1D array of detected cv2.KeyPoint
|
|
scores: 1D array of responses
|
|
descriptors: 1D array of descriptors
|
|
"""
|
|
detections, descriptors = features.detectAndCompute(image, None)
|
|
points = np.array([k.pt for k in detections], dtype=np.float32)
|
|
scores = np.array([k.response for k in detections], dtype=np.float32)
|
|
scales = np.array([k.size for k in detections], dtype=np.float32)
|
|
angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
|
|
return points, scores, scales, angles, descriptors
|
|
|
|
|
|
class SIFT(BaseModel):
|
|
default_conf = {
|
|
"rootsift": True,
|
|
"nms_radius": 0, # None to disable filtering entirely.
|
|
"max_num_keypoints": 4096,
|
|
"backend": "opencv", # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
|
|
"detection_threshold": 0.0066667, # from COLMAP
|
|
"edge_threshold": 10,
|
|
"first_octave": -1, # only used by pycolmap, the default of COLMAP
|
|
"num_octaves": 4,
|
|
"force_num_keypoints": False,
|
|
}
|
|
|
|
required_data_keys = ["image"]
|
|
|
|
def _init(self, conf):
|
|
backend = self.conf.backend
|
|
if backend.startswith("pycolmap"):
|
|
if pycolmap is None:
|
|
raise ImportError(
|
|
"Cannot find module pycolmap: install it with pip"
|
|
"or use backend=opencv."
|
|
)
|
|
options = {
|
|
"peak_threshold": self.conf.detection_threshold,
|
|
"edge_threshold": self.conf.edge_threshold,
|
|
"first_octave": self.conf.first_octave,
|
|
"num_octaves": self.conf.num_octaves,
|
|
"normalization": pycolmap.Normalization.L2, # L1_ROOT is buggy.
|
|
}
|
|
device = (
|
|
"auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
|
|
)
|
|
if (
|
|
backend == "pycolmap_cpu" or not pycolmap.has_cuda
|
|
) and pycolmap.__version__ < "0.5.0":
|
|
warnings.warn(
|
|
"The pycolmap CPU SIFT is buggy in version < 0.5.0, "
|
|
"consider upgrading pycolmap or use the CUDA version.",
|
|
stacklevel=1,
|
|
)
|
|
else:
|
|
options["max_num_features"] = self.conf.max_num_keypoints
|
|
self.sift = pycolmap.Sift(options=options, device=device)
|
|
elif backend == "opencv":
|
|
self.sift = cv2.SIFT_create(
|
|
contrastThreshold=self.conf.detection_threshold,
|
|
nfeatures=self.conf.max_num_keypoints,
|
|
edgeThreshold=self.conf.edge_threshold,
|
|
nOctaveLayers=self.conf.num_octaves,
|
|
)
|
|
else:
|
|
backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
|
|
raise ValueError(
|
|
f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
|
|
)
|
|
|
|
def extract_single_image(self, image: torch.Tensor):
|
|
image_np = image.cpu().numpy().squeeze(0)
|
|
|
|
if self.conf.backend.startswith("pycolmap"):
|
|
if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
|
|
detections, descriptors = self.sift.extract(image_np)
|
|
scores = None # Scores are not exposed by COLMAP anymore.
|
|
else:
|
|
detections, scores, descriptors = self.sift.extract(image_np)
|
|
keypoints = detections[:, :2] # Keep only (x, y).
|
|
scales, angles = detections[:, -2:].T
|
|
if scores is not None and (
|
|
self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
|
|
):
|
|
# Set the scores as a combination of abs. response and scale.
|
|
scores = np.abs(scores) * scales
|
|
elif self.conf.backend == "opencv":
|
|
# TODO: Check if opencv keypoints are already in corner convention
|
|
keypoints, scores, scales, angles, descriptors = run_opencv_sift(
|
|
self.sift, (image_np * 255.0).astype(np.uint8)
|
|
)
|
|
pred = {
|
|
"keypoints": keypoints,
|
|
"scales": scales,
|
|
"oris": angles,
|
|
"descriptors": descriptors,
|
|
}
|
|
if scores is not None:
|
|
pred["keypoint_scores"] = scores
|
|
|
|
# sometimes pycolmap returns points outside the image. We remove them
|
|
if self.conf.backend.startswith("pycolmap"):
|
|
is_inside = (
|
|
pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
|
|
).all(-1)
|
|
pred = {k: v[is_inside] for k, v in pred.items()}
|
|
|
|
if self.conf.nms_radius is not None:
|
|
keep = filter_dog_point(
|
|
pred["keypoints"],
|
|
pred["scales"],
|
|
pred["oris"],
|
|
image_np.shape,
|
|
self.conf.nms_radius,
|
|
pred["keypoint_scores"],
|
|
)
|
|
pred = {k: v[keep] for k, v in pred.items()}
|
|
|
|
pred = {k: torch.from_numpy(v) for k, v in pred.items()}
|
|
if scores is not None:
|
|
# Keep the k keypoints with highest score
|
|
num_points = self.conf.max_num_keypoints
|
|
if num_points is not None and len(pred["keypoints"]) > num_points:
|
|
indices = torch.topk(pred["keypoint_scores"], num_points).indices
|
|
pred = {k: v[indices] for k, v in pred.items()}
|
|
|
|
if self.conf.force_num_keypoints:
|
|
num_points = min(self.conf.max_num_keypoints, len(pred["keypoints"]))
|
|
pred["keypoints"] = pad_to_length(
|
|
pred["keypoints"],
|
|
num_points,
|
|
-2,
|
|
mode="random_c",
|
|
bounds=(0, min(image.shape[1:])),
|
|
)
|
|
pred["scales"] = pad_to_length(pred["scales"], num_points, -1, mode="zeros")
|
|
pred["oris"] = pad_to_length(pred["oris"], num_points, -1, mode="zeros")
|
|
pred["descriptors"] = pad_to_length(
|
|
pred["descriptors"], num_points, -2, mode="zeros"
|
|
)
|
|
if pred["keypoint_scores"] is not None:
|
|
scores = pad_to_length(
|
|
pred["keypoint_scores"], num_points, -1, mode="zeros"
|
|
)
|
|
return pred
|
|
|
|
def _forward(self, data: dict) -> dict:
|
|
image = data["image"]
|
|
if image.shape[1] == 3:
|
|
image = rgb_to_grayscale(image)
|
|
device = image.device
|
|
image = image.cpu()
|
|
pred = []
|
|
for k in range(len(image)):
|
|
img = image[k]
|
|
if "image_size" in data.keys():
|
|
# avoid extracting points in padded areas
|
|
w, h = data["image_size"][k]
|
|
img = img[:, :h, :w]
|
|
p = self.extract_single_image(img)
|
|
pred.append(p)
|
|
pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
|
|
if self.conf.rootsift:
|
|
pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
|
|
return pred
|
|
|
|
def loss(self, pred, data):
|
|
raise NotImplementedError
|