SIFT+ALIKED updates (#26)
* fix links * add sift/aliked eval configs * add SIFT/ALIKED results on megadepth * update sift config * add SIFT with 4K keypoints results * cleanup SIFT (see LightGlue) * fix compatibility with LightGlue * tiny visualization fix * fix sift kornia * Update sift configsmain
parent
aa7727675e
commit
0c75e76fd6
11
README.md
11
README.md
|
@ -66,8 +66,8 @@ Here are the results as Area Under the Curve (AUC) of the homography error at 1
|
||||||
|
|
||||||
| Methods | DLT | [OpenCV](../gluefactory/robust_estimators/homography/opencv.py) | [PoseLib](../gluefactory/robust_estimators/homography/poselib.py) |
|
| Methods | DLT | [OpenCV](../gluefactory/robust_estimators/homography/opencv.py) | [PoseLib](../gluefactory/robust_estimators/homography/poselib.py) |
|
||||||
| ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ |
|
| ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ |
|
||||||
| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue.yaml) | 32.1 / 65.0 / 75.7 | 32.9 / 55.7 / 68.0 | 37.0 / 68.2 / 78.7 |
|
| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue-official.yaml) | 32.1 / 65.0 / 75.7 | 32.9 / 55.7 / 68.0 | 37.0 / 68.2 / 78.7 |
|
||||||
| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue.yaml) | 35.1 / 67.2 / 77.6 | 34.2 / 57.9 / 69.9 | 37.1 / 67.4 / 77.8 |
|
| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue-official.yaml) | 35.1 / 67.2 / 77.6 | 34.2 / 57.9 / 69.9 | 37.1 / 67.4 / 77.8 |
|
||||||
|
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
@ -159,8 +159,11 @@ Here are the results as Area Under the Curve (AUC) of the pose error at 5/10/20
|
||||||
|
|
||||||
| Methods | [pycolmap](../gluefactory/robust_estimators/relative_pose/pycolmap.py) | [OpenCV](../gluefactory/robust_estimators/relative_pose/opencv.py) | [PoseLib](../gluefactory/robust_estimators/relative_pose/poselib.py) |
|
| Methods | [pycolmap](../gluefactory/robust_estimators/relative_pose/pycolmap.py) | [OpenCV](../gluefactory/robust_estimators/relative_pose/opencv.py) | [PoseLib](../gluefactory/robust_estimators/relative_pose/poselib.py) |
|
||||||
| ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ |
|
| ------------------------------------------------------------ | ------------------ | ------------------ | ------------------ |
|
||||||
| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue.yaml) | 54.4 / 70.4 / 82.4 | 48.7 / 65.6 / 79.0 | 64.8 / 77.9 / 87.0 |
|
| [SuperPoint + SuperGlue](../gluefactory/configs/superpoint+superglue-official.yaml) | 54.4 / 70.4 / 82.4 | 48.7 / 65.6 / 79.0 | 64.8 / 77.9 / 87.0 |
|
||||||
| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue.yaml) | 56.7 / 72.4 / 83.7 | 51.0 / 68.1 / 80.7 | 66.8 / 79.3 / 87.9 |
|
| [SuperPoint + LightGlue](../gluefactory/configs/superpoint+lightglue-official.yaml) | 56.7 / 72.4 / 83.7 | 51.0 / 68.1 / 80.7 | 66.8 / 79.3 / 87.9 |
|
||||||
|
| [SIFT (2K) + LightGlue](../gluefactory/configs/sift+lightglue-official.yaml) | ? / ? / ? | 43.5 / 61.5 / 75.9 | 60.4 / 74.3 / 84.5 |
|
||||||
|
| [SIFT (4K) + LightGlue](../gluefactory/configs/sift+lightglue-official.yaml) | ? / ? / ? | 49.9 / 67.3 / 80.3 | 65.9 / 78.6 / 87.4 |
|
||||||
|
| [ALIKED + LightGlue](../gluefactory/configs/aliked+lightglue-official.yaml) | ? / ? / ? | 51.5 / 68.1 / 80.4 | 66.3 / 78.7 / 87.5 |
|
||||||
| [SuperPoint + GlueStick](../gluefactory/configs/superpoint+lsd+gluestick.yaml) | 53.2 / 69.8 / 81.9 | 46.3 / 64.2 / 78.1 | 64.4 / 77.5 / 86.5 |
|
| [SuperPoint + GlueStick](../gluefactory/configs/superpoint+lsd+gluestick.yaml) | 53.2 / 69.8 / 81.9 | 46.3 / 64.2 / 78.1 | 64.4 / 77.5 / 86.5 |
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
model:
|
||||||
|
name: two_view_pipeline
|
||||||
|
extractor:
|
||||||
|
name: extractors.aliked
|
||||||
|
max_num_keypoints: 2048
|
||||||
|
detection_threshold: 0.0
|
||||||
|
matcher:
|
||||||
|
name: matchers.lightglue_pretrained
|
||||||
|
features: aliked
|
||||||
|
depth_confidence: -1
|
||||||
|
width_confidence: -1
|
||||||
|
filter_threshold: 0.1
|
||||||
|
benchmarks:
|
||||||
|
megadepth1500:
|
||||||
|
data:
|
||||||
|
preprocessing:
|
||||||
|
side: long
|
||||||
|
resize: 1600
|
||||||
|
eval:
|
||||||
|
estimator: opencv
|
||||||
|
ransac_th: 0.5
|
||||||
|
hpatches:
|
||||||
|
eval:
|
||||||
|
estimator: opencv
|
||||||
|
ransac_th: 0.5
|
||||||
|
model:
|
||||||
|
extractor:
|
||||||
|
max_num_keypoints: 1024 # overwrite config above
|
|
@ -0,0 +1,28 @@
|
||||||
|
model:
|
||||||
|
name: two_view_pipeline
|
||||||
|
extractor:
|
||||||
|
name: extractors.sift
|
||||||
|
backend: pycolmap_cuda
|
||||||
|
max_num_keypoints: 4096
|
||||||
|
matcher:
|
||||||
|
name: matchers.lightglue_pretrained
|
||||||
|
features: sift
|
||||||
|
depth_confidence: -1
|
||||||
|
width_confidence: -1
|
||||||
|
filter_threshold: 0.1
|
||||||
|
benchmarks:
|
||||||
|
megadepth1500:
|
||||||
|
data:
|
||||||
|
preprocessing:
|
||||||
|
side: long
|
||||||
|
resize: 1600
|
||||||
|
eval:
|
||||||
|
estimator: opencv
|
||||||
|
ransac_th: 0.5
|
||||||
|
hpatches:
|
||||||
|
eval:
|
||||||
|
estimator: opencv
|
||||||
|
ransac_th: 0.5
|
||||||
|
model:
|
||||||
|
extractor:
|
||||||
|
max_num_keypoints: 1024 # overwrite config above
|
|
@ -14,10 +14,10 @@ model:
|
||||||
name: two_view_pipeline
|
name: two_view_pipeline
|
||||||
extractor:
|
extractor:
|
||||||
name: extractors.sift
|
name: extractors.sift
|
||||||
detector: pycolmap_cuda
|
backend: pycolmap_cuda
|
||||||
max_num_keypoints: 1024
|
max_num_keypoints: 1024
|
||||||
force_num_keypoints: True
|
force_num_keypoints: True
|
||||||
detection_threshold: 0.0001
|
nms_radius: 3
|
||||||
trainable: False
|
trainable: False
|
||||||
ground_truth:
|
ground_truth:
|
||||||
name: matchers.homography_matcher
|
name: matchers.homography_matcher
|
||||||
|
@ -46,3 +46,6 @@ benchmarks:
|
||||||
eval:
|
eval:
|
||||||
estimator: opencv
|
estimator: opencv
|
||||||
ransac_th: 0.5
|
ransac_th: 0.5
|
||||||
|
model:
|
||||||
|
extractor:
|
||||||
|
nms_radius: 0
|
||||||
|
|
|
@ -25,10 +25,10 @@ model:
|
||||||
name: two_view_pipeline
|
name: two_view_pipeline
|
||||||
extractor:
|
extractor:
|
||||||
name: extractors.sift
|
name: extractors.sift
|
||||||
detector: pycolmap_cuda
|
backend: pycolmap_cuda
|
||||||
max_num_keypoints: 2048
|
max_num_keypoints: 2048
|
||||||
force_num_keypoints: True
|
force_num_keypoints: True
|
||||||
detection_threshold: 0.0001
|
nms_radius: 3
|
||||||
trainable: False
|
trainable: False
|
||||||
matcher:
|
matcher:
|
||||||
name: matchers.lightglue
|
name: matchers.lightglue
|
||||||
|
@ -62,6 +62,9 @@ benchmarks:
|
||||||
preprocessing:
|
preprocessing:
|
||||||
side: long
|
side: long
|
||||||
resize: 1600
|
resize: 1600
|
||||||
|
model:
|
||||||
|
extractor:
|
||||||
|
nms_radius: 0
|
||||||
eval:
|
eval:
|
||||||
estimator: opencv
|
estimator: opencv
|
||||||
ransac_th: 0.5
|
ransac_th: 0.5
|
||||||
|
@ -72,3 +75,4 @@ benchmarks:
|
||||||
model:
|
model:
|
||||||
extractor:
|
extractor:
|
||||||
max_num_keypoints: 1024
|
max_num_keypoints: 1024
|
||||||
|
nms_radius: 0
|
||||||
|
|
|
@ -1,238 +1,233 @@
|
||||||
|
import warnings
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pycolmap
|
|
||||||
import torch
|
import torch
|
||||||
from omegaconf import OmegaConf
|
from kornia.color import rgb_to_grayscale
|
||||||
from scipy.spatial import KDTree
|
from packaging import version
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pycolmap
|
||||||
|
except ImportError:
|
||||||
|
pycolmap = None
|
||||||
|
|
||||||
from ..base_model import BaseModel
|
from ..base_model import BaseModel
|
||||||
from ..utils.misc import pad_to_length
|
from ..utils.misc import pad_to_length
|
||||||
|
|
||||||
EPS = 1e-6
|
|
||||||
|
def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
|
||||||
|
h, w = image_shape
|
||||||
|
ij = np.round(points - 0.5).astype(int).T[::-1]
|
||||||
|
|
||||||
|
# Remove duplicate points (identical coordinates).
|
||||||
|
# Pick highest scale or score
|
||||||
|
s = scales if scores is None else scores
|
||||||
|
buffer = np.zeros((h, w))
|
||||||
|
np.maximum.at(buffer, tuple(ij), s)
|
||||||
|
keep = np.where(buffer[tuple(ij)] == s)[0]
|
||||||
|
|
||||||
|
# Pick lowest angle (arbitrary).
|
||||||
|
ij = ij[:, keep]
|
||||||
|
buffer[:] = np.inf
|
||||||
|
o_abs = np.abs(angles[keep])
|
||||||
|
np.minimum.at(buffer, tuple(ij), o_abs)
|
||||||
|
mask = buffer[tuple(ij)] == o_abs
|
||||||
|
ij = ij[:, mask]
|
||||||
|
keep = keep[mask]
|
||||||
|
|
||||||
|
if nms_radius > 0:
|
||||||
|
# Apply NMS on the remaining points
|
||||||
|
buffer[:] = 0
|
||||||
|
buffer[tuple(ij)] = s[keep] # scores or scale
|
||||||
|
|
||||||
|
local_max = torch.nn.functional.max_pool2d(
|
||||||
|
torch.from_numpy(buffer).unsqueeze(0),
|
||||||
|
kernel_size=nms_radius * 2 + 1,
|
||||||
|
stride=1,
|
||||||
|
padding=nms_radius,
|
||||||
|
).squeeze(0)
|
||||||
|
is_local_max = buffer == local_max.numpy()
|
||||||
|
keep = keep[is_local_max[tuple(ij)]]
|
||||||
|
return keep
|
||||||
|
|
||||||
|
|
||||||
def sift_to_rootsift(x):
|
def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
|
||||||
x = x / (np.linalg.norm(x, ord=1, axis=-1, keepdims=True) + EPS)
|
x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
|
||||||
x = np.sqrt(x.clip(min=EPS))
|
x.clip_(min=eps).sqrt_()
|
||||||
x = x / (np.linalg.norm(x, axis=-1, keepdims=True) + EPS)
|
return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
# from OpenGlue
|
def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
|
||||||
def nms_keypoints(kpts: np.ndarray, responses: np.ndarray, radius: float) -> np.ndarray:
|
|
||||||
# TODO: add approximate tree
|
|
||||||
kd_tree = KDTree(kpts)
|
|
||||||
|
|
||||||
sorted_idx = np.argsort(-responses)
|
|
||||||
kpts_to_keep_idx = []
|
|
||||||
removed_idx = set()
|
|
||||||
|
|
||||||
for idx in sorted_idx:
|
|
||||||
# skip point if it was already removed
|
|
||||||
if idx in removed_idx:
|
|
||||||
continue
|
|
||||||
|
|
||||||
kpts_to_keep_idx.append(idx)
|
|
||||||
point = kpts[idx]
|
|
||||||
neighbors = kd_tree.query_ball_point(point, r=radius)
|
|
||||||
# Variable `neighbors` contains the `point` itself
|
|
||||||
removed_idx.update(neighbors)
|
|
||||||
|
|
||||||
mask = np.zeros((kpts.shape[0],), dtype=bool)
|
|
||||||
mask[kpts_to_keep_idx] = True
|
|
||||||
return mask
|
|
||||||
|
|
||||||
|
|
||||||
def detect_kpts_opencv(
|
|
||||||
features: cv2.Feature2D, image: np.ndarray, describe: bool = True
|
|
||||||
) -> np.ndarray:
|
|
||||||
"""
|
"""
|
||||||
Detect keypoints using OpenCV Detector.
|
Detect keypoints using OpenCV Detector.
|
||||||
Optionally, perform NMS and filter top-response keypoints.
|
|
||||||
Optionally, perform description.
|
Optionally, perform description.
|
||||||
Args:
|
Args:
|
||||||
features: OpenCV based keypoints detector and descriptor
|
features: OpenCV based keypoints detector and descriptor
|
||||||
image: Grayscale image of uint8 data type
|
image: Grayscale image of uint8 data type
|
||||||
describe: flag indicating whether to simultaneously compute descriptors
|
|
||||||
Returns:
|
Returns:
|
||||||
kpts: 1D array of detected cv2.KeyPoint
|
keypoints: 1D array of detected cv2.KeyPoint
|
||||||
|
scores: 1D array of responses
|
||||||
|
descriptors: 1D array of descriptors
|
||||||
"""
|
"""
|
||||||
if describe:
|
detections, descriptors = features.detectAndCompute(image, None)
|
||||||
kpts, descriptors = features.detectAndCompute(image, None)
|
points = np.array([k.pt for k in detections], dtype=np.float32)
|
||||||
else:
|
scores = np.array([k.response for k in detections], dtype=np.float32)
|
||||||
kpts = features.detect(image, None)
|
scales = np.array([k.size for k in detections], dtype=np.float32)
|
||||||
kpts = np.array(kpts)
|
angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
|
||||||
|
return points, scores, scales, angles, descriptors
|
||||||
responses = np.array([k.response for k in kpts], dtype=np.float32)
|
|
||||||
|
|
||||||
# select all
|
|
||||||
top_score_idx = ...
|
|
||||||
pts = np.array([k.pt for k in kpts], dtype=np.float32)
|
|
||||||
scales = np.array([k.size for k in kpts], dtype=np.float32)
|
|
||||||
angles = np.array([k.angle for k in kpts], dtype=np.float32)
|
|
||||||
spts = np.concatenate([pts, scales[..., None], angles[..., None]], -1)
|
|
||||||
|
|
||||||
if describe:
|
|
||||||
return spts[top_score_idx], responses[top_score_idx], descriptors[top_score_idx]
|
|
||||||
else:
|
|
||||||
return spts[top_score_idx], responses[top_score_idx]
|
|
||||||
|
|
||||||
|
|
||||||
class SIFT(BaseModel):
|
class SIFT(BaseModel):
|
||||||
default_conf = {
|
default_conf = {
|
||||||
"has_detector": True,
|
|
||||||
"has_descriptor": True,
|
|
||||||
"descriptor_dim": 128,
|
|
||||||
"pycolmap_options": {
|
|
||||||
"first_octave": 0,
|
|
||||||
"peak_threshold": 0.005,
|
|
||||||
"edge_threshold": 10,
|
|
||||||
},
|
|
||||||
"rootsift": True,
|
"rootsift": True,
|
||||||
"nms_radius": None,
|
"nms_radius": 0, # None to disable filtering entirely.
|
||||||
"max_num_keypoints": -1,
|
"max_num_keypoints": 4096,
|
||||||
"max_num_keypoints_val": None,
|
"backend": "opencv", # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
|
||||||
|
"detection_threshold": 0.0066667, # from COLMAP
|
||||||
|
"edge_threshold": 10,
|
||||||
|
"first_octave": -1, # only used by pycolmap, the default of COLMAP
|
||||||
|
"num_octaves": 4,
|
||||||
"force_num_keypoints": False,
|
"force_num_keypoints": False,
|
||||||
"randomize_keypoints_training": False,
|
|
||||||
"detector": "pycolmap", # ['pycolmap', 'pycolmap_cpu', 'pycolmap_cuda', 'cv2']
|
|
||||||
"detection_threshold": None,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
required_data_keys = ["image"]
|
required_data_keys = ["image"]
|
||||||
|
|
||||||
def _init(self, conf):
|
def _init(self, conf):
|
||||||
self.sift = None # lazy loading
|
backend = self.conf.backend
|
||||||
|
if backend.startswith("pycolmap"):
|
||||||
@torch.no_grad()
|
if pycolmap is None:
|
||||||
def extract_features(self, image):
|
raise ImportError(
|
||||||
image_np = image.cpu().numpy()[0]
|
"Cannot find module pycolmap: install it with pip"
|
||||||
assert image.shape[0] == 1
|
"or use backend=opencv."
|
||||||
assert image_np.min() >= -EPS and image_np.max() <= 1 + EPS
|
)
|
||||||
|
options = {
|
||||||
detector = str(self.conf.detector)
|
"peak_threshold": self.conf.detection_threshold,
|
||||||
|
"edge_threshold": self.conf.edge_threshold,
|
||||||
if self.sift is None and detector.startswith("pycolmap"):
|
"first_octave": self.conf.first_octave,
|
||||||
options = OmegaConf.to_container(self.conf.pycolmap_options)
|
"num_octaves": self.conf.num_octaves,
|
||||||
|
"normalization": pycolmap.Normalization.L2, # L1_ROOT is buggy.
|
||||||
|
}
|
||||||
device = (
|
device = (
|
||||||
"auto" if detector == "pycolmap" else detector.replace("pycolmap_", "")
|
"auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
|
||||||
)
|
)
|
||||||
if self.conf.rootsift == "rootsift":
|
if (
|
||||||
options["normalization"] = pycolmap.Normalization.L1_ROOT
|
backend == "pycolmap_cpu" or not pycolmap.has_cuda
|
||||||
|
) and pycolmap.__version__ < "0.5.0":
|
||||||
|
warnings.warn(
|
||||||
|
"The pycolmap CPU SIFT is buggy in version < 0.5.0, "
|
||||||
|
"consider upgrading pycolmap or use the CUDA version.",
|
||||||
|
stacklevel=1,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
options["normalization"] = pycolmap.Normalization.L2
|
options["max_num_features"] = self.conf.max_num_keypoints
|
||||||
if self.conf.detection_threshold is not None:
|
|
||||||
options["peak_threshold"] = self.conf.detection_threshold
|
|
||||||
options["max_num_features"] = self.conf.max_num_keypoints
|
|
||||||
self.sift = pycolmap.Sift(options=options, device=device)
|
self.sift = pycolmap.Sift(options=options, device=device)
|
||||||
elif self.sift is None and self.conf.detector == "cv2":
|
elif backend == "opencv":
|
||||||
self.sift = cv2.SIFT_create(contrastThreshold=self.conf.detection_threshold)
|
self.sift = cv2.SIFT_create(
|
||||||
|
contrastThreshold=self.conf.detection_threshold,
|
||||||
|
nfeatures=self.conf.max_num_keypoints,
|
||||||
|
edgeThreshold=self.conf.edge_threshold,
|
||||||
|
nOctaveLayers=self.conf.num_octaves,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
|
||||||
|
)
|
||||||
|
|
||||||
if detector.startswith("pycolmap"):
|
def extract_single_image(self, image: torch.Tensor):
|
||||||
keypoints, scores, descriptors = self.sift.extract(image_np)
|
image_np = image.cpu().numpy().squeeze(0)
|
||||||
elif detector == "cv2":
|
|
||||||
|
if self.conf.backend.startswith("pycolmap"):
|
||||||
|
if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
|
||||||
|
detections, descriptors = self.sift.extract(image_np)
|
||||||
|
scores = None # Scores are not exposed by COLMAP anymore.
|
||||||
|
else:
|
||||||
|
detections, scores, descriptors = self.sift.extract(image_np)
|
||||||
|
keypoints = detections[:, :2] # Keep only (x, y).
|
||||||
|
scales, angles = detections[:, -2:].T
|
||||||
|
if scores is not None and (
|
||||||
|
self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
|
||||||
|
):
|
||||||
|
# Set the scores as a combination of abs. response and scale.
|
||||||
|
scores = np.abs(scores) * scales
|
||||||
|
elif self.conf.backend == "opencv":
|
||||||
# TODO: Check if opencv keypoints are already in corner convention
|
# TODO: Check if opencv keypoints are already in corner convention
|
||||||
keypoints, scores, descriptors = detect_kpts_opencv(
|
keypoints, scores, scales, angles, descriptors = run_opencv_sift(
|
||||||
self.sift, (image_np * 255.0).astype(np.uint8)
|
self.sift, (image_np * 255.0).astype(np.uint8)
|
||||||
)
|
)
|
||||||
|
pred = {
|
||||||
|
"keypoints": keypoints,
|
||||||
|
"scales": scales,
|
||||||
|
"oris": angles,
|
||||||
|
"descriptors": descriptors,
|
||||||
|
}
|
||||||
|
if scores is not None:
|
||||||
|
pred["keypoint_scores"] = scores
|
||||||
|
|
||||||
|
# sometimes pycolmap returns points outside the image. We remove them
|
||||||
|
if self.conf.backend.startswith("pycolmap"):
|
||||||
|
is_inside = (
|
||||||
|
pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
|
||||||
|
).all(-1)
|
||||||
|
pred = {k: v[is_inside] for k, v in pred.items()}
|
||||||
|
|
||||||
if self.conf.nms_radius is not None:
|
if self.conf.nms_radius is not None:
|
||||||
mask = nms_keypoints(keypoints[:, :2], scores, self.conf.nms_radius)
|
keep = filter_dog_point(
|
||||||
keypoints = keypoints[mask]
|
pred["keypoints"],
|
||||||
scores = scores[mask]
|
pred["scales"],
|
||||||
descriptors = descriptors[mask]
|
pred["oris"],
|
||||||
|
image_np.shape,
|
||||||
|
self.conf.nms_radius,
|
||||||
|
pred["keypoint_scores"],
|
||||||
|
)
|
||||||
|
pred = {k: v[keep] for k, v in pred.items()}
|
||||||
|
|
||||||
scales = keypoints[:, 2]
|
pred = {k: torch.from_numpy(v) for k, v in pred.items()}
|
||||||
oris = np.rad2deg(keypoints[:, 3])
|
if scores is not None:
|
||||||
|
# Keep the k keypoints with highest score
|
||||||
if self.conf.has_descriptor:
|
num_points = self.conf.max_num_keypoints
|
||||||
# We still renormalize because COLMAP does not normalize well,
|
if num_points is not None and len(pred["keypoints"]) > num_points:
|
||||||
# maybe due to numerical errors
|
indices = torch.topk(pred["keypoint_scores"], num_points).indices
|
||||||
if self.conf.rootsift:
|
pred = {k: v[indices] for k, v in pred.items()}
|
||||||
descriptors = sift_to_rootsift(descriptors)
|
|
||||||
descriptors = torch.from_numpy(descriptors)
|
|
||||||
keypoints = torch.from_numpy(keypoints[:, :2]) # keep only x, y
|
|
||||||
scales = torch.from_numpy(scales)
|
|
||||||
oris = torch.from_numpy(oris)
|
|
||||||
scores = torch.from_numpy(scores)
|
|
||||||
|
|
||||||
# Keep the k keypoints with highest score
|
|
||||||
max_kps = self.conf.max_num_keypoints
|
|
||||||
|
|
||||||
# for val we allow different
|
|
||||||
if not self.training and self.conf.max_num_keypoints_val is not None:
|
|
||||||
max_kps = self.conf.max_num_keypoints_val
|
|
||||||
|
|
||||||
if max_kps is not None and max_kps > 0:
|
|
||||||
if self.conf.randomize_keypoints_training and self.training:
|
|
||||||
# instead of selecting top-k, sample k by score weights
|
|
||||||
raise NotImplementedError
|
|
||||||
elif max_kps < scores.shape[0]:
|
|
||||||
# TODO: check that the scores from PyCOLMAP are 100% correct,
|
|
||||||
# follow https://github.com/mihaidusmanu/pycolmap/issues/8
|
|
||||||
indices = torch.topk(scores, max_kps).indices
|
|
||||||
keypoints = keypoints[indices]
|
|
||||||
scales = scales[indices]
|
|
||||||
oris = oris[indices]
|
|
||||||
scores = scores[indices]
|
|
||||||
if self.conf.has_descriptor:
|
|
||||||
descriptors = descriptors[indices]
|
|
||||||
|
|
||||||
if self.conf.force_num_keypoints:
|
if self.conf.force_num_keypoints:
|
||||||
keypoints = pad_to_length(
|
num_points = min(self.conf.max_num_keypoints, len(pred["keypoints"]))
|
||||||
keypoints,
|
pred["keypoints"] = pad_to_length(
|
||||||
max_kps,
|
pred["keypoints"],
|
||||||
|
num_points,
|
||||||
-2,
|
-2,
|
||||||
mode="random_c",
|
mode="random_c",
|
||||||
bounds=(0, min(image.shape[1:])),
|
bounds=(0, min(image.shape[1:])),
|
||||||
)
|
)
|
||||||
scores = pad_to_length(scores, max_kps, -1, mode="zeros")
|
pred["scales"] = pad_to_length(pred["scales"], num_points, -1, mode="zeros")
|
||||||
scales = pad_to_length(scales, max_kps, -1, mode="zeros")
|
pred["oris"] = pad_to_length(pred["oris"], num_points, -1, mode="zeros")
|
||||||
oris = pad_to_length(oris, max_kps, -1, mode="zeros")
|
pred["descriptors"] = pad_to_length(
|
||||||
if self.conf.has_descriptor:
|
pred["descriptors"], num_points, -2, mode="zeros"
|
||||||
descriptors = pad_to_length(descriptors, max_kps, -2, mode="zeros")
|
)
|
||||||
|
if pred["keypoint_scores"] is not None:
|
||||||
pred = {
|
scores = pad_to_length(
|
||||||
"keypoints": keypoints,
|
pred["keypoint_scores"], num_points, -1, mode="zeros"
|
||||||
"scales": scales,
|
)
|
||||||
"oris": oris,
|
|
||||||
"keypoint_scores": scores,
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.conf.has_descriptor:
|
|
||||||
pred["descriptors"] = descriptors
|
|
||||||
return pred
|
return pred
|
||||||
|
|
||||||
@torch.no_grad()
|
def _forward(self, data: dict) -> dict:
|
||||||
def _forward(self, data):
|
|
||||||
pred = {
|
|
||||||
"keypoints": [],
|
|
||||||
"scales": [],
|
|
||||||
"oris": [],
|
|
||||||
"keypoint_scores": [],
|
|
||||||
"descriptors": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
image = data["image"]
|
image = data["image"]
|
||||||
if image.shape[1] == 3: # RGB
|
if image.shape[1] == 3:
|
||||||
scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1)
|
image = rgb_to_grayscale(image)
|
||||||
image = (image * scale).sum(1, keepdim=True).cpu()
|
device = image.device
|
||||||
|
image = image.cpu()
|
||||||
for k in range(image.shape[0]):
|
pred = []
|
||||||
|
for k in range(len(image)):
|
||||||
img = image[k]
|
img = image[k]
|
||||||
if "image_size" in data.keys():
|
if "image_size" in data.keys():
|
||||||
# avoid extracting points in padded areas
|
# avoid extracting points in padded areas
|
||||||
w, h = data["image_size"][k]
|
w, h = data["image_size"][k]
|
||||||
img = img[:, :h, :w]
|
img = img[:, :h, :w]
|
||||||
p = self.extract_features(img)
|
p = self.extract_single_image(img)
|
||||||
for k, v in p.items():
|
pred.append(p)
|
||||||
pred[k].append(v)
|
pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
|
||||||
|
if self.conf.rootsift:
|
||||||
if (image.shape[0] == 1) or self.conf.force_num_keypoints:
|
pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
|
||||||
pred = {k: torch.stack(pred[k], 0) for k in pred.keys()}
|
|
||||||
|
|
||||||
pred = {k: pred[k].to(device=data["image"].device) for k in pred.keys()}
|
|
||||||
|
|
||||||
pred["oris"] = torch.deg2rad(pred["oris"])
|
|
||||||
return pred
|
return pred
|
||||||
|
|
||||||
def loss(self, pred, data):
|
def loss(self, pred, data):
|
||||||
|
|
|
@ -24,8 +24,8 @@ class KorniaSIFT(BaseModel):
|
||||||
def _forward(self, data):
|
def _forward(self, data):
|
||||||
lafs, scores, descriptors = self.sift(data["image"])
|
lafs, scores, descriptors = self.sift(data["image"])
|
||||||
keypoints = kornia.feature.get_laf_center(lafs)
|
keypoints = kornia.feature.get_laf_center(lafs)
|
||||||
scales = kornia.feature.get_laf_scale(lafs)
|
scales = kornia.feature.get_laf_scale(lafs).squeeze(-1).squeeze(-1)
|
||||||
oris = kornia.feature.get_laf_orientation(lafs)
|
oris = kornia.feature.get_laf_orientation(lafs).squeeze(-1)
|
||||||
pred = {
|
pred = {
|
||||||
"keypoints": keypoints, # @TODO: confirm keypoints are in corner convention
|
"keypoints": keypoints, # @TODO: confirm keypoints are in corner convention
|
||||||
"scales": scales,
|
"scales": scales,
|
||||||
|
|
|
@ -21,13 +21,14 @@ class LightGlue(BaseModel):
|
||||||
self.set_initialized()
|
self.set_initialized()
|
||||||
|
|
||||||
def _forward(self, data):
|
def _forward(self, data):
|
||||||
|
required_keys = ["keypoints", "descriptors", "scales", "oris"]
|
||||||
view0 = {
|
view0 = {
|
||||||
**{k: data[k + "0"] for k in ["keypoints", "descriptors"]},
|
|
||||||
**data["view0"],
|
**data["view0"],
|
||||||
|
**{k: data[k + "0"] for k in required_keys if (k + "0") in data},
|
||||||
}
|
}
|
||||||
view1 = {
|
view1 = {
|
||||||
**{k: data[k + "1"] for k in ["keypoints", "descriptors"]},
|
|
||||||
**data["view1"],
|
**data["view1"],
|
||||||
|
**{k: data[k + "1"] for k in required_keys if (k + "1") in data},
|
||||||
}
|
}
|
||||||
return self.net({"image0": view0, "image1": view1})
|
return self.net({"image0": view0, "image1": view1})
|
||||||
|
|
||||||
|
|
|
@ -37,14 +37,13 @@ configs = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"cv2-sift": {
|
"cv2-sift": {
|
||||||
"name": f"r{resize}_cv2-SIFT-k{n_kpts}",
|
"name": f"r{resize}_opencv-SIFT-k{n_kpts}",
|
||||||
"keys": ["keypoints", "descriptors", "keypoint_scores", "oris", "scales"],
|
"keys": ["keypoints", "descriptors", "keypoint_scores", "oris", "scales"],
|
||||||
"gray": True,
|
"gray": True,
|
||||||
"conf": {
|
"conf": {
|
||||||
"name": "extractors.sift",
|
"name": "extractors.sift",
|
||||||
"max_num_keypoints": 4096,
|
"max_num_keypoints": 4096,
|
||||||
"detection_threshold": 0.001,
|
"backend": "opencv",
|
||||||
"detector": "cv2",
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"pycolmap-sift": {
|
"pycolmap-sift": {
|
||||||
|
@ -54,11 +53,7 @@ configs = {
|
||||||
"conf": {
|
"conf": {
|
||||||
"name": "extractors.sift",
|
"name": "extractors.sift",
|
||||||
"max_num_keypoints": n_kpts,
|
"max_num_keypoints": n_kpts,
|
||||||
"detection_threshold": 0.0001,
|
"backend": "pycolmap",
|
||||||
"detector": "pycolmap",
|
|
||||||
"pycolmap_options": {
|
|
||||||
"first_octave": -1,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"pycolmap-sift-gpu": {
|
"pycolmap-sift-gpu": {
|
||||||
|
@ -68,11 +63,7 @@ configs = {
|
||||||
"conf": {
|
"conf": {
|
||||||
"name": "extractors.sift",
|
"name": "extractors.sift",
|
||||||
"max_num_keypoints": n_kpts,
|
"max_num_keypoints": n_kpts,
|
||||||
"detection_threshold": 0.0066666,
|
"backend": "pycolmap_cuda",
|
||||||
"detector": "pycolmap_cuda",
|
|
||||||
"pycolmap_options": {
|
|
||||||
"first_octave": -1,
|
|
||||||
},
|
|
||||||
"nms_radius": 3,
|
"nms_radius": 3,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -208,14 +208,14 @@ def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, a=1.0, labels=None, axe
|
||||||
kpts0[:, 1],
|
kpts0[:, 1],
|
||||||
c=color,
|
c=color,
|
||||||
s=ps,
|
s=ps,
|
||||||
label=None if labels is None else labels[0],
|
label=None if labels is None or len(labels) == 0 else labels[0],
|
||||||
)
|
)
|
||||||
ax1.scatter(
|
ax1.scatter(
|
||||||
kpts1[:, 0],
|
kpts1[:, 0],
|
||||||
kpts1[:, 1],
|
kpts1[:, 1],
|
||||||
c=color,
|
c=color,
|
||||||
s=ps,
|
s=ps,
|
||||||
label=None if labels is None else labels[1],
|
label=None if labels is None or len(labels) == 0 else labels[1],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue