glue-factory-custom/gluefactory/models/extractors/sift.py

import numpy as np
import torch
import pycolmap
from scipy.spatial import KDTree
from omegaconf import OmegaConf
import cv2

from ..base_model import BaseModel

from ..utils.misc import pad_to_length

EPS = 1e-6


def sift_to_rootsift(x):
    x = x / (np.linalg.norm(x, ord=1, axis=-1, keepdims=True) + EPS)
    x = np.sqrt(x.clip(min=EPS))
    x = x / (np.linalg.norm(x, axis=-1, keepdims=True) + EPS)
    return x


# from OpenGlue
def nms_keypoints(kpts: np.ndarray, responses: np.ndarray, radius: float) -> np.ndarray:
    # TODO: add approximate tree
    kd_tree = KDTree(kpts)

    sorted_idx = np.argsort(-responses)
    kpts_to_keep_idx = []
    removed_idx = set()

    for idx in sorted_idx:
        # skip point if it was already removed
        if idx in removed_idx:
            continue

        kpts_to_keep_idx.append(idx)
        point = kpts[idx]
        neighbors = kd_tree.query_ball_point(point, r=radius)
        # Variable `neighbors` contains the `point` itself
        removed_idx.update(neighbors)

    mask = np.zeros((kpts.shape[0],), dtype=bool)
    mask[kpts_to_keep_idx] = True
    return mask


def detect_kpts_opencv(
    features: cv2.Feature2D, image: np.ndarray, describe: bool = True
) -> np.ndarray:
    """
    Detect keypoints using OpenCV Detector.
    Optionally, perform NMS and filter top-response keypoints.
    Optionally, perform description.
    Args:
        features: OpenCV based keypoints detector and descriptor
        image: Grayscale image of uint8 data type
        describe: flag indicating whether to simultaneously compute descriptors
    Returns:
        kpts: 1D array of detected cv2.KeyPoint
    """
    if describe:
        kpts, descriptors = features.detectAndCompute(image, None)
    else:
        kpts = features.detect(image, None)
    kpts = np.array(kpts)

    responses = np.array([k.response for k in kpts], dtype=np.float32)

    # select all
    top_score_idx = ...
    pts = np.array([k.pt for k in kpts], dtype=np.float32)
    scales = np.array([k.size for k in kpts], dtype=np.float32)
    angles = np.array([k.angle for k in kpts], dtype=np.float32)
    spts = np.concatenate([pts, scales[..., None], angles[..., None]], -1)

    if describe:
        return spts[top_score_idx], responses[top_score_idx], descriptors[top_score_idx]
    else:
        return spts[top_score_idx], responses[top_score_idx]


class SIFT(BaseModel):
    default_conf = {
        "has_detector": True,
        "has_descriptor": True,
        "descriptor_dim": 128,
        "pycolmap_options": {
            "first_octave": 0,
            "peak_threshold": 0.005,
            "edge_threshold": 10,
        },
        "rootsift": True,
        "nms_radius": None,
        "max_num_keypoints": -1,
        "max_num_keypoints_val": None,
        "force_num_keypoints": False,
        "randomize_keypoints_training": False,
        "detector": "pycolmap",  # ['pycolmap', 'pycolmap_cpu', 'pycolmap_cuda', 'cv2']
        "detection_threshold": None,
    }

    required_data_keys = ["image"]

    def _init(self, conf):
        self.sift = None  # lazy loading

    @torch.no_grad()
    def extract_features(self, image):
        image_np = image.cpu().numpy()[0]
        assert image.shape[0] == 1
        assert image_np.min() >= -EPS and image_np.max() <= 1 + EPS

        detector = str(self.conf.detector)

        if self.sift is None and detector.startswith("pycolmap"):
            options = OmegaConf.to_container(self.conf.pycolmap_options)
            device = (
                "auto" if detector == "pycolmap" else detector.replace("pycolmap_", "")
            )
            if self.conf.rootsift == "rootsift":
                options["normalization"] = pycolmap.Normalization.L1_ROOT
            else:
                options["normalization"] = pycolmap.Normalization.L2
            if self.conf.detection_threshold is not None:
                options["peak_threshold"] = self.conf.detection_threshold
            options["max_num_features"] = self.conf.max_num_keypoints
            self.sift = pycolmap.Sift(options=options, device=device)
        elif self.sift is None and self.conf.detector == "cv2":
            self.sift = cv2.SIFT_create(contrastThreshold=self.conf.detection_threshold)

        if detector.startswith("pycolmap"):
            keypoints, scores, descriptors = self.sift.extract(image_np)
        elif detector == "cv2":
            # TODO: Check if opencv keypoints are already in corner convention
            keypoints, scores, descriptors = detect_kpts_opencv(
                self.sift, (image_np * 255.0).astype(np.uint8)
            )

        if self.conf.nms_radius is not None:
            mask = nms_keypoints(keypoints[:, :2], scores, self.conf.nms_radius)
            keypoints = keypoints[mask]
            scores = scores[mask]
            descriptors = descriptors[mask]

        scales = keypoints[:, 2]
        oris = np.rad2deg(keypoints[:, 3])

        if self.conf.has_descriptor:
            # We still renormalize because COLMAP does not normalize well,
            # maybe due to numerical errors
            if self.conf.rootsift:
                descriptors = sift_to_rootsift(descriptors)
            descriptors = torch.from_numpy(descriptors)
        keypoints = torch.from_numpy(keypoints[:, :2])  # keep only x, y
        scales = torch.from_numpy(scales)
        oris = torch.from_numpy(oris)
        scores = torch.from_numpy(scores)

        # Keep the k keypoints with highest score
        max_kps = self.conf.max_num_keypoints

        # for val we allow different
        if not self.training and self.conf.max_num_keypoints_val is not None:
            max_kps = self.conf.max_num_keypoints_val

        if max_kps is not None and max_kps > 0:
            if self.conf.randomize_keypoints_training and self.training:
                # instead of selecting top-k, sample k by score weights
                raise NotImplementedError
            elif max_kps < scores.shape[0]:
                # TODO: check that the scores from PyCOLMAP are 100% correct,
                # follow https://github.com/mihaidusmanu/pycolmap/issues/8
                indices = torch.topk(scores, max_kps).indices
                keypoints = keypoints[indices]
                scales = scales[indices]
                oris = oris[indices]
                scores = scores[indices]
                if self.conf.has_descriptor:
                    descriptors = descriptors[indices]

        if self.conf.force_num_keypoints:
            keypoints = pad_to_length(
                keypoints,
                max_kps,
                -2,
                mode="random_c",
                bounds=(0, min(image.shape[1:])),
            )
            scores = pad_to_length(scores, max_kps, -1, mode="zeros")
            scales = pad_to_length(scales, max_kps, -1, mode="zeros")
            oris = pad_to_length(oris, max_kps, -1, mode="zeros")
            if self.conf.has_descriptor:
                descriptors = pad_to_length(descriptors, max_kps, -2, mode="zeros")

        pred = {
            "keypoints": keypoints,
            "scales": scales,
            "oris": oris,
            "keypoint_scores": scores,
        }

        if self.conf.has_descriptor:
            pred["descriptors"] = descriptors
        return pred

    @torch.no_grad()
    def _forward(self, data):
        pred = {
            "keypoints": [],
            "scales": [],
            "oris": [],
            "keypoint_scores": [],
            "descriptors": [],
        }

        image = data["image"]
        if image.shape[1] == 3:  # RGB
            scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1)
            image = (image * scale).sum(1, keepdim=True).cpu()

        for k in range(image.shape[0]):
            img = image[k]
            if "image_size" in data.keys():
                # avoid extracting points in padded areas
                w, h = data["image_size"][k]
                img = img[:, :h, :w]
            p = self.extract_features(img)
            for k, v in p.items():
                pred[k].append(v)

        if (image.shape[0] == 1) or self.conf.force_num_keypoints:
            pred = {k: torch.stack(pred[k], 0) for k in pred.keys()}

        pred = {k: pred[k].to(device=data["image"].device) for k in pred.keys()}

        pred["oris"] = torch.deg2rad(pred["oris"])
        return pred

    def loss(self, pred, data):
        raise NotImplementedError