2023-10-05 16:53:51 +02:00
|
|
|
"""
|
|
|
|
ETH3D multi-view benchmark, used for line matching evaluation.
|
|
|
|
"""
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import shutil
|
2023-10-09 08:32:43 +02:00
|
|
|
import zipfile
|
|
|
|
from pathlib import Path
|
2023-10-05 16:53:51 +02:00
|
|
|
|
|
|
|
import cv2
|
2023-10-09 08:32:43 +02:00
|
|
|
import numpy as np
|
2023-10-05 16:53:51 +02:00
|
|
|
import torch
|
|
|
|
|
|
|
|
from ..geometry.wrappers import Camera, Pose
|
|
|
|
from ..settings import DATA_PATH
|
|
|
|
from ..utils.image import ImagePreprocessor, load_image
|
2023-10-09 08:32:43 +02:00
|
|
|
from .base_dataset import BaseDataset
|
|
|
|
from .utils import scale_intrinsics
|
2023-10-05 16:53:51 +02:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
def read_cameras(camera_file, scale_factor=None):
|
|
|
|
"""Read the camera intrinsics from a file in COLMAP format."""
|
|
|
|
with open(camera_file, "r") as f:
|
|
|
|
raw_cameras = f.read().rstrip().split("\n")
|
|
|
|
raw_cameras = raw_cameras[3:]
|
|
|
|
cameras = []
|
|
|
|
for c in raw_cameras:
|
|
|
|
data = c.split(" ")
|
|
|
|
fx, fy, cx, cy = np.array(list(map(float, data[4:])))
|
|
|
|
K = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float32)
|
|
|
|
if scale_factor is not None:
|
|
|
|
K = scale_intrinsics(K, np.array([scale_factor, scale_factor]))
|
|
|
|
cameras.append(Camera.from_calibration_matrix(K).float())
|
|
|
|
return cameras
|
|
|
|
|
|
|
|
|
|
|
|
def qvec2rotmat(qvec):
|
|
|
|
"""Convert from quaternions to rotation matrix."""
|
|
|
|
return np.array(
|
|
|
|
[
|
|
|
|
[
|
|
|
|
1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
|
|
|
|
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
|
|
|
|
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
|
|
|
|
],
|
|
|
|
[
|
|
|
|
2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
|
|
|
|
1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
|
|
|
|
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
|
|
|
|
],
|
|
|
|
[
|
|
|
|
2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
|
|
|
|
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
|
|
|
|
1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
|
|
|
|
],
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class ETH3DDataset(BaseDataset):
|
|
|
|
default_conf = {
|
|
|
|
"data_dir": "ETH3D_undistorted",
|
|
|
|
"grayscale": True,
|
|
|
|
"downsize_factor": 8,
|
|
|
|
"min_covisibility": 500,
|
|
|
|
"batch_size": 1,
|
|
|
|
"two_view": True,
|
|
|
|
"min_overlap": 0.5,
|
|
|
|
"max_overlap": 1.0,
|
|
|
|
"sort_by_overlap": False,
|
|
|
|
"seed": 0,
|
|
|
|
}
|
|
|
|
|
|
|
|
def _init(self, conf):
|
|
|
|
self.grayscale = conf.grayscale
|
|
|
|
self.downsize_factor = conf.downsize_factor
|
|
|
|
|
|
|
|
# Set random seeds
|
|
|
|
np.random.seed(conf.seed)
|
|
|
|
torch.manual_seed(conf.seed)
|
|
|
|
|
|
|
|
# Auto-download the dataset
|
|
|
|
if not (DATA_PATH / conf.data_dir).exists():
|
|
|
|
logger.info("Downloading the ETH3D dataset...")
|
|
|
|
self.download_eth3d()
|
|
|
|
|
|
|
|
# Form pairs of images from the multiview dataset
|
|
|
|
self.img_dir = DATA_PATH / conf.data_dir
|
|
|
|
self.data = []
|
|
|
|
for folder in self.img_dir.iterdir():
|
|
|
|
img_folder = Path(folder, "images", "dslr_images_undistorted")
|
|
|
|
depth_folder = Path(folder, "ground_truth_depth/undistorted_depth")
|
|
|
|
depth_ext = ".png"
|
|
|
|
names = [img.name for img in img_folder.iterdir()]
|
|
|
|
names.sort()
|
|
|
|
|
|
|
|
# Read intrinsics and extrinsics data
|
|
|
|
cameras = read_cameras(
|
|
|
|
str(Path(folder, "dslr_calibration_undistorted", "cameras.txt")),
|
|
|
|
1 / self.downsize_factor,
|
|
|
|
)
|
|
|
|
name_to_cam_idx = {name: {} for name in names}
|
|
|
|
with open(
|
|
|
|
str(Path(folder, "dslr_calibration_jpg", "images.txt")), "r"
|
|
|
|
) as f:
|
|
|
|
raw_data = f.read().rstrip().split("\n")[4::2]
|
|
|
|
for raw_line in raw_data:
|
|
|
|
line = raw_line.split(" ")
|
|
|
|
img_name = os.path.basename(line[-1])
|
|
|
|
name_to_cam_idx[img_name]["dist_camera_idx"] = int(line[-2])
|
|
|
|
T_world_to_camera = {}
|
|
|
|
image_visible_points3D = {}
|
|
|
|
with open(
|
|
|
|
str(Path(folder, "dslr_calibration_undistorted", "images.txt")), "r"
|
|
|
|
) as f:
|
|
|
|
lines = f.readlines()[4:] # Skip the header
|
|
|
|
raw_poses = [line.strip("\n").split(" ") for line in lines[::2]]
|
|
|
|
raw_points = [line.strip("\n").split(" ") for line in lines[1::2]]
|
|
|
|
for raw_pose, raw_pts in zip(raw_poses, raw_points):
|
|
|
|
img_name = os.path.basename(raw_pose[-1])
|
|
|
|
# Extract the transform from world to camera
|
|
|
|
target_extrinsics = list(map(float, raw_pose[1:8]))
|
|
|
|
pose = np.eye(4, dtype=np.float32)
|
|
|
|
pose[:3, :3] = qvec2rotmat(target_extrinsics[:4])
|
|
|
|
pose[:3, 3] = target_extrinsics[4:]
|
|
|
|
T_world_to_camera[img_name] = pose
|
|
|
|
name_to_cam_idx[img_name]["undist_camera_idx"] = int(raw_pose[-2])
|
|
|
|
# Extract the visible 3D points
|
|
|
|
point3D_ids = [id for id in map(int, raw_pts[2::3]) if id != -1]
|
|
|
|
image_visible_points3D[img_name] = set(point3D_ids)
|
|
|
|
|
|
|
|
# Extract the covisibility of each image
|
|
|
|
num_imgs = len(names)
|
|
|
|
n_covisible_points = np.zeros((num_imgs, num_imgs))
|
|
|
|
for i in range(num_imgs - 1):
|
|
|
|
for j in range(i + 1, num_imgs):
|
|
|
|
visible_points3D1 = image_visible_points3D[names[i]]
|
|
|
|
visible_points3D2 = image_visible_points3D[names[j]]
|
|
|
|
n_covisible_points[i, j] = len(
|
|
|
|
visible_points3D1 & visible_points3D2
|
|
|
|
)
|
|
|
|
|
|
|
|
# Keep only the pairs with enough covisibility
|
|
|
|
valid_pairs = np.where(n_covisible_points >= conf.min_covisibility)
|
|
|
|
valid_pairs = np.stack(valid_pairs, axis=1)
|
|
|
|
|
|
|
|
self.data += [
|
|
|
|
{
|
|
|
|
"view0": {
|
|
|
|
"name": names[i][:-4],
|
|
|
|
"img_path": str(Path(img_folder, names[i])),
|
|
|
|
"depth_path": str(Path(depth_folder, names[i][:-4]))
|
|
|
|
+ depth_ext,
|
|
|
|
"camera": cameras[name_to_cam_idx[names[i]]["dist_camera_idx"]],
|
|
|
|
"T_w2cam": Pose.from_4x4mat(T_world_to_camera[names[i]]),
|
|
|
|
},
|
|
|
|
"view1": {
|
|
|
|
"name": names[j][:-4],
|
|
|
|
"img_path": str(Path(img_folder, names[j])),
|
|
|
|
"depth_path": str(Path(depth_folder, names[j][:-4]))
|
|
|
|
+ depth_ext,
|
|
|
|
"camera": cameras[name_to_cam_idx[names[j]]["dist_camera_idx"]],
|
|
|
|
"T_w2cam": Pose.from_4x4mat(T_world_to_camera[names[j]]),
|
|
|
|
},
|
|
|
|
"T_world_to_ref": Pose.from_4x4mat(T_world_to_camera[names[i]]),
|
|
|
|
"T_world_to_target": Pose.from_4x4mat(T_world_to_camera[names[j]]),
|
|
|
|
"T_0to1": Pose.from_4x4mat(
|
|
|
|
np.float32(
|
|
|
|
T_world_to_camera[names[j]]
|
|
|
|
@ np.linalg.inv(T_world_to_camera[names[i]])
|
|
|
|
)
|
|
|
|
),
|
|
|
|
"T_1to0": Pose.from_4x4mat(
|
|
|
|
np.float32(
|
|
|
|
T_world_to_camera[names[i]]
|
|
|
|
@ np.linalg.inv(T_world_to_camera[names[j]])
|
|
|
|
)
|
|
|
|
),
|
|
|
|
"n_covisible_points": n_covisible_points[i, j],
|
|
|
|
}
|
|
|
|
for (i, j) in valid_pairs
|
|
|
|
]
|
|
|
|
|
|
|
|
# Print some info
|
|
|
|
print("[Info] Successfully initialized dataset")
|
|
|
|
print("\t Name: ETH3D")
|
|
|
|
print("----------------------------------------")
|
|
|
|
|
|
|
|
def download_eth3d(self):
|
|
|
|
data_dir = DATA_PATH / self.conf.data_dir
|
|
|
|
tmp_dir = data_dir.parent / "ETH3D_tmp"
|
|
|
|
if tmp_dir.exists():
|
|
|
|
shutil.rmtree(tmp_dir)
|
|
|
|
tmp_dir.mkdir(exist_ok=True, parents=True)
|
|
|
|
url_base = "https://cvg-data.inf.ethz.ch/ETH3D_undistorted/"
|
|
|
|
zip_name = "ETH3D_undistorted.zip"
|
|
|
|
zip_path = tmp_dir / zip_name
|
|
|
|
torch.hub.download_url_to_file(url_base + zip_name, zip_path)
|
|
|
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
|
|
zip_ref.extractall(tmp_dir)
|
|
|
|
shutil.move(tmp_dir / zip_name.split(".")[0], data_dir)
|
|
|
|
|
|
|
|
def get_dataset(self, split):
|
|
|
|
return ETH3DDataset(self.conf)
|
|
|
|
|
|
|
|
def _read_image(self, img_path):
|
|
|
|
img = load_image(img_path, grayscale=self.grayscale)
|
|
|
|
shape = img.shape[-2:]
|
|
|
|
# instead of INTER_AREA this does bilinear interpolation with antialiasing
|
|
|
|
img_data = ImagePreprocessor({"resize": max(shape) // self.downsize_factor})(
|
|
|
|
img
|
|
|
|
)
|
|
|
|
return img_data
|
|
|
|
|
|
|
|
def read_depth(self, depth_path):
|
|
|
|
if self.downsize_factor != 8:
|
|
|
|
raise ValueError(
|
|
|
|
"Undistorted depth only available for low res"
|
|
|
|
+ " images(downsize_factor = 8)."
|
|
|
|
)
|
|
|
|
depth_img = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH)
|
|
|
|
depth_img = depth_img.astype(np.float32) / 256
|
|
|
|
|
|
|
|
return depth_img
|
|
|
|
|
|
|
|
def __getitem__(self, idx):
|
|
|
|
"""Returns the data associated to a pair of images (reference, target)
|
|
|
|
that are co-visible."""
|
|
|
|
data = self.data[idx]
|
|
|
|
# Load the images
|
|
|
|
view0 = data.pop("view0")
|
|
|
|
view1 = data.pop("view1")
|
|
|
|
view0 = {**view0, **self._read_image(view0["img_path"])}
|
|
|
|
view1 = {**view1, **self._read_image(view1["img_path"])}
|
|
|
|
view0["scales"] = np.array([1.0, 1]).astype(np.float32)
|
|
|
|
view1["scales"] = np.array([1.0, 1]).astype(np.float32)
|
|
|
|
|
|
|
|
# Load the depths
|
|
|
|
view0["depth"] = self.read_depth(view0["depth_path"])
|
|
|
|
view1["depth"] = self.read_depth(view1["depth_path"])
|
|
|
|
|
|
|
|
outputs = {
|
|
|
|
**data,
|
|
|
|
"view0": view0,
|
|
|
|
"view1": view1,
|
|
|
|
"name": f"{view0['name']}_{view1['name']}",
|
|
|
|
}
|
|
|
|
|
|
|
|
return outputs
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self.data)
|