commit bc0498e4538b70fbd9817edc0027fd976b589ae5 Author: MobKBK <@> Date: Wed Mar 4 20:07:57 2026 +0800 init fusion lcd orin config diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..94c4ad1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +result/ +sequences/ +KITTI/ +KITTI360/ diff --git a/ALIKE/LICENSE b/ALIKE/LICENSE new file mode 100644 index 0000000..4ee705b --- /dev/null +++ b/ALIKE/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2022, Zhao Xiaoming +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ALIKE/README.md b/ALIKE/README.md new file mode 100644 index 0000000..2ba4834 --- /dev/null +++ b/ALIKE/README.md @@ -0,0 +1,117 @@ +# News: the cpp version is released [ALIKE-cpp](https://github.com/Shiaoming/ALIKE-cpp). + +# ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction + +ALIKE applies a differentiable keypoint detection module to detect accurate sub-pixel keypoints. The network can run at 95 frames per second for 640 x 480 images on NVIDIA Titan X (Pascal) GPU and achieve equivalent performance with the state-of-the-arts. ALIKE benefits real-time applications in resource-limited platforms/devices. Technical details are described in [this paper](https://arxiv.org/pdf/2112.02906.pdf). + +> ``` +> Xiaoming Zhao, Xingming Wu, Jinyu Miao, Weihai Chen, Peter C. Y. Chen, Zhengguo Li, "ALIKE: Accurate and Lightweight Keypoint +> Detection and Descriptor Extraction," IEEE Transactions on Multimedia, 2022. +> ``` + +![](./assets/alike.png) + + +If you use ALIKE in an academic work, please cite: + +``` +@article{Zhao2022ALIKE, + title = {ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction}, + url = {http://arxiv.org/abs/2112.02906}, + doi = {10.1109/TMM.2022.3155927}, + journal = {IEEE Transactions on Multimedia}, + author = {Zhao, Xiaoming and Wu, Xingming and Miao, Jinyu and Chen, Weihai and Chen, Peter C. Y. and Li, Zhengguo}, + month = march, + year = {2022}, +} +``` + + + +## 1. Prerequisites + +The required packages are listed in the `requirements.txt` : + +```shell +pip install -r requirements.txt +``` + + + +## 2. Models + +The off-the-shelf weights of four variant ALIKE models are provided in `models/` . + + + +## 3. Run demo + +```shell +$ python demo.py -h +usage: demo.py [-h] [--model {alike-t,alike-s,alike-n,alike-l}] + [--device DEVICE] [--top_k TOP_K] [--scores_th SCORES_TH] + [--n_limit N_LIMIT] [--no_display] [--no_sub_pixel] + input + +ALike Demo. + +positional arguments: + input Image directory or movie file or "camera0" (for + webcam0). + +optional arguments: + -h, --help show this help message and exit + --model {alike-t,alike-s,alike-n,alike-l} + The model configuration + --device DEVICE Running device (default: cuda). + --top_k TOP_K Detect top K keypoints. -1 for threshold based mode, + >0 for top K mode. (default: -1) + --scores_th SCORES_TH + Detector score threshold (default: 0.2). + --n_limit N_LIMIT Maximum number of keypoints to be detected (default: + 5000). + --no_display Do not display images to screen. Useful if running + remotely (default: False). + --no_sub_pixel Do not detect sub-pixel keypoints (default: False). +``` + + + +## 4. Examples + +### KITTI example +```shell +python demo.py assets/kitti +``` +![](./assets/kitti.gif) + +### TUM example +```shell +python demo.py assets/tum +``` +![](./assets/tum.gif) + +## 5. Efficiency and performance + +| Models | Parameters | GFLOPs(640x480) | MHA@3 on Hpatches | mAA(10°) on [IMW2020-test](https://www.cs.ubc.ca/research/image-matching-challenge/2021/leaderboard) (Stereo) | +|:---:|:---:|:---:|:-----------------:|:-------------------------------------------------------------------------------------------------------------:| +| D2-Net(MS) | 7653KB | 889.40 | 38.33% | 12.27% | +| LF-Net(MS) | 2642KB | 24.37 | 57.78% | 23.44% | +| SuperPoint | 1301KB | 26.11 | 70.19% | 28.97% | +| R2D2(MS) | 484KB | 464.55 | 71.48% | 39.02% | +| ASLFeat(MS) | 823KB | 77.58 | 73.52% | 33.65% | +| DISK | 1092KB | 98.97 | 70.56% | 51.22% | +| ALike-N | 318KB | 7.909 | 75.74% | 47.18% | +| ALike-L | 653KB | 19.685 | 76.85% | 49.58% | + +### Evaluation on Hpatches + +- Download [hpatches-sequences-release](https://hpatches.github.io/) and put it into `hseq/hpatches-sequences-release`. +- Remove the unreliable sequences as D2-Net. +- Run the following command to evaluate the performance: + ```shell + python hseq/eval.py + ``` + + +For more details, please refer to the [paper](https://arxiv.org/abs/2112.02906). diff --git a/ALIKE/__pycache__/alike.cpython-38.pyc b/ALIKE/__pycache__/alike.cpython-38.pyc new file mode 100644 index 0000000..344a849 Binary files /dev/null and b/ALIKE/__pycache__/alike.cpython-38.pyc differ diff --git a/ALIKE/__pycache__/alnet.cpython-38.pyc b/ALIKE/__pycache__/alnet.cpython-38.pyc new file mode 100644 index 0000000..5af6708 Binary files /dev/null and b/ALIKE/__pycache__/alnet.cpython-38.pyc differ diff --git a/ALIKE/__pycache__/soft_detect.cpython-38.pyc b/ALIKE/__pycache__/soft_detect.cpython-38.pyc new file mode 100644 index 0000000..0e944d6 Binary files /dev/null and b/ALIKE/__pycache__/soft_detect.cpython-38.pyc differ diff --git a/ALIKE/alike.py b/ALIKE/alike.py new file mode 100644 index 0000000..12dd193 --- /dev/null +++ b/ALIKE/alike.py @@ -0,0 +1,143 @@ +import logging +import os +import cv2 +import torch +from copy import deepcopy +import torch.nn.functional as F +from torchvision.transforms import ToTensor +import math + +from ALIKE.alnet import ALNet +from ALIKE.soft_detect import DKD +import time + +configs = { + 'alike-t': {'c1': 8, 'c2': 16, 'c3': 32, 'c4': 64, 'dim': 64, 'single_head': True, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-t.pth')}, + 'alike-s': {'c1': 8, 'c2': 16, 'c3': 48, 'c4': 96, 'dim': 96, 'single_head': True, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-s.pth')}, + 'alike-n': {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-n.pth')}, + 'alike-l': {'c1': 32, 'c2': 64, 'c3': 128, 'c4': 128, 'dim': 128, 'single_head': False, 'radius': 2, + 'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-l.pth')}, +} + + +class ALike(ALNet): + def __init__(self, + # ================================== feature encoder + c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128, + single_head: bool = False, + # ================================== detect parameters + radius: int = 2, + top_k: int = 500, scores_th: float = 0.5, + n_limit: int = 5000, + device: str = 'cpu', + model_path: str = '' + ): + super().__init__(c1, c2, c3, c4, dim, single_head) + self.radius = radius + self.top_k = top_k + self.n_limit = n_limit + self.scores_th = scores_th + self.dkd = DKD(radius=self.radius, top_k=self.top_k, + scores_th=self.scores_th, n_limit=self.n_limit) + self.device = device + + if model_path != '': + state_dict = torch.load(model_path, self.device) + self.load_state_dict(state_dict) + self.to(self.device) + self.eval() + logging.info(f'Loaded model parameters from {model_path}') + logging.info( + f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB") + + def extract_dense_map(self, image, ret_dict=False): + # ==================================================== + # check image size, should be integer multiples of 2^5 + # if it is not a integer multiples of 2^5, padding zeros + device = image.device + b, c, h, w = image.shape + h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h + w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w + if h_ != h: + h_padding = torch.zeros(b, c, h_ - h, w, device=device) + image = torch.cat([image, h_padding], dim=2) + if w_ != w: + w_padding = torch.zeros(b, c, h_, w_ - w, device=device) + image = torch.cat([image, w_padding], dim=3) + # ==================================================== + + scores_map, descriptor_map = super().forward(image) + + # ==================================================== + if h_ != h or w_ != w: + descriptor_map = descriptor_map[:, :, :h, :w] + scores_map = scores_map[:, :, :h, :w] # Bx1xHxW + # ==================================================== + + # BxCxHxW + descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1) + + if ret_dict: + return {'descriptor_map': descriptor_map, 'scores_map': scores_map, } + else: + return descriptor_map, scores_map + + def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False): + """ + :param img: np.array HxWx3, RGB + :param image_size_max: maximum image size, otherwise, the image will be resized + :param sort: sort keypoints by scores + :param sub_pixel: whether to use sub-pixel accuracy + :return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time' + """ + H, W, three = img.shape + assert three == 3, "input image shape should be [HxWx3]" + + # ==================== image size constraint + image = deepcopy(img) + max_hw = max(H, W) + if max_hw > image_size_max: + ratio = float(image_size_max / max_hw) + image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio) + + # ==================== convert image to tensor + image = torch.from_numpy(image).to(self.device).to(torch.float32).permute(2, 0, 1)[None] / 255.0 + + # ==================== extract keypoints + start = time.time() + + with torch.no_grad(): + descriptor_map, scores_map = self.extract_dense_map(image) + keypoints, descriptors, scores, _ = self.dkd(scores_map, descriptor_map, + sub_pixel=sub_pixel) + keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0] + keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]]) + + if sort: + indices = torch.argsort(scores, descending=True) + keypoints = keypoints[indices] + descriptors = descriptors[indices] + scores = scores[indices] + + end = time.time() + + return {'keypoints': keypoints.cpu().numpy(), + 'descriptors': descriptors.cpu().numpy(), + 'scores': scores.cpu().numpy(), + 'scores_map': scores_map.cpu().numpy(), + 'time': end - start, } + + +if __name__ == '__main__': + import numpy as np + from thop import profile + + net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False) + + image = np.random.random((640, 480, 3)).astype(np.float32) + flops, params = profile(net, inputs=(image, 9999, False), verbose=False) + print('{:<30} {:<8} GFLops'.format('Computational complexity: ', flops / 1e9)) + print('{:<30} {:<8} KB'.format('Number of parameters: ', params / 1e3)) diff --git a/ALIKE/alnet.py b/ALIKE/alnet.py new file mode 100644 index 0000000..0182e04 --- /dev/null +++ b/ALIKE/alnet.py @@ -0,0 +1,163 @@ +import torch +from torch import nn +from torchvision.models import resnet +from typing import Optional, Callable + + +class ConvBlock(nn.Module): + def __init__(self, in_channels, out_channels, + gate: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None): + super().__init__() + if gate is None: + self.gate = nn.ReLU(inplace=True) + else: + self.gate = gate + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.conv1 = resnet.conv3x3(in_channels, out_channels) + self.bn1 = norm_layer(out_channels) + self.conv2 = resnet.conv3x3(out_channels, out_channels) + self.bn2 = norm_layer(out_channels) + + def forward(self, x): + x = self.gate(self.bn1(self.conv1(x))) # B x in_channels x H x W + x = self.gate(self.bn2(self.conv2(x))) # B x out_channels x H x W + return x + + +# copied from torchvision\models\resnet.py#27->BasicBlock +class ResBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + gate: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(ResBlock, self).__init__() + if gate is None: + self.gate = nn.ReLU(inplace=True) + else: + self.gate = gate + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('ResBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in ResBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = resnet.conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.conv2 = resnet.conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: torch.Tensor) -> torch.Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.gate(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.gate(out) + + return out + + +class ALNet(nn.Module): + def __init__(self, c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128, + single_head: bool = True, + ): + super().__init__() + self.feature_size = dim + self.gate = nn.ReLU(inplace=True) + + self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) + self.pool4 = nn.MaxPool2d(kernel_size=4, stride=4) + + self.block1 = ConvBlock(3, c1, self.gate, nn.BatchNorm2d) + + self.block2 = ResBlock(inplanes=c1, planes=c2, stride=1, + downsample=nn.Conv2d(c1, c2, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + self.block3 = ResBlock(inplanes=c2, planes=c3, stride=1, + downsample=nn.Conv2d(c2, c3, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + self.block4 = ResBlock(inplanes=c3, planes=c4, stride=1, + downsample=nn.Conv2d(c3, c4, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + # ================================== feature aggregation + self.conv1 = resnet.conv1x1(c1, dim // 4) + self.conv2 = resnet.conv1x1(c2, dim // 4) + self.conv3 = resnet.conv1x1(c3, dim // 4) + self.conv4 = resnet.conv1x1(c4, dim // 4) + self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + self.upsample4 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True) + self.upsample8 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True) + self.upsample32 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True) + + # ================================== detector and descriptor head + self.single_head = single_head + if not self.single_head: + self.convhead1 = resnet.conv1x1(dim, dim) + self.convhead2 = resnet.conv1x1(dim, dim + 1) + + def forward(self, image): + # ================================== feature encoder + x1 = self.block1(image) # B x c1 x H x W + x2 = self.pool2(x1) + x2 = self.block2(x2) # B x c2 x H/2 x W/2 + x3 = self.pool4(x2) + x3 = self.block3(x3) # B x c3 x H/8 x W/8 + x4 = self.pool4(x3) + x4 = self.block4(x4) # B x dim x H/32 x W/32 + + # ================================== feature aggregation + x1 = self.gate(self.conv1(x1)) # B x dim//4 x H x W + x2 = self.gate(self.conv2(x2)) # B x dim//4 x H//2 x W//2 + x3 = self.gate(self.conv3(x3)) # B x dim//4 x H//8 x W//8 + x4 = self.gate(self.conv4(x4)) # B x dim//4 x H//32 x W//32 + x2_up = self.upsample2(x2) # B x dim//4 x H x W + x3_up = self.upsample8(x3) # B x dim//4 x H x W + x4_up = self.upsample32(x4) # B x dim//4 x H x W + x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1) + + # ================================== detector and descriptor head + if not self.single_head: + x1234 = self.gate(self.convhead1(x1234)) + x = self.convhead2(x1234) # B x dim+1 x H x W + + descriptor_map = x[:, :-1, :, :] + scores_map = torch.sigmoid(x[:, -1, :, :]).unsqueeze(1) + + return scores_map, descriptor_map + + +if __name__ == '__main__': + from thop import profile + + net = ALNet(c1=16, c2=32, c3=64, c4=128, dim=128, single_head=True) + + image = torch.randn(1, 3, 640, 480) + flops, params = profile(net, inputs=(image,), verbose=False) + print('{:<30} {:<8} GFLops'.format('Computational complexity: ', flops / 1e9)) + print('{:<30} {:<8} KB'.format('Number of parameters: ', params / 1e3)) diff --git a/ALIKE/demo.py b/ALIKE/demo.py new file mode 100644 index 0000000..f9feef2 --- /dev/null +++ b/ALIKE/demo.py @@ -0,0 +1,167 @@ +import copy +import os +import cv2 +import glob +import logging +import argparse +import numpy as np +from tqdm import tqdm +from alike import ALike, configs + + +class ImageLoader(object): + def __init__(self, filepath: str): + self.N = 3000 + if filepath.startswith('camera'): + camera = int(filepath[6:]) + self.cap = cv2.VideoCapture(camera) + if not self.cap.isOpened(): + raise IOError(f"Can't open camera {camera}!") + logging.info(f'Opened camera {camera}') + self.mode = 'camera' + elif os.path.exists(filepath): + if os.path.isfile(filepath): + self.cap = cv2.VideoCapture(filepath) + if not self.cap.isOpened(): + raise IOError(f"Can't open video {filepath}!") + rate = self.cap.get(cv2.CAP_PROP_FPS) + self.N = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1 + duration = self.N / rate + logging.info(f'Opened video {filepath}') + logging.info(f'Frames: {self.N}, FPS: {rate}, Duration: {duration}s') + self.mode = 'video' + else: + self.images = glob.glob(os.path.join(filepath, '*.png')) + \ + glob.glob(os.path.join(filepath, '*.jpg')) + \ + glob.glob(os.path.join(filepath, '*.ppm')) + self.images.sort() + self.N = len(self.images) + logging.info(f'Loading {self.N} images') + self.mode = 'images' + else: + raise IOError('Error filepath (camerax/path of images/path of videos): ', filepath) + + def __getitem__(self, item): + if self.mode == 'camera' or self.mode == 'video': + if item > self.N: + return None + ret, img = self.cap.read() + if not ret: + raise "Can't read image from camera" + if self.mode == 'video': + self.cap.set(cv2.CAP_PROP_POS_FRAMES, item) + elif self.mode == 'images': + filename = self.images[item] + img = cv2.imread(filename) + if img is None: + raise Exception('Error reading image %s' % filename) + return img + + def __len__(self): + return self.N + + +class SimpleTracker(object): + def __init__(self): + self.pts_prev = None + self.desc_prev = None + + def update(self, img, pts, desc): + N_matches = 0 + if self.pts_prev is None: + self.pts_prev = pts + self.desc_prev = desc + + out = copy.deepcopy(img) + for pt1 in pts: + p1 = (int(round(pt1[0])), int(round(pt1[1]))) + cv2.circle(out, p1, 1, (0, 0, 255), -1, lineType=16) + else: + matches = self.mnn_mather(self.desc_prev, desc) + mpts1, mpts2 = self.pts_prev[matches[:, 0]], pts[matches[:, 1]] + N_matches = len(matches) + + out = copy.deepcopy(img) + for pt1, pt2 in zip(mpts1, mpts2): + p1 = (int(round(pt1[0])), int(round(pt1[1]))) + p2 = (int(round(pt2[0])), int(round(pt2[1]))) + cv2.line(out, p1, p2, (0, 255, 0), lineType=16) + cv2.circle(out, p2, 1, (0, 0, 255), -1, lineType=16) + + self.pts_prev = pts + self.desc_prev = desc + + return out, N_matches + + def mnn_mather(self, desc1, desc2): + sim = desc1 @ desc2.transpose() + sim[sim < 0.9] = 0 + nn12 = np.argmax(sim, axis=1) + nn21 = np.argmax(sim, axis=0) + ids1 = np.arange(0, sim.shape[0]) + mask = (ids1 == nn21[nn12]) + matches = np.stack([ids1[mask], nn12[mask]]) + return matches.transpose() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='ALike Demo.') + parser.add_argument('--input', type=str, default=r'E:\caodanyang\dataset\KITTI\odometry\data_odometry_color\dataset\sequences\00\image_2', + help='Image directory or movie file or "camera0" (for webcam0).') + parser.add_argument('--model', choices=['alike-t', 'alike-s', 'alike-n', 'alike-l'], default="alike-t", + help="The model configuration") + parser.add_argument('--device', type=str, default='cuda', help="Running device (default: cuda).") + parser.add_argument('--top_k', type=int, default=-1, + help='Detect top K keypoints. -1 for threshold based mode, >0 for top K mode. (default: -1)') + parser.add_argument('--scores_th', type=float, default=0.2, + help='Detector score threshold (default: 0.2).') + parser.add_argument('--n_limit', type=int, default=5000, + help='Maximum number of keypoints to be detected (default: 5000).') + parser.add_argument('--no_display', action='store_true', + help='Do not display images to screen. Useful if running remotely (default: False).') + parser.add_argument('--no_sub_pixel', action='store_true', + help='Do not detect sub-pixel keypoints (default: False).') + args = parser.parse_args() + + logging.basicConfig(level=logging.INFO) + + image_loader = ImageLoader(args.input) + model = ALike(**configs[args.model], + device=args.device, + top_k=args.top_k, + scores_th=args.scores_th, + n_limit=args.n_limit) + tracker = SimpleTracker() + + if not args.no_display: + logging.info("Press 'q' to stop!") + cv2.namedWindow(args.model) + + runtime = [] + progress_bar = tqdm(image_loader) + for img in progress_bar: + if img is None: + break + + img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + pred = model(img_rgb, sub_pixel=not args.no_sub_pixel) + kpts = pred['keypoints'] + desc = pred['descriptors'] + runtime.append(pred['time']) + + out, N_matches = tracker.update(img, kpts, desc) + + ave_fps = (1. / np.stack(runtime)).mean() + status = f"Fps:{ave_fps:.1f}, Keypoints/Matches: {len(kpts)}/{N_matches}" + progress_bar.set_description(status) + + if not args.no_display: + cv2.setWindowTitle(args.model, args.model + ': ' + status) + cv2.imshow(args.model, out) + if cv2.waitKey(1) == ord('q'): + break + + logging.info('Finished!') + if not args.no_display: + logging.info('Press any key to exit!') + cv2.waitKey() diff --git a/ALIKE/models/alike-l.pth b/ALIKE/models/alike-l.pth new file mode 100644 index 0000000..723a0b7 Binary files /dev/null and b/ALIKE/models/alike-l.pth differ diff --git a/ALIKE/models/alike-n.pth b/ALIKE/models/alike-n.pth new file mode 100644 index 0000000..72e33e4 Binary files /dev/null and b/ALIKE/models/alike-n.pth differ diff --git a/ALIKE/models/alike-s.pth b/ALIKE/models/alike-s.pth new file mode 100644 index 0000000..98afc79 Binary files /dev/null and b/ALIKE/models/alike-s.pth differ diff --git a/ALIKE/models/alike-t.pth b/ALIKE/models/alike-t.pth new file mode 100644 index 0000000..e91c9db Binary files /dev/null and b/ALIKE/models/alike-t.pth differ diff --git a/ALIKE/requirements.txt b/ALIKE/requirements.txt new file mode 100644 index 0000000..14ca745 --- /dev/null +++ b/ALIKE/requirements.txt @@ -0,0 +1,6 @@ +opencv-python~=4.5.1.48 +numpy~=1.19.5 +tqdm~=4.60.0 +torch~=1.8.0 +torchvision~=0.9.0 +thop~=0.0.31-2005241907 \ No newline at end of file diff --git a/ALIKE/soft_detect.py b/ALIKE/soft_detect.py new file mode 100644 index 0000000..2d23cd1 --- /dev/null +++ b/ALIKE/soft_detect.py @@ -0,0 +1,194 @@ +import torch +from torch import nn +import torch.nn.functional as F + + +# coordinates system +# ------------------------------> [ x: range=-1.0~1.0; w: range=0~W ] +# | ----------------------------- +# | | | +# | | | +# | | | +# | | image | +# | | | +# | | | +# | | | +# | |---------------------------| +# v +# [ y: range=-1.0~1.0; h: range=0~H ] + +def simple_nms(scores, nms_radius: int): + """ Fast Non-maximum suppression to remove nearby points """ + assert (nms_radius >= 0) + + def max_pool(x): + return torch.nn.functional.max_pool2d( + x, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius) + + zeros = torch.zeros_like(scores) + max_mask = scores == max_pool(scores) + + for _ in range(2): + supp_mask = max_pool(max_mask.float()) > 0 + supp_scores = torch.where(supp_mask, zeros, scores) + new_max_mask = supp_scores == max_pool(supp_scores) + max_mask = max_mask | (new_max_mask & (~supp_mask)) + return torch.where(max_mask, scores, zeros) + + +def sample_descriptor(descriptor_map, kpts, bilinear_interp=False): + """ + :param descriptor_map: BxCxHxW + :param kpts: list, len=B, each is Nx2 (keypoints) [h,w] + :param bilinear_interp: bool, whether to use bilinear interpolation + :return: descriptors: list, len=B, each is NxD + """ + batch_size, channel, height, width = descriptor_map.shape + + descriptors = [] + for index in range(batch_size): + kptsi = kpts[index] # Nx2,(x,y) + + if bilinear_interp: + descriptors_ = torch.nn.functional.grid_sample(descriptor_map[index].unsqueeze(0), kptsi.view(1, 1, -1, 2), + mode='bilinear', align_corners=True)[0, :, 0, :] # CxN + else: + kptsi = (kptsi + 1) / 2 * kptsi.new_tensor([[width - 1, height - 1]]) + kptsi = kptsi.long() + descriptors_ = descriptor_map[index, :, kptsi[:, 1], kptsi[:, 0]] # CxN + + descriptors_ = torch.nn.functional.normalize(descriptors_, p=2, dim=0) + descriptors.append(descriptors_.t()) + + return descriptors + + +class DKD(nn.Module): + def __init__(self, radius=2, top_k=0, scores_th=0.2, n_limit=20000): + """ + Args: + radius: soft detection radius, kernel size is (2 * radius + 1) + top_k: top_k > 0: return top k keypoints + scores_th: top_k <= 0 threshold mode: scores_th > 0: return keypoints with scores>scores_th + else: return keypoints with scores > scores.mean() + n_limit: max number of keypoint in threshold mode + """ + super().__init__() + self.radius = radius + self.top_k = top_k + self.scores_th = scores_th + self.n_limit = n_limit + self.kernel_size = 2 * self.radius + 1 + self.temperature = 0.1 # tuned temperature + self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius) + + # local xy grid + x = torch.linspace(-self.radius, self.radius, self.kernel_size) + # (kernel_size*kernel_size) x 2 : (w,h) + self.hw_grid = torch.stack(torch.meshgrid([x, x])).view(2, -1).t()[:, [1, 0]] + + def detect_keypoints(self, scores_map, sub_pixel=True): + b, c, h, w = scores_map.shape + scores_nograd = scores_map.detach() + # nms_scores = simple_nms(scores_nograd, self.radius) + nms_scores = simple_nms(scores_nograd, 2) + + # remove border + nms_scores[:, :, :self.radius + 1, :] = 0 + nms_scores[:, :, :, :self.radius + 1] = 0 + nms_scores[:, :, h - self.radius:, :] = 0 + nms_scores[:, :, :, w - self.radius:] = 0 + + # detect keypoints without grad + if self.top_k > 0: + topk = torch.topk(nms_scores.view(b, -1), self.top_k) + indices_keypoints = topk.indices # B x top_k + else: + if self.scores_th > 0: + masks = nms_scores > self.scores_th + if masks.sum() == 0: + th = scores_nograd.reshape(b, -1).mean(dim=1) # th = self.scores_th + masks = nms_scores > th.reshape(b, 1, 1, 1) + else: + th = scores_nograd.reshape(b, -1).mean(dim=1) # th = self.scores_th + masks = nms_scores > th.reshape(b, 1, 1, 1) + masks = masks.reshape(b, -1) + + indices_keypoints = [] # list, B x (any size) + scores_view = scores_nograd.reshape(b, -1) + for mask, scores in zip(masks, scores_view): + indices = mask.nonzero(as_tuple=False)[:, 0] + if len(indices) > self.n_limit: + kpts_sc = scores[indices] + sort_idx = kpts_sc.sort(descending=True)[1] + sel_idx = sort_idx[:self.n_limit] + indices = indices[sel_idx] + indices_keypoints.append(indices) + + keypoints = [] + scoredispersitys = [] + kptscores = [] + if sub_pixel: + # detect soft keypoints with grad backpropagation + patches = self.unfold(scores_map) # B x (kernel**2) x (H*W) + self.hw_grid = self.hw_grid.to(patches) # to device + for b_idx in range(b): + patch = patches[b_idx].t() # (H*W) x (kernel**2) + indices_kpt = indices_keypoints[b_idx] # one dimension vector, say its size is M + patch_scores = patch[indices_kpt] # M x (kernel**2) + + # max is detached to prevent undesired backprop loops in the graph + max_v = patch_scores.max(dim=1).values.detach()[:, None] + x_exp = ((patch_scores - max_v) / self.temperature).exp() # M * (kernel**2), in [0, 1] + + # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} } + xy_residual = x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None] # Soft-argmax, Mx2 + + hw_grid_dist2 = torch.norm((self.hw_grid[None, :, :] - xy_residual[:, None, :]) / self.radius, + dim=-1) ** 2 + scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1) + + # compute result keypoints + keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1) # Mx2 + keypoints_xy = keypoints_xy_nms + xy_residual + keypoints_xy = keypoints_xy / keypoints_xy.new_tensor( + [w - 1, h - 1]) * 2 - 1 # (w,h) -> (-1~1,-1~1) + + kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0), + keypoints_xy.view(1, 1, -1, 2), + mode='bilinear', align_corners=True)[0, 0, 0, :] # CxN + + keypoints.append(keypoints_xy) + scoredispersitys.append(scoredispersity) + kptscores.append(kptscore) + else: + for b_idx in range(b): + indices_kpt = indices_keypoints[b_idx] # one dimension vector, say its size is M + keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1) # Mx2 + keypoints_xy = keypoints_xy_nms / keypoints_xy_nms.new_tensor( + [w - 1, h - 1]) * 2 - 1 # (w,h) -> (-1~1,-1~1) + kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0), + keypoints_xy.view(1, 1, -1, 2), + mode='bilinear', align_corners=True)[0, 0, 0, :] # CxN + keypoints.append(keypoints_xy) + scoredispersitys.append(None) + kptscores.append(kptscore) + + return keypoints, scoredispersitys, kptscores + + def forward(self, scores_map, descriptor_map, sub_pixel=False): + """ + :param scores_map: Bx1xHxW + :param descriptor_map: BxCxHxW + :param sub_pixel: whether to use sub-pixel keypoint detection + :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1.0 ~ 1.0 + """ + keypoints, scoredispersitys, kptscores = self.detect_keypoints(scores_map, + sub_pixel) + + descriptors = sample_descriptor(descriptor_map, keypoints, sub_pixel) + + # keypoints: B M 2 + # descriptors: B M D + # scoredispersitys: + return keypoints, descriptors, kptscores, scoredispersitys diff --git a/BEVNet.py b/BEVNet.py new file mode 100644 index 0000000..3924ff9 --- /dev/null +++ b/BEVNet.py @@ -0,0 +1,393 @@ +import math +import torch +import torch._utils +import torch.nn as nn +from typing import Optional, Callable +from torchvision.models import resnet + + +class RIConv2d(nn.Module): + def __init__(self, in_channel, out_channel, kernel_size=1, stride=1, padding=0, bias=True): + super().__init__() + self.padding = padding + self.stride = stride + self.use_bias = bias + idx = torch.arange(kernel_size ** 2).view(-1, 1) + row = torch.div(idx, kernel_size, rounding_mode='floor') + col = torch.fmod(idx, kernel_size) + idx = torch.cat([row, col], dim=1) + dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1) + dis = dis.view(kernel_size, kernel_size) + dis = torch.round(dis).long() + dis[dis > 0.5 * (kernel_size - 1)] = -1 + self.mask = dis + self.number = int(torch.max(dis).item() + 1) + self.weight = torch.zeros([kernel_size, kernel_size, out_channel, in_channel]) + if bias: + self.bias = torch.nn.Parameter(torch.rand([out_channel, ])) + else: + self.bias = None + self.weight1 = torch.nn.Parameter(torch.rand([self.number, out_channel, in_channel])) + + def forward(self, x): + weight = self.weight.to(self.weight1.device) + for i in range(self.number): + mask = self.mask == i + weight[mask] = self.weight1[i] + weight = weight.permute(2, 3, 0, 1) + y = torch.nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding) + return y + + def __repr__(self): + return f"RIConv2d(in_channel={self.weight.shape[3]}, out_channel={self.weight.shape[2]}," \ + f" kernel_size={self.weight.shape[0]}, stride={self.stride}, padding={self.padding}, bias={self.bias is not None})" + + +class RIMaxpool2d(nn.Module): + def __init__(self, kernel_size=1, stride=1, padding=0): + super().__init__() + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + idx = torch.arange(kernel_size ** 2).view(-1, 1) + row = torch.div(idx, kernel_size, rounding_mode='floor') + col = torch.fmod(idx, kernel_size) + idx = torch.cat([row, col], dim=1) + dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1) + dis = dis.view(kernel_size, kernel_size) + dis = torch.round(dis) + dis[dis > 0.5 * (kernel_size - 1)] = -1 + self.mask = dis.view(-1, ) > -1 + + def forward(self, x): + B, C, H, W = x.shape + h_out = math.floor((H + 2 * self.padding - (self.kernel_size - 1) - 1) / self.stride + 1) + w_out = math.floor((W + 2 * self.padding - (self.kernel_size - 1) - 1) / self.stride + 1) + unfold_x = torch.nn.functional.unfold(x, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding) + y = unfold_x.view(B, C, self.kernel_size * self.kernel_size, h_out, w_out) + y = y.permute(2, 0, 1, 3, 4) + y1 = y[self.mask] + y_max = torch.max(y1, dim=0, keepdim=False)[0] + return y_max + + def __repr__(self): + return f"RIMaxpool2d(kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding})" + + +class RIAvgpool2d(nn.Module): + def __init__(self, kernel_size=1, stride=1, padding=0): + super().__init__() + self.padding = padding + self.stride = stride + idx = torch.arange(kernel_size ** 2).view(-1, 1) + row = torch.div(idx, kernel_size, rounding_mode='floor') + col = torch.fmod(idx, kernel_size) + idx = torch.cat([row, col], dim=1) + dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1) + dis = dis.view(kernel_size, kernel_size) + dis = torch.round(dis) + dis[dis > 0.5 * (kernel_size - 1)] = -1 + mask = dis > -1 + self.number = torch.sum(mask) + self.weight = torch.zeros([kernel_size, kernel_size, 1, 1]) + self.weight[mask] = 1 + + def forward(self, x): + weight = self.weight.to(x.device) + weight = weight.permute(2, 3, 0, 1) + weight = weight.repeat(x.shape[1], 1, 1, 1) + sum = torch.nn.functional.conv2d(x, weight, None, self.stride, self.padding, groups=x.shape[1]) + avg = sum / self.number + return avg + + def __repr__(self): + return f"RIAvgpool2d(kernel_size={self.weight.shape[0]}, stride={self.stride}, padding={self.padding})" + + +class RIConvBlock(nn.Module): + def __init__(self, in_channels, out_channels, + gate: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None): + super().__init__() + if gate is None: + self.gate = nn.ReLU(inplace=True) + else: + self.gate = gate + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.conv1 = RIConv2d(in_channel=in_channels, out_channel=out_channels, kernel_size=5, padding=2, bias=False) + self.bn1 = norm_layer(out_channels) + self.conv2 = RIConv2d(in_channel=out_channels, out_channel=out_channels, kernel_size=5, padding=2, bias=False) + self.bn2 = norm_layer(out_channels) + + def forward(self, x): + x = self.gate(self.bn1(self.conv1(x))) # B x in_channels x H x W + x = self.gate(self.bn2(self.conv2(x))) # B x out_channels x H x W + return x + +class RIResBlock(nn.Module): + expansion: int = 1 + + def __init__( + self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: Optional[nn.Module] = None, + groups: int = 1, + base_width: int = 64, + dilation: int = 1, + gate: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super(RIResBlock, self).__init__() + if gate is None: + self.gate = nn.ReLU(inplace=True) + else: + self.gate = gate + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('ResBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in ResBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = RIConv2d(in_channel=inplanes, out_channel=planes, kernel_size=5, stride=1, padding=2, bias=False) + self.bn1 = norm_layer(planes) + self.conv2 = RIConv2d(in_channel=planes, out_channel=planes, kernel_size=5, stride=1, padding=2, bias=False) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: torch.Tensor) -> torch.Tensor: + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.gate(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.gate(out) + + return out + +class RICNN(nn.Module): + def __init__(self, c1: int = 8, c2: int = 16, c3: int = 32, c4: int = 64, dim: int = 64 + ): + super().__init__() + + self.gate = nn.ReLU(inplace=True) + self.pool2 = RIMaxpool2d(kernel_size=2, stride=2) + self.pool4 = RIMaxpool2d(kernel_size=5, stride=4, padding=1) + self.block1 = RIConvBlock(3, c1, self.gate, nn.BatchNorm2d) + self.block2 = RIResBlock(inplanes=c1, planes=c2, stride=1, + downsample=nn.Conv2d(c1, c2, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + self.block3 = RIResBlock(inplanes=c2, planes=c3, stride=1, + downsample=nn.Conv2d(c2, c3, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + self.block4 = RIResBlock(inplanes=c3, planes=c4, stride=1, + downsample=nn.Conv2d(c3, c4, 1), + gate=self.gate, + norm_layer=nn.BatchNorm2d) + + self.conv1 = resnet.conv1x1(c1, dim // 4) + self.conv2 = resnet.conv1x1(c2, dim // 4) + self.conv3 = resnet.conv1x1(c3, dim // 4) + self.conv4 = resnet.conv1x1(dim, dim // 4) + self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + self.upsample3 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True) + self.upsample4 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True) + self.out = nn.Conv2d(dim, dim + 1, 1) + + def forward(self, image): + x1 = self.block1(image) + x2 = self.pool2(x1) + x2 = self.block2(x2) + x3 = self.pool4(x2) + x3 = self.block3(x3) + x4 = self.pool4(x3) + x4 = self.block4(x4) + x1 = self.gate(self.conv1(x1)) + x2 = self.gate(self.conv2(x2)) + x3 = self.gate(self.conv3(x3)) + x4 = self.gate(self.conv4(x4)) + x2_up = self.upsample2(x2) + x3_up = self.upsample3(x3) + x4_up = self.upsample4(x4) + x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1) + y = self.out(x1234) + descriptor_map = y[:, :-1, :, :] + scores_map = torch.sigmoid(y[:, -1, :, :]).unsqueeze(1) + return scores_map, descriptor_map + + def ri2maxpool(self, pool): + stride = pool.stride + pool_new = nn.MaxPool2d(stride) + return pool_new + + def maxpool2ri(self, pool): + kernel_size = stride = pool.stride + ds = round((math.sqrt(2) - 1) / 2 * stride - 0.25 * (stride % 2 - 1)) + kernel_size = kernel_size + ds + pool_new = RIMaxpool2d(kernel_size, stride, ds) + return pool_new + + def ri2avgpool(self, pool): + stride = pool.stride + pool_new = nn.AvgPool2d(stride) + return pool_new + + def avgpool2ri(self, pool): + kernel_size = stride = pool.stride + if stride > 3: + kernel_size = kernel_size + 1 + pool_new = RIAvgpool2d(kernel_size, stride) + return pool_new + + def ri2conv(self, conv): + ri = conv + weight = ri.weight + device = ri.weight1.device + bias = ri.bias + use_bias = bias is not None + weight_copy = weight.clone().to(device) + for i in range(ri.number): + mask = ri.mask == i + weight_copy[mask] = ri.weight1[i] + weight_copy = weight_copy.permute(2, 3, 0, 1) + in_c = weight.shape[3] + out_c = weight.shape[2] + kz = weight.shape[0] + sd = ri.stride + pd = ri.padding + conv_new = nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=kz, stride=sd, padding=pd, bias=use_bias) + if use_bias: + state_dict = {'weight': weight_copy, 'bias': bias} + else: + state_dict = {'weight': weight_copy} + conv_new.load_state_dict(state_dict) + return conv_new.to(device) + + def conv2ri(self, conv): + weight = conv.weight + bias = conv.bias + device = weight.device + in_c = weight.shape[1] + out_c = weight.shape[0] + kz = weight.shape[2] + if kz < 3: + return conv + sd = conv.stride + pd = conv.padding + idx = torch.arange(kz ** 2).view(-1, 1) + row = torch.div(idx, kz, rounding_mode='floor') + col = torch.fmod(idx, kz) + idx = torch.cat([row, col], dim=1) + dis = (idx - 0.5 * (kz - 1)).norm(dim=1) + 0.5 * (kz % 2 - 1) + dis = dis.view(kz, kz) + dis = torch.round(dis).long() + dis[dis > 0.5 * (kz - 1)] = -1 + mask = dis + number = int(torch.max(dis).item() + 1) + weight1 = torch.rand([number, out_c, in_c]).to(device) + weight2 = weight.clone() + weight2 = weight2.permute(2, 3, 0, 1) + used_bias = bias is not None + for i in range(number): + mask1 = mask == i + w = weight2[mask1] + weight1[i] = torch.mean(w, dim=0) + if used_bias: + state_dict = {'weight1': weight1, 'bias': bias} + else: + state_dict = {'weight1': weight1} + conv_new = RIConv2d(in_channel=in_c, out_channel=out_c, kernel_size=kz, stride=sd, padding=pd, bias=used_bias) + conv_new.load_state_dict(state_dict) + return conv_new.to(device) + + def disable_ri(self): + modules = self.__dict__['_modules'] + for key, value in modules.items(): + if isinstance(value, RIMaxpool2d): + setattr(self, key, self.ri2maxpool(value)) + if isinstance(value, RIAvgpool2d): + setattr(self, key, self.ri2avgpool(value)) + if isinstance(value, RIConv2d): + setattr(self, key, self.ri2conv(value)) + if 'block' in key: + block = value + block_modules = block.__dict__['_modules'] + for bkey, bvalue in block_modules.items(): + if isinstance(bvalue, RIMaxpool2d): + setattr(block, bkey, self.ri2maxpool(bvalue)) + if isinstance(bvalue, RIAvgpool2d): + setattr(block, bkey, self.ri2avgpool(bvalue)) + if isinstance(bvalue, RIConv2d): + setattr(block, bkey, self.ri2conv(bvalue)) + modules[key] = block_modules + setattr(self, key, block) + + def enable_ri(self): + modules = self.__dict__['_modules'] + for key, value in modules.items(): + if isinstance(value, nn.MaxPool2d): + setattr(self, key, self.maxpool2ri(value)) + if isinstance(value, nn.AvgPool2d): + setattr(self, key, self.avgpool2ri(value)) + if isinstance(value, nn.Conv2d): + setattr(self, key, self.conv2ri(value)) + if 'block' in key: + block = value + block_modules = block.__dict__['_modules'] + for bkey, bvalue in block_modules.items(): + if isinstance(bvalue, nn.MaxPool2d): + setattr(block, bkey, self.maxpool2ri(bvalue)) + if isinstance(bvalue, nn.AvgPool2d): + setattr(block, bkey, self.avgpool2ri(bvalue)) + if isinstance(bvalue, nn.Conv2d): + setattr(block, bkey, self.conv2ri(bvalue)) + modules[key] = block_modules + setattr(self, key, block) + + +class EncodePosition(nn.Module): + def __init__(self, feature_size=128): + super().__init__() + self.bins = 16 + self.conv1 = nn.Sequential( + nn.Conv1d(in_channels=self.bins, out_channels=feature_size//2, kernel_size=1, stride=1, padding=0, bias=True), nn.BatchNorm1d(feature_size//2), nn.ReLU(), + nn.Conv1d(in_channels=feature_size//2, out_channels=feature_size//2, kernel_size=1, stride=1, padding=0, bias=True), nn.BatchNorm1d(feature_size//2), nn.ReLU(), + nn.Conv1d(in_channels=feature_size//2, out_channels=feature_size, kernel_size=1, stride=1, padding=0, bias=True) + ) + # self.conv2=(nn.Conv1d(in_channels=256,out_channels=128,kernel_size=1)) + + def forward(self, x, fea): + b, n, c = x.shape + x1 = x.unsqueeze(1) + x2 = x.unsqueeze(2) + dx = x1 - x2 + distance = dx.norm(p=2, dim=3) + hists = torch.zeros([b, n, self.bins]).to(x.device) + for i in range(b): + for j in range(n): + dis = distance[i, j] + hist = torch.histc(dis, bins=self.bins, min=1, max=80) + hists[i, j] = hist + hists = hists / torch.sum(hists, dim=2, keepdim=True) + x3 = hists.permute(0, 2, 1) + x4 = self.conv1(x3) + if hasattr(self, 'conv2'): + x5 = torch.cat([fea, x4], dim=1) + y = self.conv2(x5) + else: + y = fea + x4 + return y diff --git a/README.md b/README.md new file mode 100644 index 0000000..95d8651 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# FUSION + +## Table of Contents +- [Paper](#paper) +- [Overview](#overview) +- [Prerequisites](#prerequisites) +- [Running the Code](#running-the-code) +- [Evaluation](#evaluation) +## Paper +If you find the poject helps you, you can cite our paper: + +Cao D, Yue H, Liu Z, et al. BEVLCD+: Real-Time and Rotation-Invariant Loop Closure Detection Based on BEV of Point Cloud[J]. IEEE Transactions on Instrumentation and Measurement, 2023. + +Yue H, Cao D, Liu Z, et al. Cross Fusion of Point Cloud and Learned Image for Loop Closure Detection[J]. IEEE Robotics and Automation Letters, 2024. + +## Overview +We provide code for BEV mode and fusion mode, so you can easily train and test. + +## Prerequisites +Before you can use this project, you'll need to do the following: + +1. **Download Datasets**: Download the [KITTI](https://www.cvlibs.net/datasets/kitti/eval_odometry.php) and [KITTI-360](https://www.cvlibs.net/datasets/kitti-360/download.php). + +2. **Prepare Dataset Structure**: Use `preparedataset.py` to construct a dataset structure that complies with the project's requirements. Make sure to update the necessary paths in the code. + +3. **Prepare environment**: Use the commonds on `env.txt` to create your environment. Windows and Ubuntu is OK. + +## Running the Code +To run the code, follow these steps: + +1. Configure the code to run in either BEV mode or fusion mode using the settings in `config.yaml`. + +2. If you want to load a trained model used in the paper, ensure that you update the file path accordingly. + +3. Run `python train.py` + + +## Evaluation +Evaluate the saved data using the evaluation script. + +## Others +If you have any questions please feel free to contact us. diff --git a/__pycache__/BEVNet.cpython-38.pyc b/__pycache__/BEVNet.cpython-38.pyc new file mode 100644 index 0000000..ed6a368 Binary files /dev/null and b/__pycache__/BEVNet.cpython-38.pyc differ diff --git a/__pycache__/dataset.cpython-38.pyc b/__pycache__/dataset.cpython-38.pyc new file mode 100644 index 0000000..3fe15e0 Binary files /dev/null and b/__pycache__/dataset.cpython-38.pyc differ diff --git a/__pycache__/evaluate_lcd.cpython-38.pyc b/__pycache__/evaluate_lcd.cpython-38.pyc new file mode 100644 index 0000000..e16cca3 Binary files /dev/null and b/__pycache__/evaluate_lcd.cpython-38.pyc differ diff --git a/__pycache__/loss.cpython-38.pyc b/__pycache__/loss.cpython-38.pyc new file mode 100644 index 0000000..f34b1d6 Binary files /dev/null and b/__pycache__/loss.cpython-38.pyc differ diff --git a/__pycache__/net.cpython-38.pyc b/__pycache__/net.cpython-38.pyc new file mode 100644 index 0000000..0fcd715 Binary files /dev/null and b/__pycache__/net.cpython-38.pyc differ diff --git a/__pycache__/netvlad.cpython-38.pyc b/__pycache__/netvlad.cpython-38.pyc new file mode 100644 index 0000000..5ba5736 Binary files /dev/null and b/__pycache__/netvlad.cpython-38.pyc differ diff --git a/__pycache__/tools.cpython-38.pyc b/__pycache__/tools.cpython-38.pyc new file mode 100644 index 0000000..1225cfb Binary files /dev/null and b/__pycache__/tools.cpython-38.pyc differ diff --git a/__pycache__/uot.cpython-38.pyc b/__pycache__/uot.cpython-38.pyc new file mode 100644 index 0000000..29cd6cb Binary files /dev/null and b/__pycache__/uot.cpython-38.pyc differ diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..db0031e --- /dev/null +++ b/config.yaml @@ -0,0 +1,56 @@ +'experiment' : + + # 'path_dataset' : '/mnt/data/cdy/project/dataset/FUSION' + # 'path_result': '/mnt/data/cdy/data2/results/FUSIONLCD' + + # 'path_dataset' : 'E:\work\Project\dataset\FUSION' + # 'path_result' : 'E:\work\Project\results\FUSIONLCD\bev2' + + 'path_dataset' : '/home/adlab36/chenyouyuan/FUSIONLCD' + 'path_result': '/home/adlab36/chenyouyuan/FUSIONLCD/result' + 'train_flag' : 0 + 'validate_flag' : 1 + 'test_flag' : 1 + 'flag' : 'fusion' + 'cuda' : 1 + # TRAINING + 'epochs' : 200 + 'batchsize' : 6 + 'learning_rate' : 1.e-3 + 'beta1' : 0.9 + 'beta2' : 0.999 + 'eps' : 1.e-8 + 'weight_decay' : 5.e-6 + 'load_model' : 1 + #FUSION + # 'last_model' : '/data4/caodanyang/results/FUSIONLCD/08310/models/checkpoint_079.pth.tar' + #BEV + # 'last_model' : '/data4/caodanyang/results/FUSIONLCD/bev_09030/models/checkpoint_066.pth.tar' + #BEV+EP + 'last_model' : '/home/adlab36/chenyouyuan/FUSIONLCD/result/log/models/checkpoint_199.pth.tar' + #DATASET + 'train' : 0,5,6,7,9 + 'validate' : 8,50,54,55,56,59 + 'test' : 8,50,54,55,56,59 + 'voxel_num' : 15000 + 'voxel_max_points' : 100 + 'voxel_sample' : 'top' +# 'bev_range' : -51.2,-51.2,-2.5,51.2,51.2,1.5 +# 'bev_resolution' : 0.16 +# 'bev_range' : -64,-64,-2.5,64,64,1.5 +# 'bev_resolution' : 0.2 + 'bev_range' : -32,-32,-2.5,32,32,1.5 + 'bev_resolution' : 0.2 + + # NETWORK PARAMS + 'kpts_number_bev' : 150 + 'kpts_number_img' : 150 + 'cluster_num_bev' : 16 + 'cluster_num_img' : 16 + 'cluster_num_fusion' : 16 + 'sinkhorn_iter' : 5 + 'vlad_size' : 256 + # LOSS + 'loop_file' : 'loop_GT_4m' + 'trip_margin' : 0.5 + 'negetative_selsector' : 'random' \ No newline at end of file diff --git a/dataset.py b/dataset.py new file mode 100644 index 0000000..e47b470 --- /dev/null +++ b/dataset.py @@ -0,0 +1,751 @@ +import glob +import math +import os +import pickle +from functools import reduce +import matplotlib.pylab as plt +import cv2 +import numba +import numpy as np +import torch +import yaml +from scipy.spatial.distance import cdist +from torch.utils.data import Dataset, DataLoader, ConcatDataset +from torch.utils.data.dataloader import default_collate +from torch.nn.utils.rnn import pad_sequence + +import tools + +IMG_HEIGHT = 384 +IMG_WIDTH = 1152 +EGEG_PROJ = 10 +IMAGE_SCALE = 0.5 + +def euler2mat(z, y, x): + Ms = [] + if z: + cosz = math.cos(z) + sinz = math.sin(z) + Ms.append(np.array( + [[cosz, -sinz, 0], + [sinz, cosz, 0], + [0, 0, 1]])) + if y: + cosy = math.cos(y) + siny = math.sin(y) + Ms.append(np.array( + [[cosy, 0, siny], + [0, 1, 0], + [-siny, 0, cosy]])) + if x: + cosx = math.cos(x) + sinx = math.sin(x) + Ms.append(np.array( + [[1, 0, 0], + [0, cosx, -sinx], + [0, sinx, cosx]])) + if Ms: + return reduce(np.dot, Ms[::-1]) + return np.eye(3) + + +def rt_mat(rx, ry, rz, tx, ty, tz): + rt = np.eye(4, dtype=np.float32) + r = euler2mat(rz, ry, rx) + rt[0:3, 0:3] = r + rt[0:3, 3] = [tx, ty, tz] + return rt + +@numba.jit(nopython=True) +def _points_to_voxel_reverse_kernel(points, + voxel_size, + coors_range, + num_points_per_voxel, + coor_to_voxelidx, + voxels, + coors, + max_points, + max_voxels,voxel_idx_empty,voxel_mamiz): + # put all computations to one loop. + # we shouldn't create large array in main jit code, otherwise + # reduce performance + N = points.shape[0] + # ndim = points.shape[1] - 1 + ndim = 3 + ndim_minus_1 = ndim - 1 + grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size + # np.round(grid_size) + # grid_size = np.round(grid_size).astype(np.int64)(np.int32) + grid_size = np.round(grid_size, 0, grid_size).astype(np.int32) + coor = np.zeros(shape=(3,), dtype=np.int32) + voxel_num = 0 + failed = False + for i in range(N): + failed = False + for j in range(ndim): + c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j]) + if c < 0 or c >= grid_size[j]: + failed = True + break + coor[ndim_minus_1 - j] = c + if failed: + continue + voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]#0-15000 + if voxelidx == -1: + voxelidx = voxel_num + if voxel_num >= max_voxels: + break + voxel_num += 1 + coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx + coors[voxelidx] = coor + num = num_points_per_voxel[voxelidx] + if num < max_points: + voxel_idx_empty[voxelidx,num_points_per_voxel[voxelidx]]=1 + if points[i,2]>voxel_mamiz[voxelidx,0]: + voxel_mamiz[voxelidx,0]=points[i,2] + if points[i,2]=3) to voxels. This version calculate + everything in one loop. now it takes only 4.2ms(complete point cloud) + with jit and 3.2ghz cpu.(don't calculate other features) + Note: this function in ubuntu seems faster than windows 10. + + Args: + points: [N, ndim] float tensor. points[:, :3] contain xyz points and + points[:, 3:] contain other information such as reflectivity. + voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size + coors_range: [6] list/tuple or array, float. indicate voxel range. + format: xyzxyz, minmax + max_points: int. indicate maximum points contained in a voxel. + reverse_index: boolean. indicate whether return reversed coordinates. + if points has xyz format and reverse_index is True, output + coordinates will be zyx format, but points in features always + xyz format. + max_voxels: int. indicate maximum voxels this function create. + for second, 20000 is a good choice. you should shuffle points + before call this function because max_voxels may drop some points. + + Returns: + voxels: [M, max_points, ndim] float tensor. only contain points. + coordinates: [M, 3] int32 tensor. + num_points_per_voxel: [M] int32 tensor. + """ + if not isinstance(voxel_size, np.ndarray): + voxel_size = np.array(voxel_size, dtype=points.dtype) + if not isinstance(coors_range, np.ndarray): + coors_range = np.array(coors_range, dtype=points.dtype) + voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size + voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist()) + if reverse_index: + voxelmap_shape = voxelmap_shape[::-1] + # don't create large array in jit(nopython=True) code. + num_points_per_voxel = np.zeros(shape=(max_voxels,), dtype=np.int32) + coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32) + voxels = np.zeros(shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype) + coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32) + voxel_idx_empty = np.zeros((max_voxels,max_points),dtype=np.int32) + voxel_mamiz=np.zeros((max_voxels,2),dtype=points.dtype) + voxel_mamiz[:,0]=-99 + voxel_mamiz[:,1]=99 + voxel_num = _points_to_voxel_reverse_kernel( + points, voxel_size, coors_range, num_points_per_voxel, + coor_to_voxelidx, voxels, coors, max_points, max_voxels,voxel_idx_empty,voxel_mamiz) + + # coors = coors[:voxel_num] + # voxels = voxels[:voxel_num] + # num_points_per_voxel = num_points_per_voxel[:voxel_num] + # voxels[:, :, -3:] = voxels[:, :, :3] - \ + # voxels[:, :, :3].sum(axis=1, keepdims=True)/num_points_per_voxel.reshape(-1, 1, 1) + return voxels, coors, num_points_per_voxel, coor_to_voxelidx,voxel_idx_empty,voxel_mamiz + + +@numba.jit(nopython=True) +def pixel_choose(pixel1, pixel2): + n = pixel1.shape[0] + k = pixel1.shape[1] + k1 = pixel2.shape[1] + for i in range(n): + idx = [] + for j in range(k): + if pixel1[i, j, 0] > -1: + idx.append(j) + k2 = len(idx) + idx=np.asarray(idx) + if k2 >= k1: + choice = np.random.choice(idx, k1, replace=False) + else: + choice = np.random.choice(idx, k1, replace=True) + for j in range(k1): + pixel2[i,j] = pixel1[i, choice[j]] + +def pointcloud_encoder(pointcloud=None, cfg=None): + try: + resolution = cfg['bev_resolution'] + pointcloud_range = [float(x) for x in tools.read_cfg(cfg['bev_range'])] + voxel_max_points = cfg['voxel_max_points'] + voxel_num = cfg['voxel_num'] + voxel_sample = cfg['voxel_sample'] + except: + resolution = 0.2 + pointcloud_range = [-40, -40, -2.5, 40, 40, 1.5] + voxel_max_points = 100 + voxel_num = 15000 + voxel_sample = 'top' + + pc_filter = (pointcloud[:, 0] > pointcloud_range[0]) & (pointcloud[:, 0] < pointcloud_range[3]) \ + & (pointcloud[:, 1] > pointcloud_range[1]) & (pointcloud[:, 1] < pointcloud_range[4]) \ + & (pointcloud[:, 2] > pointcloud_range[2]) & (pointcloud[:, 2] < pointcloud_range[5]) + pointcloud = pointcloud[pc_filter] + if voxel_sample == 'top': + idx = np.argsort(-pointcloud[:, 2]) + pointcloud = pointcloud[idx] + else: + idx = np.arange(len(pointcloud)) + np.random.shuffle(idx) + pointcloud = pointcloud[idx] + resolution_z=pointcloud_range[5]-pointcloud_range[2] + voxels, coors, num_points_per_voxel, coor_to_voxelidx,voxel_idx_empty,voxel_mamiz = points_to_voxel(pointcloud, + voxel_size=[ + resolution, resolution,resolution_z], + coors_range=[ + *pointcloud_range], + max_points=voxel_max_points, + max_voxels=voxel_num) + + + coor_to_voxelidx = np.squeeze(coor_to_voxelidx) + voxel_idx_empty=voxel_idx_empty.astype(np.bool_) + + voxels_center = np.sum(voxels, axis=1) + voxels_center[:, 0:4] = voxels_center[:, 0:4] / (num_points_per_voxel.reshape(-1, 1)+1e-8) + + max_z = voxel_mamiz[:,0] + min_z = voxel_mamiz[:,1] + max_z1 = max_z / resolution_z + mean_i = np.sum(voxels[:, :, 3], axis=1) / (num_points_per_voxel+1e-8) + density = np.log(np.clip(num_points_per_voxel, 1, None)) / np.log(cfg['voxel_max_points']) + + + dz = max_z - min_z + idx_not_ground = (dz > 0.05) & (num_points_per_voxel > 1) + coors1 = coors[idx_not_ground, 1:3] + relation = 0 + if voxels.shape[2] == 6: # x,y,z,i,pu,pv + have_pixel = (voxels[:, :, 4] > 0) & (voxels[:, :, 5] > 0) # each 3d point has a pixel + have_pixel = np.bitwise_or.reduce(have_pixel, axis=1) # each cell of bev has pixels,num of pixels may less than num of 3d points + + # num_voxels_pixel = np.sum(voxels_pixel, axis=1) + # num_voxels_pixel = np.clip(num_voxels_pixel, 1, None) + # voxels_center[:, 4:6] = np.int_(voxels_center[:, 4:6] / num_voxels_pixel.reshape(-1, 1)) + feature = np.concatenate([max_z1, mean_i, density, + voxels_center[:, 0], voxels_center[:, 1], voxels_center[:, 2], voxels_center[:, 3]], + axis=0).reshape(7, -1).T + pixels = voxels[idx_not_ground & have_pixel, :, 4:6] + # pixels2 = np.zeros([pixels.shape[0], 10, 2], dtype=pixels.dtype) + # pixel_choose(pixels, pixels2) + # pixels = pixels2 + + coors2 = coors[idx_not_ground & have_pixel, 1:3].reshape(-1, 1, 2) + relation = np.hstack((pixels, coors2)) + + elif voxels.shape[2] == 4: # x,y,z,i + feature = np.concatenate([max_z1, mean_i, density, + voxels_center[:, 0], voxels_center[:, 1], voxels_center[:, 2], voxels_center[:, 3]], + axis=0).reshape(7, -1).T + else: + print('ERROR VOXEL') + exit() + # eigvalues = voxel_svd(voxels) + # feature = np.hstack((feature, eigvalues)) + bev = np.zeros([coor_to_voxelidx.shape[1], coor_to_voxelidx.shape[1], feature.shape[1]], dtype=np.float32) + bev[coors1[:, 0], coors1[:, 1]] = feature[idx_not_ground] + # bev[coors1[:, 0], coors1[:, 1]] = feature[idx_not_ground] + # bev_show=np.uint8(bev[:,:,0:3]*255) + # cv2.imshow('1',bev_show) + # cv2.waitKey(0) + + return bev, relation + + +def crop_image(img, height=IMG_HEIGHT, width=IMG_WIDTH): + # img: the original image, a numpy array of size H*W*C + # height: the target height + # width: the target width + + # get the size of the original image + h, w, _ = img.shape + + # calculate the padding size if necessary + if h < height: + pad_top = (height - h) // 2 + pad_bottom = height - h - pad_top + else: + pad_top, pad_bottom = 0, 0 + if w < width: + pad_left = (width - w) // 2 + pad_right = width - w - pad_left + else: + pad_left, pad_right = 0, 0 + + # pad the original image with black pixels if necessary + img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0)) + + # crop the padded image to the target size + h1 = (img_padded.shape[0] - height) // 2 + h2 = h1 + height + w1 = (img_padded.shape[1] - width) // 2 + w2 = w1 + width + img_cropped = img_padded[h1:h2, w1:w2] + dh = int((height - h) / 2) + dw = int((width - w) / 2) + if IMAGE_SCALE<1: + img_cropped = cv2.resize(img_cropped, None, None, IMAGE_SCALE, IMAGE_SCALE) + dh = int(dh * IMAGE_SCALE) + dw = int(dw * IMAGE_SCALE) + + return img_cropped, dh, dw + + +class KittiDataset(Dataset): + def __init__(self, cfg, sequence, argument=True, mode='train', flag_bev=True, flag_img=True, flag_fuse=True): + root_dataset = cfg['path_dataset'] + if (flag_img == False) & (flag_bev == False) & (flag_fuse == False): + print('No module will be used!') + exit() + if (flag_img == False) or (flag_bev == False): + flag_fuse = False + self.flag_img = flag_img + self.flag_bev = flag_bev + self.flag_fuse = flag_fuse + self.cfg = cfg + self.sequence = sequence + self.mode = mode + scans = glob.glob(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'velodyne', "*.bin")) + images = glob.glob(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'image_2', "*.png")) + if int(sequence) >= 50: + calib = np.loadtxt(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'calib.txt')) + poses = os.path.join(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'poses.npy')) + else: + calib = np.genfromtxt(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'calib.txt'))[:, 1:] + poses = os.path.join(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'poses.txt')) + f_gt = open(os.path.join(root_dataset, 'sequences', '%02d' % sequence, cfg['loop_file'] + '.pickle'), 'rb') + scans.sort() + images.sort() + self.scans = scans + self.images = images + if int(sequence) >= 50: + cam0_to_velo = np.reshape(calib, (3, 4)) + cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]]) + cam0_to_velo = np.linalg.inv(cam0_to_velo) + self.cam0_to_velo = cam0_to_velo.astype(np.float32) + k = [552.554261, 0.000000, 682.049453, 0.000000, + 0.000000, 552.554261, 238.769549, 0.000000, + 0.000000, 0.000000, 1.000000, 0.000000] + k = np.array(k).reshape([3, 4]) + p2 = np.eye(4) + p2[:3] = k + self.p2 = p2.astype(np.float32) + poses2 = np.load(poses) + else: + p2 = np.reshape(calib[2], (3, 4)) + p2 = np.vstack([p2, [0, 0, 0, 1]]) + self.p2 = p2.astype(np.float32) + cam0_to_velo = np.reshape(calib[4], (3, 4)) + cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]]) + self.cam0_to_velo = cam0_to_velo.astype(np.float32) + cam0_to_velo = torch.tensor(cam0_to_velo) + poses2 = [] + with open(poses, 'r') as f: + for x in f: + x = x.strip().split() + x = [float(v) for v in x] + pose = torch.zeros((4, 4), dtype=torch.float64) + pose[0, 0:4] = torch.tensor(x[0:4]) + pose[1, 0:4] = torch.tensor(x[4:8]) + pose[2, 0:4] = torch.tensor(x[8:12]) + pose[3, 3] = 1.0 + pose = cam0_to_velo.inverse() @ (pose @ cam0_to_velo) # + poses2.append(pose.float().numpy()) + poses2 = np.stack(poses2) + # for i in range(12): + # plt.subplot(3, 4, i + 1), plt.plot(np.arange(len(poses2)), poses2[:, i // 4, i % 4]) + # plt.show() + self.poses = poses2 + + gt = pickle.load(f_gt) + self.gt=gt + # gt_new=[] + # for i in range(len(gt)): + # idx=gt[i]['idx'] + # positive_idxs=gt[i]['positive_idxs'] + # for j in positive_idxs: + # sample={'idx':idx,'positive_idxs':[j]} + # gt_new.append(sample) + # self.gt=gt_new + self.argument = argument + + def __len__(self): + if self.mode == 'test': + return len(self.poses) + else: + return int(len(self.gt)) + + def __getitem__(self, idx): + if self.mode == 'test': + idx_query = idx + pose_query = self.poses[idx_query] + image_query, scan_query, bev_query, dw, dh, W, H, relation_query = 0, 0, 0, 0, 0, 0, 0, 0 + if self.flag_img: + image_query = cv2.imread(self.images[idx_query]) + # image_query = cv2.GaussianBlur(image_query, (15,15),0) + image_query, dh, dw = crop_image(image_query, IMG_HEIGHT, IMG_WIDTH) + if self.flag_bev: + print("DEBUG __len__ scans:", len(self.scans), "requested idx_query:", idx_query) + scan_query = np.fromfile(self.scans[idx_query], dtype=np.float32).reshape((-1, 4)) + # idx = np.random.choice(len(scan_query), int(len(scan_query) /4), replace=False) + # scan_query = scan_query[idx] + if self.flag_bev & self.flag_img & self.flag_fuse: + # mat_proj = np.matmul(self.p2, self.cam0_to_velo) + mat_proj = torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)).numpy() + pts_query = scan_query.copy() + pts_query[:, 3] = 1 + # pts_proj_query = np.matmul(mat_proj, pts_query.T).T + pts_proj_query = torch.matmul(torch.from_numpy(mat_proj), torch.from_numpy(pts_query.T)).numpy().T + z = pts_proj_query[:, 2:3] + pts_proj_query = pts_proj_query / z * IMAGE_SCALE + pts_proj_query[:, 0:2] = pts_proj_query[:, 0:2] + [dw, dh] + H, W, _ = image_query.shape + mask_query = (pts_proj_query[:, 0] >= EGEG_PROJ) & (pts_proj_query[:, 0] < W - EGEG_PROJ) & ( + pts_proj_query[:, 1] >= EGEG_PROJ) & (pts_proj_query[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0) + pts_proj_query[~mask_query] = -1 + pixel_query = pts_proj_query[:, 0:2] + pixel_query = pixel_query[:, [1, 0]] + scan_query = np.hstack((scan_query, pixel_query)) + bev_query, relation_query = pointcloud_encoder(scan_query, self.cfg) + sample = { + 'sequence': self.sequence, + 'id_query': idx_query, + 'bev_query': bev_query, + 'img_query': image_query, + 'pose_query': pose_query, + 'relation_query': relation_query + } + else: + gt = self.gt[idx] + idx_query = gt['idx'] + idx_ps = gt['positive_idxs'] + idx_positive = np.random.choice(idx_ps) + pose_query = self.poses[idx_query] + pose_positive = self.poses[idx_positive] + image_query, scan_query, bev_query, image_positive, scan_query, bev_positive, dw, dh, W, H, pose_to_frame, \ + label_score, relation_query, relation_positive, = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + if self.flag_img: + image_query = cv2.imread(self.images[idx_query]) + image_query, dh, dw = crop_image(image_query) + image_positive = cv2.imread(self.images[idx_positive]) + image_positive, _, _ = crop_image(image_positive) + if self.flag_bev: + scan_query = np.fromfile(self.scans[idx_query], dtype=np.float32).reshape((-1, 4)) + + scan_positive = np.fromfile(self.scans[idx_positive], dtype=np.float32).reshape((-1, 4)) + + # return {'1':np.zeros(5)} + # import open3d as o3d + # pcd1 = o3d.geometry.PointCloud() + # sc1 = scan_query.copy() + # sc1[:, 3] = 1 + # sc1 = np.matmul(pose_query, sc1.T).T + # pcd1.points = o3d.utility.Vector3dVector(sc1[:, :3]) + # pcd1.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))]) + # pcd2 = o3d.geometry.PointCloud() + # sc2 = scan_positive.copy() + # sc2[:, 3] = 1 + # sc2 = np.matmul(pose_positive, sc2.T).T + # pcd2.points = o3d.utility.Vector3dVector(sc2[:, :3]) + # pcd2.colors = o3d.utility.Vector3dVector([[0, 1, 0] for i in range(len(pcd2.points))]) + # vis1 = o3d.visualization.Visualizer() + # vis1.create_window(window_name='registration', width=600, height=600) # 创建窗口 + # render_option: o3d.visualization.RenderOption = vis1.get_render_option() # 设置点云渲染参数 + # render_option.background_color = np.array([1, 1, 1]) # 设置背景色(这里为黑色) + # render_option.point_size = 2 # 设置渲染点的大小 + # vis1.add_geometry(pcd1) + # vis1.add_geometry(pcd2) + # coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.0, origin=pose_query[0:3,3]) + # vis1.add_geometry(coord_frame) + # vis1.run() + + if self.argument: + # Rt1 = np.eye(4) + # idx1=np.random.random((len(scan_query),)) + # idx1=idx1>0.1 + # scan_query=scan_query[idx1] + # idx2=np.random.random((len(scan_positive),)) + # idx2=idx2>0.1 + # scan_positive=scan_positive[idx2] + rand = np.random.random(6) * 2 - 1 + Rt = rt_mat(rand[0] * 3 / 180 * np.pi, + rand[1] * 3 / 180 * np.pi, + rand[2] * 180 / 180 * np.pi, + rand[3] * 3, rand[4] * 3, rand[5] * 0.3) + Rt1 = torch.from_numpy(Rt).inverse().numpy() + ints = scan_positive[:, 3].copy() + scan_positive[:, 3] = 1 + # scan_positive = np.matmul(Rt, scan_positive.T).T + scan_positive = torch.matmul(torch.from_numpy(Rt),torch.from_numpy(scan_positive.T)).numpy().T + scan_positive[:, 3] = ints + # pose_positive = np.matmul(pose_positive, np.linalg.inv(Rt)) + pose_positive=torch.matmul(torch.from_numpy(pose_positive),torch.from_numpy(Rt).inverse()).numpy() + else: + Rt1 = np.eye(4).astype(np.float32) + if self.flag_fuse: + # mat_proj = np.matmul(self.p2, self.cam0_to_velo) + mat_proj = torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)).numpy() + pts_query = scan_query.copy() + pts_query[:, 3] = 1 + # pts_proj_query = np.matmul(mat_proj, pts_query.T).T + pts_proj_query = torch.matmul(torch.from_numpy(mat_proj), torch.from_numpy(pts_query.T)).numpy().T + z = pts_proj_query[:, 2:3] + pts_proj_query = pts_proj_query / z * IMAGE_SCALE + pts_proj_query[:, 0:2] = pts_proj_query[:, 0:2] + [dw, dh] + H, W, _ = image_query.shape + mask_query = (pts_proj_query[:, 0] >= EGEG_PROJ) & (pts_proj_query[:, 0] < W - EGEG_PROJ) & ( + pts_proj_query[:, 1] >= EGEG_PROJ) & (pts_proj_query[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0) + pts_proj_query[~mask_query] = -1 + pixel_query = pts_proj_query[:, 0:2] + pixel_query = pixel_query[:, [1, 0]] # h,w + scan_query = np.hstack((scan_query, pixel_query)) + + # fig = plt.figure() + # plt.subplot(2, 1, 1), plt.imshow(image_query[:, :, [2, 1, 0]]) + # plt.subplot(2, 1, 2), plt.imshow(image_query[:, :, [2, 1, 0]]) + # plt.scatter(pixel_query[mask_query, 1], pixel_query[mask_query, 0], c=z[mask_query], cmap='jet', alpha=0.5, s=1) + # plt.show() + + # mat_proj1 = self.p2.dot(self.cam0_to_velo).dot(Rt1) + mat_proj1 = torch.matmul(torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)), torch.from_numpy(Rt1)).numpy() + pts_positive = scan_positive.copy() + pts_positive[:, 3] = 1 + # pts_proj_positive = np.matmul(mat_proj1, pts_positive.T).T + pts_proj_positive = torch.matmul(torch.from_numpy(mat_proj1), torch.from_numpy(pts_positive.T)).numpy().T + z = pts_proj_positive[:, 2:3] + pts_proj_positive = pts_proj_positive / z * IMAGE_SCALE + pts_proj_positive[:, 0:2] = pts_proj_positive[:, 0:2] + [dw, dh] + mask_positive = (pts_proj_positive[:, 0] >= EGEG_PROJ) & (pts_proj_positive[:, 0] < W - EGEG_PROJ) & ( + pts_proj_positive[:, 1] >= 0 - EGEG_PROJ) & (pts_proj_positive[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0) + pts_proj_positive[~mask_positive] = -1 + pixel_positive = pts_proj_positive[:, 0:2] + pixel_positive = pixel_positive[:, [1, 0]] + scan_positive = np.hstack((scan_positive, pixel_positive)) + + # pose_to_frame = np.matmul(np.linalg.inv(pose_positive), pose_query) + pose_to_frame=torch.matmul(torch.from_numpy(pose_positive).inverse(),torch.from_numpy(pose_query)).numpy() + # scan_query1=scan_query.copy() + # scan_query1[:,3]=1 + # scan_query1=np.matmul(pose_to_frame,scan_query1.T).T + # scan_query[:,0:3]=scan_query1[:,0:3] + # scan_query[:, 3] = 1 + # scan_query = np.matmul(pose_query, scan_query.T).T + # scan_positive[:, 3] = 1 + # scan_positive = np.matmul(pose_positive, scan_positive.T).T + # plt.subplot(1, 2, 1), plt.plot(scan_query[:, 0], scan_query[:, 1], 'b.', markersize=1),plt.axis([-60,60,-60,60]) + # plt.subplot(1, 2, 2), plt.plot(scan_positive[:, 0], scan_positive[:, 1], 'b.', markersize=1),plt.axis([-60,60,-60,60]) + # plt.show() + + bev_query, relation_query = pointcloud_encoder(scan_query, self.cfg) + bev_positive, relation_positive = pointcloud_encoder(scan_positive, self.cfg) + # if self.argument: + # rand = np.random.randint(0, 9, [2, ]) + # if rand[0] > 4: + # bev_query1 = np.rot90(bev_query, 2, axes=(0, 1)) + # bev_query = bev_query1.copy() + # if rand[1] > 4: + # bev_positive1 = np.rot90(bev_positive, 2, axes=(0, 1)) + # bev_positive = bev_positive1.copy() + h_bev, w_bev, _ = bev_query.shape + label_score = np.zeros_like(bev_positive[:, :, :2]) + grid = np.array(np.meshgrid(np.arange(h_bev), np.arange(w_bev))).swapaxes(0, 2) + scan_query_sample = bev_query[:, :, 3:5] + mask_query = scan_query_sample != 0 + mask_query = mask_query[:, :, 0] | mask_query[:, :, 1] + grid_query = grid[mask_query] + scan_query_sample = scan_query_sample[mask_query] + scan_positive_sample = bev_positive[:, :, 3:5] + mask_positive = scan_positive_sample != 0 + mask_positive = mask_positive[:, :, 0] | mask_positive[:, :, 1] + grid_positive = grid[mask_positive] + scan_positive_sample = scan_positive_sample[mask_positive] + scan_query_sample1 = np.hstack((scan_query_sample, scan_query_sample * 0)) + scan_query_sample1[:, 3] = 1 + # scan_query_sample1 = np.matmul(pose_to_frame, scan_query_sample1.T).T + scan_query_sample1 = torch.matmul(torch.from_numpy(pose_to_frame), torch.from_numpy(scan_query_sample1.T)).numpy().T + + idx1, idx2, dis = tools.nn_match(scan_query_sample1[:, 0:2], scan_positive_sample[:, 0:2], 'euclidean') + if len(dis) > 50: + th1 = max([2, dis[min([256, int(len(dis) * 0.3)])]]) + idx1 = idx1[dis < th1] + idx2 = idx2[dis < th1] + else: + dis = cdist(scan_query_sample1[:, 0:2], scan_positive_sample) + min1 = np.min(dis, axis=1) + min2 = np.min(dis, axis=0) + min11 = np.sort(min1) + th1 = max([0.2, min11[min([256, int(len(min11) * 0.2)])]]) + min21 = np.sort(min2) + th2 = max([0.2, min21[min([256, int(len(min21) * 0.2)])]]) + idx1 = np.arange(len(scan_query_sample))[min1 < th1] + idx2 = np.arange(len(scan_positive_sample))[min2 < th2] + + # points1=scan_query_sample1[:, 0:2] + # points2=scan_positive_sample[:, 0:2] + # points = np.mean(np.vstack((points1, points2)), axis=0, keepdims=True) + # points1 = points1 - points + # points2 = points2 - points + # af = torch.sum(torch.from_numpy(points1) ** 2, -1, keepdim=True) + # bf = torch.sum(torch.from_numpy(points2) ** 2, -1, keepdim=True).transpose(0, 1) + # cf = af + bf - 2 * torch.mm(torch.from_numpy(points1), torch.from_numpy(points2).transpose(0, 1)) # c^2=a^2+b^2-2abcos + # c = torch.sqrt(cf) + # dis = c + # dis1 = torch.min(dis, dim=1)[0] + # dis2 = torch.min(dis, dim=0)[0] + # idx1 = torch.where(dis1 < 0.5)[0].numpy() + # idx2 = torch.where(dis2 < 0.5)[0].numpy() + # dis1=dis1.numpy() + # dis2= dis2.numpy() + + grid_query = grid_query[idx1] + grid_positive = grid_positive[idx2] + label_score[grid_query[:, 0], grid_query[:, 1], 0] = 1 + label_score[grid_positive[:, 0], grid_positive[:, 1], 1] = 1 + # fig, ax = plt.subplots(2, 2) + # ax[0,0].imshow(bev_query[:, :, 0:3]) + # ax[0,1].imshow(label_score[:, :, 0]) + # ax[1,0].imshow(bev_positive[:, :, 0:3]) + # ax[1,1].imshow(label_score[:, :, 1]) + # plt.savefig('1.png') + # plt.show() + + sample = { + 'sequence': self.sequence, + 'id_query': idx_query, + 'bev_query': bev_query, + 'img_query': image_query, + 'pose_query': pose_query, + 'id_positive': idx_positive, + 'bev_positive': bev_positive, + 'img_positive': image_positive, + 'pose_positive': pose_positive, + 'pose_to_frame': pose_to_frame, + 'label_score': label_score, + 'relation_query': relation_query, + 'relation_positive': relation_positive + } + return sample + + +def collate(samples): + relation_query = [] + relation_positive = [] + samples2 = {key: default_collate([d[key] for d in samples]) for key in samples[0] + if key != 'relation_query' and key != 'relation_positive'} + + for single_sample in samples: + try: + relation_query.append(torch.from_numpy(single_sample['relation_query'])) + except: + pass + try: + relation_positive.append(torch.from_numpy(single_sample['relation_positive'])) + except: + pass + + relation = relation_query + relation_positive + if len(relation) > 0: + relation1 = pad_sequence(relation, batch_first=True, padding_value=-1) + relation1 = relation1.float() + samples2['relation'] = relation1 + + return samples2 + + +def KittiTotalLoader(cfg): + flag = cfg['flag'] + bev = False + img = False + fuse = False + if flag == 'fusion': + bev = True + img = True + fuse = True + elif flag == 'img': + img = True + else: + bev = True + sequence_train = [int(x) for x in tools.read_cfg(cfg['train'])] + sequence_val = [int(x) for x in tools.read_cfg(cfg['validate'])] + sequence_test = [int(x) for x in tools.read_cfg(cfg['test'])] + dataset_list = [] + for sequence in sequence_train: + single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=True, mode='train') + print('===Trainloader add: sequence %02d, %04d files, %04d frames with loop' % (sequence, len(single_dataset.poses), len(single_dataset.gt))) + dataset_list.append(single_dataset) + dataset_train = ConcatDataset(dataset_list) + dataset_list = [] + for sequence in sequence_val: + single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=False, mode='train') + print('===Validationloader add: sequence %02d, %04d files, %04d frames with loop' % (sequence, len(single_dataset.poses), len(single_dataset.gt))) + dataset_list.append(single_dataset) + dataset_val = ConcatDataset(dataset_list) + dataset_list = [] + for sequence in sequence_test: + single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=False, mode='test') + print('===Testloader add: sequence %02d, %04d files' % (sequence, len(single_dataset.poses))) + dataset_list.append(single_dataset) + dataset_test = ConcatDataset(dataset_list) + loader_train = DataLoader(dataset_train, batch_size=cfg['batchsize'], shuffle=True, num_workers=6, collate_fn=collate) + loader_val = DataLoader(dataset_val, batch_size=cfg['batchsize'], shuffle=True, num_workers=6, collate_fn=collate) + loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=6, collate_fn=collate) + return loader_train, loader_val, loader_test + + +if __name__ == '__main__': + root_dir = '/media/ubuntu/Workshop/caodanyang/Project_CDY/results/mylcd' + # sequence = 0 + # dataset = KittiDataset(root_dir, sequence) + # dataloader = DataLoader(dataset, batch_size=1, num_workers=8, shuffle=False) + # t = tools.Timer(name='Loading', number=len(dataloader)) + # for data in dataloader: + # t.update() + try: + with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml")) + except: + with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml")) + cfg = cfg['experiment'] + path_dataset = cfg['path_dataset'] + path_result = cfg['path_result'] + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + train, val, test = KittiTotalLoader(cfg) + t = tools.Timer(name='Loading') + ds = [] + for i, data in enumerate(train): + # ds.append(d) + t.update(i) + + # t = tools.Timer(name='Loading', number=len(val)) + # for i, data in enumerate(val): + # t.update() diff --git a/env.txt b/env.txt new file mode 100644 index 0000000..d7b4f6d --- /dev/null +++ b/env.txt @@ -0,0 +1,59 @@ +## 环境配置备忘 +conda create -n fusion_cyy python=3.8 + +conda activate fusion_cyy + +conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=11.3 -c pytorch -c conda-forge + +pip install pykitti pytorch-metric-learning pyyaml scipy scikit-image scikit-learn tqdm open3d matplotlib numba opencv-python opencv-contrib-python pandas -i https://pypi.mirrors.ustc.edu.cn/simple/ + +conda remove -n fusion_cyy --all + +在.git的config下 + +[user] + name = MobKBK + email = 202311250413@csust.edu.cn /home/adlab8/pub_data1/Kitti360/KITTI-360 + +tmux使用备忘: +列表 +tmux ls + +tmux new -s your-session-name wget -c https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_color.zip +转后台: +ctrl+b d +杀死 +ctrl+d +分离: +tmux detach +连接: +tmux attach -t cyy +切换: +tmux switch -t your-session-name +重命名: +tmux rename-session -t old-session new-session +新建窗口: +tmux new-window -n your-window-name +选择窗口: +ctrl+b c: 创建一个新窗口(状态栏会显示多个窗口的信息) +ctrl+b p: 切换到上一个窗口(按照状态栏的顺序) +ctrl+b n: 切换到下一个窗口 +ctrl+b w: 从列表中选择窗口 +重命名窗口: +tmux rename-window -t old_name new_name +# 划分为上下两个窗格 +tmux split-window + +# 划分左右两个窗格 +tmux split-window -h +左右划分:ctrl+b % +上下划分:ctrl+b " + +ctrl+b [ 使用PageUp和PageDown可以实现上下翻页 + +kitti位置 +/home/adlab8/pub_data1/OpenDataLab___KITTI_Odometry_2012/raw +kitt360 + +/home/adlab8/pub_data1/KITTI-360/ +ln -s <源文件或目录的路径> <软连接的路径> \ No newline at end of file diff --git a/evaluate_fm.py b/evaluate_fm.py new file mode 100644 index 0000000..c5c4479 --- /dev/null +++ b/evaluate_fm.py @@ -0,0 +1,81 @@ +import os +import time +import torch +import tools +import numpy as np +import yaml +from dataset import KittiTotalLoader +from tqdm import tqdm + +def match_err(fea1,fea2,pose1,pose2,kpt1,kpt2): + pose_to_frame=torch.matmul(pose2.inverse(),pose1) + kpt1 = (pose_to_frame @ kpt1.permute(1, 0)).permute(1, 0) + kpt1[:,2]=0 + kpt2[:,2]=0 + idxp1,idxp2,dis_kpt=tools.nn_match(kpt1,kpt2,'euclidean') + idxf1,idxf2,dis_fea=tools.nn_match(fea1,fea2,'cosine') + kpt11=kpt1[idxf1] + kpt21=kpt2[idxf2] + dis_kpt1=(kpt11-kpt21).norm(p=2,dim=1) + mas=[] + mss=[] + reps=[] + for thr in [0.3,0.5,1,2,3]: + mas.append((torch.sum(dis_kpt1<=thr)/(len(idxf1)+1e-8)).item()) + mss.append((torch.sum(dis_kpt1<=thr)/(torch.sum(dis_kpt<=thr)+1e-8)).item()) + reps.append((torch.sum(dis_kpt<=thr)).item()/(len(fea1)+len(fea2))*2) + return mas,mss,reps + +def feature_match(loader_val=None,data=None): + if loader_val is None: + try: + with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml")) + except: + with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml")) + cfg = cfg['experiment'] + _, loader_val, _ = KittiTotalLoader(cfg) + sequences=data['sequences'] + fea_kpt=data['fea_kpt'].cuda() + poses=data['pose_query'].cuda() + kpts=data['key_points'].cuda() + seq=torch.unique(sequences) + if __name__ == '__main__': + flag = False + else: + flag = True + for i in range(len(seq)): + idx=sequences==seq[i] + fea_kpt1=fea_kpt[idx] + poses1=poses[idx] + kpts1=kpts[idx] + gt=loader_val.dataset.datasets[i].gt + ms=[] + for j in tqdm(range(0, len(gt)), disable=flag, ncols=60, desc='feature match'): + query=gt[j]['idx'] + p_idxs=gt[j]['positive_idxs'] + for p in p_idxs: + a,s,reps=match_err(fea_kpt1[query],fea_kpt1[p],poses1[query],poses1[p],kpts1[query],kpts1[p]) + ms.append(a+s+reps) + ms=np.asarray(ms) + ms1=np.mean(ms,axis=0) + print('Feature matching, sequence %02d'%(seq[i])) + print('MA@0.3:%.3f, MA@0.5:%.3f, MA@1:%.3f, MA@2:%.3f, MA@3:%.3f' %(ms1[0], ms1[1], ms1[2], ms1[3], ms1[4])) + print('MS@0.3:%.3f, MS@0.5:%.3f, MS@1:%.3f, MS@2:%.3f, MS@3:%.3f' %(ms1[5], ms1[6], ms1[7], ms1[8], ms1[9])) + print('RP@0.3:%.3f, RP@0.5:%.3f, RP@1:%.3f, RP@2:%.3f, RP@3:%.3f' %(ms1[10], ms1[11], ms1[12], ms1[13], ms1[14])) + time.sleep(1) + + + + + + +if __name__ == '__main__': + # data = torch.load("/mnt/data2/datasets/cdy/results/FUSIONLCD/05230/database/database_149_b0.pth.tar") + # data = torch.load("/mnt/data2/datasets/cdy/results/BEVLCD/ricnn03202/database/database_xyz_000.pth.tar") + data= torch.load("/data4/caodanyang/results/FUSIONLCD/bev_07190/database/database_all.pth.tar") + feature_match(data=data) + \ No newline at end of file diff --git a/evaluate_lcd.py b/evaluate_lcd.py new file mode 100644 index 0000000..829b0d9 --- /dev/null +++ b/evaluate_lcd.py @@ -0,0 +1,265 @@ +import os +import time +import matplotlib +# set non-interactive backend for server (must be set before pyplot import) +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import numpy as np +import torch +from skimage.measure import ransac +from skimage.transform import EuclideanTransform +import tools +from tqdm import tqdm +from sklearn.metrics import auc +from sklearn.neighbors import KDTree +import warnings + +warnings.filterwarnings("ignore") + + +def recall_with_candidates(vlads, poses, sequence, recall_num=25, positive_distance=4): + recall_at_k = [0] * recall_num + num_with_loop = 0 + if __name__ == '__main__': + flag = False + else: + flag = True + for i in tqdm(range(0, len(vlads)), disable=flag, ncols=60, desc='Recall@k'): + valid_idx = list(set(range(0, len(vlads))) - set(range(max(0, i - 50), min(len(vlads), i + 50)))) + valid_idx = torch.tensor(valid_idx).to(vlads.device) + vlad_query = vlads[i].view(1, -1) + vlad_valid = vlads[valid_idx] + dis_valid = torch.linalg.norm((poses[i:i + 1, 0:3, 3] - poses[valid_idx, 0:3, 3]), dim=1) + min_dis = torch.min(dis_valid) + if min_dis > positive_distance: + continue + num_with_loop = num_with_loop + 1 + # global feature to query quickly + dis_vlad = torch.cdist(vlad_query, vlad_valid).view(-1, ) + dis, idx_cand = torch.topk(dis_vlad, recall_num, largest=False) + idx_cand = valid_idx[idx_cand] + for j in range(recall_num): + idx_cand1 = idx_cand[j] + dis = torch.linalg.norm((poses[i:i + 1, 0:3, 3] - poses[idx_cand1, 0:3, 3]), dim=1) + if dis <= positive_distance: + recall_at_k[j] = recall_at_k[j] + 1 + break + time.sleep(1) + recall_at_k = np.cumsum(recall_at_k) / float(num_with_loop) + print('Sequence %02d, Recall@' % sequence, end='') + for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 19, 24]: + if i == (len(recall_at_k) - 1): + print('%d[%.3f]' % (i + 1, recall_at_k[i])) + else: + print('%d[%.3f]' % (i + 1, recall_at_k[i]), end=', ') + return recall_at_k + + +def retrieve(vlads, feas, kpts, poses, num_cand=1, verify='ransac'): + loops = [] + if __name__ == '__main__': + flag = False + else: + flag = True + ts = [] + for i in tqdm(range(0, len(feas)), disable=flag, ncols=60, desc='retrieve loop'): + t0 = time.time() + valid_idx = list(set(range(0, len(feas))) - set(range(max(0, i - 50), min(len(feas), i + 50)))) + valid_idx = torch.tensor(valid_idx).to(vlads.device) + vlad_query = vlads[i].view(1, -1) + vlad_valid = vlads[valid_idx] + # global feature to query quickly + dis_vlad = torch.cdist(vlad_query, vlad_valid).view(-1, ) + dis, idx_cand = torch.topk(dis_vlad, num_cand, largest=False) + t_retrieve = time.time() - t0 + idx_cand = valid_idx[idx_cand] + # local feature to qverify + fea_query = feas[i] + if verify == 'ransac': + p1 = kpts[i] + p1 = p1.cpu().detach().numpy() + min_dis = torch.tensor([9999]) + idx_detect = idx_cand[0] + dis_truth = torch.tensor([9999]) + for idx_cand1 in idx_cand: + fea_cand1 = feas[idx_cand1] + p2 = kpts[idx_cand1].cpu().detach().numpy() + idx1, idx2, dis = tools.nn_match(fea_query, fea_cand1, 'cosine') + if len(idx1) < 31: + continue + idx1 = idx1.cpu().detach().numpy() + idx2 = idx2.cpu().detach().numpy() + try: + model, inliers = ransac((p1[idx1, 0:2], p2[idx2, 0:2]), model_class=EuclideanTransform, min_samples=15, max_trials=3, residual_threshold=1) + num_inlier = np.sum(inliers) + # r = model.params[0:2, 0:2] + dis_estimate = np.linalg.norm(model.params[0:2, 2]) + # rot = model.rotation + if num_inlier > 30: # ransac存在足够内点 + if min_dis > dis_estimate: + min_dis = dis_estimate + idx_detect = idx_cand1 + dis_truth = torch.linalg.norm((poses[i, 0:3, 3] - poses[idx_detect, 0:3, 3])) + except: + pass + loops.append([i, idx_detect.item(), min_dis.item(), dis_truth.item()]) + + else: + idx_detect = idx_cand[0] + dis_truth = torch.linalg.norm(poses[i, 0:3, 3] - poses[idx_detect, 0:3, 3]) + loops.append([i, idx_detect.item(), dis[0].item(), dis_truth.item()]) + t_verify = time.time() - t0 - t_retrieve + + ts.append([t_retrieve, t_verify]) + # if loops[-1][2] < 4 and loops[-1][1] < i: + # loop1.append(loops[-1][1]) + # x = poses[:, 0, 3] + # y = poses[:, 1, 3] + # x1 = x[loop1] + # y1 = y[loop1] + # plt.plot(x, y, 'b.', markersize=1) + # plt.plot(x1, y1, 'ro', markersize=2, markerfacecolor='none') + # plt.axis('equal') + # plt.show() + ts = np.array(ts) * 1000 + # np.savetxt('times.txt', ts) + # x=np.arange(len(ts)) + # plt.plot(x,ts[:,0],'b.') + # plt.plot(x,ts[:,1],'r.') + # plt.show() + loops = np.array(loops) + return loops + + +def pr_curve(poses, loops, sequence, positive_distance=4): + + map_tree_poses = KDTree(poses[:, 0:3, 3]) + reverse_loops = [] + real_loop = [] + for i in range(0,len(poses)): + min_range = max(0, i - 50) + max_range = min(i + 50, poses.shape[0]) + current_pose = poses[i] + indices = map_tree_poses.query_radius(np.expand_dims(current_pose[0:3, 3], 0), positive_distance) + valid_idxs = list(set(indices[0]) - set(range(min_range, max_range))) + valid_idxs = np.array(valid_idxs) + if len(valid_idxs) > 0: + # dis = np.linalg.norm(current_pose[0:3, 3]-poses[valid_idxs,0:3,3],axis=1) + real_loop.append(1) + r0 = poses[i, :3, :3] + rs = poses[valid_idxs, :3, :3] + dr = np.linalg.inv(r0) @ rs.swapaxes(0, 2) + angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1)) + angle = angle * 180 / np.pi + if np.min(angle) > 90: + reverse_loops.append(1) + else: + reverse_loops.append(0) + else: + real_loop.append(0) + reverse_loops.append(0) + reverse_loops = np.array(reverse_loops) + real_loop = np.array(real_loop) + # loops=np.hstack((loops,real_loop.reshape(-1,1))) + # np.savetxt('loops_bev%02d.txt'%sequence,loops,fmt='%.6f') + # print('sequence %d, %d frames, %d loops, %d reverse loops' % (sequence,len(real_loop), np.sum(real_loop), np.sum(reverse_loops))) + # # return 0 + distances = loops[:, 3] + detected_loop = loops[:, 2] + precision2 = [1] + recall2 = [0] + for thr in np.unique(detected_loop): + tp = detected_loop <= thr + tp = tp & real_loop + tp = tp & (distances <= positive_distance) + tp = tp.sum() + fp = (detected_loop <= thr).sum() - tp + fn = (real_loop.sum()) - tp + if (tp + fp) > 0.: + precision2.append(tp / (tp + fp)) + else: + precision2.append(1.) + + recall2.append(tp / (tp + fn)) + f1s = [] + for i in range(len(recall2)): + f1s.append((2 * precision2[i] * recall2[i]) / (precision2[i] + recall2[i])) + f1 = max(f1s) + recall_p1 = np.max(np.array(recall2)[np.array(precision2) == 1]) + # plt.plot(recall2, precision2, 'b-') + # plt.show() + pr = np.array(precision2 + recall2).reshape(2, -1).T + # np.save('fusion_pr_%02d.npy' % sequence, pr) + ap = auc(recall2, precision2) + idx=loops[:,2]<9999 + loops1=loops[idx] + rp=np.sum(np.abs(loops1[:,2]-loops1[:,3])<2)/len(loops1) + + print('Sequence %02d, AP %.3f, Recall@100 %.3f, F1 %.3f, RP %.3f/%d' % (sequence, ap, recall_p1, f1, rp, len(loops1))) + # if ap<0.1: + # exit() + + # --- save PR curve to file (for server usage) --- + try: + out_dir = os.path.join('/home/adlab36/chenyouyuan/FUSIONLCD', 'result', 'plots') + os.makedirs(out_dir, exist_ok=True) + plt.figure(figsize=(6, 5)) + plt.plot(recall2, precision2, 'b-', marker='o', linewidth=2) + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'Sequence {int(sequence):02d} PR (AP={ap:.3f})') + plt.grid(True, linestyle='--', alpha=0.4) + fname = os.path.join(out_dir, f'pr_sequence_{int(sequence):02d}.png') + plt.tight_layout() + plt.savefig(fname, dpi=150) + plt.close() + except Exception as e: + # 如果保存失败也不要阻塞主流程 + print(f'Warning: failed to save PR plot for sequence {sequence}: {e}') + + return ap, recall_p1, f1 + + +def lcd(data): + vlads = data['vlads'].cuda() + kpts = data['key_points'] + sequences = data['sequences'] + poses = data['pose_query'].cuda() + feas = data['fea_kpt'].cuda() + # feas = feas / torch.sqrt(torch.sum(feas ** 2, -1, keepdim=True) + 1e-8) + result = [] + recall_at_ks = [] + recall_at_k=[] + for s in torch.unique(sequences): + # if s==54: + # continue + mask = sequences == s + + vlads1 = vlads[mask] + feas1 = feas[mask] + kpts1 = kpts[mask] + poses1 = poses[mask] + poses2 = poses1.cpu().detach().numpy() + # recall_at_k = recall_with_candidates(vlads1, poses1, s) + # idx=np.arange(len(vlads1)//2) + # idx=np.tile(idx, 2) + # vlads1, feas1, kpts1, poses1 =vlads1[idx], feas1[idx], kpts1[idx], poses1[idx] + loops = retrieve(vlads1, feas1, kpts1, poses1, 1, 'ransac') + ap, recall_p1, f1 = pr_curve(poses2, loops, s, 4) + recall_at_ks.append(recall_at_k) + result.append([ap, recall_p1, f1]) + + return result, recall_at_ks + + +if __name__ == '__main__': + np.random.seed(123) + # data = torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_bev.pth.tar') + # lcd(data) + print('----------------------------------------------------------------------') + data= torch.load('/home/adlab36/chenyouyuan/FUSIONLCD/result/log/database/database_bevp.pth.tar') + lcd(data) + print('----------------------------------------------------------------------') + # data=torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_fusion.pth.tar') + # lcd(data) + diff --git a/evaluate_pose.py b/evaluate_pose.py new file mode 100644 index 0000000..19696b1 --- /dev/null +++ b/evaluate_pose.py @@ -0,0 +1,191 @@ +import math +import os +import time +import warnings + +import numpy as np +import torch +import yaml +from skimage.measure import ransac +from skimage.transform import EuclideanTransform +from tqdm import tqdm + +import net +import tools +from dataset import KittiTotalLoader + +warnings.filterwarnings("ignore") + + +def npto_XYZRPY(rotmatrix): + ''' + Usa mathutils per trasformare una matrice di trasformazione omogenea in xyzrpy + https://docs.blender.org/api/master/mathutils.html# + WARNING: funziona in 32bits quando le variabili numpy sono a 64 bit + + :param rotmatrix: np array + :return: np array with the xyzrpy + ''' + + # qui sotto corrisponde a + # quat2eul([ 0.997785 -0.0381564 0.0358964 0.041007 ],'XYZ') + roll = math.atan2(-rotmatrix[1, 2], rotmatrix[2, 2]) + pitch = math.asin(rotmatrix[0, 2]) + yaw = math.atan2(-rotmatrix[0, 1], rotmatrix[0, 0]) + x = rotmatrix[:3, 3][0] + y = rotmatrix[:3, 3][1] + z = rotmatrix[:3, 3][2] + + return np.array([x, y, z, roll, pitch, yaw]) + + +def yt_error(pose1, pose2): + distance = np.linalg.norm((pose1 - pose2)[0:3, 3]) + yaw1 = npto_XYZRPY(pose1)[-1] + yaw2 = npto_XYZRPY(pose2)[-1] + yaw1 = yaw1 % (2 * np.pi) + yaw2 = yaw2 % (2 * np.pi) + dyaw = abs(yaw1 - yaw2) % (2 * np.pi) + angle = dyaw * 180 / np.pi + if angle > 180: + angle = 360 - angle + return distance, angle + + +def rt_error(pose1, pose2): + r0 = pose1[:3, :3] + r1 = pose2[:3, :3] + dr = np.linalg.inv(r0) @ r1 + angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1)) * 180 / np.pi + distance = np.sqrt(np.sum(np.square(pose1[:3, -1] - pose2[:3, -1]))) + return distance, angle + + +def pose_err(vlads=None, dataset=None, positive_distance=4., kpts=None, feas=None, model=None, num_cand=10): + poses = dataset.poses + error_ransac = [] + error_uot = [] + gt = dataset.gt + pairs = [] + num_reverse = 0 + for i in range(len(gt)): + sample = gt[i] + idx_query = sample['idx'] + positive_idxs = sample['positive_idxs'] + for j in range(len(positive_idxs)): + idx_positive = positive_idxs[j] + if idx_query < idx_positive: + pairs.append([idx_query, idx_positive]) + _, r = yt_error(poses[idx_query], poses[idx_positive]) + if r > 90: + num_reverse = num_reverse + 1 + # print('Total %d frames, %d pair of loops, %d[%.3f] reverse'%(len(poses),len(pairs),num_reverse,num_reverse/len(pairs))) + cnt_ransca = 0 + pairs = np.asarray(pairs) + idx = np.argsort(pairs[:, 1]) + pairs = pairs[idx] + times_uot=[] + times_ransac=[] + for i in tqdm(range(len(pairs)),ncols=60): + fea_query = feas[pairs[i][0]] + p1 = kpts[pairs[i][0]].cpu().detach().numpy() + fea_cand = feas[pairs[i][1]] + p2 = kpts[pairs[i][1]].cpu().detach().numpy() + pose_to_frame = np.matmul(np.linalg.inv(poses[pairs[i][1]]), poses[pairs[i][0]]) + + st_ransac=time.time() + idx1, idx2, dis = tools.nn_match(fea_query, fea_cand, 'cosine') + ransac_flag=False + if len(idx1) >= 20: + idx1 = idx1.cpu().detach().numpy() + idx2 = idx2.cpu().detach().numpy() + try: + result_ransac, inliers = ransac((p1[idx1, 0:3], p2[idx2, 0:3]), model_class=EuclideanTransform, min_samples=15, max_trials=3, residual_threshold=1.7) + num_inlier = np.sum(inliers) + if num_inlier > 30: # ransac存在一致的点且无缩 + cnt_ransca = cnt_ransca + 1 + error_ransac.append(yt_error(pose_to_frame, result_ransac.params)) + ransac_flag=True + except: + pass + # if not ransac_flag: + # try: + # result_ransac, inliers = ransac((p1[idx1, 0:3], p2[idx2, 0:3]), model_class=EuclideanTransform,min_samples=5,max_trials=10,residual_threshold=5) + # num_inlier = np.sum(inliers) + # cnt_ransca = cnt_ransca + 1 + # error_ransac.append(yt_error(pose_to_frame, result_ransac.params)) + # except: + # pass + times_ransac.append(time.time()-st_ransac) + + fea1 = feas[pairs[i], :, :].to(model.epsilon.device).permute(0, 2, 1) + kpts1 = kpts[pairs[i], :, :].to(model.epsilon.device) + bd = {'fea_kpt': fea1, 'key_points': kpts1} + st_uot=time.time() + bd = model(bd) + times_uot.append(time.time()-st_uot) + pose_estimate1 = bd['transformation'].squeeze(0).cpu().detach().numpy() + pose_estimate1 = np.vstack((pose_estimate1, [0, 0, 0, 1])) + error_uot.append(yt_error(pose_to_frame, pose_estimate1)) + + ransac_rate = cnt_ransca / len(pairs) + error_ransac = np.asarray(error_ransac) + error_uot = np.asarray(error_uot) + # np.save('error_ransac_%02d.npy' % dataset.sequence, error_ransac) + # np.save('error_uot_%02d.npy' % dataset.sequence, error_uot) + et_ransac = np.mean(error_ransac[:, 0]) + er_ransac = np.mean(error_ransac[:, 1]) + et_uot = np.mean(error_uot[:, 0]) + er_uot = np.mean(error_uot[:, 1]) + print('uot time: ',np.array(times_uot).mean()) + print('ransac time: ',np.array(times_ransac).mean()) + return et_ransac, er_ransac, et_uot, er_uot, ransac_rate + + +def estimate_pose(database): + try: + with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml")) + except: + with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml")) + cfg = cfg['experiment'] + path_result = cfg['path_result'] + _, _, loader_test = KittiTotalLoader(cfg) + model = net.Fusion(cfg) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = model.to(device) + checkpoint = torch.load(tools.path_join(path_result, 'models', cfg['last_model'])) + model.load_state_dict(checkpoint['model']) + uot = model.uot + # uot = model.bev.uot + + vlads = database['vlads'] + key_points = database['key_points'] + fea_kpt = database['fea_kpt'] + sequences = database['sequences'] + print() + print('****************************************************************************') + end = 0 + for i in range(len(loader_test.dataset.datasets)): + start = end + end = start + len(loader_test.dataset.datasets[i]) + # end=0 + # start=0 + et_ransac, er_ransac, et_uot, er_uot, rate = pose_err(vlads=vlads[start:end], dataset=loader_test.dataset.datasets[i], + kpts=key_points[start:end], feas=fea_kpt[start:end], model=uot) + print('Sequence %02d' % (torch.unique(sequences)[i])) + print('ransac rate:%.4f, translation error:%.4f[m], rototion error:%.4f[deg]' % (rate, float(et_ransac), float(er_ransac))) + print('uot translation error:%.4f[m], uot rototion error:%.4f[deg],' % (float(et_uot), float(er_uot))) + print('****************************************************************************') + + +if __name__ == '__main__': + # CUDA_VISIBLE_DEVICES=5 python evaluate_pose.py + # CUDA_VISIBLE_DEVICES=2 nohup python -u evaluate_pose.py >03090.log 2>&1 & + # fuser /dev/nvidia* + database = torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_fusion.pth.tar') + estimate_pose(database) diff --git a/loop_gt.py b/loop_gt.py new file mode 100644 index 0000000..a144f92 --- /dev/null +++ b/loop_gt.py @@ -0,0 +1,130 @@ +import argparse +import torch +from torch.utils.data import Dataset +import pykitti +import os +from sklearn.neighbors import KDTree +import pickle +import numpy as np +skip_frame=50 + +class KITTILoader3DPosesOnlyLoopPositives(Dataset): + + def __init__(self, dir, sequence, poses, positive_range=5., negative_range=25., hard_range=None): + super(KITTILoader3DPosesOnlyLoopPositives, self).__init__() + + self.positive_range = positive_range + self.negative_range = negative_range + self.hard_range = hard_range + self.dir = dir + self.sequence = sequence + + if int(sequence) > 21: + self.poses = np.load(poses) + else: + calib = np.genfromtxt(os.path.join(dir, 'sequences', sequence, 'calib.txt'))[:, 1:] + T_cam_velo = np.reshape(calib[4], (3, 4)) + T_cam_velo = np.vstack([T_cam_velo, [0, 0, 0, 1]]) + poses2 = [] + with open(poses, 'r') as f: + for x in f: + x = x.strip().split() + x = [float(v) for v in x] + pose = np.zeros((4, 4)) + pose[0, 0:4] = np.array(x[0:4]) + pose[1, 0:4] = np.array(x[4:8]) + pose[2, 0:4] = np.array(x[8:12]) + pose[3, 3] = 1.0 + pose = np.linalg.inv(T_cam_velo) @ (pose @ T_cam_velo) + poses2.append(pose) + self.poses = np.stack(poses2) + self.kdtree = KDTree(self.poses[:, :3, 3]) + + def __len__(self): + return len(self.poses) + + def __getitem__(self, idx): + + x = self.poses[idx, 0, 3] + y = self.poses[idx, 1, 3] + z = self.poses[idx, 2, 3] + r0 = self.poses[idx, :3, :3] + rs = self.poses[:, :3, :3] + dr = np.linalg.inv(r0) @ rs.swapaxes(0, 2) + angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1)) + angle = angle * 180 / np.pi + + idx_angle = np.where(angle < 99999)[0] + anchor_pose = torch.tensor([x, y, z]) + + indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.positive_range, sort_results=True, return_distance=True) + indices = [indices[0][0], indices[1][0]] + min_range = max(0, idx - skip_frame) + max_range = min(idx + skip_frame, len(self.poses)) + positive_idxs = list(set(indices[0]) & set(idx_angle) - set(range(min_range, max_range))) + loop_angle = angle[positive_idxs] + reverse = 0 + if len(loop_angle) > 0: + reverse=np.sum(loop_angle>90) + if min(loop_angle) > 90: + reverse = -1*reverse + positive_idxs.sort() + num_loop = len(positive_idxs) + + indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.negative_range) + indices = set(indices[0]) + negative_idxs = set(range(len(self.poses))) - indices + negative_idxs = list(negative_idxs) + negative_idxs.sort() + + hard_idxs = None + if self.hard_range is not None: + inner_indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.hard_range[0]) + outer_indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.hard_range[1]) + hard_idxs = set(outer_indices[0]) - set(inner_indices[0]) + pass + + return num_loop, positive_idxs, negative_idxs, hard_idxs, reverse + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument('--root_folder', default='/home/adlab36/chenyouyuan/FUSIONLCD', + help='dataset directory') + args = parser.parse_args() + base_dir = args.root_folder + for sequence in ['00', '05', '06', '07', '08', '09', '50', '54', '55', '56', '59']:#'00', '05', '06', '07', '08', '09', '50', '54', '55', '56', '59', '120205','130405' + if int(sequence) < 50: + poses_file = base_dir + "/sequences/" + sequence + "/poses.txt" + elif int(sequence)<100: + poses_file = base_dir + "/sequences/" + sequence + "/poses.npy" + else: + pass + + + dataset = KITTILoader3DPosesOnlyLoopPositives(base_dir, sequence, poses_file, 4, 15, [8, 15]) + lc_gt = [] + lc_gt_file = os.path.join(base_dir, 'sequences', sequence, 'loop_GT_4m.pickle') + loop_pairs = [] + loop_files = [] + for i in range(len(dataset)): + sample, pos, neg, hard, reverse = dataset[i] + if sample > 0.: + loop_files.append([i, reverse]) + sample_dict = {} + sample_dict['idx'] = i + sample_dict['positive_idxs'] = pos + for p in pos: + if i < p: + loop_pairs.append([i, p]) + # sample_dict['negative_idxs'] = neg + # sample_dict['hard_idxs'] = hard + lc_gt.append(sample_dict) + loop_files = np.array(loop_files) + num_reverse_file = int(np.sum(loop_files[:, 1]<0)) + num_reverse_pairs = int(np.sum(np.abs(loop_files[:, 1])))/2 + with open(lc_gt_file, 'wb') as f: + pickle.dump(lc_gt, f) + print('Sequence %02d done,%05d files, %05d files with loop, %05d[%.4f] files only has reverse loop, %05d loop pairs, %05d[%.4f] reverse loop' % + (int(sequence), len(dataset), len(loop_files), num_reverse_file, num_reverse_file / len(loop_files), len(loop_pairs),num_reverse_pairs,num_reverse_pairs/len(loop_pairs))) diff --git a/loss.py b/loss.py new file mode 100644 index 0000000..c6d3c69 --- /dev/null +++ b/loss.py @@ -0,0 +1,340 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from pytorch_metric_learning import distances + + +def tr_loss(batch_dict,key): + loss1 = (batch_dict[key][:, 0:3, 3] - + batch_dict['pose_to_frame'][:, 0:3, 3]).norm(dim=1).mean() + loss2 = (torch.acos(torch.clip(batch_dict[key][:, 0, 0].view(-1, 1), -1, 1)) - + torch.acos(torch.clip(batch_dict['pose_to_frame'][:, 0, 0].view(-1, 1), -1, 1))).norm(dim=1).mean() / 3.1415 * 180 + return loss1, loss2 + + +def gen_points_loss(batch_dict): + key_points_gen = batch_dict['key_points_gen'] + key_points = batch_dict['key_points'] + key_points_gen1 = torch.cat((key_points_gen, key_points_gen * 0), dim=2) + key_points_gen1[:, :, 3] = 1 + # pose_query=batch_dict['pose_query'] + # pose_positive=batch_dict['pose_positive'] + # poses=torch.cat((pose_query,pose_positive),dim=0) + # key_points_gen2=torch.bmm(poses,key_points_gen1.permute(0,2,1)).permute(0,2,1) + # key_points2=torch.bmm(poses,key_points.permute(0,2,1)).permute(0,2,1) + # loss_gpo=(key_points_gen2[:,:,:2]-key_points2[:,:,:2]).norm(p=1,dim=2).mean() + pose_to_frame = batch_dict['pose_to_frame'] + B = pose_to_frame.shape[0] + src_pts = key_points[:B] + tgt_pts = key_points[B:] + src_pts_gen = key_points_gen1[:B] + tgt_pts_gen = key_points_gen1[B:] + srcs = torch.cat((src_pts, src_pts_gen), dim=0) + tgts = torch.cat((tgt_pts_gen, tgt_pts), dim=0) + pose_to_frame1 = torch.cat((pose_to_frame, pose_to_frame), dim=0) + srcs1 = torch.bmm(pose_to_frame1, srcs.permute(0, 2, 1)).permute(0, 2, 1) + loss = torch.mean(torch.abs(srcs1[:, :, :2] - tgts[:, :, :2])) + return loss + + +def rand_dis(x, y): + assert len(x.shape)==2 and len(y.shape)==2,'x and y must be 2 dim' + N, N = x.size() + ids=torch.arange(N).to(x.device) + idx = ids.view(1, N).repeat(N, 1) + mask = ~(idx == idx.transpose(0, 1)) + idx1 = idx[mask].view(N, N - 1) + random_indices = torch.randint(N - 1, size=(N,)).to(x.device) + rand_idx = torch.gather(idx1, 1, random_indices.view(-1, 1)) + rand_idx1 = torch.cat([ids.view(N, 1), rand_idx], dim=1) + diag = ids.view(N, 1).repeat(1, 2) + x1 = x[rand_idx1[:, 0], rand_idx1[:, 1]] + x2 = x1*0 + x3 = torch.cat([x1.view(N, 1), x2.view(N, 1)], dim=1) + y1 = y[rand_idx1[:, 0], rand_idx1[:, 1]] + y2 = y[diag[:, 0], diag[:, 1]] + y3 = torch.cat([y1.view(N, 1), y2.view(N, 1)], dim=1) + dis=torch.abs(x3-y3).mean()+F.relu(0.2-torch.abs(y1)).mean() + return dis + + +def gen_feature_loss(batch_dict): + #BCN + fea_pt_dual_gen = batch_dict['fea_pt_dual_gen'] + fea_pl_dual_gen = batch_dict['fea_pl_dual_gen'] + fea_kpt_original_gen = batch_dict['fea_kpt_original_gen'] + # fea_kpt_gen_gen=batch_dict['fea_kpt_gen_gen'] + fea_pt_dual = batch_dict['fea_pt_dual'] + fea_pl_dual = batch_dict['fea_pl_dual'] + fea_kpt_original = batch_dict['fea_kpt_original'] + # fea_pt_dual = batch_dict['fea_pt_dual'].detach() + # fea_pl_dual = batch_dict['fea_pl_dual'].detach() + # fea_kpt_original = batch_dict['fea_kpt_original'].detach() + + b = fea_pl_dual.shape[0] + loss0 = 0 + loss1 = 0 + loss2 = 0 + loss3 = 0 + relation = batch_dict['relation'] + nums=0 + for i in range(b): + cnt = torch.sum((relation[i, :, -1, 0] > 0) & (relation[i, :, -1, 1] > 0)) + nums+=cnt + fea_pt_dual1 = fea_pt_dual[i, :, :cnt] # 匹配点云特征,CN + fea_pt_dual_gen1 = fea_pt_dual_gen[i, :, :cnt] # 匹配点云特征,生成于图像 + fea_pl_dual1 = fea_pl_dual[i, :, :cnt] # 匹配图像特征 + fea_pl_dual_gen1 = fea_pl_dual_gen[i, :, :cnt] # 匹配图像特征,生成于点云 + + # loss0 = loss0 + torch.abs(fea_pt_dual1 - fea_pt_dual_gen1).mean() + loss0 = loss0 + (1 - F.cosine_similarity(fea_pt_dual1,fea_pt_dual_gen1,dim=0)).mean() + # loss0 = loss0 + F.mse_loss(fea_pt_dual1, fea_pt_dual_gen1) + # loss0 = loss0 + ((fea_pt_dual1 - fea_pt_dual_gen1).norm(p=2, dim=0)).mean() + # sims00=tools.batch_distance(fea_pt_dual1.unsqueeze(0).permute(0,2,1),fea_pt_dual1.unsqueeze(0).permute(0,2,1),'cosine') + # sims01=tools.batch_distance(fea_pt_dual_gen1.unsqueeze(0).permute(0,2,1),fea_pt_dual_gen1.unsqueeze(0).permute(0,2,1),'cosine') + # loss0 = loss0 + torch.abs(sims00-sims01).mean() + + # loss1 = loss1 + torch.abs(fea_pl_dual1 - fea_pl_dual_gen1).mean() + loss1 = loss1 + (1 - F.cosine_similarity(fea_pl_dual1,fea_pl_dual_gen1,dim=0)).mean() + # loss1 = loss1 + F.mse_loss(fea_pl_dual1, fea_pl_dual_gen1) + # loss1 = loss1 + ((fea_pl_dual1 - fea_pl_dual_gen1).norm(p=2, dim=0)).mean() + # sims10=tools.batch_distance(fea_pl_dual1.unsqueeze(0).permute(0,2,1),fea_pl_dual1.unsqueeze(0).permute(0,2,1),'cosine') + # sims11=tools.batch_distance(fea_pl_dual_gen1.unsqueeze(0).permute(0,2,1),fea_pl_dual_gen1.unsqueeze(0).permute(0,2,1),'cosine') + # loss1 = loss1 + torch.abs(sims10-sims11).mean() + + #全景特征生成模块损失计算 + # loss2 = loss2 + torch.abs(fea_kpt_original[i] - fea_kpt_original_gen[i]).mean() + loss2= loss2 + (1-F.cosine_similarity(fea_kpt_original[i], fea_kpt_original_gen[i],dim=0)).mean() + # loss2 = loss2 + F.mse_loss(fea_kpt_original[i], fea_kpt_original_gen[i]) + # loss2 = loss2 + ((fea_kpt_original[i] - fea_kpt_original_gen[i]).norm(p=2, dim=0)).mean() + # sims20=tools.batch_distance(fea_kpt_original[i:i+1].permute(0,2,1),fea_kpt_original[i:i+1].permute(0,2,1),'cosine') + # sims21=tools.batch_distance(fea_kpt_original_gen[i:i+1].permute(0,2,1),fea_kpt_original_gen[i:i+1].permute(0,2,1),'cosine') + # loss2 = loss2 + torch.abs(sims20-sims21).mean() + loss0 = loss0 / b + loss1 = loss1 / b + loss2 = loss2 / b + return loss0, loss1, loss2, loss3 + + +def sinkhorn_matches_loss(batch_dict,key): + project_kpts = batch_dict[key] # calculated from corrspondence of kpts + src_coords = batch_dict['key_points'] + pose_to_frame = batch_dict['pose_to_frame'] + src_coords = src_coords.clone().view(batch_dict['batch_size'], -1, 4) + B, N_POINT, _ = src_coords.shape + B = B // 2 + src_coords = src_coords[:B, :, [0, 1, 2, 3]] + src_coords[:, :, -1] = 1. + gt_dst_coords = torch.bmm(pose_to_frame, src_coords.permute(0, 2, 1)) # True project kpts + gt_dst_coords = gt_dst_coords.permute(0, 2, 1)[:, :, :3] + loss = (gt_dst_coords - project_kpts).norm(dim=2).mean() + return loss + + + +def score_loss(batch_dict): + score = batch_dict['score_bev'] + label_score = batch_dict['label_score'] + label_score = torch.cat([label_score[:, :, :, 0], label_score[:, :, :, 1]], dim=0) + mask1 = score > 1e-8 + # mask2 = label_score > 1e-8 + # mask = mask1 | mask2 + score = score[mask1] + label_score = label_score[mask1] + loss = nn.functional.mse_loss(score, label_score) + + return loss + + +def pose_loss(batch_dict,key): + src_coords = batch_dict['key_points'] + src_coords = src_coords.clone().view(batch_dict['batch_size'], -1, 4) + delta_pose = batch_dict['pose_to_frame'] + B, N_POINT, _ = src_coords.shape + B = B // 2 + src_coords = src_coords[:B] + gt_dst_coords = torch.bmm(delta_pose, src_coords.permute(0, 2, 1)).float() + gt_dst_coords = gt_dst_coords.permute(0, 2, 1)[:, :, :3] + + transformation = batch_dict[key] + pred_dst_coords = torch.bmm(transformation, src_coords.permute(0, 2, 1)) + pred_dst_coords = pred_dst_coords.permute(0, 2, 1)[:, :, :3] + loss = torch.mean(torch.abs(pred_dst_coords - gt_dst_coords)) + return loss + + +def get_all_triplets(dist_mat, pos_mask, neg_mask, is_inverted=False, margin=0.5, different_embedding=False): + if not different_embedding: + pos_mask = torch.triu(pos_mask, 1) + triplets = pos_mask.unsqueeze(2) * neg_mask.unsqueeze(1) + return torch.where(triplets) + + +def hardest_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False): + if not different_embedding: + pos_mask = torch.triu(pos_mask, 1) + a, p = torch.where(pos_mask) + if neg_mask.sum() == 0: + return a, p, None + if is_inverted: + dist_neg = dist_mat * neg_mask + n = torch.max(dist_neg, dim=1) + else: + dist_neg = dist_mat.clone() + dist_neg[~neg_mask] = dist_neg.max() + 1. + _, n = torch.min(dist_neg, dim=1) + n = n[a] + return a, p, n + + +def random_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False): + if not different_embedding: + pos_mask = torch.triu(pos_mask, 1) + a, p = torch.where(pos_mask) + selected_negs = [] + for i in range(a.shape[0]): + possible_negs = torch.where(neg_mask[a[i]])[0] + if len(possible_negs) == 0: + return a, p, None + + dist_neg = dist_mat[a[i], possible_negs] + if is_inverted: + curr_loss = -dist_mat[a[i], p[i]] + dist_neg + margin + else: + curr_loss = dist_mat[a[i], p[i]] - dist_neg + margin + + if len(possible_negs[curr_loss > 0]) > 0: + possible_negs = possible_negs[curr_loss > 0] + random_neg = np.random.choice(possible_negs.cpu().numpy()) + selected_negs.append(random_neg) + n = torch.tensor(selected_negs, dtype=a.dtype, device=a.device) + return a, p, n + + +def semihard_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False): + if not different_embedding: + pos_mask = torch.triu(pos_mask, 1) + a, p = torch.where(pos_mask) + selected_negs = [] + for i in range(a.shape[0]): + possible_negs = torch.where(neg_mask[a[i]])[0] + if len(possible_negs) == 0: + return a, p, None + + dist_neg = dist_mat[a[i], possible_negs] + if is_inverted: + curr_loss = -dist_mat[a[i], p[i]] + dist_neg + margin + else: + curr_loss = dist_mat[a[i], p[i]] - dist_neg + margin + + semihard_idxs = (curr_loss > 0) & (curr_loss < margin) + if len(possible_negs[semihard_idxs]) > 0: + possible_negs = possible_negs[semihard_idxs] + random_neg = np.random.choice(possible_negs.cpu().numpy()) + selected_negs.append(random_neg) + n = torch.tensor(selected_negs, dtype=a.dtype, device=a.device) + return a, p, n + + +class TripletLoss(nn.Module): + def __init__(self, margin: float, triplet_selector, distance: distances.BaseDistance): + super(TripletLoss, self).__init__() + self.margin = margin + self.triplet_selector = triplet_selector + self.distance = distance + + def forward(self, embeddings, pos_mask, neg_mask, other_embeddings=None): + if other_embeddings is None: + other_embeddings = embeddings + dist_mat = self.distance(embeddings, other_embeddings) + triplets = self.triplet_selector( + dist_mat, pos_mask, neg_mask, self.distance.is_inverted) + distance_positive = dist_mat[triplets[0], triplets[1]] + if triplets[-1] is None: + if self.distance.is_inverted: + return F.relu(1 - distance_positive).mean() + else: + return F.relu(distance_positive).mean() + distance_negative = dist_mat[triplets[0], triplets[2]] + curr_margin = self.distance.margin( + distance_positive, distance_negative) + loss = F.relu(curr_margin + self.margin) + return loss.mean() + + +def _pairwise_distance(x, squared=False, eps=1e-16): + # Compute the 2D matrix of distances between all the embeddings. + + cor_mat = torch.matmul(x, x.t()) + norm_mat = cor_mat.diag() + distances = norm_mat.unsqueeze(1) - 2 * cor_mat + norm_mat.unsqueeze(0) + distances = F.relu(distances) + + if not squared: + mask = torch.eq(distances, 0.0).float() + distances = distances + mask * eps + distances = torch.sqrt(distances) + distances = distances * (1.0 - mask) + + return distances + + +class TotalLoss(nn.Module): + def __init__(self, cfg): + super(TotalLoss, self).__init__() + if 'hardest' == cfg['negetative_selsector']: + neg_selector = hardest_negative_selector + elif 'semihard' == cfg['negetative_selsector']: + neg_selector = semihard_negative_selector + else: + neg_selector = random_negative_selector + self.trip_fun = TripletLoss(margin=cfg['trip_margin'], triplet_selector=neg_selector, distance=distances.LpDistance()) + self.negetative_distcance = 50 + + def forward(self, batch_dict): + l_pose=l_score=l_match=l_tra=l_rot=l_gb=l_gi=l_gpa=l_gpo=l_kpl = 0 + if 'key_points' in batch_dict.keys(): + l_score = score_loss(batch_dict) + l_match1,l_pose1,l_match2,l_pose2,l_tra1,l_rot1,l_tra2,l_rot2=0,0,0,0,0,0,0,0 + + if 'transformation_original' in batch_dict.keys(): + l_match1 = sinkhorn_matches_loss(batch_dict,'project_kpts_original') + l_tra1, l_rot1 = tr_loss(batch_dict,'transformation_original') + l_pose1 = pose_loss(batch_dict,'transformation_original') + if 'transformation_fusion' in batch_dict.keys(): + l_match2 = sinkhorn_matches_loss(batch_dict,'project_kpts_fusion') + l_tra2, l_rot2 = tr_loss(batch_dict,'transformation_fusion') + l_pose2 = pose_loss(batch_dict,'transformation_fusion') + cnt=1 + if min(l_rot1,l_rot2)>0: + cnt=2 + l_match=(l_match1+l_match2)/cnt + l_pose=(l_pose1+l_pose2)/cnt + l_tra=(l_tra1+l_tra2)/cnt + l_rot=(l_rot1+l_rot2)/cnt + if ('fea_pt_dual_gen' in batch_dict.keys()) or ('fea_pl_dual_gen' in batch_dict.keys()): + l_gb, l_gi, l_gpa,l_kpl = gen_feature_loss(batch_dict) + if 'key_points_gen' in batch_dict.keys(): + l_gpo = gen_points_loss(batch_dict) + if 'sequence' in batch_dict: + neg_mask = batch_dict['sequence'].view(1, -1) != batch_dict['sequence'].view(-1, 1) + else: + neg_mask = torch.zeros((batch_dict['pose_query'].shape[0] * 2, batch_dict['pose_query'].shape[0] * 2), dtype=torch.bool) + pair_dist = _pairwise_distance(batch_dict['pose_query'][:, 0:3, 3]) + neg_mask = ((pair_dist > self.negetative_distcance) | neg_mask.to(pair_dist.device)) + neg_mask = neg_mask.repeat(2, 2) + batch_size = batch_dict['batch_size'] + pos_mask = torch.zeros((batch_size, batch_size), device=neg_mask.device) + + for i in range(batch_size // 2): + pos_mask[i, i + batch_size // 2] = 1 + pos_mask[i + batch_size // 2, i] = 1 + l_triplet = self.trip_fun(batch_dict['vlads'], pos_mask, neg_mask) + l_total = l_score + l_pose + 0.05 * l_match + l_triplet + (l_gb + l_gi + l_gpa + l_kpl) + loss = [l_total, l_pose, l_score, l_match, l_triplet, l_tra, l_rot, l_gb, l_gi, l_gpa, l_gpo,l_kpl] + for i in range(len(loss)): + if loss[i]==0: + loss[i]=loss[0]*0 + batch_dict['loss']=loss + return batch_dict \ No newline at end of file diff --git a/models/checkpoint_bev.pth.tar b/models/checkpoint_bev.pth.tar new file mode 100644 index 0000000..2e87bfb Binary files /dev/null and b/models/checkpoint_bev.pth.tar differ diff --git a/models/checkpoint_bevp.pth.tar b/models/checkpoint_bevp.pth.tar new file mode 100644 index 0000000..047cc76 Binary files /dev/null and b/models/checkpoint_bevp.pth.tar differ diff --git a/models/checkpoint_fusion.pth.tar b/models/checkpoint_fusion.pth.tar new file mode 100644 index 0000000..e8af4ae Binary files /dev/null and b/models/checkpoint_fusion.pth.tar differ diff --git a/net.py b/net.py new file mode 100644 index 0000000..18a7ef5 --- /dev/null +++ b/net.py @@ -0,0 +1,628 @@ +import cv2 +import math +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch._utils +import torch.nn as nn +import torch.nn.functional as F +from uot import UOTHead +from netvlad import NetVLAD, NetVLADLoupe +from ALIKE.alike import configs +from ALIKE.alnet import ALNet +from BEVNet import RICNN, EncodePosition, RIAvgpool2d, RIMaxpool2d +import tools + + +def simple_nms(scores, nms_radius=2, itertation=2, mode='1'): + """ Fast Non-maximum suppression to remove nearby points """ + assert (nms_radius >= 0) + if mode == 'ri': + max_pool = RIMaxpool2d(kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius) + else: + max_pool = nn.MaxPool2d(kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius) + + zeros = torch.zeros_like(scores) + max_mask = scores == max_pool(scores) + + for _ in range(itertation): + supp_mask = max_pool(max_mask.float()) > 0 + supp_scores = torch.where(supp_mask, zeros, scores) + new_max_mask = supp_scores == max_pool(supp_scores) + max_mask = max_mask | (new_max_mask & (~supp_mask)) + return torch.where(max_mask, scores, zeros) + + +class BEVHead(nn.Module): + def __init__(self, alnet='alike-n', iter=5, num_kpt=100, cluster_num=16, vlad_size=256): + super(BEVHead, self).__init__() + cfg = configs[alnet] + self.feature_extractor = ALNet(c1=cfg['c1'], c2=cfg['c2'], c3=cfg['c3'], c4=cfg['c4'], dim=cfg['dim'], + single_head=cfg['single_head']) + self.feature_size = int(self.feature_extractor.feature_size) + self.select = 'maxpool' + self.num_kpt = num_kpt + self.ep = EncodePosition(feature_size=self.feature_size) + self.uot = UOTHead(nb_iter=iter,name='original') + self.netvlad_bev = NetVLAD(self.feature_size, cluster_num) + # state_dict=torch.load('/data4/caodanyang/results/FUSIONLCD/bev_07250/models/checkpoint_049.pth.tar', map_location='cpu')['model'] + # state_dict_new={} + # for k,v in state_dict.items(): + # state_dict_new[k[4:]]=v + # self.load_state_dict(state_dict_new) + # for param in self.parameters(): + # param.requires_grad = False + + def forward(self, batch_dict): + assert type(batch_dict) is dict, 'Input should be a dict' + bev = batch_dict['bev'] + guider = (bev[:, 2:3] > 0).float() + b, c, h_bev, w_bev = bev.shape + x = bev[:, 0:3, :, :] + points = bev[:, 3:7, :, :] # xyzi + points[:, 2] = 0 + points[:, 3] = 1 + + score_bev, feature_bev = self.feature_extractor(x) + score_bev = score_bev * guider + + if self.select == 'avgpool': + avgpool = RIAvgpool2d(kernel_size=5, stride=4, padding=1) + grid = np.array(np.meshgrid(np.arange(h_bev), np.arange(w_bev))).swapaxes(0, 2) + grid = torch.from_numpy(grid).to(x.device).permute(2, 0, 1).unsqueeze(0).repeat(b, 1, 1, 1) + score_bev_avg = avgpool(score_bev) + grid_avg = avgpool(grid.float() * score_bev) / (score_bev_avg + 1e-8) + grid_avg = torch.round(grid_avg).long().permute(0, 2, 3, 1) + points_avg = avgpool(score_bev * points) / (score_bev_avg + 1e-8) + feature_bev_avg = avgpool(feature_bev * score_bev) / (score_bev_avg + 1e-8) + score_bev = score_bev.view(b, h_bev, w_bev) + score_bev_avg = score_bev_avg.squeeze(1) + kpts = [] + feas_kpt = [] + pixels_kpt = [] + # cnt=0 + for i in range(b): + uv = list(torch.where(score_bev_avg[i] > 0)) + num_kpt = int(self.num_kpt) + if num_kpt == 0: + print('NO BEV key point') + exit() + while len(uv[0]) < num_kpt: + uv[0] = torch.cat([uv[0], uv[0][:(num_kpt - len(uv[0]))]]) + uv[1] = torch.cat([uv[1], uv[1][:(num_kpt - len(uv[1]))]]) + score_bev0 = score_bev_avg[i, uv[0], uv[1]] + score_bev1, idx = torch.topk(score_bev0, k=self.num_kpt) + # cnt=max(cnt,len(uv[0])) + # idx=torch.arange(len(uv[0])).to(x.device) + pc = points_avg[i, :, uv[0], uv[1]].permute(1, 0) + # pc = torch.cat([pc, pc * 0], dim=1) + kpt = pc[idx] + fea_kpt = feature_bev_avg[i, :, uv[0][idx], uv[1][idx]] + pixel_kpt = grid_avg[i, uv[0][idx], uv[1][idx]] + pixels_kpt.append(pixel_kpt) + kpts.append(kpt.unsqueeze(0)) + feas_kpt.append(fea_kpt.unsqueeze(0)) + else: + score_bev_max = simple_nms(score_bev, nms_radius=3) + score_bev = score_bev.view(b, h_bev, w_bev) + score_bev_max = score_bev_max.view(b, h_bev, w_bev) + kpts = [] + feas_kpt = [] + pixels_kpt = [] + for i in range(b): + uv = list(torch.where((score_bev[i] == score_bev_max[i]) & (score_bev[i] > 0))) + num_kpt = int(self.num_kpt) + if num_kpt == 0: + print('NO BEV key point') + exit() + while len(uv[0]) < num_kpt: + uv[0] = torch.cat([uv[0], uv[0][:(num_kpt - len(uv[0]))]]) + uv[1] = torch.cat([uv[1], uv[1][:(num_kpt - len(uv[1]))]]) + score_bev0 = score_bev[i, uv[0], uv[1]] + # sc0 = score_bev0.cpu().detach().numpy() + score_bev1, idx = torch.topk(score_bev0, k=self.num_kpt) + pc = points[i, :, uv[0], uv[1]].permute(1, 0) + # pc = torch.cat([pc, pc * 0], dim=1) + kpt = pc[idx] + fea_kpt = feature_bev[i, :, uv[0][idx], uv[1][idx]] + pixel_kpt = torch.cat([uv[0][idx], uv[1][idx]]).view(2, -1).T + pixels_kpt.append(pixel_kpt.unsqueeze(0)) + kpts.append(kpt.unsqueeze(0)) + feas_kpt.append(fea_kpt.unsqueeze(0)) + + # kpts1=torch.zeros((b,cnt,kpt.shape[1])).to(x.device) + # feas_kpt1=torch.zeros((b,fea_kpt.shape[0],cnt)).to(x.device) + # for i in range(b): + # kpts1[i,:kpts[i].shape[1]]=kpts[i].squeeze(0) + # feas_kpt1[i,:,:feas_kpt[i].shape[2]]=feas_kpt[i].squeeze(0) + + kpts = torch.cat(kpts) + feas_kpt = torch.cat(feas_kpt) + pixels_kpt = torch.cat(pixels_kpt) + if hasattr(self, 'ep'): + feas_kpt = self.ep(kpts, feas_kpt) + batch_dict['pixels_kpt'] = pixels_kpt + batch_dict['score_bev'] = score_bev + batch_dict['fea_kpt_original'] = feas_kpt + batch_dict['fea_bev'] = feature_bev + batch_dict['key_points'] = kpts + if hasattr(self, 'netvlad_bev'): + try: + vlad_bev = self.netvlad_bev(feas_kpt.transpose(1, 2).contiguous()) + except: + vlad_bev = self.netvlad_bev(feas_kpt.unsqueeze(3)) + batch_dict['vlad_bev'] = vlad_bev + if ('pose_to_frame' in batch_dict.keys()) and (hasattr(self, 'uot')): + self.uot(batch_dict) + #################################### show bev and kpt ############################################ + if 0: + for i in range(b): + bevshow = x[i].permute(1, 2, 0).cpu().detach().numpy() + bevshow = np.ascontiguousarray(bevshow[:, :, 0:3] * 255, dtype=np.uint8) + bevshow1 = bevshow.copy() + bevshow1[:, 1] = [255, 255, 255] + for j in range(kpt.shape[0]): + center = (int(uv[1][idx[j]].cpu().detach().numpy()), int(uv[0][idx[j]].cpu().detach().numpy())) + cv2.circle(bevshow1, center, 2, (0, 0, 255), -1, cv2.LINE_AA) + bevshow2 = np.hstack((bevshow, bevshow1)) + # cv2.namedWindow('2', cv2.WINDOW_NORMAL) + # cv2.imshow('2', bevshow2) + # cv2.waitKey(0) + fig = plt.figure() + plt.imshow(bevshow2) + plt.show() + ######################################################################################################### + + #################################### show match ############################################ + if 0: + for i in range(b // 2): + kpt1 = kpts[i] + pose_to_frame = batch_dict['pose_to_frame'][i] + # pose_to_frame = batch_dict['transformation'][i] + # pose_to_frame = torch.cat((pose_to_frame, torch.tensor([0, 0, 0, 1]).view(1, 4).to(pose_to_frame.device))) + kpt1 = (pose_to_frame @ kpt1.permute(1, 0)).permute(1, 0) + kpt2 = kpts[i + b // 2] + bev1 = batch_dict['bev'][i][0:3].permute(1, 2, 0) + bev1 = np.ascontiguousarray(bev1.cpu().detach().numpy() * 255, dtype=np.uint8) + bev2 = batch_dict['bev'][i + b // 2][0:3].permute(1, 2, 0) + bev2 = np.ascontiguousarray(bev2.cpu().detach().numpy() * 255, dtype=np.uint8) + pixel1 = pixels_kpt[i].cpu().detach().numpy() + pixel2 = pixels_kpt[i + b // 2].cpu().detach().numpy() + fea1 = feas_kpt[i].permute(1, 0).cpu().detach().numpy() + fea2 = feas_kpt[i + b // 2].permute(1, 0).cpu().detach().numpy() + idx1, idx2, dis = tools.nn_match(fea1, fea2, 'cosine') + # idx11, idx21, dis1 = tools.nn_match(kpt1, kpt2, 'euclidean') + # idx1 = idx1[dis < 0.1] + # idx2 = idx2[dis < 0.1] + h, w, _ = bev1.shape + img = np.hstack((bev1, bev2)) + img[:, w] = [255, 255, 255] + tp = 0 + img1 = img.copy() + for j in range(len(pixel1)): + center1 = (int(pixel1[j, 1]), int(pixel1[j, 0])) + center2 = (int(pixel2[j, 1]) + w, int(pixel2[j, 0])) + cv2.circle(img, center1, 2, (155, 155, 155), -1, cv2.LINE_AA) + cv2.circle(img, center2, 2, (155, 155, 155), -1, cv2.LINE_AA) + for j in range(len(idx1)): + center1 = (int(pixel1[idx1[j], 1]), int(pixel1[idx1[j], 0])) + center2 = (int(pixel2[idx2[j], 1]) + w, int(pixel2[idx2[j], 0])) + dis_kpt = (kpt1[idx1[j]] - kpt2[idx2[j]]).norm(p=2) + if dis_kpt < 2: + tp = tp + 1 + cv2.line(img, center1, center2, (0, 166, 0), 1, cv2.LINE_AA) + else: + cv2.line(img, center1, center2, (0, 0, 188), 1, cv2.LINE_AA) + cv2.circle(img, center1, 2, (255, 255, 255), -1, cv2.LINE_AA) + cv2.circle(img, center2, 2, (255, 255, 255), -1, cv2.LINE_AA) + # print(np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180, (tp / len(idx1))) + img2 = np.vstack((img1, img)) + img2[h, :] = [255, 255, 255] + cv2.namedWindow('bev match %.3f,%.1fdeg' % (tp / len(idx1), np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180)) + cv2.imshow('bev match %.3f,%.1fdeg' % (tp / len(idx1), np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180), img2) + cv2.waitKey(0) + ##################################################################################################### + + ############################################ ICP ################################################## + if 0: + import open3d as o3d + for i in range(b // 2): + pose_to_frame = batch_dict['pose_to_frame'][i].cpu().detach().numpy() + print('angle', np.arccos(pose_to_frame[0, 0]) / 3.14 * 180) + transformation = batch_dict['transformation'][i].cpu().detach().numpy() + transformation = np.vstack((transformation, [0, 0, 0, 1])) + scan1 = batch_dict['scan_query'][i].cpu().detach().numpy() + scan2 = batch_dict['scan_positive'][i].cpu().detach().numpy() + + pcd1 = o3d.geometry.PointCloud() + pcd1.points = o3d.utility.Vector3dVector(scan1[:, :3]) + pcd1.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))]) + + pcd11 = o3d.geometry.PointCloud() + pcd11.points = o3d.utility.Vector3dVector(scan1[:, :3]) + pcd11.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))]) + + pcd2 = o3d.geometry.PointCloud() + pcd2.points = o3d.utility.Vector3dVector(scan2[:, :3]) + pcd2.colors = o3d.utility.Vector3dVector([[0, 1, 0] for i in range(len(pcd2.points))]) + + icp_config = o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=200, relative_fitness=1e-6, + relative_rmse=1e-6) + trans_init = transformation + threshold = 2 + estimation_method = o3d.pipelines.registration.TransformationEstimationPointToPoint() + registration_result = o3d.pipelines.registration.registration_icp(pcd1, pcd2, threshold, trans_init, + estimation_method, icp_config) + + # 将待配准点云应用变换 + pcd1.transform(registration_result.transformation) + + vis1 = o3d.visualization.Visualizer() + vis1.create_window(window_name='registration', width=600, height=600) # 创建窗口 + render_option: o3d.visualization.RenderOption = vis1.get_render_option() # 设置点云渲染参数 + render_option.background_color = np.array([1, 1, 1]) # 设置背景色(这里为黑色) + render_option.point_size = 2 # 设置渲染点的大小 + vis1.add_geometry(pcd11) + vis1.run() + + vis2 = o3d.visualization.Visualizer() + vis2.create_window(window_name='registration', width=600, height=600) # 创建窗口 + render_option: o3d.visualization.RenderOption = vis2.get_render_option() # 设置点云渲染参数 + render_option.background_color = np.array([1, 1, 1]) # 设置背景色(这里为黑色) + render_option.point_size = 2 # 设置渲染点的大小 + vis2.add_geometry(pcd2) + vis2.run() + + vis = o3d.visualization.Visualizer() + vis.create_window(window_name='registration', width=600, height=600) # 创建窗口 + render_option: o3d.visualization.RenderOption = vis.get_render_option() # 设置点云渲染参数 + render_option.background_color = np.array([1, 1, 1]) # 设置背景色(这里为黑色) + render_option.point_size = 2 # 设置渲染点的大小 + vis.add_geometry(pcd1) + vis.add_geometry(pcd2) + vis.run() + ####################################################################################################### + + return batch_dict + + +class ImgHead(nn.Module): + def __init__(self, alnet='alike-n', num_kpt=150, cluster_num=0,vlad_size=256): + super(ImgHead, self).__init__() + cfg = configs[alnet] + self.feature_extractor = ALNet(c1=cfg['c1'], c2=cfg['c2'], c3=cfg['c3'], c4=cfg['c4'], dim=cfg['dim'], + single_head=cfg['single_head']) + self.feature_size = int(self.feature_extractor.feature_size) + # try: + # model_path = cfg['model_path'] + # except: + # model_path = '' + # if model_path != '': + # state_dict = torch.load(model_path) + # self.feature_extractor.load_state_dict(state_dict) + # for param in self.feature_extractor.parameters(): + # param.requires_grad = False + if num_kpt>0: + self.num_kpt = num_kpt + + def forward(self, batch_dict): + x = batch_dict['img'][:, 0:3].float() / 255.0 + # x=x[:,:,:,384:768,] + # pixels = batch_dict['img'][:, 3:5] + b, c, h, w = x.shape + pixel_features = [] + kpts = [] + scores = [] + score_img, feature_img = self.feature_extractor(x) + # feature_img=feature_img*0 + if hasattr(self,'num_kpt') : + score_img = simple_nms(score_img, 2, 2) + s_thr = 0.1 + for i in range(b): + score_global1 = score_img[i, 0] + values, indices = torch.topk(score_global1.view(-1), k=self.num_kpt, dim=0, largest=True) + if torch.max(values) < s_thr: + print('0 pixel') + exit() + num_low_value = torch.sum(values < s_thr) + if num_low_value > 0: + indices1 = indices.clone() + indices1[(self.num_kpt - num_low_value):] = indices[:num_low_value] + indices = indices1 + row = torch.div(indices, score_global1.shape[1], rounding_mode='trunc') + col = indices % score_global1.shape[1] + pixel_feature = feature_img[i:i + 1, :, row, col] + pixel_features.append(pixel_feature) + kpts.append(torch.cat([row.view(1, -1, 1), col.view(1, -1, 1)], dim=2)) + scores.append(values.view(1, -1)) + pixel_features = torch.cat(pixel_features) + kpts = torch.cat(kpts) + scores = torch.cat(scores) + #################################### show match ############################################ + if 0: + for i in range(b // 2): + img1 = batch_dict['img'][i][0:3].permute(1, 2, 0) + img1 = np.ascontiguousarray(img1.cpu().detach().numpy(), dtype=np.uint8) + img2 = batch_dict['img'][i + b // 2][0:3].permute(1, 2, 0) + img2 = np.ascontiguousarray(img2.cpu().detach().numpy(), dtype=np.uint8) + pixel1 = kpts[i].cpu().detach().numpy() + pixel2 = kpts[i + b // 2].cpu().detach().numpy() + fea1 = pixel_features[i].permute(1, 0).cpu().detach().numpy() + fea2 = pixel_features[i + b // 2].permute(1, 0).cpu().detach().numpy() + idx1, idx2, dis = tools.nn_match(fea1, fea2, 'euclidean') + idx1 = idx1[dis < 10] + idx2 = idx2[dis < 10] + h, w, _ = img1.shape + img = np.vstack((img1, img2)) + img[h, :] = [255, 255, 255] + for i in range(len(idx1)): + center1 = (int(pixel1[idx1[i], 1]), int(pixel1[idx1[i], 0])) + center2 = (int(pixel2[idx2[i], 1]), int(pixel2[idx2[i], 0] + h)) + cv2.line(img, center1, center2, (0, 188, 0), 1, cv2.LINE_AA) + cv2.circle(img, center1, 2, (0, 0, 255), -1, cv2.LINE_AA) + cv2.circle(img, center2, 2, (0, 0, 255), -1, cv2.LINE_AA) + fig = plt.figure() + plt.imshow(img[:, :, [2, 1, 0]]) + plt.show() + # cv2.namedWindow('img match') + # cv2.imshow('img match', img) + # cv2.waitKey(0) + + ######################################################################################################### + batch_dict['key_pixels'] = kpts + batch_dict['fea_kpl'] = pixel_features + batch_dict['fea_img'] = feature_img + batch_dict['score_img'] = score_img + if hasattr(self, 'netvlad_img'): + vlad = self.netvlad_img(pixel_features.transpose(1, 2).contiguous()) + batch_dict['vlad_img'] = vlad + + return batch_dict + +class LocalPool(nn.Module): + def __init__(self, in_c): + super().__init__() + self.conv1 = nn.Conv2d(100, 10, 1, 1, 0, bias=True) + self.mp=nn.MaxPool2d((1, 10)) + + def forward(self, x): + b, c, n, k = x.shape #k=100 + x1 = x.permute(0, 3, 2, 1) # b,k,n,c + x2=self.conv1(x1) + x3=x2.permute(0,3,2,1) + x4=self.mp(x3) + return x4 # bcn1 + +class TransformerEncoder(nn.Module): + def __init__(self, in_c=128, num_heads=4, dropout=0.1, num_layers=2): + super().__init__() + encoder_layer = nn.TransformerEncoderLayer(d_model=in_c, nhead=num_heads, dropout=dropout, batch_first=True) + self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers) + + def forward(self, x): + y = self.encoder(x) + return y + + + +class Attention(nn.Module): + def __init__(self, d_model): + super(Attention, self).__init__() + self.d_model = d_model + self.w_q = nn.Linear(d_model, d_model) + self.w_k = nn.Linear(d_model, d_model) + self.w_v = nn.Linear(d_model, d_model) + self.fnn = nn.Linear(d_model, d_model) + # self.dp=nn.Dropout(0.1) + + def forward(self, q, k=None, v=None): + proj_q = self.w_q(q) # BNC + proj_k = self.w_k(k) + proj_w = self.w_v(v) + # proj_q=self.dp(proj_q) + # proj_k=self.dp(proj_k) + # proj_w=self.dp(proj_w) + weights = nn.functional.softmax(torch.matmul(proj_q, proj_k.transpose(-2, -1)) / (self.d_model ** 0.5), dim=-1) + attn_output = torch.matmul(weights, proj_w).contiguous() + output = self.fnn(attn_output) + return output, weights + + +class Generator(nn.Module): + def __init__(self, in_c=128, num=150): + super().__init__() + self.mha = Attention(in_c) + self.conv1 = nn.Sequential( + nn.ConvTranspose1d(in_c, in_c, kernel_size=3, stride=3, padding=0), + nn.AdaptiveMaxPool1d(num) + ) + + def forward(self, x): + b, c, n = x.shape + # x=x.detach() + x1 = x.permute(0, 2, 1) # BNC + x2, _ = self.mha(x1, x1, x1) + x2 = x2.permute(0, 2, 1) + x3 = self.conv1(x2) + return x3 + + +class Converter(nn.Module): + def __init__(self, in_c=128): + super().__init__() + self.mha = Attention(in_c) + self.conv1 = nn.Sequential( + nn.Conv1d(in_c, in_c, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c), nn.ReLU(), + nn.Conv1d(in_c, in_c // 4, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 4), nn.ReLU(), + nn.Conv1d(in_c // 4, in_c // 8, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 8), nn.ReLU(), + nn.Conv1d(in_c // 8, in_c // 4, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 4), nn.ReLU(), + nn.Conv1d(in_c // 4, in_c, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c), nn.ReLU(), + nn.Conv1d(in_c, in_c, kernel_size=1, stride=1, padding=0) + ) + self.conv2 = nn.Conv1d(in_c * 2, in_c, 1, 1, 0, bias=False) + + def forward(self, x): + # return x + b, c, n = x.shape + # x=x.detach() + mask = (x == 0).all(dim=1) + x1 = x.permute(0, 2, 1) # BNC + x2, _ = self.mha(x1, x1, x1) + x2 = x2.permute(0, 2, 1) + x3 = self.conv1(x) + x4=torch.cat([x2,x3],dim=1) + x5=self.conv2(x4) + x5 = x5.masked_fill(mask.unsqueeze(1), 0) + return x5 + +class FusionHead(nn.Module): + def __init__(self, in_c=128): + super().__init__() + self.mha1 = Attention(in_c) + self.mha2 = Attention(in_c) + self.conv1 = nn.Conv1d(in_c * 2, in_c, 1) + + def forward(self, x): + fea_kpt = x[:, :, 0] + fea_kpl_gen = x[:, :, 3] + B, C, K, N = x.shape + x1 = x[:, :, :3] # BC3N + x2 = x1.permute(0, 3, 2, 1).contiguous()#BN3C + x3 = x2.view(B * N, 3, C) + x4, _ = self.mha1(x3, x3, x3) + x5 = torch.max(x4, dim=1)[0]#B*N 3 C + x6=x5.view(B,N,C) + x7, _ = self.mha2(x6, fea_kpl_gen.permute(0, 2, 1), fea_kpl_gen.permute(0, 2, 1)) + x7 = x7.permute(0, 2, 1) + x8 = torch.cat([fea_kpt, x7] ,dim=1) + x9 = self.conv1(x8) + return x9 + + +def cosine_similarity(feature1, feature2): + # BNC + feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8) + feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8) + C = torch.bmm(feature1, feature2.transpose(1, 2)) + # distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True) + # distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2) + # distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2)) # c^2=a^2+b^2-2abcos + # C = distance_matrix ** 0.5 + return C + + +class Fusion(nn.Module): + def __init__(self, cfg): + super().__init__() + flag = cfg['flag'] + self.flag = flag + if flag == 'fusion': + self.img = ImgHead(alnet='alike-n', num_kpt=cfg['kpts_number_img'], + cluster_num=cfg['cluster_num_img'], vlad_size=cfg['vlad_size']) + self.bev = BEVHead(alnet='alike-n', iter=cfg['sinkhorn_iter'], + num_kpt=cfg['kpts_number_bev'], cluster_num=cfg['cluster_num_bev'], vlad_size=cfg['vlad_size']) + assert self.img.feature_size == self.bev.feature_size, 'img feature and image feature should be the same size' + feature_size = self.img.feature_size + self.localpool = LocalPool(feature_size) + self.cvt_img = Converter(feature_size) + self.cvt_bev = Converter(feature_size) + self.gen_pan = Generator(feature_size, cfg['kpts_number_bev']) + self.att_fusion = FusionHead(feature_size) + # self.netvlad_fusion = NetVLADLoupe(feature_size, cfg['cluster_num_fusion'], cfg['vlad_size']) + self.netvlad_fusion = NetVLAD(feature_size, cfg['cluster_num_fusion']) + self.uot = UOTHead(nb_iter=cfg['sinkhorn_iter'],name='fusion') + self.vlad='fusion' + self.w= torch.nn.Parameter(torch.zeros(1)) + + if flag == 'bev': + self.bev = BEVHead(alnet='alike-n',iter=cfg['sinkhorn_iter'], num_kpt=cfg['kpts_number_bev'], cluster_num=cfg['cluster_num_bev'], vlad_size=256) + if flag == 'img': + self.img = ImgHead(alnet='alike-n', num_kpt=cfg['kpts_number_img'], cluster_num=cfg['cluster_num_img'], vlad_size=cfg['vlad_size']) + + def forward(self, batch_dict): + if self.flag == 'fusion': + batch_dict = self.img(batch_dict) + batch_dict = self.bev(batch_dict) + fea_img = batch_dict['fea_img'] + fea_bev = batch_dict['fea_bev'] + relation = batch_dict['relation'] + fea_kpt_original = batch_dict['fea_kpt_original'] + # fea_kpl = batch_dict['fea_kpl'] + # pixel_kpt = batch_dict['pixels_kpt'] + b, n1, n2, _ = relation.shape + n2 = n2 - 1 + # ns=torch.sum((relation[:,:,-1]>0).all(dim=2),dim=1) + # n_least=torch.min(ns) + # n_least=min(n_least,256) + # relation1=[] + # for i in range(b): + # idx=torch.randperm(ns[i])[:n_least].to(relation.device) + # relation1.append(relation[i:i+1,idx]) + # relation1=torch.cat(relation1) + # relation=relation1 + pixel_img = relation[:, :, 0:n2].clone() + grid_img = pixel_img[:, :, :, [1, 0]].float() / torch.tensor([fea_img.shape[3] - 1, fea_img.shape[2] - 1]).to(fea_img.device).float() * 2 - 1 + fea_pl_dual = F.grid_sample(fea_img, grid_img, align_corners=True, mode='bilinear', padding_mode='zeros') + fea_pl_dual = self.localpool(fea_pl_dual).squeeze(3) + fea_pt_dual_gen = self.cvt_bev(fea_pl_dual) + + if 'pose_to_frame' in batch_dict.keys() and hasattr(self, 'uot'): + pixel_bev = relation[:, :, n2:n2 + 1, 0:2].clone() + grid_bev = pixel_bev[:, :, :, [1, 0]].float() / torch.tensor([fea_bev.shape[3] - 1, fea_bev.shape[2] - 1]).to(fea_bev.device).float() * 2 - 1 + fea_pt_dual = (F.grid_sample(fea_bev, grid_bev, align_corners=True, mode='bilinear', padding_mode='zeros')).squeeze(3) + fea_pl_dual_gen = self.cvt_img(fea_pt_dual) + batch_dict['fea_pt_dual_gen'] = fea_pt_dual_gen + batch_dict['fea_pl_dual_gen'] = fea_pl_dual_gen + batch_dict['fea_pt_dual'] = fea_pt_dual + batch_dict['fea_pl_dual'] = fea_pl_dual + + fea_kpt_original_gen = self.gen_pan(fea_pt_dual_gen) + batch_dict['fea_kpt_original_gen'] = fea_kpt_original_gen + fea_kpl_gen = self.cvt_img(fea_kpt_original) + fea_kpt_gen_gen = self.cvt_bev(fea_kpl_gen) + batch_dict['fea_kpt_gen_gen'] = fea_kpt_gen_gen + batch_dict['fea_kpl_gen']=fea_kpl_gen + fea_kpts = torch.cat([fea_kpt_original.unsqueeze(2), fea_kpt_original_gen.unsqueeze(2), fea_kpt_gen_gen.unsqueeze(2), fea_kpl_gen.unsqueeze(2)], dim=2) + fea_kpt_fusion = self.att_fusion(fea_kpts) + batch_dict['fea_kpt_fusion'] = fea_kpt_original + + # sim10 = cosine_similarity(fea_pt_dual.permute(0, 2, 1), fea_pt_dual.permute(0, 2, 1))[0].cpu().detach().numpy() + # sim11 = cosine_similarity(fea_pt_dual_gen.permute(0, 2, 1), fea_pt_dual_gen.permute(0, 2, 1))[0].cpu().detach().numpy() + # sim20 = cosine_similarity(fea_pl_dual.permute(0, 2, 1), fea_pl_dual.permute(0, 2, 1))[0].cpu().detach().numpy() + # sim21 = cosine_similarity(fea_pl_dual_gen.permute(0, 2, 1), fea_pl_dual_gen.permute(0, 2, 1))[0].cpu().detach().numpy() + # sim30 = cosine_similarity(fea_kpt_original.permute(0, 2, 1), fea_kpt_original.permute(0, 2, 1))[0].cpu().detach().numpy() + # sim31 = cosine_similarity(fea_kpt_original_gen.permute(0, 2, 1), fea_kpt_original_gen.permute(0, 2, 1))[0].cpu().detach().numpy() + # sim32 = cosine_similarity(fea_kpt_gen_gen.permute(0, 2, 1), fea_kpt_gen_gen.permute(0, 2, 1))[0].cpu().detach().numpy() + # fig=plt.figure() + # plt.subplot(2, 4, 1), plt.imshow(sim10), plt.title('points') + # plt.subplot(2, 4, 5), plt.imshow(sim11), plt.title('gen points') + # plt.subplot(2, 4, 2), plt.imshow(sim20), plt.title('pixel') + # plt.subplot(2, 4, 6), plt.imshow(sim21), plt.title('gen pixel') + # plt.subplot(2, 4, 3), plt.imshow(sim30), plt.title('kpt orig') + # plt.subplot(2, 4, 7), plt.imshow(sim31), plt.title('pan kpt') + # plt.subplot(2, 4, 4), plt.imshow(sim30), plt.title('kpt orig') + # plt.subplot(2, 4, 8), plt.imshow(sim32), plt.title('kpt gen gen') + # plt.show() + + if 'pose_to_frame' in batch_dict.keys() and hasattr(self, 'uot'): + self.uot(batch_dict) + + vlad_fusion = self.netvlad_fusion(fea_kpt_fusion.unsqueeze(3)) + if self.vlad=='bev': + batch_dict['vlads']=batch_dict['vlad_bev'] + if self.vlad=='fusion': + if 'vlad_bev' in batch_dict.keys(): + batch_dict['vlads']=torch.sigmoid(self.w)*vlad_fusion + (1-torch.sigmoid(self.w))*batch_dict['vlad_bev'] + else: + batch_dict['vlads']=vlad_fusion + if self.flag == 'bev': + batch_dict = self.bev(batch_dict) + batch_dict['vlads'] = batch_dict['vlad_bev'] + if self.flag == 'img': + batch_dict = self.img(batch_dict) + batch_dict['vlads'] = batch_dict['vlad_img'] + return batch_dict + + +if __name__ == '__main__': + b=BEVHead() \ No newline at end of file diff --git a/netvlad.py b/netvlad.py new file mode 100644 index 0000000..0de480e --- /dev/null +++ b/netvlad.py @@ -0,0 +1,155 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class NetVLAD(nn.Module): + def __init__(self, fea_size=128, num_clusters=16): + super(NetVLAD, self).__init__() + self.num_clusters = num_clusters + self.conv = nn.Conv2d(fea_size, num_clusters, kernel_size=(1, 1), bias=True) + self.centroids = nn.Parameter(torch.randn(num_clusters, fea_size)) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """ + x: B, C, H, W,W=1 + """ + soft_assign = self.conv(x) # (B, num_clusters, H, W) + soft_assign = self.relu(soft_assign) + soft_assign = torch.nn.functional.softmax(soft_assign, dim=1) # (B, num_clusters, H, W) + + # reshape for broadcasting + B, C, H, W = x.shape + soft_assign = soft_assign.view(B, self.num_clusters, -1) # (B, num_clusters, H * W) + x_flatten = x.view(B, C, -1) # (B, C, H * W) + + # compute residuals + x_flatten1 = x_flatten.unsqueeze(1).permute(0, 1, 3, 2) # (B, 1, H*W, C) + centroids = self.centroids.unsqueeze(0).unsqueeze(2) # (1, num_cluster, 1, C) + residual = x_flatten1 - centroids # (B, num_clusters, H * W, C) + + residual *= soft_assign.unsqueeze(-1) # (B, num_clusters, H * W, C) + + # sum residuals and assign + vlad = residual.sum(dim=-2) # (B, num_clusters, C) + vlad = nn.functional.normalize(vlad, p=2, dim=2) # (B, num_clusters, C) + + vlad = vlad.view(B, -1) + vlad = nn.functional.normalize(vlad, p=2, dim=1) # (B, num_clusters * C) + return vlad + + +class NetVLADLoupe(nn.Module): + """ + Original Tensorflow implementation: https://github.com/antoine77340/LOUPE + """ + + def __init__(self, feature_size, cluster_size, output_dim, + gating=True, add_norm=True, is_training=True, normalization='batch'): + super(NetVLADLoupe, self).__init__() + self.feature_size = feature_size + # output_dim=cluster_size * feature_size + self.output_dim = output_dim + self.is_training = is_training + self.gating = gating + self.add_batch_norm = add_norm + self.cluster_size = cluster_size + if normalization == 'instance': + norm = lambda x: nn.LayerNorm(x) + elif normalization == 'group': + norm = lambda x: nn.GroupNorm(8, x) + else: + norm = lambda x: nn.BatchNorm1d(x) + self.softmax = nn.Softmax(dim=-1) + self.cluster_weights = nn.Parameter(torch.randn(feature_size, cluster_size) * 1 / math.sqrt(feature_size)) + self.cluster_weights2 = nn.Parameter(torch.randn(1, feature_size, cluster_size) * 1 / math.sqrt(feature_size)) + self.hidden1_weights = nn.Parameter(torch.randn(cluster_size * feature_size, output_dim) * 1 / math.sqrt(feature_size)) + if add_norm: + self.cluster_biases = None + self.bn1 = norm(cluster_size) + else: + self.cluster_biases = nn.Parameter(torch.randn(cluster_size) * 1 / math.sqrt(feature_size)) + self.bn1 = None + + self.bn2 = norm(output_dim) + + if gating: + self.context_gating = GatingContext(output_dim, add_batch_norm=add_norm, normalization=normalization) + + def forward(self, x): + """ + x: B N C + """ + max_samples = x.shape[1] + activation = torch.matmul(x, self.cluster_weights) + if self.add_batch_norm: + activation = activation.view(-1, self.cluster_size) + activation = self.bn1(activation) + activation = activation.view(-1, max_samples, self.cluster_size) + else: + activation = activation + self.cluster_biases + activation = self.softmax(activation) + + a_sum = activation.sum(-2, keepdim=True) + a = a_sum * self.cluster_weights2 + + activation = torch.transpose(activation, 2, 1) + x = x.view((-1, max_samples, self.feature_size)) + vlad = torch.matmul(activation, x) + vlad = torch.transpose(vlad, 2, 1).contiguous() + vlad0 = vlad - a + + vlad1 = F.normalize(vlad0, dim=1, p=2, eps=1e-6) + vlad2 = vlad1.view((-1, self.cluster_size * self.feature_size)) + vlad = F.normalize(vlad2, dim=1, p=2, eps=1e-6) + + vlad = torch.matmul(vlad, self.hidden1_weights) + + vlad = self.bn2(vlad) + + if self.gating: + vlad = self.context_gating(vlad) + # vlad = vlad / vlad.norm(dim=1, keepdim=True) + return vlad + + +class GatingContext(nn.Module): + """ + Original Tensorflow implementation: https://github.com/antoine77340/LOUPE + """ + + def __init__(self, dim, add_batch_norm=True, normalization='batch'): + super(GatingContext, self).__init__() + self.dim = dim + self.add_batch_norm = add_batch_norm + if normalization == 'instance': + norm = lambda x: nn.LayerNorm(x) + elif normalization == 'group': + norm = lambda x: nn.GroupNorm(8, x) + else: + norm = lambda x: nn.BatchNorm1d(x) + self.gating_weights = nn.Parameter(torch.randn(dim, dim) * 1 / math.sqrt(dim)) + self.sigmoid = nn.Sigmoid() + + if add_batch_norm: + self.gating_biases = None + self.bn1 = norm(dim) + else: + self.gating_biases = nn.Parameter(torch.randn(dim) * 1 / math.sqrt(dim)) + self.bn1 = None + + def forward(self, x): + gates = torch.matmul(x, self.gating_weights) + + if self.add_batch_norm: + gates = self.bn1(gates) + else: + gates = gates + self.gating_biases + + gates = self.sigmoid(gates) + + activation = x * gates + + return activation diff --git a/preparedataset.py b/preparedataset.py new file mode 100644 index 0000000..eb6a21c --- /dev/null +++ b/preparedataset.py @@ -0,0 +1,105 @@ +import os + +import numpy as np +import torch +import tqdm + + +def k3602k(k, k360): + for src in [0, 3, 4, 5, 6, 7, 9, 10]: + tgt = src + 50 + path_pose = k360 + '/data_poses/2013_05_28_drive_%04d_sync/cam0_to_world.txt' % src + path_velo = k360 + '/data_3d_raw/2013_05_28_drive_%04d_sync/velodyne_points/data' % src + path_calib = k360 + '/calibration/calib_cam_to_velo.txt' + path_img = k360 + '/data_2d_raw/2013_05_28_drive_%04d_sync/image_00/data_rect' % src + + path_pose1 = k + '/data_odometry_poses/dataset/poses/%02d.npy' % tgt + path_velo1 = k + '/data_odometry_velodyne/dataset/sequences/%02d/velodyne' % tgt + path_calib1 = k + '/data_odometry_calib/dataset/sequences/%02d' % tgt + path_img1 = k + '/data_odometry_color/dataset/sequences/%02d/image_2/' % tgt + + if not os.path.exists(path_velo1): + os.makedirs(path_velo1) + if not os.path.exists(path_img1): + os.makedirs(path_img1) + if not os.path.exists(path_calib1): + os.makedirs(path_calib1) + + if not os.path.exists(path_calib1 + '/calib.txt'): + os.symlink(path_calib, path_calib1 + '/calib.txt') + + with open(path_calib, 'r') as f: + for line in f.readlines(): + data = np.array([float(x) for x in line.split()]) + cam0_to_velo = np.reshape(data, (3, 4)) + cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]]) + cam0_to_velo = torch.tensor(cam0_to_velo) + poses2 = [] + ids = [] + with open(path_pose, 'r') as f: + for x in f: + x = x.strip().split() + x = [float(v) for v in x] + ids.append(int(x[0])) + pose = torch.zeros((4, 4), dtype=torch.float64) + pose[0, 0:4] = torch.tensor(x[1:5]) + pose[1, 0:4] = torch.tensor(x[5:9]) + pose[2, 0:4] = torch.tensor(x[9:13]) + pose[3, 3] = 1.0 + pose = pose @ cam0_to_velo.inverse() + poses2.append(pose.float().numpy()) + pose = np.stack(poses2) + np.save(path_pose1, pose) + + cnt = 0 + for i in tqdm.tqdm(ids, desc='%02d:' % src): + path_velo_now = os.path.join(path_velo, '0000%06d.bin' % i) + path_img_now = os.path.join(path_img, '0000%06d.png' % i) + if os.path.exists(path_velo_now) and os.path.exists(path_img_now): + pass + else: + break + path_velo_now1 = os.path.join(path_velo1, '%06d.bin' % cnt) + path_img_now1 = os.path.join(path_img1, '%06d.png' % cnt) + if not os.path.exists(path_velo_now1): + os.symlink(path_velo_now, path_velo_now1) + if not os.path.exists(path_img_now1): + os.symlink(path_img_now, path_img_now1) + cnt = cnt + 1 + + +def todataset(kitti_root, dataset_root): + sequences = [0, 5, 6, 7, 8, 9, 50, 54, 55, 56, 59] + for s in sequences: + if s >= 50: + suffix = '.npy' + else: + suffix = '.txt' + kitti_velo_dir = kitti_root + '/data_odometry_velodyne/dataset/sequences/%02d/velodyne' % s + kitti_img_dir = kitti_root + '/data_odometry_color/dataset/sequences/%02d/image_2' % s + kitti_pose_dir = kitti_root + '/data_odometry_poses/dataset/poses/%02d' % s + suffix + kitti_calib_dir = kitti_root + '/data_odometry_calib/dataset/sequences/%02d/calib.txt' % s + dataset_path = dataset_root + '/%02d' % s + if not os.path.exists(dataset_path): + os.makedirs(dataset_path) + dataset_velo_dir = dataset_path + '/velodyne' + dataset_img_dir = dataset_path + '/image_2' + dataset_pose_dir = dataset_path + '/poses' + suffix + dataset_calib_dir = dataset_path + '/calib.txt' + if not os.path.exists(dataset_velo_dir): + os.symlink(kitti_velo_dir, dataset_velo_dir) + if not os.path.exists(dataset_img_dir): + os.symlink(kitti_img_dir, dataset_img_dir) + if not os.path.exists(dataset_pose_dir): + os.symlink(kitti_pose_dir, dataset_pose_dir) + if not os.path.exists(dataset_calib_dir): + os.symlink(kitti_calib_dir, dataset_calib_dir) + + +if __name__ == '__main__': + k360 = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI360' + k = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI' + k3602k(k, k360) + kitti_root = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI' + dataset_root = '/home/coop8/chenyouyuan/FUSIONLCD/sequences' + todataset(kitti_root, dataset_root) diff --git a/tools.py b/tools.py new file mode 100644 index 0000000..2b76cdd --- /dev/null +++ b/tools.py @@ -0,0 +1,149 @@ +import threading +import torch +import os +import time + + +def farthest_point_sample(xyz, npoint): + """Iterative farthest point sampling + + Args: + xyz: pointcloud data_loader, [B, N, C] + npoint: number of samples + Returns: + centroids: sampled pointcloud index, [B, npoint] + """ + device = xyz.device + B, N, C = xyz.shape + centroids = torch.zeros(B, npoint, dtype=torch.long).to(device) + distance = torch.ones(B, N).to(device) * 1e10 + farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device) + batch_indices = torch.arange(B, dtype=torch.long).to(device) + for i in range(npoint): + centroids[:, i] = farthest + centroid = xyz[batch_indices, farthest, :].view(B, 1, 3) + dist = torch.sum((xyz - centroid) ** 2, -1) + mask = dist < distance + distance[mask] = dist[mask] + farthest = torch.max(distance, -1)[1] + return centroids + + +def batch_distance(feature1,feature2,mode='cosine'): + if mode == 'cosine': + # Transport cost matrix + feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8) + feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8) + dis = 1.0 - torch.bmm(feature1, feature2.transpose(1, 2)) + elif mode == 'euclidean': + feature=torch.cat([feature1,feature2],dim=1) + feature_mean=torch.mean(feature,dim=1,keepdim=True) + feature1=feature1-feature_mean + feature2=feature2-feature_mean + distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True) + distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2) + distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2)) # c^2=a^2+b^2-2abcos + distance_matrix = distance_matrix ** 0.5 + dis = distance_matrix + return dis + +def nn_match(fea1, fea2, matrix='cosine'): + assert len(fea1.shape) == 2 and len(fea2.shape) == 2, 'nnmatch error' + if not isinstance(fea1, torch.Tensor): + fea1 = torch.tensor(fea1) + if not isinstance(fea2, torch.Tensor): + fea2 = torch.tensor(fea2) + if matrix == 'cosine': + # Transport cost matrix + fea1 = fea1 / torch.sqrt(torch.sum(fea1 ** 2, -1, keepdim=True) + 1e-8) + fea2 = fea2 / torch.sqrt(torch.sum(fea2 ** 2, -1, keepdim=True) + 1e-8) + dis = 1.0 - torch.mm(fea1, fea2.transpose(0, 1)) + elif matrix == 'euclidean': + distance_matrix = torch.sum(fea1 ** 2, -1, keepdim=True) + distance_matrix = distance_matrix + torch.sum(fea2 ** 2, -1, keepdim=True).transpose(0, 1) + distance_matrix = distance_matrix - 2 * torch.mm(fea1, fea2.transpose(0, 1)) # c^2=a^2+b^2-2abcos + dis = distance_matrix ** 0.5 + else: + dis = 0 + print('Invalid matrix') + idx0_min = torch.argmin(dis, dim=0) + idx1_min = torch.argmin(dis, dim=1) + ids1 = torch.arange(0, dis.shape[1]).to(fea1.device) + idx = idx1_min[idx0_min] + idx_match = ids1 == idx + idx1 = ids1[idx_match] + idx2 = idx0_min[idx_match] + dis_min = dis[idx2, idx1] + + return idx2, idx1, dis_min + + +def path_join(*args): + names = list(args) + path = names[0] + for i in range(len(names) - 1): + path = os.path.join(path, names[i + 1]) + path = list(path) + while "\\" in path: + idx = path.index("\\") + path[idx] = "/" + path = ''.join(path) + return path + + +def make_save_path(*args): + path = path_join(*args) + if not os.path.exists(path): + os.makedirs(path) + return path + + + +def read_cfg(data): + if type(data) is int: + result = [data] + else: + result = data.split(',') + return result + + +class Timer: + """A module to record the program running time""" + + def __init__(self, name="Now"): + self.strat = time.time() + self.cnt = 0 + self.end = time.time() + self.avg = 0 + self.all = 0 + self.now = 0 + self.name = name + time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) + print('Init timer: ',time_now) + + def update(self, name=None): + if name is not None: + self.name = name + self.cnt = self.cnt + 1 + self.end = time.time() + self.avg = (self.end - self.strat) / self.cnt + self.now = self.end - self.all - self.strat + self.all = self.end - self.strat + time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) + if self.avg<1: + print("%s | %s | using %d | each %.3f" % + (time_now, self.name, self.all, self.now)) + elif self.avg<10: + print("%s | %s | using %d | each %.2f" % + (time_now, self.name, self.all, self.now)) + elif self.avg<100: + print("%s | %s | using %d | each %.1f" % + (time_now, self.name, self.all, self.now)) + else: + print("%s | %s | using %d | each %d" % + (time_now, self.name, self.all, self.now)) + + +if __name__ == '__main__': + # draw_trace() + pass diff --git a/train.py b/train.py new file mode 100644 index 0000000..cdcf6a0 --- /dev/null +++ b/train.py @@ -0,0 +1,432 @@ +import argparse +import os +import time + +import numpy as np +import torch +import torch.optim as optim +import yaml + +import net +import tools +from dataset import KittiTotalLoader +from evaluate_lcd import lcd +from loss import TotalLoss + +test_step = 10 # 保存测试点的步长 + +def save_checkpoint(model, optimizer, loss_total_fun, epoch, iter_train, path_result): + if (epoch + 1) % test_step == 0 and epoch+1>=test_step: + + time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) + checkpoint = {'time': time_now, + 'epoch': epoch, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict()} + savepath = tools.make_save_path(path_result, 'models') + torch.save(checkpoint, savepath + '/checkpoint_%03d.pth.tar' % epoch) + print(savepath + '/checkpoint_%03d.pth.tar is saved' % epoch) + +class log_result(): + def __init__(self,path_result): + self.path=path_result + if not os.path.exists(path_result): + with open(path_result, 'w') as file: + file.write('Time Sequence Epoch AP R100 F1 R@1 R@2 R@3 R@4 R@5') + file.write(' R@6 R@7 R@8 R@9 R@10 R@15 R@20 R@25\n') + for i in range(300): + file.write('\n') + def write(self,seq,epoch,row,x): + with open(self.path, 'r') as file: + lines = file.readlines() + time_now = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) + new_content='%s %08d %06d'%(time_now,seq,epoch) + for x1 in x: + new_content=new_content + ' %.3f'%x1 + + lines[row] = new_content+'\n' + with open(self.path, 'w') as file: + file.writelines(lines) + +def train(model, optimizer, loss_total_fun, data, device): + model.train() + sequences = data['sequence'] + id_query = data['id_query'] + id_positive = data['id_positive'] + batchsize = len(id_query) + + bev_query = data['bev_query'].to(device) + bev_positive = data['bev_positive'].to(device) + + pose_query = data['pose_query'].to(device) + pose_positive = data['pose_positive'].to(device) + + pose_to_frame = data['pose_to_frame'].to(device) + label_score = data['label_score'].to(device) + + img_query = data['img_query'].to(device) + img_positive = data['img_positive'].to(device) + try: + bev = torch.cat([bev_query, bev_positive], dim=0) + bev = bev.permute(0, 3, 1, 2) + except: + bev = 0 + try: + img = torch.cat([img_query, img_positive], dim=0) + img = img.permute(0, 3, 1, 2) + except: + img = 0 + try: + relation = data['relation'].to(device) + except: + relation = 0 + + batch_dict = {'bev': bev, + 'label_score': label_score, + 'img': img, + 'relation': relation, + 'id_query': id_query, + 'sequence': sequences, + 'id_positive': id_positive, + 'pose_to_frame': pose_to_frame, + 'pose_query': pose_query, + 'pose_positive': pose_positive, + 'batch_size': int(batchsize * 2)} + + model(batch_dict) + + loss_total_fun(batch_dict) + l_total = batch_dict['loss'][0] + + optimizer.zero_grad() + l_total.backward() + optimizer.step() + for p in model.parameters(): + if torch.isnan(p).any(): + print('Model NAN, ', p.shape) + exit() + return batch_dict + + +def validate(model, loss_total_fun, data, device): + model.eval() + with torch.no_grad(): + sequences = data['sequence'] + id_query = data['id_query'] + id_positive = data['id_positive'] + batchsize = len(id_query) + + bev_query = data['bev_query'].to(device) + bev_positive = data['bev_positive'].to(device) + + pose_query = data['pose_query'].to(device) + pose_positive = data['pose_positive'].to(device) + + pose_to_frame = data['pose_to_frame'].to(device) + label_score = data['label_score'].to(device) + + img_query = data['img_query'].to(device) + img_positive = data['img_positive'].to(device) + try: + bev = torch.cat([bev_query, bev_positive], dim=0) + bev = bev.permute(0, 3, 1, 2) + except: + bev = 0 + try: + img = torch.cat([img_query, img_positive], dim=0) + img = img.permute(0, 3, 1, 2) + except: + img = 0 + try: + relation = data['relation'].to(device) + except: + relation = 0 + + batch_dict = {'bev': bev, + 'label_score': label_score, + 'img': img, + 'relation': relation, + 'id_query': id_query, + 'sequence': sequences, + 'id_positive': id_positive, + 'pose_to_frame': pose_to_frame, + 'pose_query': pose_query, + 'pose_positive': pose_positive, + 'batch_size': int(batchsize * 2)} + model(batch_dict) + loss_total_fun(batch_dict) + + return batch_dict + + +def test(model, data, device): + model.eval() + with torch.no_grad(): + sequences = data['sequence'] + id_query = data['id_query'] + batchsize = len(id_query) + bev_query = data['bev_query'].to(device) + pose_query = data['pose_query'].to(device) + + img_query = data['img_query'].to(device) + try: + bev = bev_query + bev = bev.permute(0, 3, 1, 2) + except: + bev = 0 + try: + img = img_query + img = img.permute(0, 3, 1, 2) + except: + img = 0 + try: + relation = data['relation'].to(device) + except: + relation = 0 + + batch_dict = {'bev': bev, + 'img': img, + 'relation': relation, + 'id_query': id_query, + 'sequence': sequences, + 'pose_query': pose_query, + 'batch_size': int(batchsize * 2)} + + model(batch_dict) + + return batch_dict + + +def main(args): + try: + with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml")) + except: + with open(os.path.join(os.getcwd(), "project/FUSIONLCD/config.yaml"), "r") as ymlfile: + cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader) + print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml")) + cfg = cfg['experiment'] + for k, v in cfg.items(): + print(k, ':', v) + path_result = os.path.join(cfg['path_result'],args.result_name) + lres=log_result(os.path.join(os.getcwd(),'result',args.result_name+'.txt')) + device = torch.device("cuda" if torch.cuda.is_available() and cfg['cuda'] else "cpu") + start_epoch = 0 + iter_train = 0 + epochs = cfg['epochs'] + model = net.Fusion(cfg) + print(model) + model = model.to(device) + loss_total_fun = TotalLoss(cfg).to(device) + print("Model params: %.6fM" % (sum(p.numel() for p in model.parameters()) / 1e6)) + optimizer = optim.Adam(model.parameters(), lr=cfg['learning_rate'], betas=(cfg['beta1'], cfg['beta2']), eps=cfg['eps'], weight_decay=cfg['weight_decay']) + # optimizer = optim.Adam([{'params': model.bev.parameters(), 'lr': 0.0002}, + # {'params': model.img.parameters(), 'lr': 0.0001}, + # {'params': model.vlad_fusion_layer.parameters(), 'lr': 0.0001}], + # betas=(cfg['beta1'], cfg['beta2']), eps=cfg['eps'], weight_decay=cfg['weight_decay']) + # print(optimizer) + loader_train, loader_val, loader_test = KittiTotalLoader(cfg) + # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1, 3, 5, 10, 50, 100], gamma=0.5, last_epoch=start_epoch - 1) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.99) + # scheduler = warmup(optimizer, 5, 1e-6, cfg['learning_rate']) + # writer = SummaryWriter(tools.make_save_path(path_result, 'tensorboard_log')) + t = tools.Timer() + test_best = np.zeros([len(loader_test.dataset.datasets), 3]) + if cfg['load_model']: + checkpoint = torch.load((cfg['last_model'])) + start_epoch = checkpoint['epoch'] + 1 * cfg['train_flag'] + state_dict_saved = checkpoint['model'] + model.load_state_dict(state_dict_saved) + optimizer.load_state_dict(checkpoint['optimizer']) + print('loaded %s' % cfg['last_model']) + if not cfg['train_flag']: + print_frequency = 1e9 + else: + print_frequency = 1 + for epoch in range(start_epoch, epochs): + torch.cuda.empty_cache() + ''' + ============================== train =============================== + ''' + if cfg['train_flag']: + if epoch - start_epoch == 0: + pf = print_frequency + print_frequency = min(len(loader_train), print_frequency * 10) + else: + print_frequency = pf + l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + step_print = max(1, int(len(loader_train) / print_frequency)) + step_now = 0 + optimizer.zero_grad() + for id_sample, data in enumerate(loader_train): + batch_dict = train(model, optimizer, loss_total_fun, data, device) + + # if (id_sample+1)%4==0 or (id_sample+1)==len(loader_train): + # optimizer.step() + # optimizer.zero_grad() + if step_now < step_print: + step_now = step_now + 1 + l0 = l0 + batch_dict['loss'][0] + l1 = l1 + batch_dict['loss'][1] + l2 = l2 + batch_dict['loss'][2] + l3 = l3 + batch_dict['loss'][3] + l4 = l4 + batch_dict['loss'][4] + l5 = l5 + batch_dict['loss'][5] + l6 = l6 + batch_dict['loss'][6] + l7 = l7 + batch_dict['loss'][7] + l8 = l8 + batch_dict['loss'][8] + l9 = l9 + batch_dict['loss'][9] + l10 = l10 + batch_dict['loss'][10] + l11 = l11 + batch_dict['loss'][11] + if step_now == step_print: + step_now = 0 + info = 'loss a%.3f p%.3f s%.3f m%.3f t%.3f tr%.3f_%.1f genb%.3f geni%.3f genpa%.3f genpo%.3f genkpl%.3f' % ( + l0 / step_print, l1 / step_print, l2 / step_print, l3 / step_print, + l4 / step_print, l5 / step_print, l6 / step_print, l7 / step_print, + l8 / step_print, l9 / step_print, l10 / step_print, l11 / step_print) + t.update("Epoch %03d | train %04d/%04d | %s" % + (epoch, id_sample, len(loader_train) - 1, info)) + l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + save_checkpoint(model, optimizer, loss_total_fun, epoch, iter_train, path_result) + scheduler.step() + ''' + ============================= validate ============================= + ''' + if cfg['validate_flag'] and (epoch + 1) % test_step == 0: + l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + step_print = max(1, int(len(loader_val) / print_frequency)) + step_now = 0 + for id_sample, data in enumerate(loader_val): + batch_dict = validate(model, loss_total_fun, data, device) + if step_now < step_print: + step_now = step_now + 1 + l0 = l0 + batch_dict['loss'][0] + l1 = l1 + batch_dict['loss'][1] + l2 = l2 + batch_dict['loss'][2] + l3 = l3 + batch_dict['loss'][3] + l4 = l4 + batch_dict['loss'][4] + l5 = l5 + batch_dict['loss'][5] + l6 = l6 + batch_dict['loss'][6] + l7 = l7 + batch_dict['loss'][7] + l8 = l8 + batch_dict['loss'][8] + l9 = l9 + batch_dict['loss'][9] + l10 = l10 + batch_dict['loss'][10] + l11 = l11 + batch_dict['loss'][11] + if step_now == step_print: + step_now = 0 + info = 'loss a%.3f p%.3f s%.3f m%.3f t%.3f tr%.3f_%.1f genb%.3f geni%.3f genpa%.3f genpo%.3f genkpl%.3f' % ( + l0 / step_print, l1 / step_print, l2 / step_print, l3 / step_print, + l4 / step_print, l5 / step_print, l6 / step_print, l7 / step_print, + l8 / step_print, l9 / step_print, l10 / step_print, l11 / step_print) + t.update("Epoch %03d | validate %04d/%04d | %s" % + (epoch, id_sample, len(loader_val) - 1, info)) + l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ''' + ============================== test ================================ + ''' + if cfg['test_flag'] and (epoch + 1) % test_step == 0: + step_print = max(1, int(len(loader_test) / print_frequency)) + step_now = 0 + vlads = [] + kpts = [] + feas_original = [] + feas_fusion = [] + sequences = [] + poses = [] + for id_sample, data in enumerate(loader_test): + batch_dict = test(model, data, device) + # save_figure(batch_dict, epoch, path_result, cfg) + sequences.append((batch_dict['sequence']).detach().cpu()) + vlads.append(batch_dict['vlads'].detach().cpu()) + poses.append(batch_dict['pose_query'].detach().cpu()) + kpts.append(batch_dict['key_points'].detach().cpu()) + if 'fea_kpt_fusion' in batch_dict.keys(): + feas_fusion.append(batch_dict['fea_kpt_fusion'].detach().cpu().permute(0, 2, 1)) + feas_original.append(batch_dict['fea_kpt_original'].detach().cpu().permute(0, 2, 1)) + + if step_now < step_print: + step_now = step_now + 1 + if step_now == step_print: + step_now = 0 + t.update("Epoch %03d | test %05d/%05d" % (epoch, id_sample, len(loader_test))) + vlads = torch.cat(vlads) + kpts = torch.cat(kpts) + feas_original = torch.cat(feas_original) + if 'fea_kpt_fusion' in batch_dict.keys(): + feas_fusion = torch.cat(feas_fusion) + else: + feas_fusion=feas_original + + poses = torch.cat(poses) + sequences = torch.cat(sequences) + + database = {'vlads': vlads, + 'key_points': kpts, + 'fea_kpt_original': feas_original, + 'fea_kpt_fusion': feas_fusion, + 'fea_kpt': feas_fusion, + 'sequences': sequences, + 'pose_query': poses} + savepath = tools.make_save_path(path_result, 'database') + + torch.save(database, savepath + '/database_bevp.pth.tar') + # print('save ' + savepath + '/database_%03d.pth.tar' % epoch) + # exit() + # database = torch.load('/data4/caodanyang/results/FUSIONLCD/07250/database/database_159.pth.tar') + print() + print('***************************************************************************************************************************************') + print('Epoch %03d' % epoch) + # feature_match(loader_val,database) + result,recall_at_k = lcd(database) + seq = torch.unique(sequences) + for i in range((test_best.shape[0])): + recall_at_k1=recall_at_k[i] + for j in range((test_best.shape[1])): + test_best[i, j] = max([test_best[i, j], result[i][j]]) + print('Best, sequence %02d, AP=%.3f, R100=%.3f, F1=%.3f' % (seq[i], test_best[i, 0], test_best[i, 1],test_best[i, 2])) + lres.write(seq[i],epoch,(epoch+1)//test_step+i*(epochs)//test_step, + [result[i][0],result[i][1],result[i][2],recall_at_k1[0],recall_at_k1[1],recall_at_k1[2],recall_at_k1[3],recall_at_k1[4], + recall_at_k1[5],recall_at_k1[6],recall_at_k1[7],recall_at_k1[8],recall_at_k1[9],recall_at_k1[14],recall_at_k1[19],recall_at_k1[24] + ]) + # print('Sequence %02d, AP=%.3f[%.3f], R100=%.3f[%.3f], F1=%.3f[%.3f], Recall@1[%.3f] 2[%.3f] 5[%.3f] 10[%.3f] 15[%.3f] 25[%.3f]' % + # (sequences[i], result[i][0], test_best[i, 0], result[i][1], test_best[i, 1], result[i][2], test_best[i, 2], + # recall_at_k1[0],recall_at_k1[1],recall_at_k1[4],recall_at_k1[9],recall_at_k1[14],recall_at_k1[24])) + print('***************************************************************************************************************************************') + print() + if cfg['train_flag']: + pass + else: + exit() + # exit() + +if __name__ == '__main__': + # CUDA_VISIBLE_DEVICES=2 nohup python -u train.py --result_name=08280 --info=cosim >log/08280.log 2>&1 & + # fuser /dev/nvidia* + parser = argparse.ArgumentParser() + parser.add_argument('--result_name', type=str, default='log', help='log name of result') + parser.add_argument('--pro_name', type=str, default='python', help='name of process') + parser.add_argument('--info', type=str, default='python', help='name of process') + parser.add_argument('--gpu', type=str, default=None, help="GPU id(s), e.g. '0' or '0,1'. Use 'cpu' to force CPU.") + args = parser.parse_args() + + # set visible GPUs before any CUDA call / seed + if args.gpu: + if args.gpu.lower() == 'cpu': + # force CPU by hiding GPUs + os.environ['CUDA_VISIBLE_DEVICES'] = '' + else: + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + + np.random.seed(123) + torch.manual_seed(123) + # only call cuda seed if CUDA visible + if torch.cuda.is_available(): + torch.cuda.manual_seed(123) + + print(args.info) + try: + print("Using GPU device:", os.environ.get("CUDA_VISIBLE_DEVICES", "")) + except: + pass + main(args) \ No newline at end of file diff --git a/uot.py b/uot.py new file mode 100644 index 0000000..072e958 --- /dev/null +++ b/uot.py @@ -0,0 +1,150 @@ +import math +import torch +import torch._utils +import torch.nn as nn +import torch.nn.functional as F +from torchvision.models import resnet +from typing import Optional, Callable + + +def compute_rigid_transform(points1, points2, weights): + """Compute rigid transforms between two point clouds via weighted SVD. + Adapted from https://github.com/yewzijian/RPMNet/ + Args: + points1 (torch.Tensor): (B, M, 3) coordinates of the first point cloud + points2 (torch.Tensor): (B, N, 3) coordinates of the second point cloud + weights (torch.Tensor): (B, M) + Returns: + Transform T (B, 3, 4) to get from points1 to points2, i.e. T*points1 = points2 + """ + + weights_normalized = weights[..., None] / (torch.sum(weights[..., None], dim=1, keepdim=True) + 1e-5) + centroid_a = torch.sum(points1 * weights_normalized, dim=1) + centroid_b = torch.sum(points2 * weights_normalized, dim=1) + a_centered = points1 - centroid_a[:, None, :] + b_centered = points2 - centroid_b[:, None, :] + cov = a_centered.transpose(-2, -1) @ (b_centered * weights_normalized) + + # Compute rotation using Kabsch algorithm. Will compute two copies with +/-V[:,:3] + # and choose based on determinant to avoid flips + u, s, v = torch.svd(cov, some=False, compute_uv=True) + rot_mat_pos = v @ u.transpose(-1, -2) + v_neg = v.clone() + v_neg[:, :, 2] *= -1 + rot_mat_neg = v_neg @ u.transpose(-1, -2) + rot_mat = torch.where(torch.det(rot_mat_pos)[:, None, None] > 0, rot_mat_pos, rot_mat_neg) + assert torch.all(torch.det(rot_mat) > 0) + + # Compute translation (uncenter centroid) + translation = -rot_mat @ centroid_a[:, :, None] + centroid_b[:, :, None] + + transform = torch.cat((rot_mat, translation), dim=2) + return transform + + +def sinkhorn_unbalanced(feature1, feature2, epsilon, gamma, max_iter, matrix='cosine'): + """ + Sinkhorn algorithm for Unbalanced Optimal Transport. + Modified from https://github.com/valeoai/FLOT/ + Args: + feature1 (torch.Tensor): + (B, N, C) Point-wise features for points cloud 1. + feature2 (torch.Tensor): + (B, M, C) Point-wise features for points cloud 2. + epsilon (torch.Tensor): + Entropic regularization. + gamma (torch.Tensor): + Mass regularization. + max_iter (int): + Number of iteration of the Sinkhorn algorithm. + Returns: + T (torch.Tensor): + (B, N, M) Transport plan between point cloud 1 and 2. + """ + if matrix == 'cosine': + # Transport cost matrix + feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8) + feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8) + C = 1.0 - torch.bmm(feature1, feature2.transpose(1, 2)) + elif matrix == 'euclidean': + distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True) + distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2) + distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2)) # c^2=a^2+b^2-2abcos + distance_matrix = distance_matrix ** 0.5 + # d_max, _ = torch.max(distance_matrix, dim=2, keepdim=True) + C = distance_matrix + + # Entropic regularisation + K = torch.exp(-C / epsilon) # * support + + # Early return if no iteration + if max_iter == 0: + return K + + # Init. of Sinkhorn algorithm + power = gamma / (gamma + epsilon + 1e-8) + a = (torch.ones((K.shape[0], K.shape[1], 1), device=feature1.device, dtype=feature1.dtype) / K.shape[1]) + prob1 = (torch.ones((K.shape[0], K.shape[1], 1), device=feature1.device, dtype=feature1.dtype) / K.shape[1]) + prob2 = (torch.ones((K.shape[0], K.shape[2], 1), device=feature2.device, dtype=feature2.dtype) / K.shape[2]) + + # Sinkhorn algorithm + for _ in range(max_iter): + # Update b + KTa = torch.bmm(K.transpose(1, 2), a) + b = torch.pow(prob2 / (KTa + 1e-8), power) + # Update a + Kb = torch.bmm(K, b) + a = torch.pow(prob1 / (Kb + 1e-8), power) + + # Transportation map + T = torch.mul(torch.mul(a, K), b.transpose(1, 2)) + return T + + +class UOTHead(nn.Module): + + def __init__(self, nb_iter=5,name='original'): + super().__init__() + self.epsilon = torch.nn.Parameter(torch.zeros(1)) # Entropic regularisation + self.gamma = torch.nn.Parameter(torch.zeros(1)) # Mass regularisation + self.nb_iter = nb_iter + self.name=name + + def forward(self, batch_dict, src_coords=None, mode='pairs'): + + feats = batch_dict['fea_kpt_'+self.name].squeeze(-1) + + B, C, NUM = feats.shape + + assert B % 2 == 0, "Batch size must be multiple of 2: B anchor + B positive samples" + B = B // 2 + feat1 = feats[:B] + feat2 = feats[B:] + + coords = batch_dict['key_points'] + coords1 = coords[:B, :, 0:3] + coords2 = coords[B:, :, 0:3] + + correspondences_feature = sinkhorn_unbalanced( + feat1.permute(0, 2, 1), + feat2.permute(0, 2, 1), + epsilon=torch.exp(self.epsilon) + 0.03, + gamma=torch.exp(self.gamma), + max_iter=self.nb_iter, + matrix='cosine', + ) + + feature_corr_sum = correspondences_feature.sum(-1, keepdim=True) + project_kpts = (correspondences_feature @ coords2) / (feature_corr_sum + 1e-8) + project_feas = (correspondences_feature @ feat2.permute(0, 2, 1)) / (feature_corr_sum + 1e-8) + + batch_dict['project_kpts_'+self.name] = project_kpts + batch_dict['project_feas_'+self.name] = project_feas.permute(0, 2, 1) + # batch_dict['project_coord_kpts'] = project_coord_kpts + batch_dict['correspondences_feature_'+self.name] = correspondences_feature + # batch_dict['correspondences_coord'] = correspondences_coord + + transformation = compute_rigid_transform(coords1, project_kpts, feature_corr_sum.squeeze(-1)) + batch_dict['transformation_'+self.name] = transformation + + return batch_dict \ No newline at end of file