init fusion lcd orin config

2026-03-04 20:07:57 +08:00
commit bc0498e453
42 changed files with 4750 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
 result/
 sequences/
 KITTI/
 KITTI360/
--- a/ALIKE/LICENSE
+++ b/ALIKE/LICENSE
@@ -0,0 +1,29 @@
 BSD 3-Clause License
 Copyright (c) 2022, Zhao Xiaoming
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.
 3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/ALIKE/README.md
+++ b/ALIKE/README.md
@@ -0,0 +1,117 @@
 # News: the cpp version is released [ALIKE-cpp](https://github.com/Shiaoming/ALIKE-cpp).
 # ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction
 ALIKE applies a differentiable keypoint detection module to detect accurate sub-pixel keypoints. The network can run at 95 frames per second for 640 x 480 images on NVIDIA Titan X (Pascal) GPU and achieve equivalent performance with the state-of-the-arts. ALIKE benefits real-time applications in resource-limited platforms/devices. Technical details are described in [this paper](https://arxiv.org/pdf/2112.02906.pdf).
 > ```
 > Xiaoming Zhao, Xingming Wu, Jinyu Miao, Weihai Chen, Peter C. Y. Chen, Zhengguo Li, "ALIKE: Accurate and Lightweight Keypoint
 > Detection and Descriptor Extraction," IEEE Transactions on Multimedia, 2022.
 > ```
 ![](./assets/alike.png)
 If you use ALIKE in an academic work, please cite:
 ```
@article{Zhao2022ALIKE,
    title = {ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction},
    url = {http://arxiv.org/abs/2112.02906},
    doi = {10.1109/TMM.2022.3155927},
    journal = {IEEE Transactions on Multimedia},
    author = {Zhao, Xiaoming and Wu, Xingming and Miao, Jinyu and Chen, Weihai and Chen, Peter C. Y. and Li, Zhengguo},
    month = march,
    year = {2022},
 }
 ```
 ## 1. Prerequisites
 The required packages are listed in the `requirements.txt` :
 ```shell
 pip install -r requirements.txt
 ```
 ## 2. Models
 The off-the-shelf weights of four variant ALIKE models are provided in `models/` .
 ## 3. Run demo
 ```shell
 $ python demo.py -h
 usage: demo.py [-h] [--model {alike-t,alike-s,alike-n,alike-l}]
               [--device DEVICE] [--top_k TOP_K] [--scores_th SCORES_TH]
               [--n_limit N_LIMIT] [--no_display] [--no_sub_pixel]
               input
 ALike Demo.
 positional arguments:
  input                 Image directory or movie file or "camera0" (for
                        webcam0).
 optional arguments:
  -h, --help            show this help message and exit
  --model {alike-t,alike-s,alike-n,alike-l}
                        The model configuration
  --device DEVICE       Running device (default: cuda).
  --top_k TOP_K         Detect top K keypoints. -1 for threshold based mode,
                        >0 for top K mode. (default: -1)
  --scores_th SCORES_TH
                        Detector score threshold (default: 0.2).
  --n_limit N_LIMIT     Maximum number of keypoints to be detected (default:
                        5000).
  --no_display          Do not display images to screen. Useful if running
                        remotely (default: False).
  --no_sub_pixel        Do not detect sub-pixel keypoints (default: False).
 ```
 ## 4. Examples
 ### KITTI example
 ```shell
 python demo.py assets/kitti 
 ```
 ![](./assets/kitti.gif)
 ### TUM example
 ```shell
 python demo.py assets/tum 
 ```
 ![](./assets/tum.gif)
 ## 5. Efficiency and performance
 | Models | Parameters | GFLOPs(640x480) | MHA@3 on Hpatches | mAA(10°) on [IMW2020-test](https://www.cs.ubc.ca/research/image-matching-challenge/2021/leaderboard) (Stereo) |
 |:---:|:---:|:---:|:-----------------:|:-------------------------------------------------------------------------------------------------------------:|
 | D2-Net(MS) | 7653KB | 889.40 |      38.33%       |                                                    12.27%                                                     |
 | LF-Net(MS) | 2642KB | 24.37 |      57.78%       |                                                    23.44%                                                     |
 | SuperPoint | 1301KB | 26.11 |      70.19%       |                                                    28.97%                                                     |
 | R2D2(MS) | 484KB | 464.55 |      71.48%       |                                                    39.02%                                                     |
 | ASLFeat(MS) | 823KB | 77.58 |      73.52%       |                                                    33.65%                                                     |
 | DISK | 1092KB | 98.97 |      70.56%       |                                                    51.22%                                                     |
 | ALike-N | 318KB | 7.909 |      75.74%       |                                                    47.18%                                                     |
 | ALike-L | 653KB | 19.685 |      76.85%       |                                                    49.58%                                                     |
 ### Evaluation on Hpatches
 - Download [hpatches-sequences-release](https://hpatches.github.io/) and put it into `hseq/hpatches-sequences-release`.
 - Remove the unreliable sequences as D2-Net.
 - Run the following command to evaluate the performance:
  ```shell  
  python hseq/eval.py
  ```
 For more details, please refer to the [paper](https://arxiv.org/abs/2112.02906).
--- a/ALIKE/pycache/alike.cpython-38.pyc
+++ b/ALIKE/pycache/alike.cpython-38.pyc
--- a/ALIKE/pycache/alnet.cpython-38.pyc
+++ b/ALIKE/pycache/alnet.cpython-38.pyc
--- a/ALIKE/pycache/soft_detect.cpython-38.pyc
+++ b/ALIKE/pycache/soft_detect.cpython-38.pyc
--- a/ALIKE/alike.py
+++ b/ALIKE/alike.py
@@ -0,0 +1,143 @@
 import logging
 import os
 import cv2
 import torch
 from copy import deepcopy
 import torch.nn.functional as F
 from torchvision.transforms import ToTensor
 import math
 from ALIKE.alnet import ALNet
 from ALIKE.soft_detect import DKD
 import time
 configs = {
    'alike-t': {'c1': 8, 'c2': 16, 'c3': 32, 'c4': 64, 'dim': 64, 'single_head': True, 'radius': 2,
                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-t.pth')},
    'alike-s': {'c1': 8, 'c2': 16, 'c3': 48, 'c4': 96, 'dim': 96, 'single_head': True, 'radius': 2,
                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-s.pth')},
    'alike-n': {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True, 'radius': 2,
                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-n.pth')},
    'alike-l': {'c1': 32, 'c2': 64, 'c3': 128, 'c4': 128, 'dim': 128, 'single_head': False, 'radius': 2,
                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-l.pth')},
 }
 class ALike(ALNet):
    def __init__(self,
                 # ================================== feature encoder
                 c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128,
                 single_head: bool = False,
                 # ================================== detect parameters
                 radius: int = 2,
                 top_k: int = 500, scores_th: float = 0.5,
                 n_limit: int = 5000,
                 device: str = 'cpu',
                 model_path: str = ''
                 ):
        super().__init__(c1, c2, c3, c4, dim, single_head)
        self.radius = radius
        self.top_k = top_k
        self.n_limit = n_limit
        self.scores_th = scores_th
        self.dkd = DKD(radius=self.radius, top_k=self.top_k,
                       scores_th=self.scores_th, n_limit=self.n_limit)
        self.device = device
        if model_path != '':
            state_dict = torch.load(model_path, self.device)
            self.load_state_dict(state_dict)
            self.to(self.device)
            self.eval()
            logging.info(f'Loaded model parameters from {model_path}')
            logging.info(
                f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB")
    def extract_dense_map(self, image, ret_dict=False):
        # ====================================================
        # check image size, should be integer multiples of 2^5
        # if it is not a integer multiples of 2^5, padding zeros
        device = image.device
        b, c, h, w = image.shape
        h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h
        w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w
        if h_ != h:
            h_padding = torch.zeros(b, c, h_ - h, w, device=device)
            image = torch.cat([image, h_padding], dim=2)
        if w_ != w:
            w_padding = torch.zeros(b, c, h_, w_ - w, device=device)
            image = torch.cat([image, w_padding], dim=3)
        # ====================================================
        scores_map, descriptor_map = super().forward(image)
        # ====================================================
        if h_ != h or w_ != w:
            descriptor_map = descriptor_map[:, :, :h, :w]
            scores_map = scores_map[:, :, :h, :w]  # Bx1xHxW
        # ====================================================
        # BxCxHxW
        descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1)
        if ret_dict:
            return {'descriptor_map': descriptor_map, 'scores_map': scores_map, }
        else:
            return descriptor_map, scores_map
    def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False):
        """
        :param img: np.array HxWx3, RGB
        :param image_size_max: maximum image size, otherwise, the image will be resized
        :param sort: sort keypoints by scores
        :param sub_pixel: whether to use sub-pixel accuracy
        :return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time'
        """
        H, W, three = img.shape
        assert three == 3, "input image shape should be [HxWx3]"
        # ==================== image size constraint
        image = deepcopy(img)
        max_hw = max(H, W)
        if max_hw > image_size_max:
            ratio = float(image_size_max / max_hw)
            image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio)
        # ==================== convert image to tensor
        image = torch.from_numpy(image).to(self.device).to(torch.float32).permute(2, 0, 1)[None] / 255.0
        # ==================== extract keypoints
        start = time.time()
        with torch.no_grad():
            descriptor_map, scores_map = self.extract_dense_map(image)
            keypoints, descriptors, scores, _ = self.dkd(scores_map, descriptor_map,
                                                         sub_pixel=sub_pixel)
            keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0]
            keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]])
        if sort:
            indices = torch.argsort(scores, descending=True)
            keypoints = keypoints[indices]
            descriptors = descriptors[indices]
            scores = scores[indices]
        end = time.time()
        return {'keypoints': keypoints.cpu().numpy(),
                'descriptors': descriptors.cpu().numpy(),
                'scores': scores.cpu().numpy(),
                'scores_map': scores_map.cpu().numpy(),
                'time': end - start, }
 if __name__ == '__main__':
    import numpy as np
    from thop import profile
    net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False)
    image = np.random.random((640, 480, 3)).astype(np.float32)
    flops, params = profile(net, inputs=(image, 9999, False), verbose=False)
    print('{:<30}  {:<8} GFLops'.format('Computational complexity: ', flops / 1e9))
    print('{:<30}  {:<8} KB'.format('Number of parameters: ', params / 1e3))
--- a/ALIKE/alnet.py
+++ b/ALIKE/alnet.py
@@ -0,0 +1,163 @@
 import torch
 from torch import nn
 from torchvision.models import resnet
 from typing import Optional, Callable
 class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels,
                 gate: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super().__init__()
        if gate is None:
            self.gate = nn.ReLU(inplace=True)
        else:
            self.gate = gate
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self.conv1 = resnet.conv3x3(in_channels, out_channels)
        self.bn1 = norm_layer(out_channels)
        self.conv2 = resnet.conv3x3(out_channels, out_channels)
        self.bn2 = norm_layer(out_channels)
    def forward(self, x):
        x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
        x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
        return x
 # copied from torchvision\models\resnet.py#27->BasicBlock
 class ResBlock(nn.Module):
    expansion: int = 1
    def __init__(
            self,
            inplanes: int,
            planes: int,
            stride: int = 1,
            downsample: Optional[nn.Module] = None,
            groups: int = 1,
            base_width: int = 64,
            dilation: int = 1,
            gate: Optional[Callable[..., nn.Module]] = None,
            norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(ResBlock, self).__init__()
        if gate is None:
            self.gate = nn.ReLU(inplace=True)
        else:
            self.gate = gate
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('ResBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in ResBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = resnet.conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.conv2 = resnet.conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.gate(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.gate(out)
        return out
 class ALNet(nn.Module):
    def __init__(self, c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128,
                 single_head: bool = True,
                 ):
        super().__init__()
        self.feature_size = dim
        self.gate = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.pool4 = nn.MaxPool2d(kernel_size=4, stride=4)
        self.block1 = ConvBlock(3, c1, self.gate, nn.BatchNorm2d)
        self.block2 = ResBlock(inplanes=c1, planes=c2, stride=1,
                               downsample=nn.Conv2d(c1, c2, 1),
                               gate=self.gate,
                               norm_layer=nn.BatchNorm2d)
        self.block3 = ResBlock(inplanes=c2, planes=c3, stride=1,
                               downsample=nn.Conv2d(c2, c3, 1),
                               gate=self.gate,
                               norm_layer=nn.BatchNorm2d)
        self.block4 = ResBlock(inplanes=c3, planes=c4, stride=1,
                               downsample=nn.Conv2d(c3, c4, 1),
                               gate=self.gate,
                               norm_layer=nn.BatchNorm2d)
        # ================================== feature aggregation
        self.conv1 = resnet.conv1x1(c1, dim // 4)
        self.conv2 = resnet.conv1x1(c2, dim // 4)
        self.conv3 = resnet.conv1x1(c3, dim // 4)
        self.conv4 = resnet.conv1x1(c4, dim // 4)
        self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.upsample4 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
        self.upsample8 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True)
        self.upsample32 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True)
        # ================================== detector and descriptor head
        self.single_head = single_head
        if not self.single_head:
            self.convhead1 = resnet.conv1x1(dim, dim)
        self.convhead2 = resnet.conv1x1(dim, dim + 1)
    def forward(self, image):
        # ================================== feature encoder
        x1 = self.block1(image)  # B x c1 x H x W
        x2 = self.pool2(x1)
        x2 = self.block2(x2)  # B x c2 x H/2 x W/2
        x3 = self.pool4(x2)
        x3 = self.block3(x3)  # B x c3 x H/8 x W/8
        x4 = self.pool4(x3)
        x4 = self.block4(x4)  # B x dim x H/32 x W/32
        # ================================== feature aggregation
        x1 = self.gate(self.conv1(x1))  # B x dim//4 x H x W
        x2 = self.gate(self.conv2(x2))  # B x dim//4 x H//2 x W//2
        x3 = self.gate(self.conv3(x3))  # B x dim//4 x H//8 x W//8
        x4 = self.gate(self.conv4(x4))  # B x dim//4 x H//32 x W//32
        x2_up = self.upsample2(x2)  # B x dim//4 x H x W
        x3_up = self.upsample8(x3)  # B x dim//4 x H x W
        x4_up = self.upsample32(x4)  # B x dim//4 x H x W
        x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
        # ================================== detector and descriptor head
        if not self.single_head:
            x1234 = self.gate(self.convhead1(x1234))
        x = self.convhead2(x1234)  # B x dim+1 x H x W
        descriptor_map = x[:, :-1, :, :]
        scores_map = torch.sigmoid(x[:, -1, :, :]).unsqueeze(1)
        return scores_map, descriptor_map
 if __name__ == '__main__':
    from thop import profile
    net = ALNet(c1=16, c2=32, c3=64, c4=128, dim=128, single_head=True)
    image = torch.randn(1, 3, 640, 480)
    flops, params = profile(net, inputs=(image,), verbose=False)
    print('{:<30}  {:<8} GFLops'.format('Computational complexity: ', flops / 1e9))
    print('{:<30}  {:<8} KB'.format('Number of parameters: ', params / 1e3))
--- a/ALIKE/demo.py
+++ b/ALIKE/demo.py
@@ -0,0 +1,167 @@
 import copy
 import os
 import cv2
 import glob
 import logging
 import argparse
 import numpy as np
 from tqdm import tqdm
 from alike import ALike, configs
 class ImageLoader(object):
    def __init__(self, filepath: str):
        self.N = 3000
        if filepath.startswith('camera'):
            camera = int(filepath[6:])
            self.cap = cv2.VideoCapture(camera)
            if not self.cap.isOpened():
                raise IOError(f"Can't open camera {camera}!")
            logging.info(f'Opened camera {camera}')
            self.mode = 'camera'
        elif os.path.exists(filepath):
            if os.path.isfile(filepath):
                self.cap = cv2.VideoCapture(filepath)
                if not self.cap.isOpened():
                    raise IOError(f"Can't open video {filepath}!")
                rate = self.cap.get(cv2.CAP_PROP_FPS)
                self.N = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
                duration = self.N / rate
                logging.info(f'Opened video {filepath}')
                logging.info(f'Frames: {self.N}, FPS: {rate}, Duration: {duration}s')
                self.mode = 'video'
            else:
                self.images = glob.glob(os.path.join(filepath, '*.png')) + \
                              glob.glob(os.path.join(filepath, '*.jpg')) + \
                              glob.glob(os.path.join(filepath, '*.ppm'))
                self.images.sort()
                self.N = len(self.images)
                logging.info(f'Loading {self.N} images')
                self.mode = 'images'
        else:
            raise IOError('Error filepath (camerax/path of images/path of videos): ', filepath)
    def __getitem__(self, item):
        if self.mode == 'camera' or self.mode == 'video':
            if item > self.N:
                return None
            ret, img = self.cap.read()
            if not ret:
                raise "Can't read image from camera"
            if self.mode == 'video':
                self.cap.set(cv2.CAP_PROP_POS_FRAMES, item)
        elif self.mode == 'images':
            filename = self.images[item]
            img = cv2.imread(filename)
            if img is None:
                raise Exception('Error reading image %s' % filename)        
        return img
    def __len__(self):
        return self.N
 class SimpleTracker(object):
    def __init__(self):
        self.pts_prev = None
        self.desc_prev = None
    def update(self, img, pts, desc):
        N_matches = 0
        if self.pts_prev is None:
            self.pts_prev = pts
            self.desc_prev = desc
            out = copy.deepcopy(img)
            for pt1 in pts:
                p1 = (int(round(pt1[0])), int(round(pt1[1])))
                cv2.circle(out, p1, 1, (0, 0, 255), -1, lineType=16)
        else:
            matches = self.mnn_mather(self.desc_prev, desc)
            mpts1, mpts2 = self.pts_prev[matches[:, 0]], pts[matches[:, 1]]
            N_matches = len(matches)
            out = copy.deepcopy(img)
            for pt1, pt2 in zip(mpts1, mpts2):
                p1 = (int(round(pt1[0])), int(round(pt1[1])))
                p2 = (int(round(pt2[0])), int(round(pt2[1])))
                cv2.line(out, p1, p2, (0, 255, 0), lineType=16)
                cv2.circle(out, p2, 1, (0, 0, 255), -1, lineType=16)
            self.pts_prev = pts
            self.desc_prev = desc
        return out, N_matches
    def mnn_mather(self, desc1, desc2):
        sim = desc1 @ desc2.transpose()
        sim[sim < 0.9] = 0
        nn12 = np.argmax(sim, axis=1)
        nn21 = np.argmax(sim, axis=0)
        ids1 = np.arange(0, sim.shape[0])
        mask = (ids1 == nn21[nn12])
        matches = np.stack([ids1[mask], nn12[mask]])
        return matches.transpose()
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='ALike Demo.')
    parser.add_argument('--input', type=str, default=r'E:\caodanyang\dataset\KITTI\odometry\data_odometry_color\dataset\sequences\00\image_2',
                        help='Image directory or movie file or "camera0" (for webcam0).')
    parser.add_argument('--model', choices=['alike-t', 'alike-s', 'alike-n', 'alike-l'], default="alike-t",
                        help="The model configuration")
    parser.add_argument('--device', type=str, default='cuda', help="Running device (default: cuda).")
    parser.add_argument('--top_k', type=int, default=-1,
                        help='Detect top K keypoints. -1 for threshold based mode, >0 for top K mode. (default: -1)')
    parser.add_argument('--scores_th', type=float, default=0.2,
                        help='Detector score threshold (default: 0.2).')
    parser.add_argument('--n_limit', type=int, default=5000,
                        help='Maximum number of keypoints to be detected (default: 5000).')
    parser.add_argument('--no_display', action='store_true',
                        help='Do not display images to screen. Useful if running remotely (default: False).')
    parser.add_argument('--no_sub_pixel', action='store_true',
                        help='Do not detect sub-pixel keypoints (default: False).')
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)
    image_loader = ImageLoader(args.input)
    model = ALike(**configs[args.model],
                  device=args.device,
                  top_k=args.top_k,
                  scores_th=args.scores_th,
                  n_limit=args.n_limit)
    tracker = SimpleTracker()
    if not args.no_display:
        logging.info("Press 'q' to stop!")
        cv2.namedWindow(args.model)
    runtime = []
    progress_bar = tqdm(image_loader)
    for img in progress_bar:
        if img is None:
            break
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        pred = model(img_rgb, sub_pixel=not args.no_sub_pixel)
        kpts = pred['keypoints']
        desc = pred['descriptors']
        runtime.append(pred['time'])
        out, N_matches = tracker.update(img, kpts, desc)
        ave_fps = (1. / np.stack(runtime)).mean()
        status = f"Fps:{ave_fps:.1f}, Keypoints/Matches: {len(kpts)}/{N_matches}"
        progress_bar.set_description(status)
        if not args.no_display:
            cv2.setWindowTitle(args.model, args.model + ': ' + status)
            cv2.imshow(args.model, out)
            if cv2.waitKey(1) == ord('q'):
                break
    logging.info('Finished!')
    if not args.no_display:
        logging.info('Press any key to exit!')
        cv2.waitKey()
--- a/ALIKE/models/alike-l.pth
+++ b/ALIKE/models/alike-l.pth
--- a/ALIKE/models/alike-n.pth
+++ b/ALIKE/models/alike-n.pth
--- a/ALIKE/models/alike-s.pth
+++ b/ALIKE/models/alike-s.pth
--- a/ALIKE/models/alike-t.pth
+++ b/ALIKE/models/alike-t.pth
--- a/ALIKE/requirements.txt
+++ b/ALIKE/requirements.txt
@@ -0,0 +1,6 @@
 opencv-python~=4.5.1.48
 numpy~=1.19.5
 tqdm~=4.60.0
 torch~=1.8.0
 torchvision~=0.9.0
 thop~=0.0.31-2005241907
--- a/ALIKE/soft_detect.py
+++ b/ALIKE/soft_detect.py
@@ -0,0 +1,194 @@
 import torch
 from torch import nn
 import torch.nn.functional as F
 # coordinates system
 #  ------------------------------>  [ x: range=-1.0~1.0; w: range=0~W ]
 #  | -----------------------------
 #  | |                           |
 #  | |                           |
 #  | |                           |
 #  | |         image             |
 #  | |                           |
 #  | |                           |
 #  | |                           |
 #  | |---------------------------|
 #  v
 # [ y: range=-1.0~1.0; h: range=0~H ]
 def simple_nms(scores, nms_radius: int):
    """ Fast Non-maximum suppression to remove nearby points """
    assert (nms_radius >= 0)
    def max_pool(x):
        return torch.nn.functional.max_pool2d(
            x, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius)
    zeros = torch.zeros_like(scores)
    max_mask = scores == max_pool(scores)
    for _ in range(2):
        supp_mask = max_pool(max_mask.float()) > 0
        supp_scores = torch.where(supp_mask, zeros, scores)
        new_max_mask = supp_scores == max_pool(supp_scores)
        max_mask = max_mask | (new_max_mask & (~supp_mask))
    return torch.where(max_mask, scores, zeros)
 def sample_descriptor(descriptor_map, kpts, bilinear_interp=False):
    """
    :param descriptor_map: BxCxHxW
    :param kpts: list, len=B, each is Nx2 (keypoints) [h,w]
    :param bilinear_interp: bool, whether to use bilinear interpolation
    :return: descriptors: list, len=B, each is NxD
    """
    batch_size, channel, height, width = descriptor_map.shape
    descriptors = []
    for index in range(batch_size):
        kptsi = kpts[index]  # Nx2,(x,y)
        if bilinear_interp:
            descriptors_ = torch.nn.functional.grid_sample(descriptor_map[index].unsqueeze(0), kptsi.view(1, 1, -1, 2),
                                                           mode='bilinear', align_corners=True)[0, :, 0, :]  # CxN
        else:
            kptsi = (kptsi + 1) / 2 * kptsi.new_tensor([[width - 1, height - 1]])
            kptsi = kptsi.long()
            descriptors_ = descriptor_map[index, :, kptsi[:, 1], kptsi[:, 0]]  # CxN
        descriptors_ = torch.nn.functional.normalize(descriptors_, p=2, dim=0)
        descriptors.append(descriptors_.t())
    return descriptors
 class DKD(nn.Module):
    def __init__(self, radius=2, top_k=0, scores_th=0.2, n_limit=20000):
        """
        Args:
            radius: soft detection radius, kernel size is (2 * radius + 1)
            top_k: top_k > 0: return top k keypoints
            scores_th: top_k <= 0 threshold mode:  scores_th > 0: return keypoints with scores>scores_th
                                                   else: return keypoints with scores > scores.mean()
            n_limit: max number of keypoint in threshold mode
        """
        super().__init__()
        self.radius = radius
        self.top_k = top_k
        self.scores_th = scores_th
        self.n_limit = n_limit
        self.kernel_size = 2 * self.radius + 1
        self.temperature = 0.1  # tuned temperature
        self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius)
        # local xy grid
        x = torch.linspace(-self.radius, self.radius, self.kernel_size)
        # (kernel_size*kernel_size) x 2 : (w,h)
        self.hw_grid = torch.stack(torch.meshgrid([x, x])).view(2, -1).t()[:, [1, 0]]
    def detect_keypoints(self, scores_map, sub_pixel=True):
        b, c, h, w = scores_map.shape
        scores_nograd = scores_map.detach()
        # nms_scores = simple_nms(scores_nograd, self.radius)
        nms_scores = simple_nms(scores_nograd, 2)
        # remove border
        nms_scores[:, :, :self.radius + 1, :] = 0
        nms_scores[:, :, :, :self.radius + 1] = 0
        nms_scores[:, :, h - self.radius:, :] = 0
        nms_scores[:, :, :, w - self.radius:] = 0
        # detect keypoints without grad
        if self.top_k > 0:
            topk = torch.topk(nms_scores.view(b, -1), self.top_k)
            indices_keypoints = topk.indices  # B x top_k
        else:
            if self.scores_th > 0:
                masks = nms_scores > self.scores_th
                if masks.sum() == 0:
                    th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
                    masks = nms_scores > th.reshape(b, 1, 1, 1)
            else:
                th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
                masks = nms_scores > th.reshape(b, 1, 1, 1)
            masks = masks.reshape(b, -1)
            indices_keypoints = []  # list, B x (any size)
            scores_view = scores_nograd.reshape(b, -1)
            for mask, scores in zip(masks, scores_view):
                indices = mask.nonzero(as_tuple=False)[:, 0]
                if len(indices) > self.n_limit:
                    kpts_sc = scores[indices]
                    sort_idx = kpts_sc.sort(descending=True)[1]
                    sel_idx = sort_idx[:self.n_limit]
                    indices = indices[sel_idx]
                indices_keypoints.append(indices)
        keypoints = []
        scoredispersitys = []
        kptscores = []
        if sub_pixel:
            # detect soft keypoints with grad backpropagation
            patches = self.unfold(scores_map)  # B x (kernel**2) x (H*W)
            self.hw_grid = self.hw_grid.to(patches)  # to device
            for b_idx in range(b):
                patch = patches[b_idx].t()  # (H*W) x (kernel**2)
                indices_kpt = indices_keypoints[b_idx]  # one dimension vector, say its size is M
                patch_scores = patch[indices_kpt]  # M x (kernel**2)
                # max is detached to prevent undesired backprop loops in the graph
                max_v = patch_scores.max(dim=1).values.detach()[:, None]
                x_exp = ((patch_scores - max_v) / self.temperature).exp()  # M * (kernel**2), in [0, 1]
                # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} }
                xy_residual = x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None]  # Soft-argmax, Mx2
                hw_grid_dist2 = torch.norm((self.hw_grid[None, :, :] - xy_residual[:, None, :]) / self.radius,
                                           dim=-1) ** 2
                scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1)
                # compute result keypoints
                keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1)  # Mx2
                keypoints_xy = keypoints_xy_nms + xy_residual
                keypoints_xy = keypoints_xy / keypoints_xy.new_tensor(
                    [w - 1, h - 1]) * 2 - 1  # (w,h) -> (-1~1,-1~1)
                kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0),
                                                           keypoints_xy.view(1, 1, -1, 2),
                                                           mode='bilinear', align_corners=True)[0, 0, 0, :]  # CxN
                keypoints.append(keypoints_xy)
                scoredispersitys.append(scoredispersity)
                kptscores.append(kptscore)
        else:
            for b_idx in range(b):
                indices_kpt = indices_keypoints[b_idx]  # one dimension vector, say its size is M
                keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1)  # Mx2
                keypoints_xy = keypoints_xy_nms / keypoints_xy_nms.new_tensor(
                    [w - 1, h - 1]) * 2 - 1  # (w,h) -> (-1~1,-1~1)
                kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0),
                                                           keypoints_xy.view(1, 1, -1, 2),
                                                           mode='bilinear', align_corners=True)[0, 0, 0, :]  # CxN
                keypoints.append(keypoints_xy)
                scoredispersitys.append(None)
                kptscores.append(kptscore)
        return keypoints, scoredispersitys, kptscores
    def forward(self, scores_map, descriptor_map, sub_pixel=False):
        """
        :param scores_map:  Bx1xHxW
        :param descriptor_map: BxCxHxW
        :param sub_pixel: whether to use sub-pixel keypoint detection
        :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1.0 ~ 1.0
        """
        keypoints, scoredispersitys, kptscores = self.detect_keypoints(scores_map,
                                                                       sub_pixel)
        descriptors = sample_descriptor(descriptor_map, keypoints, sub_pixel)
        # keypoints: B M 2
        # descriptors: B M D
        # scoredispersitys:
        return keypoints, descriptors, kptscores, scoredispersitys
--- a/BEVNet.py
+++ b/BEVNet.py
@@ -0,0 +1,393 @@
 import math
 import torch
 import torch._utils
 import torch.nn as nn
 from typing import Optional, Callable
 from torchvision.models import resnet
 class RIConv2d(nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size=1, stride=1, padding=0, bias=True):
        super().__init__()
        self.padding = padding
        self.stride = stride
        self.use_bias = bias
        idx = torch.arange(kernel_size ** 2).view(-1, 1)
        row = torch.div(idx, kernel_size, rounding_mode='floor')
        col = torch.fmod(idx, kernel_size)
        idx = torch.cat([row, col], dim=1)
        dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1)
        dis = dis.view(kernel_size, kernel_size)
        dis = torch.round(dis).long()
        dis[dis > 0.5 * (kernel_size - 1)] = -1
        self.mask = dis
        self.number = int(torch.max(dis).item() + 1)
        self.weight = torch.zeros([kernel_size, kernel_size, out_channel, in_channel])
        if bias:
            self.bias = torch.nn.Parameter(torch.rand([out_channel, ]))
        else:
            self.bias = None
        self.weight1 = torch.nn.Parameter(torch.rand([self.number, out_channel, in_channel]))
    def forward(self, x):
        weight = self.weight.to(self.weight1.device)
        for i in range(self.number):
            mask = self.mask == i
            weight[mask] = self.weight1[i]
        weight = weight.permute(2, 3, 0, 1)
        y = torch.nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding)
        return y
    def __repr__(self):
        return f"RIConv2d(in_channel={self.weight.shape[3]}, out_channel={self.weight.shape[2]}," \
               f" kernel_size={self.weight.shape[0]}, stride={self.stride}, padding={self.padding}, bias={self.bias is not None})"
 class RIMaxpool2d(nn.Module):
    def __init__(self, kernel_size=1, stride=1, padding=0):
        super().__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        idx = torch.arange(kernel_size ** 2).view(-1, 1)
        row = torch.div(idx, kernel_size, rounding_mode='floor')
        col = torch.fmod(idx, kernel_size)
        idx = torch.cat([row, col], dim=1)
        dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1)
        dis = dis.view(kernel_size, kernel_size)
        dis = torch.round(dis)
        dis[dis > 0.5 * (kernel_size - 1)] = -1
        self.mask = dis.view(-1, ) > -1
    def forward(self, x):
        B, C, H, W = x.shape
        h_out = math.floor((H + 2 * self.padding - (self.kernel_size - 1) - 1) / self.stride + 1)
        w_out = math.floor((W + 2 * self.padding - (self.kernel_size - 1) - 1) / self.stride + 1)
        unfold_x = torch.nn.functional.unfold(x, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
        y = unfold_x.view(B, C, self.kernel_size * self.kernel_size, h_out, w_out)
        y = y.permute(2, 0, 1, 3, 4)
        y1 = y[self.mask]
        y_max = torch.max(y1, dim=0, keepdim=False)[0]
        return y_max
    def __repr__(self):
        return f"RIMaxpool2d(kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding})"
 class RIAvgpool2d(nn.Module):
    def __init__(self, kernel_size=1, stride=1, padding=0):
        super().__init__()
        self.padding = padding
        self.stride = stride
        idx = torch.arange(kernel_size ** 2).view(-1, 1)
        row = torch.div(idx, kernel_size, rounding_mode='floor')
        col = torch.fmod(idx, kernel_size)
        idx = torch.cat([row, col], dim=1)
        dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1)
        dis = dis.view(kernel_size, kernel_size)
        dis = torch.round(dis)
        dis[dis > 0.5 * (kernel_size - 1)] = -1
        mask = dis > -1
        self.number = torch.sum(mask)
        self.weight = torch.zeros([kernel_size, kernel_size, 1, 1])
        self.weight[mask] = 1
    def forward(self, x):
        weight = self.weight.to(x.device)
        weight = weight.permute(2, 3, 0, 1)
        weight = weight.repeat(x.shape[1], 1, 1, 1)
        sum = torch.nn.functional.conv2d(x, weight, None, self.stride, self.padding, groups=x.shape[1])
        avg = sum / self.number
        return avg
    def __repr__(self):
        return f"RIAvgpool2d(kernel_size={self.weight.shape[0]}, stride={self.stride}, padding={self.padding})"
 class RIConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels,
                 gate: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super().__init__()
        if gate is None:
            self.gate = nn.ReLU(inplace=True)
        else:
            self.gate = gate
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self.conv1 = RIConv2d(in_channel=in_channels, out_channel=out_channels, kernel_size=5, padding=2, bias=False)
        self.bn1 = norm_layer(out_channels)
        self.conv2 = RIConv2d(in_channel=out_channels, out_channel=out_channels, kernel_size=5, padding=2, bias=False)
        self.bn2 = norm_layer(out_channels)
    def forward(self, x):
        x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
        x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
        return x
 class RIResBlock(nn.Module):
    expansion: int = 1
    def __init__(
            self,
            inplanes: int,
            planes: int,
            stride: int = 1,
            downsample: Optional[nn.Module] = None,
            groups: int = 1,
            base_width: int = 64,
            dilation: int = 1,
            gate: Optional[Callable[..., nn.Module]] = None,
            norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(RIResBlock, self).__init__()
        if gate is None:
            self.gate = nn.ReLU(inplace=True)
        else:
            self.gate = gate
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('ResBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in ResBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = RIConv2d(in_channel=inplanes, out_channel=planes, kernel_size=5, stride=1, padding=2, bias=False)
        self.bn1 = norm_layer(planes)
        self.conv2 = RIConv2d(in_channel=planes, out_channel=planes, kernel_size=5, stride=1, padding=2, bias=False)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.gate(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.gate(out)
        return out
 class RICNN(nn.Module):
    def __init__(self, c1: int = 8, c2: int = 16, c3: int = 32, c4: int = 64, dim: int = 64
                 ):
        super().__init__()
        self.gate = nn.ReLU(inplace=True)
        self.pool2 = RIMaxpool2d(kernel_size=2, stride=2)
        self.pool4 = RIMaxpool2d(kernel_size=5, stride=4, padding=1)
        self.block1 = RIConvBlock(3, c1, self.gate, nn.BatchNorm2d)
        self.block2 = RIResBlock(inplanes=c1, planes=c2, stride=1,
                                 downsample=nn.Conv2d(c1, c2, 1),
                                 gate=self.gate,
                                 norm_layer=nn.BatchNorm2d)
        self.block3 = RIResBlock(inplanes=c2, planes=c3, stride=1,
                                 downsample=nn.Conv2d(c2, c3, 1),
                                 gate=self.gate,
                                 norm_layer=nn.BatchNorm2d)
        self.block4 = RIResBlock(inplanes=c3, planes=c4, stride=1,
                                 downsample=nn.Conv2d(c3, c4, 1),
                                 gate=self.gate,
                                 norm_layer=nn.BatchNorm2d)
        self.conv1 = resnet.conv1x1(c1, dim // 4)
        self.conv2 = resnet.conv1x1(c2, dim // 4)
        self.conv3 = resnet.conv1x1(c3, dim // 4)
        self.conv4 = resnet.conv1x1(dim, dim // 4)
        self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.upsample3 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True)
        self.upsample4 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True)
        self.out = nn.Conv2d(dim, dim + 1, 1)
    def forward(self, image):
        x1 = self.block1(image)
        x2 = self.pool2(x1)
        x2 = self.block2(x2)
        x3 = self.pool4(x2)
        x3 = self.block3(x3)
        x4 = self.pool4(x3)
        x4 = self.block4(x4)
        x1 = self.gate(self.conv1(x1))
        x2 = self.gate(self.conv2(x2))
        x3 = self.gate(self.conv3(x3))
        x4 = self.gate(self.conv4(x4))
        x2_up = self.upsample2(x2)
        x3_up = self.upsample3(x3)
        x4_up = self.upsample4(x4)
        x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
        y = self.out(x1234)
        descriptor_map = y[:, :-1, :, :]
        scores_map = torch.sigmoid(y[:, -1, :, :]).unsqueeze(1)
        return scores_map, descriptor_map
    def ri2maxpool(self, pool):
        stride = pool.stride
        pool_new = nn.MaxPool2d(stride)
        return pool_new
    def maxpool2ri(self, pool):
        kernel_size = stride = pool.stride
        ds = round((math.sqrt(2) - 1) / 2 * stride - 0.25 * (stride % 2 - 1))
        kernel_size = kernel_size + ds
        pool_new = RIMaxpool2d(kernel_size, stride, ds)
        return pool_new
    def ri2avgpool(self, pool):
        stride = pool.stride
        pool_new = nn.AvgPool2d(stride)
        return pool_new
    def avgpool2ri(self, pool):
        kernel_size = stride = pool.stride
        if stride > 3:
            kernel_size = kernel_size + 1
        pool_new = RIAvgpool2d(kernel_size, stride)
        return pool_new
    def ri2conv(self, conv):
        ri = conv
        weight = ri.weight
        device = ri.weight1.device
        bias = ri.bias
        use_bias = bias is not None
        weight_copy = weight.clone().to(device)
        for i in range(ri.number):
            mask = ri.mask == i
            weight_copy[mask] = ri.weight1[i]
        weight_copy = weight_copy.permute(2, 3, 0, 1)
        in_c = weight.shape[3]
        out_c = weight.shape[2]
        kz = weight.shape[0]
        sd = ri.stride
        pd = ri.padding
        conv_new = nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=kz, stride=sd, padding=pd, bias=use_bias)
        if use_bias:
            state_dict = {'weight': weight_copy, 'bias': bias}
        else:
            state_dict = {'weight': weight_copy}
        conv_new.load_state_dict(state_dict)
        return conv_new.to(device)
    def conv2ri(self, conv):
        weight = conv.weight
        bias = conv.bias
        device = weight.device
        in_c = weight.shape[1]
        out_c = weight.shape[0]
        kz = weight.shape[2]
        if kz < 3:
            return conv
        sd = conv.stride
        pd = conv.padding
        idx = torch.arange(kz ** 2).view(-1, 1)
        row = torch.div(idx, kz, rounding_mode='floor')
        col = torch.fmod(idx, kz)
        idx = torch.cat([row, col], dim=1)
        dis = (idx - 0.5 * (kz - 1)).norm(dim=1) + 0.5 * (kz % 2 - 1)
        dis = dis.view(kz, kz)
        dis = torch.round(dis).long()
        dis[dis > 0.5 * (kz - 1)] = -1
        mask = dis
        number = int(torch.max(dis).item() + 1)
        weight1 = torch.rand([number, out_c, in_c]).to(device)
        weight2 = weight.clone()
        weight2 = weight2.permute(2, 3, 0, 1)
        used_bias = bias is not None
        for i in range(number):
            mask1 = mask == i
            w = weight2[mask1]
            weight1[i] = torch.mean(w, dim=0)
        if used_bias:
            state_dict = {'weight1': weight1, 'bias': bias}
        else:
            state_dict = {'weight1': weight1}
        conv_new = RIConv2d(in_channel=in_c, out_channel=out_c, kernel_size=kz, stride=sd, padding=pd, bias=used_bias)
        conv_new.load_state_dict(state_dict)
        return conv_new.to(device)
    def disable_ri(self):
        modules = self.__dict__['_modules']
        for key, value in modules.items():
            if isinstance(value, RIMaxpool2d):
                setattr(self, key, self.ri2maxpool(value))
            if isinstance(value, RIAvgpool2d):
                setattr(self, key, self.ri2avgpool(value))
            if isinstance(value, RIConv2d):
                setattr(self, key, self.ri2conv(value))
            if 'block' in key:
                block = value
                block_modules = block.__dict__['_modules']
                for bkey, bvalue in block_modules.items():
                    if isinstance(bvalue, RIMaxpool2d):
                        setattr(block, bkey, self.ri2maxpool(bvalue))
                    if isinstance(bvalue, RIAvgpool2d):
                        setattr(block, bkey, self.ri2avgpool(bvalue))
                    if isinstance(bvalue, RIConv2d):
                        setattr(block, bkey, self.ri2conv(bvalue))
                modules[key] = block_modules
                setattr(self, key, block)
    def enable_ri(self):
        modules = self.__dict__['_modules']
        for key, value in modules.items():
            if isinstance(value, nn.MaxPool2d):
                setattr(self, key, self.maxpool2ri(value))
            if isinstance(value, nn.AvgPool2d):
                setattr(self, key, self.avgpool2ri(value))
            if isinstance(value, nn.Conv2d):
                setattr(self, key, self.conv2ri(value))
            if 'block' in key:
                block = value
                block_modules = block.__dict__['_modules']
                for bkey, bvalue in block_modules.items():
                    if isinstance(bvalue, nn.MaxPool2d):
                        setattr(block, bkey, self.maxpool2ri(bvalue))
                    if isinstance(bvalue, nn.AvgPool2d):
                        setattr(block, bkey, self.avgpool2ri(bvalue))
                    if isinstance(bvalue, nn.Conv2d):
                        setattr(block, bkey, self.conv2ri(bvalue))
                modules[key] = block_modules
                setattr(self, key, block)
 class EncodePosition(nn.Module):
    def __init__(self, feature_size=128):
        super().__init__()
        self.bins = 16
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels=self.bins, out_channels=feature_size//2, kernel_size=1, stride=1, padding=0, bias=True), nn.BatchNorm1d(feature_size//2), nn.ReLU(),
            nn.Conv1d(in_channels=feature_size//2, out_channels=feature_size//2, kernel_size=1, stride=1, padding=0, bias=True), nn.BatchNorm1d(feature_size//2), nn.ReLU(),
            nn.Conv1d(in_channels=feature_size//2, out_channels=feature_size, kernel_size=1, stride=1, padding=0, bias=True)
        )
        # self.conv2=(nn.Conv1d(in_channels=256,out_channels=128,kernel_size=1))
    def forward(self, x, fea):
        b, n, c = x.shape
        x1 = x.unsqueeze(1)
        x2 = x.unsqueeze(2)
        dx = x1 - x2
        distance = dx.norm(p=2, dim=3)
        hists = torch.zeros([b, n, self.bins]).to(x.device)
        for i in range(b):
            for j in range(n):
                dis = distance[i, j]
                hist = torch.histc(dis, bins=self.bins, min=1, max=80)
                hists[i, j] = hist
        hists = hists / torch.sum(hists, dim=2, keepdim=True)
        x3 = hists.permute(0, 2, 1)
        x4 = self.conv1(x3)
        if hasattr(self, 'conv2'):
            x5 = torch.cat([fea, x4], dim=1)
            y = self.conv2(x5)
        else:
            y = fea + x4
        return y
--- a/README.md
+++ b/README.md
@@ -0,0 +1,42 @@
 # FUSION
 ## Table of Contents
 - [Paper](#paper)
 - [Overview](#overview)
 - [Prerequisites](#prerequisites)
 - [Running the Code](#running-the-code)
 - [Evaluation](#evaluation)
 ## Paper
 If you find the poject helps you, you can cite our paper:
 Cao D, Yue H, Liu Z, et al. BEVLCD+: Real-Time and Rotation-Invariant Loop Closure Detection Based on BEV of Point Cloud[J]. IEEE Transactions on Instrumentation and Measurement, 2023.
 Yue H, Cao D, Liu Z, et al. Cross Fusion of Point Cloud and Learned Image for Loop Closure Detection[J]. IEEE Robotics and Automation Letters, 2024.
 ## Overview
 We provide code for BEV mode and fusion mode, so you can easily train and test.
 ## Prerequisites
 Before you can use this project, you'll need to do the following:
 1. **Download Datasets**: Download the [KITTI](https://www.cvlibs.net/datasets/kitti/eval_odometry.php) and [KITTI-360](https://www.cvlibs.net/datasets/kitti-360/download.php).
 2. **Prepare Dataset Structure**: Use `preparedataset.py` to construct a dataset structure that complies with the project's requirements. Make sure to update the necessary paths in the code.
 3. **Prepare environment**: Use the commonds on `env.txt` to create your environment. Windows and Ubuntu is OK.
 ## Running the Code
 To run the code, follow these steps:
 1. Configure the code to run in either BEV mode or fusion mode using the settings in `config.yaml`.
 2. If you want to load a trained model used in the paper, ensure that you update the file path accordingly.
 3. Run `python train.py`
 ## Evaluation
 Evaluate the saved data using the evaluation script.
 ## Others
 If you have any questions please feel free to contact us.
--- a/pycache/BEVNet.cpython-38.pyc
+++ b/pycache/BEVNet.cpython-38.pyc
--- a/pycache/dataset.cpython-38.pyc
+++ b/pycache/dataset.cpython-38.pyc
--- a/pycache/evaluate_lcd.cpython-38.pyc
+++ b/pycache/evaluate_lcd.cpython-38.pyc
--- a/pycache/loss.cpython-38.pyc
+++ b/pycache/loss.cpython-38.pyc
--- a/pycache/net.cpython-38.pyc
+++ b/pycache/net.cpython-38.pyc
--- a/pycache/netvlad.cpython-38.pyc
+++ b/pycache/netvlad.cpython-38.pyc
--- a/pycache/tools.cpython-38.pyc
+++ b/pycache/tools.cpython-38.pyc
--- a/pycache/uot.cpython-38.pyc
+++ b/pycache/uot.cpython-38.pyc
--- a/config.yaml
+++ b/config.yaml
@@ -0,0 +1,56 @@
 'experiment' :
  # 'path_dataset' : '/mnt/data/cdy/project/dataset/FUSION'
  # 'path_result': '/mnt/data/cdy/data2/results/FUSIONLCD'
  # 'path_dataset' : 'E:\work\Project\dataset\FUSION'
  # 'path_result' : 'E:\work\Project\results\FUSIONLCD\bev2'
  'path_dataset' : '/home/adlab36/chenyouyuan/FUSIONLCD'
  'path_result': '/home/adlab36/chenyouyuan/FUSIONLCD/result'
  'train_flag' : 0
  'validate_flag' : 1
  'test_flag' : 1
  'flag' : 'fusion' 
  'cuda' : 1
  # TRAINING
  'epochs' : 200
  'batchsize' : 6
  'learning_rate' : 1.e-3
  'beta1' : 0.9
  'beta2' : 0.999
  'eps' : 1.e-8
  'weight_decay' : 5.e-6
  'load_model' : 1
  #FUSION
  # 'last_model' : '/data4/caodanyang/results/FUSIONLCD/08310/models/checkpoint_079.pth.tar'
  #BEV
  # 'last_model' : '/data4/caodanyang/results/FUSIONLCD/bev_09030/models/checkpoint_066.pth.tar'
  #BEV+EP
  'last_model' : '/home/adlab36/chenyouyuan/FUSIONLCD/result/log/models/checkpoint_199.pth.tar'
  #DATASET
  'train' : 0,5,6,7,9
  'validate' : 8,50,54,55,56,59
  'test' : 8,50,54,55,56,59
  'voxel_num' : 15000
  'voxel_max_points' : 100
  'voxel_sample' : 'top'
 #  'bev_range' : -51.2,-51.2,-2.5,51.2,51.2,1.5
 #  'bev_resolution' : 0.16
 #  'bev_range' : -64,-64,-2.5,64,64,1.5
 #  'bev_resolution' : 0.2
  'bev_range' : -32,-32,-2.5,32,32,1.5
  'bev_resolution' : 0.2
  # NETWORK PARAMS
  'kpts_number_bev' : 150
  'kpts_number_img' : 150
  'cluster_num_bev' : 16
  'cluster_num_img' : 16
  'cluster_num_fusion' : 16
  'sinkhorn_iter' : 5
  'vlad_size' : 256
  # LOSS
  'loop_file' :  'loop_GT_4m'
  'trip_margin' : 0.5
  'negetative_selsector' : 'random'
--- a/dataset.py
+++ b/dataset.py
@@ -0,0 +1,751 @@
 import glob
 import math
 import os
 import pickle
 from functools import reduce
 import matplotlib.pylab as plt
 import cv2
 import numba
 import numpy as np
 import torch
 import yaml
 from scipy.spatial.distance import cdist
 from torch.utils.data import Dataset, DataLoader, ConcatDataset
 from torch.utils.data.dataloader import default_collate
 from torch.nn.utils.rnn import pad_sequence
 import tools
 IMG_HEIGHT = 384
 IMG_WIDTH = 1152
 EGEG_PROJ = 10
 IMAGE_SCALE = 0.5
 def euler2mat(z, y, x):
    Ms = []
    if z:
        cosz = math.cos(z)
        sinz = math.sin(z)
        Ms.append(np.array(
            [[cosz, -sinz, 0],
             [sinz, cosz, 0],
             [0, 0, 1]]))
    if y:
        cosy = math.cos(y)
        siny = math.sin(y)
        Ms.append(np.array(
            [[cosy, 0, siny],
             [0, 1, 0],
             [-siny, 0, cosy]]))
    if x:
        cosx = math.cos(x)
        sinx = math.sin(x)
        Ms.append(np.array(
            [[1, 0, 0],
             [0, cosx, -sinx],
             [0, sinx, cosx]]))
    if Ms:
        return reduce(np.dot, Ms[::-1])
    return np.eye(3)
 def rt_mat(rx, ry, rz, tx, ty, tz):
    rt = np.eye(4, dtype=np.float32)
    r = euler2mat(rz, ry, rx)
    rt[0:3, 0:3] = r
    rt[0:3, 3] = [tx, ty, tz]
    return rt
@numba.jit(nopython=True)
 def _points_to_voxel_reverse_kernel(points,
                                    voxel_size,
                                    coors_range,
                                    num_points_per_voxel,
                                    coor_to_voxelidx,
                                    voxels,
                                    coors,
                                    max_points,
                                    max_voxels,voxel_idx_empty,voxel_mamiz):
    # put all computations to one loop.
    # we shouldn't create large array in main jit code, otherwise
    # reduce performance
    N = points.shape[0]
    # ndim = points.shape[1] - 1
    ndim = 3
    ndim_minus_1 = ndim - 1
    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
    # np.round(grid_size)
    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)
    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
    coor = np.zeros(shape=(3,), dtype=np.int32)
    voxel_num = 0
    failed = False
    for i in range(N):
        failed = False
        for j in range(ndim):
            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
            if c < 0 or c >= grid_size[j]:
                failed = True
                break
            coor[ndim_minus_1 - j] = c
        if failed:
            continue
        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]#0-15000
        if voxelidx == -1:
            voxelidx = voxel_num
            if voxel_num >= max_voxels:
                break
            voxel_num += 1
            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
            coors[voxelidx] = coor
        num = num_points_per_voxel[voxelidx]
        if num < max_points:
            voxel_idx_empty[voxelidx,num_points_per_voxel[voxelidx]]=1
            if points[i,2]>voxel_mamiz[voxelidx,0]:
                voxel_mamiz[voxelidx,0]=points[i,2]
            if points[i,2]<voxel_mamiz[voxelidx,1]:
                voxel_mamiz[voxelidx,1]=points[i,2]
            voxels[voxelidx, num] = points[i]
            num_points_per_voxel[voxelidx] += 1
    return voxel_num
 def points_to_voxel(points,
                    voxel_size,
                    coors_range,
                    max_points=35,
                    reverse_index=True,
                    max_voxels=20000):
    """convert kitti points(N, >=3) to voxels. This version calculate
    everything in one loop. now it takes only 4.2ms(complete point cloud)
    with jit and 3.2ghz cpu.(don't calculate other features)
    Note: this function in ubuntu seems faster than windows 10.
    Args:
        points: [N, ndim] float tensor. points[:, :3] contain xyz points and
            points[:, 3:] contain other information such as reflectivity.
        voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
        coors_range: [6] list/tuple or array, float. indicate voxel range.
            format: xyzxyz, minmax
        max_points: int. indicate maximum points contained in a voxel.
        reverse_index: boolean. indicate whether return reversed coordinates.
            if points has xyz format and reverse_index is True, output
            coordinates will be zyx format, but points in features always
            xyz format.
        max_voxels: int. indicate maximum voxels this function create.
            for second, 20000 is a good choice. you should shuffle points
            before call this function because max_voxels may drop some points.
    Returns:
        voxels: [M, max_points, ndim] float tensor. only contain points.
        coordinates: [M, 3] int32 tensor.
        num_points_per_voxel: [M] int32 tensor.
    """
    if not isinstance(voxel_size, np.ndarray):
        voxel_size = np.array(voxel_size, dtype=points.dtype)
    if not isinstance(coors_range, np.ndarray):
        coors_range = np.array(coors_range, dtype=points.dtype)
    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
    if reverse_index:
        voxelmap_shape = voxelmap_shape[::-1]
    # don't create large array in jit(nopython=True) code.
    num_points_per_voxel = np.zeros(shape=(max_voxels,), dtype=np.int32)
    coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)
    voxels = np.zeros(shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
    voxel_idx_empty = np.zeros((max_voxels,max_points),dtype=np.int32)
    voxel_mamiz=np.zeros((max_voxels,2),dtype=points.dtype)
    voxel_mamiz[:,0]=-99
    voxel_mamiz[:,1]=99
    voxel_num = _points_to_voxel_reverse_kernel(
        points, voxel_size, coors_range, num_points_per_voxel,
        coor_to_voxelidx, voxels, coors, max_points, max_voxels,voxel_idx_empty,voxel_mamiz)
    # coors = coors[:voxel_num]
    # voxels = voxels[:voxel_num]
    # num_points_per_voxel = num_points_per_voxel[:voxel_num]
    # voxels[:, :, -3:] = voxels[:, :, :3] - \
    #     voxels[:, :, :3].sum(axis=1, keepdims=True)/num_points_per_voxel.reshape(-1, 1, 1)
    return voxels, coors, num_points_per_voxel, coor_to_voxelidx,voxel_idx_empty,voxel_mamiz
@numba.jit(nopython=True)
 def pixel_choose(pixel1, pixel2):
    n = pixel1.shape[0]
    k = pixel1.shape[1]
    k1 = pixel2.shape[1]
    for i in range(n):
        idx = []
        for j in range(k):
            if pixel1[i, j, 0] > -1:
                idx.append(j)
        k2 = len(idx)
        idx=np.asarray(idx)
        if k2 >= k1:
            choice = np.random.choice(idx, k1, replace=False)
        else:
            choice = np.random.choice(idx, k1, replace=True)
        for j in range(k1):
            pixel2[i,j] = pixel1[i, choice[j]]
 def pointcloud_encoder(pointcloud=None, cfg=None):
    try:
        resolution = cfg['bev_resolution']
        pointcloud_range = [float(x) for x in tools.read_cfg(cfg['bev_range'])]
        voxel_max_points = cfg['voxel_max_points']
        voxel_num = cfg['voxel_num']
        voxel_sample = cfg['voxel_sample']
    except:
        resolution = 0.2
        pointcloud_range = [-40, -40, -2.5, 40, 40, 1.5]
        voxel_max_points = 100
        voxel_num = 15000
        voxel_sample = 'top'
    pc_filter = (pointcloud[:, 0] > pointcloud_range[0]) & (pointcloud[:, 0] < pointcloud_range[3]) \
                & (pointcloud[:, 1] > pointcloud_range[1]) & (pointcloud[:, 1] < pointcloud_range[4]) \
                & (pointcloud[:, 2] > pointcloud_range[2]) & (pointcloud[:, 2] < pointcloud_range[5])
    pointcloud = pointcloud[pc_filter]
    if voxel_sample == 'top':
        idx = np.argsort(-pointcloud[:, 2])
        pointcloud = pointcloud[idx]
    else:
        idx = np.arange(len(pointcloud))
        np.random.shuffle(idx)
        pointcloud = pointcloud[idx]
    resolution_z=pointcloud_range[5]-pointcloud_range[2]
    voxels, coors, num_points_per_voxel, coor_to_voxelidx,voxel_idx_empty,voxel_mamiz = points_to_voxel(pointcloud,
                                                                            voxel_size=[
                                                                                resolution, resolution,resolution_z],
                                                                            coors_range=[
                                                                                *pointcloud_range],
                                                                            max_points=voxel_max_points,
                                                                            max_voxels=voxel_num)
    coor_to_voxelidx = np.squeeze(coor_to_voxelidx)
    voxel_idx_empty=voxel_idx_empty.astype(np.bool_)
    voxels_center = np.sum(voxels, axis=1)
    voxels_center[:, 0:4] = voxels_center[:, 0:4] / (num_points_per_voxel.reshape(-1, 1)+1e-8)
    max_z = voxel_mamiz[:,0]
    min_z = voxel_mamiz[:,1]
    max_z1 = max_z / resolution_z
    mean_i = np.sum(voxels[:, :, 3], axis=1) / (num_points_per_voxel+1e-8)
    density = np.log(np.clip(num_points_per_voxel, 1, None)) / np.log(cfg['voxel_max_points'])
    dz = max_z - min_z
    idx_not_ground = (dz > 0.05) & (num_points_per_voxel > 1)
    coors1 = coors[idx_not_ground, 1:3]
    relation = 0
    if voxels.shape[2] == 6:  # x,y,z,i,pu,pv
        have_pixel = (voxels[:, :, 4] > 0) & (voxels[:, :, 5] > 0)  # each 3d point has a pixel
        have_pixel = np.bitwise_or.reduce(have_pixel, axis=1)  # each cell of bev has pixels,num of pixels may less than num of 3d points
        # num_voxels_pixel = np.sum(voxels_pixel, axis=1)
        # num_voxels_pixel = np.clip(num_voxels_pixel, 1, None)
        # voxels_center[:, 4:6] = np.int_(voxels_center[:, 4:6] / num_voxels_pixel.reshape(-1, 1))
        feature = np.concatenate([max_z1, mean_i, density,
                                  voxels_center[:, 0], voxels_center[:, 1], voxels_center[:, 2], voxels_center[:, 3]],
                                 axis=0).reshape(7, -1).T
        pixels = voxels[idx_not_ground & have_pixel, :, 4:6]
        # pixels2 = np.zeros([pixels.shape[0], 10, 2], dtype=pixels.dtype)
        # pixel_choose(pixels, pixels2)
        # pixels = pixels2
        coors2 = coors[idx_not_ground & have_pixel, 1:3].reshape(-1, 1, 2)
        relation = np.hstack((pixels, coors2))
    elif voxels.shape[2] == 4:  # x,y,z,i
        feature = np.concatenate([max_z1, mean_i, density,
                                  voxels_center[:, 0], voxels_center[:, 1], voxels_center[:, 2], voxels_center[:, 3]],
                                 axis=0).reshape(7, -1).T
    else:
        print('ERROR VOXEL')
        exit()
    # eigvalues = voxel_svd(voxels)
    # feature = np.hstack((feature, eigvalues))
    bev = np.zeros([coor_to_voxelidx.shape[1], coor_to_voxelidx.shape[1], feature.shape[1]], dtype=np.float32)
    bev[coors1[:, 0], coors1[:, 1]] = feature[idx_not_ground]
    # bev[coors1[:, 0], coors1[:, 1]] = feature[idx_not_ground]
    # bev_show=np.uint8(bev[:,:,0:3]*255)
    # cv2.imshow('1',bev_show)
    # cv2.waitKey(0)
    return bev, relation
 def crop_image(img, height=IMG_HEIGHT, width=IMG_WIDTH):
    # img: the original image, a numpy array of size H*W*C
    # height: the target height
    # width: the target width
    # get the size of the original image
    h, w, _ = img.shape
    # calculate the padding size if necessary
    if h < height:
        pad_top = (height - h) // 2
        pad_bottom = height - h - pad_top
    else:
        pad_top, pad_bottom = 0, 0
    if w < width:
        pad_left = (width - w) // 2
        pad_right = width - w - pad_left
    else:
        pad_left, pad_right = 0, 0
    # pad the original image with black pixels if necessary
    img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
    # crop the padded image to the target size
    h1 = (img_padded.shape[0] - height) // 2
    h2 = h1 + height
    w1 = (img_padded.shape[1] - width) // 2
    w2 = w1 + width
    img_cropped = img_padded[h1:h2, w1:w2]
    dh = int((height - h) / 2)
    dw = int((width - w) / 2)
    if IMAGE_SCALE<1:
        img_cropped = cv2.resize(img_cropped, None, None, IMAGE_SCALE, IMAGE_SCALE)
        dh = int(dh * IMAGE_SCALE)
        dw = int(dw * IMAGE_SCALE)
    return img_cropped, dh, dw
 class KittiDataset(Dataset):
    def __init__(self, cfg, sequence, argument=True, mode='train', flag_bev=True, flag_img=True, flag_fuse=True):
        root_dataset = cfg['path_dataset']
        if (flag_img == False) & (flag_bev == False) & (flag_fuse == False):
            print('No module will be used!')
            exit()
        if (flag_img == False) or (flag_bev == False):
            flag_fuse = False
        self.flag_img = flag_img
        self.flag_bev = flag_bev
        self.flag_fuse = flag_fuse
        self.cfg = cfg
        self.sequence = sequence
        self.mode = mode
        scans = glob.glob(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'velodyne', "*.bin"))
        images = glob.glob(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'image_2', "*.png"))
        if int(sequence) >= 50:
            calib = np.loadtxt(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'calib.txt'))
            poses = os.path.join(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'poses.npy'))
        else:
            calib = np.genfromtxt(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'calib.txt'))[:, 1:]
            poses = os.path.join(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'poses.txt'))
        f_gt = open(os.path.join(root_dataset, 'sequences', '%02d' % sequence, cfg['loop_file'] + '.pickle'), 'rb')
        scans.sort()
        images.sort()
        self.scans = scans
        self.images = images
        if int(sequence) >= 50:
            cam0_to_velo = np.reshape(calib, (3, 4))
            cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]])
            cam0_to_velo = np.linalg.inv(cam0_to_velo)
            self.cam0_to_velo = cam0_to_velo.astype(np.float32)
            k = [552.554261, 0.000000, 682.049453, 0.000000, 
                 0.000000, 552.554261, 238.769549, 0.000000, 
                 0.000000, 0.000000, 1.000000, 0.000000]
            k = np.array(k).reshape([3, 4])
            p2 = np.eye(4)
            p2[:3] = k
            self.p2 = p2.astype(np.float32)
            poses2 = np.load(poses)
        else:
            p2 = np.reshape(calib[2], (3, 4))
            p2 = np.vstack([p2, [0, 0, 0, 1]])
            self.p2 = p2.astype(np.float32)
            cam0_to_velo = np.reshape(calib[4], (3, 4))
            cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]])
            self.cam0_to_velo = cam0_to_velo.astype(np.float32)
            cam0_to_velo = torch.tensor(cam0_to_velo)
            poses2 = []
            with open(poses, 'r') as f:
                for x in f:
                    x = x.strip().split()
                    x = [float(v) for v in x]
                    pose = torch.zeros((4, 4), dtype=torch.float64)
                    pose[0, 0:4] = torch.tensor(x[0:4])
                    pose[1, 0:4] = torch.tensor(x[4:8])
                    pose[2, 0:4] = torch.tensor(x[8:12])
                    pose[3, 3] = 1.0
                    pose = cam0_to_velo.inverse() @ (pose @ cam0_to_velo)  #
                    poses2.append(pose.float().numpy())
            poses2 = np.stack(poses2)
        # for i in range(12):
        #     plt.subplot(3, 4, i + 1), plt.plot(np.arange(len(poses2)), poses2[:, i // 4, i % 4])
        # plt.show()
        self.poses = poses2
        gt = pickle.load(f_gt)
        self.gt=gt
        # gt_new=[]
        # for i in range(len(gt)):
        #     idx=gt[i]['idx']
        #     positive_idxs=gt[i]['positive_idxs']
        #     for j in positive_idxs:
        #         sample={'idx':idx,'positive_idxs':[j]}
        #         gt_new.append(sample)
        # self.gt=gt_new
        self.argument = argument
    def __len__(self):
        if self.mode == 'test':
            return len(self.poses)
        else:
            return int(len(self.gt))
    def __getitem__(self, idx):
        if self.mode == 'test':
            idx_query = idx
            pose_query = self.poses[idx_query]
            image_query, scan_query, bev_query, dw, dh, W, H, relation_query = 0, 0, 0, 0, 0, 0, 0, 0
            if self.flag_img:
                image_query = cv2.imread(self.images[idx_query])
                # image_query = cv2.GaussianBlur(image_query, (15,15),0)
                image_query, dh, dw = crop_image(image_query, IMG_HEIGHT, IMG_WIDTH)
            if self.flag_bev:
                print("DEBUG __len__ scans:", len(self.scans), "requested idx_query:", idx_query)
                scan_query = np.fromfile(self.scans[idx_query], dtype=np.float32).reshape((-1, 4))
                # idx = np.random.choice(len(scan_query), int(len(scan_query) /4), replace=False)
                # scan_query = scan_query[idx]
                if self.flag_bev & self.flag_img & self.flag_fuse:
                    # mat_proj = np.matmul(self.p2, self.cam0_to_velo)
                    mat_proj = torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)).numpy()
                    pts_query = scan_query.copy()
                    pts_query[:, 3] = 1
                    # pts_proj_query = np.matmul(mat_proj, pts_query.T).T
                    pts_proj_query = torch.matmul(torch.from_numpy(mat_proj), torch.from_numpy(pts_query.T)).numpy().T
                    z = pts_proj_query[:, 2:3]
                    pts_proj_query = pts_proj_query / z * IMAGE_SCALE 
                    pts_proj_query[:, 0:2] = pts_proj_query[:, 0:2] + [dw, dh]
                    H, W, _ = image_query.shape
                    mask_query = (pts_proj_query[:, 0] >= EGEG_PROJ) & (pts_proj_query[:, 0] < W - EGEG_PROJ) & (
                            pts_proj_query[:, 1] >= EGEG_PROJ) & (pts_proj_query[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0)
                    pts_proj_query[~mask_query] = -1
                    pixel_query = pts_proj_query[:, 0:2]
                    pixel_query = pixel_query[:, [1, 0]]
                    scan_query = np.hstack((scan_query, pixel_query))
                bev_query, relation_query = pointcloud_encoder(scan_query, self.cfg)
            sample = {
                'sequence': self.sequence,
                'id_query': idx_query,
                'bev_query': bev_query,
                'img_query': image_query,
                'pose_query': pose_query,
                'relation_query': relation_query
            }
        else:
            gt = self.gt[idx]
            idx_query = gt['idx']
            idx_ps = gt['positive_idxs']
            idx_positive = np.random.choice(idx_ps)
            pose_query = self.poses[idx_query]
            pose_positive = self.poses[idx_positive]
            image_query, scan_query, bev_query, image_positive, scan_query, bev_positive, dw, dh, W, H, pose_to_frame, \
                label_score, relation_query, relation_positive, = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            if self.flag_img:
                image_query = cv2.imread(self.images[idx_query])
                image_query, dh, dw = crop_image(image_query)
                image_positive = cv2.imread(self.images[idx_positive])
                image_positive, _, _ = crop_image(image_positive)
            if self.flag_bev:
                scan_query = np.fromfile(self.scans[idx_query], dtype=np.float32).reshape((-1, 4))
                scan_positive = np.fromfile(self.scans[idx_positive], dtype=np.float32).reshape((-1, 4))
                # return {'1':np.zeros(5)}
                # import open3d as o3d
                # pcd1 = o3d.geometry.PointCloud()
                # sc1 = scan_query.copy()
                # sc1[:, 3] = 1
                # sc1 = np.matmul(pose_query, sc1.T).T
                # pcd1.points = o3d.utility.Vector3dVector(sc1[:, :3])
                # pcd1.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))])
                # pcd2 = o3d.geometry.PointCloud()
                # sc2 = scan_positive.copy()
                # sc2[:, 3] = 1
                # sc2 = np.matmul(pose_positive, sc2.T).T
                # pcd2.points = o3d.utility.Vector3dVector(sc2[:, :3])
                # pcd2.colors = o3d.utility.Vector3dVector([[0, 1, 0] for i in range(len(pcd2.points))])
                # vis1 = o3d.visualization.Visualizer()
                # vis1.create_window(window_name='registration', width=600, height=600)  # 创建窗口
                # render_option: o3d.visualization.RenderOption = vis1.get_render_option()  # 设置点云渲染参数
                # render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
                # render_option.point_size = 2  # 设置渲染点的大小
                # vis1.add_geometry(pcd1)
                # vis1.add_geometry(pcd2)
                # coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.0, origin=pose_query[0:3,3])
                # vis1.add_geometry(coord_frame)
                # vis1.run()
                if self.argument:
                    # Rt1 = np.eye(4)
                    # idx1=np.random.random((len(scan_query),))
                    # idx1=idx1>0.1
                    # scan_query=scan_query[idx1]
                    # idx2=np.random.random((len(scan_positive),))
                    # idx2=idx2>0.1
                    # scan_positive=scan_positive[idx2]
                    rand = np.random.random(6) * 2 - 1
                    Rt = rt_mat(rand[0] * 3 / 180 * np.pi,
                                rand[1] * 3 / 180 * np.pi,
                                rand[2] * 180 / 180 * np.pi,
                                rand[3] * 3, rand[4] * 3, rand[5] * 0.3)
                    Rt1 = torch.from_numpy(Rt).inverse().numpy()
                    ints = scan_positive[:, 3].copy()
                    scan_positive[:, 3] = 1
                    # scan_positive = np.matmul(Rt, scan_positive.T).T
                    scan_positive = torch.matmul(torch.from_numpy(Rt),torch.from_numpy(scan_positive.T)).numpy().T
                    scan_positive[:, 3] = ints
                    # pose_positive = np.matmul(pose_positive, np.linalg.inv(Rt))
                    pose_positive=torch.matmul(torch.from_numpy(pose_positive),torch.from_numpy(Rt).inverse()).numpy()
                else:
                    Rt1 = np.eye(4).astype(np.float32)
                if self.flag_fuse:
                    # mat_proj = np.matmul(self.p2, self.cam0_to_velo)
                    mat_proj = torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)).numpy()
                    pts_query = scan_query.copy()
                    pts_query[:, 3] = 1
                    # pts_proj_query = np.matmul(mat_proj, pts_query.T).T
                    pts_proj_query = torch.matmul(torch.from_numpy(mat_proj), torch.from_numpy(pts_query.T)).numpy().T
                    z = pts_proj_query[:, 2:3]
                    pts_proj_query = pts_proj_query / z * IMAGE_SCALE 
                    pts_proj_query[:, 0:2] = pts_proj_query[:, 0:2] + [dw, dh]
                    H, W, _ = image_query.shape
                    mask_query = (pts_proj_query[:, 0] >= EGEG_PROJ) & (pts_proj_query[:, 0] < W - EGEG_PROJ) & (
                            pts_proj_query[:, 1] >= EGEG_PROJ) & (pts_proj_query[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0)
                    pts_proj_query[~mask_query] = -1
                    pixel_query = pts_proj_query[:, 0:2]
                    pixel_query = pixel_query[:, [1, 0]]  # h,w
                    scan_query = np.hstack((scan_query, pixel_query))
                    # fig = plt.figure()
                    # plt.subplot(2, 1, 1), plt.imshow(image_query[:, :, [2, 1, 0]])
                    # plt.subplot(2, 1, 2), plt.imshow(image_query[:, :, [2, 1, 0]])
                    # plt.scatter(pixel_query[mask_query, 1], pixel_query[mask_query, 0], c=z[mask_query], cmap='jet', alpha=0.5, s=1)
                    # plt.show()
                    # mat_proj1 = self.p2.dot(self.cam0_to_velo).dot(Rt1)
                    mat_proj1 = torch.matmul(torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)), torch.from_numpy(Rt1)).numpy()
                    pts_positive = scan_positive.copy()
                    pts_positive[:, 3] = 1
                    # pts_proj_positive = np.matmul(mat_proj1, pts_positive.T).T
                    pts_proj_positive = torch.matmul(torch.from_numpy(mat_proj1), torch.from_numpy(pts_positive.T)).numpy().T
                    z = pts_proj_positive[:, 2:3]
                    pts_proj_positive = pts_proj_positive / z * IMAGE_SCALE 
                    pts_proj_positive[:, 0:2] = pts_proj_positive[:, 0:2] + [dw, dh]
                    mask_positive = (pts_proj_positive[:, 0] >= EGEG_PROJ) & (pts_proj_positive[:, 0] < W - EGEG_PROJ) & (
                            pts_proj_positive[:, 1] >= 0 - EGEG_PROJ) & (pts_proj_positive[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0)
                    pts_proj_positive[~mask_positive] = -1
                    pixel_positive = pts_proj_positive[:, 0:2]
                    pixel_positive = pixel_positive[:, [1, 0]]
                    scan_positive = np.hstack((scan_positive, pixel_positive))
                # pose_to_frame = np.matmul(np.linalg.inv(pose_positive), pose_query)
                pose_to_frame=torch.matmul(torch.from_numpy(pose_positive).inverse(),torch.from_numpy(pose_query)).numpy()
                # scan_query1=scan_query.copy()
                # scan_query1[:,3]=1
                # scan_query1=np.matmul(pose_to_frame,scan_query1.T).T
                # scan_query[:,0:3]=scan_query1[:,0:3]
                # scan_query[:, 3] = 1
                # scan_query = np.matmul(pose_query, scan_query.T).T
                # scan_positive[:, 3] = 1
                # scan_positive = np.matmul(pose_positive, scan_positive.T).T
                # plt.subplot(1, 2, 1), plt.plot(scan_query[:, 0], scan_query[:, 1], 'b.', markersize=1),plt.axis([-60,60,-60,60])
                # plt.subplot(1, 2, 2), plt.plot(scan_positive[:, 0], scan_positive[:, 1], 'b.', markersize=1),plt.axis([-60,60,-60,60])
                # plt.show()
                bev_query, relation_query = pointcloud_encoder(scan_query, self.cfg)
                bev_positive, relation_positive = pointcloud_encoder(scan_positive, self.cfg)
                # if self.argument:
                #     rand = np.random.randint(0, 9, [2, ])
                #     if rand[0] > 4:
                #         bev_query1 = np.rot90(bev_query, 2, axes=(0, 1))
                #         bev_query = bev_query1.copy()
                #     if rand[1] > 4:
                #         bev_positive1 = np.rot90(bev_positive, 2, axes=(0, 1))
                #         bev_positive = bev_positive1.copy()
                h_bev, w_bev, _ = bev_query.shape
                label_score = np.zeros_like(bev_positive[:, :, :2])
                grid = np.array(np.meshgrid(np.arange(h_bev), np.arange(w_bev))).swapaxes(0, 2)
                scan_query_sample = bev_query[:, :, 3:5]
                mask_query = scan_query_sample != 0
                mask_query = mask_query[:, :, 0] | mask_query[:, :, 1]
                grid_query = grid[mask_query]
                scan_query_sample = scan_query_sample[mask_query]
                scan_positive_sample = bev_positive[:, :, 3:5]
                mask_positive = scan_positive_sample != 0
                mask_positive = mask_positive[:, :, 0] | mask_positive[:, :, 1]
                grid_positive = grid[mask_positive]
                scan_positive_sample = scan_positive_sample[mask_positive]
                scan_query_sample1 = np.hstack((scan_query_sample, scan_query_sample * 0))
                scan_query_sample1[:, 3] = 1
                # scan_query_sample1 = np.matmul(pose_to_frame, scan_query_sample1.T).T
                scan_query_sample1 = torch.matmul(torch.from_numpy(pose_to_frame), torch.from_numpy(scan_query_sample1.T)).numpy().T
                idx1, idx2, dis = tools.nn_match(scan_query_sample1[:, 0:2], scan_positive_sample[:, 0:2], 'euclidean')
                if len(dis) > 50:
                    th1 = max([2, dis[min([256, int(len(dis) * 0.3)])]])
                    idx1 = idx1[dis < th1]
                    idx2 = idx2[dis < th1]
                else:
                    dis = cdist(scan_query_sample1[:, 0:2], scan_positive_sample)
                    min1 = np.min(dis, axis=1)
                    min2 = np.min(dis, axis=0)
                    min11 = np.sort(min1)
                    th1 = max([0.2, min11[min([256, int(len(min11) * 0.2)])]])
                    min21 = np.sort(min2)
                    th2 = max([0.2, min21[min([256, int(len(min21) * 0.2)])]])
                    idx1 = np.arange(len(scan_query_sample))[min1 < th1]
                    idx2 = np.arange(len(scan_positive_sample))[min2 < th2]
                # points1=scan_query_sample1[:, 0:2]
                # points2=scan_positive_sample[:, 0:2]
                # points = np.mean(np.vstack((points1, points2)), axis=0, keepdims=True)
                # points1 = points1 - points
                # points2 = points2 - points
                # af = torch.sum(torch.from_numpy(points1) ** 2, -1, keepdim=True)
                # bf = torch.sum(torch.from_numpy(points2) ** 2, -1, keepdim=True).transpose(0, 1)
                # cf = af + bf - 2 * torch.mm(torch.from_numpy(points1), torch.from_numpy(points2).transpose(0, 1))  # c^2=a^2+b^2-2abcos
                # c = torch.sqrt(cf)
                # dis = c
                # dis1 = torch.min(dis, dim=1)[0]
                # dis2 = torch.min(dis, dim=0)[0]
                # idx1 = torch.where(dis1 < 0.5)[0].numpy()
                # idx2 = torch.where(dis2 < 0.5)[0].numpy()
                # dis1=dis1.numpy()
                # dis2= dis2.numpy()
                grid_query = grid_query[idx1]
                grid_positive = grid_positive[idx2]
                label_score[grid_query[:, 0], grid_query[:, 1], 0] = 1
                label_score[grid_positive[:, 0], grid_positive[:, 1], 1] = 1
                # fig, ax = plt.subplots(2, 2)
                # ax[0,0].imshow(bev_query[:, :, 0:3])
                # ax[0,1].imshow(label_score[:, :, 0])
                # ax[1,0].imshow(bev_positive[:, :, 0:3])
                # ax[1,1].imshow(label_score[:, :, 1])
                # plt.savefig('1.png')
                # plt.show()
            sample = {
                'sequence': self.sequence,
                'id_query': idx_query,
                'bev_query': bev_query,
                'img_query': image_query,
                'pose_query': pose_query,
                'id_positive': idx_positive,
                'bev_positive': bev_positive,
                'img_positive': image_positive,
                'pose_positive': pose_positive,
                'pose_to_frame': pose_to_frame,
                'label_score': label_score,
                'relation_query': relation_query,
                'relation_positive': relation_positive
            }
        return sample
 def collate(samples):
    relation_query = []
    relation_positive = []
    samples2 = {key: default_collate([d[key] for d in samples]) for key in samples[0]
                if key != 'relation_query' and key != 'relation_positive'}
    for single_sample in samples:
        try:
            relation_query.append(torch.from_numpy(single_sample['relation_query']))
        except:
            pass
        try:
            relation_positive.append(torch.from_numpy(single_sample['relation_positive']))
        except:
            pass
    relation = relation_query + relation_positive
    if len(relation) > 0:
        relation1 = pad_sequence(relation, batch_first=True, padding_value=-1)
        relation1 = relation1.float()
        samples2['relation'] = relation1
    return samples2
 def KittiTotalLoader(cfg):
    flag = cfg['flag']
    bev = False
    img = False
    fuse = False
    if flag == 'fusion':
        bev = True
        img = True
        fuse = True
    elif flag == 'img':
        img = True
    else:
        bev = True
    sequence_train = [int(x) for x in tools.read_cfg(cfg['train'])]
    sequence_val = [int(x) for x in tools.read_cfg(cfg['validate'])]
    sequence_test = [int(x) for x in tools.read_cfg(cfg['test'])]
    dataset_list = []
    for sequence in sequence_train:
        single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=True, mode='train')
        print('===Trainloader add: sequence %02d, %04d files, %04d frames with loop' % (sequence, len(single_dataset.poses), len(single_dataset.gt)))
        dataset_list.append(single_dataset)
    dataset_train = ConcatDataset(dataset_list)
    dataset_list = []
    for sequence in sequence_val:
        single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=False, mode='train')
        print('===Validationloader add: sequence %02d, %04d files, %04d frames with loop' % (sequence, len(single_dataset.poses), len(single_dataset.gt)))
        dataset_list.append(single_dataset)
    dataset_val = ConcatDataset(dataset_list)
    dataset_list = []
    for sequence in sequence_test:
        single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=False, mode='test')
        print('===Testloader add: sequence %02d, %04d files' % (sequence, len(single_dataset.poses)))
        dataset_list.append(single_dataset)
    dataset_test = ConcatDataset(dataset_list)
    loader_train = DataLoader(dataset_train, batch_size=cfg['batchsize'], shuffle=True, num_workers=6, collate_fn=collate)
    loader_val = DataLoader(dataset_val, batch_size=cfg['batchsize'], shuffle=True, num_workers=6, collate_fn=collate)
    loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=6, collate_fn=collate)
    return loader_train, loader_val, loader_test
 if __name__ == '__main__':
    root_dir = '/media/ubuntu/Workshop/caodanyang/Project_CDY/results/mylcd'
    # sequence = 0
    # dataset = KittiDataset(root_dir, sequence)
    # dataloader = DataLoader(dataset, batch_size=1, num_workers=8, shuffle=False)
    # t = tools.Timer(name='Loading', number=len(dataloader))
    # for data in dataloader:
    #     t.update()
    try:
        with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
        print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
    except:
        with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
        print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
    cfg = cfg['experiment']
    path_dataset = cfg['path_dataset']
    path_result = cfg['path_result']
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train, val, test = KittiTotalLoader(cfg)
    t = tools.Timer(name='Loading')
    ds = []
    for i, data in enumerate(train):
        # ds.append(d)
        t.update(i)
    # t = tools.Timer(name='Loading', number=len(val))
    # for i, data in enumerate(val):
    #     t.update()
--- a/env.txt
+++ b/env.txt
@@ -0,0 +1,59 @@
 ## 环境配置备忘
 conda create -n fusion_cyy python=3.8
 conda activate fusion_cyy
 conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=11.3 -c pytorch -c conda-forge
 pip install pykitti pytorch-metric-learning pyyaml scipy scikit-image scikit-learn tqdm open3d matplotlib numba opencv-python opencv-contrib-python pandas -i https://pypi.mirrors.ustc.edu.cn/simple/
 conda remove -n fusion_cyy --all
 在.git的config下
 [user]
    name = MobKBK
    email = 202311250413@csust.edu.cn    /home/adlab8/pub_data1/Kitti360/KITTI-360
 tmux使用备忘：
 列表
 tmux ls
 tmux new -s your-session-name   wget -c https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_color.zip
 转后台：
 ctrl+b d
 杀死
 ctrl+d
 分离：
 tmux detach
 连接：
 tmux attach -t cyy
 切换：
 tmux switch -t your-session-name
 重命名：
 tmux rename-session -t old-session new-session
 新建窗口：
 tmux new-window -n your-window-name
 选择窗口：
 ctrl+b c: 创建一个新窗口（状态栏会显示多个窗口的信息）
 ctrl+b p: 切换到上一个窗口（按照状态栏的顺序）
 ctrl+b n: 切换到下一个窗口
 ctrl+b w: 从列表中选择窗口
 重命名窗口：
 tmux rename-window -t old_name new_name
 # 划分为上下两个窗格
 tmux split-window
 # 划分左右两个窗格
 tmux split-window -h
 左右划分：ctrl+b %
 上下划分：ctrl+b "
 ctrl+b [ 使用PageUp和PageDown可以实现上下翻页
 kitti位置
 /home/adlab8/pub_data1/OpenDataLab___KITTI_Odometry_2012/raw
 kitt360
 /home/adlab8/pub_data1/KITTI-360/
 ln -s <源文件或目录的路径> <软连接的路径>
--- a/evaluate_fm.py
+++ b/evaluate_fm.py
@@ -0,0 +1,81 @@
 import os
 import time
 import torch
 import tools
 import numpy as np
 import yaml
 from dataset import KittiTotalLoader
 from tqdm import tqdm
 def match_err(fea1,fea2,pose1,pose2,kpt1,kpt2):
    pose_to_frame=torch.matmul(pose2.inverse(),pose1)
    kpt1 = (pose_to_frame @ kpt1.permute(1, 0)).permute(1, 0)
    kpt1[:,2]=0
    kpt2[:,2]=0
    idxp1,idxp2,dis_kpt=tools.nn_match(kpt1,kpt2,'euclidean')
    idxf1,idxf2,dis_fea=tools.nn_match(fea1,fea2,'cosine')
    kpt11=kpt1[idxf1]
    kpt21=kpt2[idxf2]
    dis_kpt1=(kpt11-kpt21).norm(p=2,dim=1)
    mas=[]
    mss=[]
    reps=[]
    for thr in [0.3,0.5,1,2,3]:
        mas.append((torch.sum(dis_kpt1<=thr)/(len(idxf1)+1e-8)).item())
        mss.append((torch.sum(dis_kpt1<=thr)/(torch.sum(dis_kpt<=thr)+1e-8)).item())
        reps.append((torch.sum(dis_kpt<=thr)).item()/(len(fea1)+len(fea2))*2)
    return mas,mss,reps
 def feature_match(loader_val=None,data=None):
    if loader_val is None:
        try:
            with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
                cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
            print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
        except:
            with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile:
                cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
            print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
        cfg = cfg['experiment']
        _, loader_val, _ = KittiTotalLoader(cfg)
    sequences=data['sequences']
    fea_kpt=data['fea_kpt'].cuda()
    poses=data['pose_query'].cuda()
    kpts=data['key_points'].cuda()
    seq=torch.unique(sequences)
    if __name__ == '__main__':
        flag = False
    else:
        flag = True
    for i in range(len(seq)):
        idx=sequences==seq[i]
        fea_kpt1=fea_kpt[idx]
        poses1=poses[idx]
        kpts1=kpts[idx]
        gt=loader_val.dataset.datasets[i].gt
        ms=[]
        for j in tqdm(range(0, len(gt)), disable=flag, ncols=60, desc='feature match'):
            query=gt[j]['idx']
            p_idxs=gt[j]['positive_idxs']
            for p in p_idxs:
                a,s,reps=match_err(fea_kpt1[query],fea_kpt1[p],poses1[query],poses1[p],kpts1[query],kpts1[p])
                ms.append(a+s+reps)
        ms=np.asarray(ms)
        ms1=np.mean(ms,axis=0)
        print('Feature matching, sequence %02d'%(seq[i]))
        print('MA@0.3:%.3f, MA@0.5:%.3f, MA@1:%.3f, MA@2:%.3f, MA@3:%.3f' %(ms1[0], ms1[1], ms1[2], ms1[3], ms1[4]))
        print('MS@0.3:%.3f, MS@0.5:%.3f, MS@1:%.3f, MS@2:%.3f, MS@3:%.3f' %(ms1[5], ms1[6], ms1[7], ms1[8], ms1[9]))
        print('RP@0.3:%.3f, RP@0.5:%.3f, RP@1:%.3f, RP@2:%.3f, RP@3:%.3f' %(ms1[10], ms1[11], ms1[12], ms1[13], ms1[14]))
        time.sleep(1)
 if __name__ == '__main__':
    # data = torch.load("/mnt/data2/datasets/cdy/results/FUSIONLCD/05230/database/database_149_b0.pth.tar")
    # data = torch.load("/mnt/data2/datasets/cdy/results/BEVLCD/ricnn03202/database/database_xyz_000.pth.tar")
    data= torch.load("/data4/caodanyang/results/FUSIONLCD/bev_07190/database/database_all.pth.tar")
    feature_match(data=data)
--- a/evaluate_lcd.py
+++ b/evaluate_lcd.py
@@ -0,0 +1,265 @@
 import os
 import time
 import matplotlib
 # set non-interactive backend for server (must be set before pyplot import)
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 from skimage.measure import ransac
 from skimage.transform import EuclideanTransform
 import tools
 from tqdm import tqdm
 from sklearn.metrics import auc
 from sklearn.neighbors import KDTree
 import warnings
 warnings.filterwarnings("ignore")
 def recall_with_candidates(vlads, poses, sequence, recall_num=25, positive_distance=4):
    recall_at_k = [0] * recall_num
    num_with_loop = 0
    if __name__ == '__main__':
        flag = False
    else:
        flag = True
    for i in tqdm(range(0, len(vlads)), disable=flag, ncols=60, desc='Recall@k'):
        valid_idx = list(set(range(0, len(vlads))) - set(range(max(0, i - 50), min(len(vlads), i + 50))))
        valid_idx = torch.tensor(valid_idx).to(vlads.device)
        vlad_query = vlads[i].view(1, -1)
        vlad_valid = vlads[valid_idx]
        dis_valid = torch.linalg.norm((poses[i:i + 1, 0:3, 3] - poses[valid_idx, 0:3, 3]), dim=1)
        min_dis = torch.min(dis_valid)
        if min_dis > positive_distance:
            continue
        num_with_loop = num_with_loop + 1
        # global feature to query quickly
        dis_vlad = torch.cdist(vlad_query, vlad_valid).view(-1, )
        dis, idx_cand = torch.topk(dis_vlad, recall_num, largest=False)
        idx_cand = valid_idx[idx_cand]
        for j in range(recall_num):
            idx_cand1 = idx_cand[j]
            dis = torch.linalg.norm((poses[i:i + 1, 0:3, 3] - poses[idx_cand1, 0:3, 3]), dim=1)
            if dis <= positive_distance:
                recall_at_k[j] = recall_at_k[j] + 1
                break
    time.sleep(1)
    recall_at_k = np.cumsum(recall_at_k) / float(num_with_loop)
    print('Sequence %02d, Recall@' % sequence, end='')
    for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 19, 24]:
        if i == (len(recall_at_k) - 1):
            print('%d[%.3f]' % (i + 1, recall_at_k[i]))
        else:
            print('%d[%.3f]' % (i + 1, recall_at_k[i]), end=', ')
    return recall_at_k
 def retrieve(vlads, feas, kpts, poses, num_cand=1, verify='ransac'):
    loops = []
    if __name__ == '__main__':
        flag = False
    else:
        flag = True
    ts = []
    for i in tqdm(range(0, len(feas)), disable=flag, ncols=60, desc='retrieve loop'):
        t0 = time.time()
        valid_idx = list(set(range(0, len(feas))) - set(range(max(0, i - 50), min(len(feas), i + 50))))
        valid_idx = torch.tensor(valid_idx).to(vlads.device)
        vlad_query = vlads[i].view(1, -1)
        vlad_valid = vlads[valid_idx]
        # global feature to query quickly
        dis_vlad = torch.cdist(vlad_query, vlad_valid).view(-1, )
        dis, idx_cand = torch.topk(dis_vlad, num_cand, largest=False)
        t_retrieve = time.time() - t0
        idx_cand = valid_idx[idx_cand]
        # local feature to qverify
        fea_query = feas[i]
        if verify == 'ransac':
            p1 = kpts[i]
            p1 = p1.cpu().detach().numpy()
            min_dis = torch.tensor([9999])
            idx_detect = idx_cand[0]
            dis_truth = torch.tensor([9999])
            for idx_cand1 in idx_cand:
                fea_cand1 = feas[idx_cand1]
                p2 = kpts[idx_cand1].cpu().detach().numpy()
                idx1, idx2, dis = tools.nn_match(fea_query, fea_cand1, 'cosine')
                if len(idx1) < 31:
                    continue
                idx1 = idx1.cpu().detach().numpy()
                idx2 = idx2.cpu().detach().numpy()
                try:
                    model, inliers = ransac((p1[idx1, 0:2], p2[idx2, 0:2]), model_class=EuclideanTransform, min_samples=15, max_trials=3, residual_threshold=1)
                    num_inlier = np.sum(inliers)
                    # r = model.params[0:2, 0:2]
                    dis_estimate = np.linalg.norm(model.params[0:2, 2])
                    # rot = model.rotation
                    if num_inlier > 30:  # ransac存在足够内点
                        if min_dis > dis_estimate:
                            min_dis = dis_estimate
                            idx_detect = idx_cand1
                            dis_truth = torch.linalg.norm((poses[i, 0:3, 3] - poses[idx_detect, 0:3, 3]))
                except:
                    pass
            loops.append([i, idx_detect.item(), min_dis.item(), dis_truth.item()])
        else:
            idx_detect = idx_cand[0]
            dis_truth = torch.linalg.norm(poses[i, 0:3, 3] - poses[idx_detect, 0:3, 3])
            loops.append([i, idx_detect.item(), dis[0].item(), dis_truth.item()])
        t_verify = time.time() - t0 - t_retrieve
        ts.append([t_retrieve, t_verify])
    #     if loops[-1][2] < 4 and loops[-1][1] < i:
    #         loop1.append(loops[-1][1])
    # x = poses[:, 0, 3]
    # y = poses[:, 1, 3]
    # x1 = x[loop1]
    # y1 = y[loop1]
    # plt.plot(x, y, 'b.', markersize=1)
    # plt.plot(x1, y1, 'ro', markersize=2, markerfacecolor='none')
    # plt.axis('equal')
    # plt.show()
    ts = np.array(ts) * 1000
    # np.savetxt('times.txt', ts)
    # x=np.arange(len(ts))
    # plt.plot(x,ts[:,0],'b.')
    # plt.plot(x,ts[:,1],'r.')
    # plt.show()
    loops = np.array(loops)
    return loops
 def pr_curve(poses, loops, sequence, positive_distance=4):
    map_tree_poses = KDTree(poses[:, 0:3, 3])
    reverse_loops = []
    real_loop = []
    for i in range(0,len(poses)):
        min_range = max(0, i - 50)
        max_range = min(i + 50, poses.shape[0])
        current_pose = poses[i]
        indices = map_tree_poses.query_radius(np.expand_dims(current_pose[0:3, 3], 0), positive_distance)
        valid_idxs = list(set(indices[0]) - set(range(min_range, max_range)))
        valid_idxs = np.array(valid_idxs)
        if len(valid_idxs) > 0:
            # dis = np.linalg.norm(current_pose[0:3, 3]-poses[valid_idxs,0:3,3],axis=1)
            real_loop.append(1)
            r0 = poses[i, :3, :3]
            rs = poses[valid_idxs, :3, :3]
            dr = np.linalg.inv(r0) @ rs.swapaxes(0, 2)
            angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1))
            angle = angle * 180 / np.pi
            if np.min(angle) > 90:
                reverse_loops.append(1)
            else:
                reverse_loops.append(0)
        else:
            real_loop.append(0)
            reverse_loops.append(0)
    reverse_loops = np.array(reverse_loops)
    real_loop = np.array(real_loop)
    # loops=np.hstack((loops,real_loop.reshape(-1,1)))
    # np.savetxt('loops_bev%02d.txt'%sequence,loops,fmt='%.6f')
    # print('sequence %d, %d frames, %d loops, %d reverse loops' % (sequence,len(real_loop), np.sum(real_loop), np.sum(reverse_loops)))
    # # return 0
    distances = loops[:, 3]
    detected_loop = loops[:, 2]
    precision2 = [1]
    recall2 = [0]
    for thr in np.unique(detected_loop):
        tp = detected_loop <= thr
        tp = tp & real_loop
        tp = tp & (distances <= positive_distance)
        tp = tp.sum()
        fp = (detected_loop <= thr).sum() - tp
        fn = (real_loop.sum()) - tp
        if (tp + fp) > 0.:
            precision2.append(tp / (tp + fp))
        else:
            precision2.append(1.)
        recall2.append(tp / (tp + fn))
    f1s = []
    for i in range(len(recall2)):
        f1s.append((2 * precision2[i] * recall2[i]) / (precision2[i] + recall2[i]))
    f1 = max(f1s)
    recall_p1 = np.max(np.array(recall2)[np.array(precision2) == 1])
    # plt.plot(recall2, precision2, 'b-')
    # plt.show()
    pr = np.array(precision2 + recall2).reshape(2, -1).T
    # np.save('fusion_pr_%02d.npy' % sequence, pr)
    ap = auc(recall2, precision2)
    idx=loops[:,2]<9999
    loops1=loops[idx]
    rp=np.sum(np.abs(loops1[:,2]-loops1[:,3])<2)/len(loops1)
    print('Sequence %02d, AP %.3f, Recall@100 %.3f, F1 %.3f, RP %.3f/%d' % (sequence, ap, recall_p1, f1, rp, len(loops1)))
    # if ap<0.1:
    #     exit()
    # --- save PR curve to file (for server usage) ---
    try:
        out_dir = os.path.join('/home/adlab36/chenyouyuan/FUSIONLCD', 'result', 'plots')
        os.makedirs(out_dir, exist_ok=True)
        plt.figure(figsize=(6, 5))
        plt.plot(recall2, precision2, 'b-', marker='o', linewidth=2)
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title(f'Sequence {int(sequence):02d} PR (AP={ap:.3f})')
        plt.grid(True, linestyle='--', alpha=0.4)
        fname = os.path.join(out_dir, f'pr_sequence_{int(sequence):02d}.png')
        plt.tight_layout()
        plt.savefig(fname, dpi=150)
        plt.close()
    except Exception as e:
        # 如果保存失败也不要阻塞主流程
        print(f'Warning: failed to save PR plot for sequence {sequence}: {e}')
    return ap, recall_p1, f1
 def lcd(data):
    vlads = data['vlads'].cuda()
    kpts = data['key_points']
    sequences = data['sequences']
    poses = data['pose_query'].cuda()
    feas = data['fea_kpt'].cuda()
    # feas = feas / torch.sqrt(torch.sum(feas ** 2, -1, keepdim=True) + 1e-8)
    result = []
    recall_at_ks = []
    recall_at_k=[]
    for s in torch.unique(sequences):
        # if s==54:
        #     continue
        mask = sequences == s
        vlads1 = vlads[mask]
        feas1 = feas[mask]
        kpts1 = kpts[mask]
        poses1 = poses[mask]
        poses2 = poses1.cpu().detach().numpy()
        # recall_at_k = recall_with_candidates(vlads1, poses1, s)
        # idx=np.arange(len(vlads1)//2)
        # idx=np.tile(idx, 2)
        # vlads1, feas1, kpts1, poses1 =vlads1[idx], feas1[idx], kpts1[idx], poses1[idx]
        loops = retrieve(vlads1, feas1, kpts1, poses1, 1, 'ransac')
        ap, recall_p1, f1 = pr_curve(poses2, loops, s, 4)
        recall_at_ks.append(recall_at_k)
        result.append([ap, recall_p1, f1])
    return result, recall_at_ks
 if __name__ == '__main__':
    np.random.seed(123)
    # data = torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_bev.pth.tar')
    # lcd(data)
    print('----------------------------------------------------------------------')
    data= torch.load('/home/adlab36/chenyouyuan/FUSIONLCD/result/log/database/database_bevp.pth.tar')
    lcd(data)
    print('----------------------------------------------------------------------')
    # data=torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_fusion.pth.tar')
    # lcd(data)
--- a/evaluate_pose.py
+++ b/evaluate_pose.py
@@ -0,0 +1,191 @@
 import math
 import os
 import time
 import warnings
 import numpy as np
 import torch
 import yaml
 from skimage.measure import ransac
 from skimage.transform import EuclideanTransform
 from tqdm import tqdm
 import net
 import tools
 from dataset import KittiTotalLoader
 warnings.filterwarnings("ignore")
 def npto_XYZRPY(rotmatrix):
    '''
    Usa mathutils per trasformare una matrice di trasformazione omogenea in xyzrpy
    https://docs.blender.org/api/master/mathutils.html#
    WARNING: funziona in 32bits quando le variabili numpy sono a 64 bit
    :param rotmatrix: np array
    :return: np array with the xyzrpy
    '''
    # qui sotto corrisponde a
    # quat2eul([ 0.997785  -0.0381564  0.0358964  0.041007 ],'XYZ')
    roll = math.atan2(-rotmatrix[1, 2], rotmatrix[2, 2])
    pitch = math.asin(rotmatrix[0, 2])
    yaw = math.atan2(-rotmatrix[0, 1], rotmatrix[0, 0])
    x = rotmatrix[:3, 3][0]
    y = rotmatrix[:3, 3][1]
    z = rotmatrix[:3, 3][2]
    return np.array([x, y, z, roll, pitch, yaw])
 def yt_error(pose1, pose2):
    distance = np.linalg.norm((pose1 - pose2)[0:3, 3])
    yaw1 = npto_XYZRPY(pose1)[-1]
    yaw2 = npto_XYZRPY(pose2)[-1]
    yaw1 = yaw1 % (2 * np.pi)
    yaw2 = yaw2 % (2 * np.pi)
    dyaw = abs(yaw1 - yaw2) % (2 * np.pi)
    angle = dyaw * 180 / np.pi
    if angle > 180:
        angle = 360 - angle
    return distance, angle
 def rt_error(pose1, pose2):
    r0 = pose1[:3, :3]
    r1 = pose2[:3, :3]
    dr = np.linalg.inv(r0) @ r1
    angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1)) * 180 / np.pi
    distance = np.sqrt(np.sum(np.square(pose1[:3, -1] - pose2[:3, -1])))
    return distance, angle
 def pose_err(vlads=None, dataset=None, positive_distance=4., kpts=None, feas=None, model=None, num_cand=10):
    poses = dataset.poses
    error_ransac = []
    error_uot = []
    gt = dataset.gt
    pairs = []
    num_reverse = 0
    for i in range(len(gt)):
        sample = gt[i]
        idx_query = sample['idx']
        positive_idxs = sample['positive_idxs']
        for j in range(len(positive_idxs)):
            idx_positive = positive_idxs[j]
            if idx_query < idx_positive:
                pairs.append([idx_query, idx_positive])
                _, r = yt_error(poses[idx_query], poses[idx_positive])
                if r > 90:
                    num_reverse = num_reverse + 1
    # print('Total %d frames, %d pair of loops, %d[%.3f] reverse'%(len(poses),len(pairs),num_reverse,num_reverse/len(pairs)))
    cnt_ransca = 0
    pairs = np.asarray(pairs)
    idx = np.argsort(pairs[:, 1])
    pairs = pairs[idx]
    times_uot=[]
    times_ransac=[]
    for i in tqdm(range(len(pairs)),ncols=60):
        fea_query = feas[pairs[i][0]]
        p1 = kpts[pairs[i][0]].cpu().detach().numpy()
        fea_cand = feas[pairs[i][1]]
        p2 = kpts[pairs[i][1]].cpu().detach().numpy()
        pose_to_frame = np.matmul(np.linalg.inv(poses[pairs[i][1]]), poses[pairs[i][0]])
        st_ransac=time.time()
        idx1, idx2, dis = tools.nn_match(fea_query, fea_cand, 'cosine')
        ransac_flag=False
        if len(idx1) >= 20:
            idx1 = idx1.cpu().detach().numpy()
            idx2 = idx2.cpu().detach().numpy()
            try:
                result_ransac, inliers = ransac((p1[idx1, 0:3], p2[idx2, 0:3]), model_class=EuclideanTransform, min_samples=15, max_trials=3, residual_threshold=1.7)
                num_inlier = np.sum(inliers)
                if num_inlier > 30:  # ransac存在一致的点且无缩
                    cnt_ransca = cnt_ransca + 1
                    error_ransac.append(yt_error(pose_to_frame, result_ransac.params))
                    ransac_flag=True
            except:
                pass
        # if not ransac_flag:
        #     try:
        #         result_ransac, inliers = ransac((p1[idx1, 0:3], p2[idx2, 0:3]), model_class=EuclideanTransform,min_samples=5,max_trials=10,residual_threshold=5)
        #         num_inlier = np.sum(inliers)
        #         cnt_ransca = cnt_ransca + 1
        #         error_ransac.append(yt_error(pose_to_frame, result_ransac.params))
        #     except:
        #         pass
        times_ransac.append(time.time()-st_ransac)
        fea1 = feas[pairs[i], :, :].to(model.epsilon.device).permute(0, 2, 1)
        kpts1 = kpts[pairs[i], :, :].to(model.epsilon.device)
        bd = {'fea_kpt': fea1, 'key_points': kpts1}
        st_uot=time.time()
        bd = model(bd)
        times_uot.append(time.time()-st_uot)
        pose_estimate1 = bd['transformation'].squeeze(0).cpu().detach().numpy()
        pose_estimate1 = np.vstack((pose_estimate1, [0, 0, 0, 1]))
        error_uot.append(yt_error(pose_to_frame, pose_estimate1))
    ransac_rate = cnt_ransca / len(pairs)
    error_ransac = np.asarray(error_ransac)
    error_uot = np.asarray(error_uot)
    # np.save('error_ransac_%02d.npy' % dataset.sequence, error_ransac)
    # np.save('error_uot_%02d.npy' % dataset.sequence, error_uot)
    et_ransac = np.mean(error_ransac[:, 0])
    er_ransac = np.mean(error_ransac[:, 1])
    et_uot = np.mean(error_uot[:, 0])
    er_uot = np.mean(error_uot[:, 1])
    print('uot time: ',np.array(times_uot).mean())
    print('ransac time: ',np.array(times_ransac).mean())
    return et_ransac, er_ransac, et_uot, er_uot, ransac_rate
 def estimate_pose(database):
    try:
        with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
        print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
    except:
        with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
        print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
    cfg = cfg['experiment']
    path_result = cfg['path_result']
    _, _, loader_test = KittiTotalLoader(cfg)
    model = net.Fusion(cfg)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    checkpoint = torch.load(tools.path_join(path_result, 'models', cfg['last_model']))
    model.load_state_dict(checkpoint['model'])
    uot = model.uot
    # uot = model.bev.uot
    vlads = database['vlads']
    key_points = database['key_points']
    fea_kpt = database['fea_kpt']
    sequences = database['sequences']
    print()
    print('****************************************************************************')
    end = 0
    for i in range(len(loader_test.dataset.datasets)):
        start = end
        end = start + len(loader_test.dataset.datasets[i])
        # end=0
        # start=0
        et_ransac, er_ransac, et_uot, er_uot, rate = pose_err(vlads=vlads[start:end], dataset=loader_test.dataset.datasets[i],
                                                              kpts=key_points[start:end], feas=fea_kpt[start:end], model=uot)
        print('Sequence %02d' % (torch.unique(sequences)[i]))
        print('ransac rate:%.4f, translation error:%.4f[m], rototion error:%.4f[deg]' % (rate, float(et_ransac), float(er_ransac)))
        print('uot translation error:%.4f[m], uot rototion error:%.4f[deg],' % (float(et_uot), float(er_uot)))
    print('****************************************************************************')
 if __name__ == '__main__':
    # CUDA_VISIBLE_DEVICES=5 python evaluate_pose.py
    # CUDA_VISIBLE_DEVICES=2 nohup python -u evaluate_pose.py >03090.log 2>&1 &
    # fuser /dev/nvidia*
    database = torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_fusion.pth.tar')
    estimate_pose(database)
--- a/loop_gt.py
+++ b/loop_gt.py
@@ -0,0 +1,130 @@
 import argparse
 import torch
 from torch.utils.data import Dataset
 import pykitti
 import os
 from sklearn.neighbors import KDTree
 import pickle
 import numpy as np
 skip_frame=50
 class KITTILoader3DPosesOnlyLoopPositives(Dataset):
    def __init__(self, dir, sequence, poses, positive_range=5., negative_range=25., hard_range=None):
        super(KITTILoader3DPosesOnlyLoopPositives, self).__init__()
        self.positive_range = positive_range
        self.negative_range = negative_range
        self.hard_range = hard_range
        self.dir = dir
        self.sequence = sequence
        if int(sequence) > 21:
            self.poses = np.load(poses)
        else:
            calib = np.genfromtxt(os.path.join(dir, 'sequences', sequence, 'calib.txt'))[:, 1:]
            T_cam_velo = np.reshape(calib[4], (3, 4))
            T_cam_velo = np.vstack([T_cam_velo, [0, 0, 0, 1]])
            poses2 = []
            with open(poses, 'r') as f:
                for x in f:
                    x = x.strip().split()
                    x = [float(v) for v in x]
                    pose = np.zeros((4, 4))
                    pose[0, 0:4] = np.array(x[0:4])
                    pose[1, 0:4] = np.array(x[4:8])
                    pose[2, 0:4] = np.array(x[8:12])
                    pose[3, 3] = 1.0
                    pose = np.linalg.inv(T_cam_velo) @ (pose @ T_cam_velo)
                    poses2.append(pose)
            self.poses = np.stack(poses2)
        self.kdtree = KDTree(self.poses[:, :3, 3])
    def __len__(self):
        return len(self.poses)
    def __getitem__(self, idx):
        x = self.poses[idx, 0, 3]
        y = self.poses[idx, 1, 3]
        z = self.poses[idx, 2, 3]
        r0 = self.poses[idx, :3, :3]
        rs = self.poses[:, :3, :3]
        dr = np.linalg.inv(r0) @ rs.swapaxes(0, 2)
        angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1))
        angle = angle * 180 / np.pi
        idx_angle = np.where(angle < 99999)[0]
        anchor_pose = torch.tensor([x, y, z])
        indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.positive_range, sort_results=True, return_distance=True)
        indices = [indices[0][0], indices[1][0]]
        min_range = max(0, idx - skip_frame)
        max_range = min(idx + skip_frame, len(self.poses))
        positive_idxs = list(set(indices[0]) & set(idx_angle) - set(range(min_range, max_range)))
        loop_angle = angle[positive_idxs]
        reverse = 0
        if len(loop_angle) > 0:
            reverse=np.sum(loop_angle>90)
            if min(loop_angle) > 90:
                reverse = -1*reverse
        positive_idxs.sort()
        num_loop = len(positive_idxs)
        indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.negative_range)
        indices = set(indices[0])
        negative_idxs = set(range(len(self.poses))) - indices
        negative_idxs = list(negative_idxs)
        negative_idxs.sort()
        hard_idxs = None
        if self.hard_range is not None:
            inner_indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.hard_range[0])
            outer_indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.hard_range[1])
            hard_idxs = set(outer_indices[0]) - set(inner_indices[0])
            pass
        return num_loop, positive_idxs, negative_idxs, hard_idxs, reverse
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--root_folder', default='/home/adlab36/chenyouyuan/FUSIONLCD',
                        help='dataset directory')
    args = parser.parse_args()
    base_dir = args.root_folder
    for sequence in ['00', '05', '06', '07', '08', '09', '50', '54', '55', '56', '59']:#'00', '05', '06', '07', '08', '09', '50', '54', '55', '56', '59', '120205','130405'
        if int(sequence) < 50:
            poses_file = base_dir + "/sequences/" + sequence + "/poses.txt"
        elif int(sequence)<100:
            poses_file = base_dir + "/sequences/" + sequence + "/poses.npy"
        else:
            pass
        dataset = KITTILoader3DPosesOnlyLoopPositives(base_dir, sequence, poses_file, 4, 15, [8, 15])
        lc_gt = []
        lc_gt_file = os.path.join(base_dir, 'sequences', sequence, 'loop_GT_4m.pickle')
        loop_pairs = []
        loop_files = []
        for i in range(len(dataset)):
            sample, pos, neg, hard, reverse = dataset[i]
            if sample > 0.:
                loop_files.append([i, reverse])
                sample_dict = {}
                sample_dict['idx'] = i
                sample_dict['positive_idxs'] = pos
                for p in pos:
                    if i < p:
                        loop_pairs.append([i, p])
                # sample_dict['negative_idxs'] = neg
                # sample_dict['hard_idxs'] = hard
                lc_gt.append(sample_dict)
        loop_files = np.array(loop_files)
        num_reverse_file = int(np.sum(loop_files[:, 1]<0))
        num_reverse_pairs = int(np.sum(np.abs(loop_files[:, 1])))/2
        with open(lc_gt_file, 'wb') as f:
            pickle.dump(lc_gt, f)
        print('Sequence %02d done,%05d files, %05d files with loop, %05d[%.4f] files only has reverse loop, %05d loop pairs, %05d[%.4f] reverse loop' %
              (int(sequence), len(dataset), len(loop_files), num_reverse_file, num_reverse_file / len(loop_files), len(loop_pairs),num_reverse_pairs,num_reverse_pairs/len(loop_pairs)))
--- a/loss.py
+++ b/loss.py
@@ -0,0 +1,340 @@
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from pytorch_metric_learning import distances
 def tr_loss(batch_dict,key):
    loss1 = (batch_dict[key][:, 0:3, 3] -
             batch_dict['pose_to_frame'][:, 0:3, 3]).norm(dim=1).mean()
    loss2 = (torch.acos(torch.clip(batch_dict[key][:, 0, 0].view(-1, 1), -1, 1)) -
             torch.acos(torch.clip(batch_dict['pose_to_frame'][:, 0, 0].view(-1, 1), -1, 1))).norm(dim=1).mean() / 3.1415 * 180
    return loss1, loss2
 def gen_points_loss(batch_dict):
    key_points_gen = batch_dict['key_points_gen']
    key_points = batch_dict['key_points']
    key_points_gen1 = torch.cat((key_points_gen, key_points_gen * 0), dim=2)
    key_points_gen1[:, :, 3] = 1
    # pose_query=batch_dict['pose_query']
    # pose_positive=batch_dict['pose_positive']
    # poses=torch.cat((pose_query,pose_positive),dim=0)
    # key_points_gen2=torch.bmm(poses,key_points_gen1.permute(0,2,1)).permute(0,2,1)
    # key_points2=torch.bmm(poses,key_points.permute(0,2,1)).permute(0,2,1)
    # loss_gpo=(key_points_gen2[:,:,:2]-key_points2[:,:,:2]).norm(p=1,dim=2).mean()
    pose_to_frame = batch_dict['pose_to_frame']
    B = pose_to_frame.shape[0]
    src_pts = key_points[:B]
    tgt_pts = key_points[B:]
    src_pts_gen = key_points_gen1[:B]
    tgt_pts_gen = key_points_gen1[B:]
    srcs = torch.cat((src_pts, src_pts_gen), dim=0)
    tgts = torch.cat((tgt_pts_gen, tgt_pts), dim=0)
    pose_to_frame1 = torch.cat((pose_to_frame, pose_to_frame), dim=0)
    srcs1 = torch.bmm(pose_to_frame1, srcs.permute(0, 2, 1)).permute(0, 2, 1)
    loss = torch.mean(torch.abs(srcs1[:, :, :2] - tgts[:, :, :2]))
    return loss
 def rand_dis(x, y):
    assert len(x.shape)==2 and len(y.shape)==2,'x and y must be 2 dim'
    N, N = x.size()
    ids=torch.arange(N).to(x.device)
    idx = ids.view(1, N).repeat(N, 1)
    mask = ~(idx == idx.transpose(0, 1))
    idx1 = idx[mask].view(N, N - 1)
    random_indices = torch.randint(N - 1, size=(N,)).to(x.device)
    rand_idx = torch.gather(idx1, 1, random_indices.view(-1, 1))
    rand_idx1 = torch.cat([ids.view(N, 1), rand_idx], dim=1)
    diag = ids.view(N, 1).repeat(1, 2)
    x1 = x[rand_idx1[:, 0], rand_idx1[:, 1]]
    x2 = x1*0
    x3 = torch.cat([x1.view(N, 1), x2.view(N, 1)], dim=1)
    y1 = y[rand_idx1[:, 0], rand_idx1[:, 1]]
    y2 = y[diag[:, 0], diag[:, 1]]
    y3 = torch.cat([y1.view(N, 1), y2.view(N, 1)], dim=1)
    dis=torch.abs(x3-y3).mean()+F.relu(0.2-torch.abs(y1)).mean()
    return dis
 def gen_feature_loss(batch_dict):
    #BCN
    fea_pt_dual_gen = batch_dict['fea_pt_dual_gen']
    fea_pl_dual_gen = batch_dict['fea_pl_dual_gen']
    fea_kpt_original_gen = batch_dict['fea_kpt_original_gen']
    # fea_kpt_gen_gen=batch_dict['fea_kpt_gen_gen']
    fea_pt_dual = batch_dict['fea_pt_dual']
    fea_pl_dual = batch_dict['fea_pl_dual']
    fea_kpt_original = batch_dict['fea_kpt_original']
    # fea_pt_dual = batch_dict['fea_pt_dual'].detach()
    # fea_pl_dual = batch_dict['fea_pl_dual'].detach()
    # fea_kpt_original = batch_dict['fea_kpt_original'].detach()
    b = fea_pl_dual.shape[0]
    loss0 = 0
    loss1 = 0
    loss2 = 0
    loss3 = 0
    relation = batch_dict['relation']
    nums=0
    for i in range(b):
        cnt = torch.sum((relation[i, :, -1, 0] > 0) & (relation[i, :, -1, 1] > 0))
        nums+=cnt
        fea_pt_dual1 = fea_pt_dual[i, :, :cnt]  # 匹配点云特征，CN
        fea_pt_dual_gen1 = fea_pt_dual_gen[i, :, :cnt]  # 匹配点云特征，生成于图像
        fea_pl_dual1 = fea_pl_dual[i, :, :cnt]  # 匹配图像特征
        fea_pl_dual_gen1 = fea_pl_dual_gen[i, :, :cnt]  # 匹配图像特征，生成于点云
        # loss0 = loss0 + torch.abs(fea_pt_dual1 - fea_pt_dual_gen1).mean()
        loss0 = loss0 + (1 - F.cosine_similarity(fea_pt_dual1,fea_pt_dual_gen1,dim=0)).mean()
        # loss0 = loss0 + F.mse_loss(fea_pt_dual1, fea_pt_dual_gen1)
        # loss0 = loss0 + ((fea_pt_dual1 - fea_pt_dual_gen1).norm(p=2, dim=0)).mean()
        # sims00=tools.batch_distance(fea_pt_dual1.unsqueeze(0).permute(0,2,1),fea_pt_dual1.unsqueeze(0).permute(0,2,1),'cosine')
        # sims01=tools.batch_distance(fea_pt_dual_gen1.unsqueeze(0).permute(0,2,1),fea_pt_dual_gen1.unsqueeze(0).permute(0,2,1),'cosine')
        # loss0 = loss0 + torch.abs(sims00-sims01).mean()
        # loss1 = loss1 + torch.abs(fea_pl_dual1 - fea_pl_dual_gen1).mean()
        loss1 = loss1 + (1 - F.cosine_similarity(fea_pl_dual1,fea_pl_dual_gen1,dim=0)).mean()
        # loss1 = loss1 + F.mse_loss(fea_pl_dual1, fea_pl_dual_gen1)
        # loss1 = loss1 + ((fea_pl_dual1 - fea_pl_dual_gen1).norm(p=2, dim=0)).mean()
        # sims10=tools.batch_distance(fea_pl_dual1.unsqueeze(0).permute(0,2,1),fea_pl_dual1.unsqueeze(0).permute(0,2,1),'cosine')
        # sims11=tools.batch_distance(fea_pl_dual_gen1.unsqueeze(0).permute(0,2,1),fea_pl_dual_gen1.unsqueeze(0).permute(0,2,1),'cosine')
        # loss1 = loss1 + torch.abs(sims10-sims11).mean()
        #全景特征生成模块损失计算
        # loss2 = loss2 + torch.abs(fea_kpt_original[i] - fea_kpt_original_gen[i]).mean()
        loss2= loss2 + (1-F.cosine_similarity(fea_kpt_original[i], fea_kpt_original_gen[i],dim=0)).mean()
        # loss2 = loss2 +  F.mse_loss(fea_kpt_original[i], fea_kpt_original_gen[i])
        # loss2 = loss2 + ((fea_kpt_original[i] - fea_kpt_original_gen[i]).norm(p=2, dim=0)).mean()
        # sims20=tools.batch_distance(fea_kpt_original[i:i+1].permute(0,2,1),fea_kpt_original[i:i+1].permute(0,2,1),'cosine')
        # sims21=tools.batch_distance(fea_kpt_original_gen[i:i+1].permute(0,2,1),fea_kpt_original_gen[i:i+1].permute(0,2,1),'cosine')
        # loss2 = loss2 + torch.abs(sims20-sims21).mean()
    loss0 = loss0 / b
    loss1 = loss1 / b
    loss2 = loss2 / b
    return loss0, loss1, loss2, loss3
 def sinkhorn_matches_loss(batch_dict,key):
    project_kpts = batch_dict[key]  # calculated from corrspondence of kpts
    src_coords = batch_dict['key_points']
    pose_to_frame = batch_dict['pose_to_frame']
    src_coords = src_coords.clone().view(batch_dict['batch_size'], -1, 4)
    B, N_POINT, _ = src_coords.shape
    B = B // 2
    src_coords = src_coords[:B, :, [0, 1, 2, 3]]
    src_coords[:, :, -1] = 1.
    gt_dst_coords = torch.bmm(pose_to_frame, src_coords.permute(0, 2, 1))  # True project kpts
    gt_dst_coords = gt_dst_coords.permute(0, 2, 1)[:, :, :3]
    loss = (gt_dst_coords - project_kpts).norm(dim=2).mean()
    return loss
 def score_loss(batch_dict):
    score = batch_dict['score_bev']
    label_score = batch_dict['label_score']
    label_score = torch.cat([label_score[:, :, :, 0], label_score[:, :, :, 1]], dim=0)
    mask1 = score > 1e-8
    # mask2 = label_score > 1e-8
    # mask = mask1 | mask2
    score = score[mask1]
    label_score = label_score[mask1]
    loss = nn.functional.mse_loss(score, label_score)
    return loss
 def pose_loss(batch_dict,key):
    src_coords = batch_dict['key_points']
    src_coords = src_coords.clone().view(batch_dict['batch_size'], -1, 4)
    delta_pose = batch_dict['pose_to_frame']
    B, N_POINT, _ = src_coords.shape
    B = B // 2
    src_coords = src_coords[:B]
    gt_dst_coords = torch.bmm(delta_pose, src_coords.permute(0, 2, 1)).float()
    gt_dst_coords = gt_dst_coords.permute(0, 2, 1)[:, :, :3]
    transformation = batch_dict[key]
    pred_dst_coords = torch.bmm(transformation, src_coords.permute(0, 2, 1))
    pred_dst_coords = pred_dst_coords.permute(0, 2, 1)[:, :, :3]
    loss = torch.mean(torch.abs(pred_dst_coords - gt_dst_coords))
    return loss
 def get_all_triplets(dist_mat, pos_mask, neg_mask, is_inverted=False, margin=0.5, different_embedding=False):
    if not different_embedding:
        pos_mask = torch.triu(pos_mask, 1)
    triplets = pos_mask.unsqueeze(2) * neg_mask.unsqueeze(1)
    return torch.where(triplets)
 def hardest_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False):
    if not different_embedding:
        pos_mask = torch.triu(pos_mask, 1)
    a, p = torch.where(pos_mask)
    if neg_mask.sum() == 0:
        return a, p, None
    if is_inverted:
        dist_neg = dist_mat * neg_mask
        n = torch.max(dist_neg, dim=1)
    else:
        dist_neg = dist_mat.clone()
        dist_neg[~neg_mask] = dist_neg.max() + 1.
        _, n = torch.min(dist_neg, dim=1)
    n = n[a]
    return a, p, n
 def random_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False):
    if not different_embedding:
        pos_mask = torch.triu(pos_mask, 1)
    a, p = torch.where(pos_mask)
    selected_negs = []
    for i in range(a.shape[0]):
        possible_negs = torch.where(neg_mask[a[i]])[0]
        if len(possible_negs) == 0:
            return a, p, None
        dist_neg = dist_mat[a[i], possible_negs]
        if is_inverted:
            curr_loss = -dist_mat[a[i], p[i]] + dist_neg + margin
        else:
            curr_loss = dist_mat[a[i], p[i]] - dist_neg + margin
        if len(possible_negs[curr_loss > 0]) > 0:
            possible_negs = possible_negs[curr_loss > 0]
        random_neg = np.random.choice(possible_negs.cpu().numpy())
        selected_negs.append(random_neg)
    n = torch.tensor(selected_negs, dtype=a.dtype, device=a.device)
    return a, p, n
 def semihard_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False):
    if not different_embedding:
        pos_mask = torch.triu(pos_mask, 1)
    a, p = torch.where(pos_mask)
    selected_negs = []
    for i in range(a.shape[0]):
        possible_negs = torch.where(neg_mask[a[i]])[0]
        if len(possible_negs) == 0:
            return a, p, None
        dist_neg = dist_mat[a[i], possible_negs]
        if is_inverted:
            curr_loss = -dist_mat[a[i], p[i]] + dist_neg + margin
        else:
            curr_loss = dist_mat[a[i], p[i]] - dist_neg + margin
        semihard_idxs = (curr_loss > 0) & (curr_loss < margin)
        if len(possible_negs[semihard_idxs]) > 0:
            possible_negs = possible_negs[semihard_idxs]
        random_neg = np.random.choice(possible_negs.cpu().numpy())
        selected_negs.append(random_neg)
    n = torch.tensor(selected_negs, dtype=a.dtype, device=a.device)
    return a, p, n
 class TripletLoss(nn.Module):
    def __init__(self, margin: float, triplet_selector, distance: distances.BaseDistance):
        super(TripletLoss, self).__init__()
        self.margin = margin
        self.triplet_selector = triplet_selector
        self.distance = distance
    def forward(self, embeddings, pos_mask, neg_mask, other_embeddings=None):
        if other_embeddings is None:
            other_embeddings = embeddings
        dist_mat = self.distance(embeddings, other_embeddings)
        triplets = self.triplet_selector(
            dist_mat, pos_mask, neg_mask, self.distance.is_inverted)
        distance_positive = dist_mat[triplets[0], triplets[1]]
        if triplets[-1] is None:
            if self.distance.is_inverted:
                return F.relu(1 - distance_positive).mean()
            else:
                return F.relu(distance_positive).mean()
        distance_negative = dist_mat[triplets[0], triplets[2]]
        curr_margin = self.distance.margin(
            distance_positive, distance_negative)
        loss = F.relu(curr_margin + self.margin)
        return loss.mean()
 def _pairwise_distance(x, squared=False, eps=1e-16):
    # Compute the 2D matrix of distances between all the embeddings.
    cor_mat = torch.matmul(x, x.t())
    norm_mat = cor_mat.diag()
    distances = norm_mat.unsqueeze(1) - 2 * cor_mat + norm_mat.unsqueeze(0)
    distances = F.relu(distances)
    if not squared:
        mask = torch.eq(distances, 0.0).float()
        distances = distances + mask * eps
        distances = torch.sqrt(distances)
        distances = distances * (1.0 - mask)
    return distances
 class TotalLoss(nn.Module):
    def __init__(self, cfg):
        super(TotalLoss, self).__init__()
        if 'hardest' == cfg['negetative_selsector']:
            neg_selector = hardest_negative_selector
        elif 'semihard' == cfg['negetative_selsector']:
            neg_selector = semihard_negative_selector
        else:
            neg_selector = random_negative_selector
        self.trip_fun = TripletLoss(margin=cfg['trip_margin'], triplet_selector=neg_selector, distance=distances.LpDistance())
        self.negetative_distcance = 50
    def forward(self, batch_dict):
        l_pose=l_score=l_match=l_tra=l_rot=l_gb=l_gi=l_gpa=l_gpo=l_kpl = 0
        if 'key_points' in batch_dict.keys():
            l_score = score_loss(batch_dict)
        l_match1,l_pose1,l_match2,l_pose2,l_tra1,l_rot1,l_tra2,l_rot2=0,0,0,0,0,0,0,0
        if 'transformation_original' in batch_dict.keys():
            l_match1 = sinkhorn_matches_loss(batch_dict,'project_kpts_original')
            l_tra1, l_rot1 = tr_loss(batch_dict,'transformation_original')
            l_pose1 = pose_loss(batch_dict,'transformation_original')
        if  'transformation_fusion' in batch_dict.keys():
            l_match2 = sinkhorn_matches_loss(batch_dict,'project_kpts_fusion')
            l_tra2, l_rot2 = tr_loss(batch_dict,'transformation_fusion')
            l_pose2 = pose_loss(batch_dict,'transformation_fusion')
        cnt=1
        if min(l_rot1,l_rot2)>0:
            cnt=2
        l_match=(l_match1+l_match2)/cnt
        l_pose=(l_pose1+l_pose2)/cnt
        l_tra=(l_tra1+l_tra2)/cnt
        l_rot=(l_rot1+l_rot2)/cnt
        if ('fea_pt_dual_gen' in batch_dict.keys()) or ('fea_pl_dual_gen' in batch_dict.keys()):
            l_gb, l_gi, l_gpa,l_kpl = gen_feature_loss(batch_dict)
        if 'key_points_gen' in batch_dict.keys():
            l_gpo = gen_points_loss(batch_dict)
        if 'sequence' in batch_dict:
            neg_mask = batch_dict['sequence'].view(1, -1) != batch_dict['sequence'].view(-1, 1)
        else:
            neg_mask = torch.zeros((batch_dict['pose_query'].shape[0] * 2, batch_dict['pose_query'].shape[0] * 2), dtype=torch.bool)
        pair_dist = _pairwise_distance(batch_dict['pose_query'][:, 0:3, 3])
        neg_mask = ((pair_dist > self.negetative_distcance) | neg_mask.to(pair_dist.device))
        neg_mask = neg_mask.repeat(2, 2)
        batch_size = batch_dict['batch_size']
        pos_mask = torch.zeros((batch_size, batch_size), device=neg_mask.device)
        for i in range(batch_size // 2):
            pos_mask[i, i + batch_size // 2] = 1
            pos_mask[i + batch_size // 2, i] = 1
        l_triplet = self.trip_fun(batch_dict['vlads'], pos_mask, neg_mask)
        l_total = l_score + l_pose + 0.05 * l_match + l_triplet + (l_gb + l_gi + l_gpa + l_kpl)
        loss = [l_total, l_pose, l_score, l_match, l_triplet, l_tra, l_rot, l_gb, l_gi, l_gpa, l_gpo,l_kpl]
        for i in range(len(loss)):
            if loss[i]==0:
                loss[i]=loss[0]*0
        batch_dict['loss']=loss
        return batch_dict
--- a/models/checkpoint_bev.pth.tar
+++ b/models/checkpoint_bev.pth.tar
--- a/models/checkpoint_bevp.pth.tar
+++ b/models/checkpoint_bevp.pth.tar
--- a/models/checkpoint_fusion.pth.tar
+++ b/models/checkpoint_fusion.pth.tar
--- a/net.py
+++ b/net.py
@@ -0,0 +1,628 @@
 import cv2
 import math
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch._utils
 import torch.nn as nn
 import torch.nn.functional as F
 from uot import UOTHead
 from netvlad import NetVLAD, NetVLADLoupe
 from ALIKE.alike import configs
 from ALIKE.alnet import ALNet
 from BEVNet import RICNN, EncodePosition, RIAvgpool2d, RIMaxpool2d
 import tools
 def simple_nms(scores, nms_radius=2, itertation=2, mode='1'):
    """ Fast Non-maximum suppression to remove nearby points """
    assert (nms_radius >= 0)
    if mode == 'ri':
        max_pool = RIMaxpool2d(kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius)
    else:
        max_pool = nn.MaxPool2d(kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius)
    zeros = torch.zeros_like(scores)
    max_mask = scores == max_pool(scores)
    for _ in range(itertation):
        supp_mask = max_pool(max_mask.float()) > 0
        supp_scores = torch.where(supp_mask, zeros, scores)
        new_max_mask = supp_scores == max_pool(supp_scores)
        max_mask = max_mask | (new_max_mask & (~supp_mask))
    return torch.where(max_mask, scores, zeros)
 class BEVHead(nn.Module):
    def __init__(self, alnet='alike-n', iter=5, num_kpt=100, cluster_num=16, vlad_size=256):
        super(BEVHead, self).__init__()
        cfg = configs[alnet]
        self.feature_extractor = ALNet(c1=cfg['c1'], c2=cfg['c2'], c3=cfg['c3'], c4=cfg['c4'], dim=cfg['dim'],
                                       single_head=cfg['single_head'])
        self.feature_size = int(self.feature_extractor.feature_size)
        self.select = 'maxpool'
        self.num_kpt = num_kpt
        self.ep = EncodePosition(feature_size=self.feature_size)
        self.uot = UOTHead(nb_iter=iter,name='original')
        self.netvlad_bev = NetVLAD(self.feature_size, cluster_num)
        # state_dict=torch.load('/data4/caodanyang/results/FUSIONLCD/bev_07250/models/checkpoint_049.pth.tar', map_location='cpu')['model']
        # state_dict_new={}
        # for k,v in state_dict.items():
        #     state_dict_new[k[4:]]=v
        # self.load_state_dict(state_dict_new)
        # for param in self.parameters():
        #     param.requires_grad = False
    def forward(self, batch_dict):
        assert type(batch_dict) is dict, 'Input should be a dict'
        bev = batch_dict['bev']
        guider = (bev[:, 2:3] > 0).float()
        b, c, h_bev, w_bev = bev.shape
        x = bev[:, 0:3, :, :]
        points = bev[:, 3:7, :, :]  # xyzi
        points[:, 2] = 0
        points[:, 3] = 1
        score_bev, feature_bev = self.feature_extractor(x)
        score_bev = score_bev * guider
        if self.select == 'avgpool':
            avgpool = RIAvgpool2d(kernel_size=5, stride=4, padding=1)
            grid = np.array(np.meshgrid(np.arange(h_bev), np.arange(w_bev))).swapaxes(0, 2)
            grid = torch.from_numpy(grid).to(x.device).permute(2, 0, 1).unsqueeze(0).repeat(b, 1, 1, 1)
            score_bev_avg = avgpool(score_bev)
            grid_avg = avgpool(grid.float() * score_bev) / (score_bev_avg + 1e-8)
            grid_avg = torch.round(grid_avg).long().permute(0, 2, 3, 1)
            points_avg = avgpool(score_bev * points) / (score_bev_avg + 1e-8)
            feature_bev_avg = avgpool(feature_bev * score_bev) / (score_bev_avg + 1e-8)
            score_bev = score_bev.view(b, h_bev, w_bev)
            score_bev_avg = score_bev_avg.squeeze(1)
            kpts = []
            feas_kpt = []
            pixels_kpt = []
            # cnt=0
            for i in range(b):
                uv = list(torch.where(score_bev_avg[i] > 0))
                num_kpt = int(self.num_kpt)
                if num_kpt == 0:
                    print('NO BEV key point')
                    exit()
                while len(uv[0]) < num_kpt:
                    uv[0] = torch.cat([uv[0], uv[0][:(num_kpt - len(uv[0]))]])
                    uv[1] = torch.cat([uv[1], uv[1][:(num_kpt - len(uv[1]))]])
                score_bev0 = score_bev_avg[i, uv[0], uv[1]]
                score_bev1, idx = torch.topk(score_bev0, k=self.num_kpt)
                # cnt=max(cnt,len(uv[0]))
                # idx=torch.arange(len(uv[0])).to(x.device)
                pc = points_avg[i, :, uv[0], uv[1]].permute(1, 0)
                # pc = torch.cat([pc, pc * 0], dim=1)
                kpt = pc[idx]
                fea_kpt = feature_bev_avg[i, :, uv[0][idx], uv[1][idx]]
                pixel_kpt = grid_avg[i, uv[0][idx], uv[1][idx]]
                pixels_kpt.append(pixel_kpt)
                kpts.append(kpt.unsqueeze(0))
                feas_kpt.append(fea_kpt.unsqueeze(0))
        else:
            score_bev_max = simple_nms(score_bev, nms_radius=3)
            score_bev = score_bev.view(b, h_bev, w_bev)
            score_bev_max = score_bev_max.view(b, h_bev, w_bev)
            kpts = []
            feas_kpt = []
            pixels_kpt = []
            for i in range(b):
                uv = list(torch.where((score_bev[i] == score_bev_max[i]) & (score_bev[i] > 0)))
                num_kpt = int(self.num_kpt)
                if num_kpt == 0:
                    print('NO BEV key point')
                    exit()
                while len(uv[0]) < num_kpt:
                    uv[0] = torch.cat([uv[0], uv[0][:(num_kpt - len(uv[0]))]])
                    uv[1] = torch.cat([uv[1], uv[1][:(num_kpt - len(uv[1]))]])
                score_bev0 = score_bev[i, uv[0], uv[1]]
                # sc0 = score_bev0.cpu().detach().numpy()
                score_bev1, idx = torch.topk(score_bev0, k=self.num_kpt)
                pc = points[i, :, uv[0], uv[1]].permute(1, 0)
                # pc = torch.cat([pc, pc * 0], dim=1)
                kpt = pc[idx]
                fea_kpt = feature_bev[i, :, uv[0][idx], uv[1][idx]]
                pixel_kpt = torch.cat([uv[0][idx], uv[1][idx]]).view(2, -1).T
                pixels_kpt.append(pixel_kpt.unsqueeze(0))
                kpts.append(kpt.unsqueeze(0))
                feas_kpt.append(fea_kpt.unsqueeze(0))
        # kpts1=torch.zeros((b,cnt,kpt.shape[1])).to(x.device)
        # feas_kpt1=torch.zeros((b,fea_kpt.shape[0],cnt)).to(x.device)
        # for i in range(b):
        #     kpts1[i,:kpts[i].shape[1]]=kpts[i].squeeze(0)
        #     feas_kpt1[i,:,:feas_kpt[i].shape[2]]=feas_kpt[i].squeeze(0)
        kpts = torch.cat(kpts)
        feas_kpt = torch.cat(feas_kpt)
        pixels_kpt = torch.cat(pixels_kpt)
        if hasattr(self, 'ep'):
            feas_kpt = self.ep(kpts, feas_kpt)
        batch_dict['pixels_kpt'] = pixels_kpt
        batch_dict['score_bev'] = score_bev
        batch_dict['fea_kpt_original'] = feas_kpt
        batch_dict['fea_bev'] = feature_bev
        batch_dict['key_points'] = kpts
        if hasattr(self, 'netvlad_bev'):
            try:
                vlad_bev = self.netvlad_bev(feas_kpt.transpose(1, 2).contiguous())
            except:
                vlad_bev = self.netvlad_bev(feas_kpt.unsqueeze(3))
            batch_dict['vlad_bev'] = vlad_bev
        if ('pose_to_frame' in batch_dict.keys()) and (hasattr(self, 'uot')):
            self.uot(batch_dict)
        ####################################    show bev and kpt     ############################################
        if 0:
            for i in range(b):
                bevshow = x[i].permute(1, 2, 0).cpu().detach().numpy()
                bevshow = np.ascontiguousarray(bevshow[:, :, 0:3] * 255, dtype=np.uint8)
                bevshow1 = bevshow.copy()
                bevshow1[:, 1] = [255, 255, 255]
                for j in range(kpt.shape[0]):
                    center = (int(uv[1][idx[j]].cpu().detach().numpy()), int(uv[0][idx[j]].cpu().detach().numpy()))
                    cv2.circle(bevshow1, center, 2, (0, 0, 255), -1, cv2.LINE_AA)
                bevshow2 = np.hstack((bevshow, bevshow1))
                # cv2.namedWindow('2', cv2.WINDOW_NORMAL)
                # cv2.imshow('2', bevshow2)
                # cv2.waitKey(0)
                fig = plt.figure()
                plt.imshow(bevshow2)
                plt.show()
        #########################################################################################################
        ####################################        show match       ############################################
        if 0:
            for i in range(b // 2):
                kpt1 = kpts[i]
                pose_to_frame = batch_dict['pose_to_frame'][i]
                # pose_to_frame = batch_dict['transformation'][i]
                # pose_to_frame = torch.cat((pose_to_frame, torch.tensor([0, 0, 0, 1]).view(1, 4).to(pose_to_frame.device)))
                kpt1 = (pose_to_frame @ kpt1.permute(1, 0)).permute(1, 0)
                kpt2 = kpts[i + b // 2]
                bev1 = batch_dict['bev'][i][0:3].permute(1, 2, 0)
                bev1 = np.ascontiguousarray(bev1.cpu().detach().numpy() * 255, dtype=np.uint8)
                bev2 = batch_dict['bev'][i + b // 2][0:3].permute(1, 2, 0)
                bev2 = np.ascontiguousarray(bev2.cpu().detach().numpy() * 255, dtype=np.uint8)
                pixel1 = pixels_kpt[i].cpu().detach().numpy()
                pixel2 = pixels_kpt[i + b // 2].cpu().detach().numpy()
                fea1 = feas_kpt[i].permute(1, 0).cpu().detach().numpy()
                fea2 = feas_kpt[i + b // 2].permute(1, 0).cpu().detach().numpy()
                idx1, idx2, dis = tools.nn_match(fea1, fea2, 'cosine')
                # idx11, idx21, dis1 = tools.nn_match(kpt1, kpt2, 'euclidean')
                # idx1 = idx1[dis < 0.1]
                # idx2 = idx2[dis < 0.1]
                h, w, _ = bev1.shape
                img = np.hstack((bev1, bev2))
                img[:, w] = [255, 255, 255]
                tp = 0
                img1 = img.copy()
                for j in range(len(pixel1)):
                    center1 = (int(pixel1[j, 1]), int(pixel1[j, 0]))
                    center2 = (int(pixel2[j, 1]) + w, int(pixel2[j, 0]))
                    cv2.circle(img, center1, 2, (155, 155, 155), -1, cv2.LINE_AA)
                    cv2.circle(img, center2, 2, (155, 155, 155), -1, cv2.LINE_AA)
                for j in range(len(idx1)):
                    center1 = (int(pixel1[idx1[j], 1]), int(pixel1[idx1[j], 0]))
                    center2 = (int(pixel2[idx2[j], 1]) + w, int(pixel2[idx2[j], 0]))
                    dis_kpt = (kpt1[idx1[j]] - kpt2[idx2[j]]).norm(p=2)
                    if dis_kpt < 2:
                        tp = tp + 1
                        cv2.line(img, center1, center2, (0, 166, 0), 1, cv2.LINE_AA)
                    else:
                        cv2.line(img, center1, center2, (0, 0, 188), 1, cv2.LINE_AA)
                    cv2.circle(img, center1, 2, (255, 255, 255), -1, cv2.LINE_AA)
                    cv2.circle(img, center2, 2, (255, 255, 255), -1, cv2.LINE_AA)
                # print(np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180, (tp / len(idx1)))
                img2 = np.vstack((img1, img))
                img2[h, :] = [255, 255, 255]
                cv2.namedWindow('bev match %.3f,%.1fdeg' % (tp / len(idx1), np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180))
                cv2.imshow('bev match %.3f,%.1fdeg' % (tp / len(idx1), np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180), img2)
                cv2.waitKey(0)
        #####################################################################################################
        ############################################  ICP  ##################################################
        if 0:
            import open3d as o3d
            for i in range(b // 2):
                pose_to_frame = batch_dict['pose_to_frame'][i].cpu().detach().numpy()
                print('angle', np.arccos(pose_to_frame[0, 0]) / 3.14 * 180)
                transformation = batch_dict['transformation'][i].cpu().detach().numpy()
                transformation = np.vstack((transformation, [0, 0, 0, 1]))
                scan1 = batch_dict['scan_query'][i].cpu().detach().numpy()
                scan2 = batch_dict['scan_positive'][i].cpu().detach().numpy()
                pcd1 = o3d.geometry.PointCloud()
                pcd1.points = o3d.utility.Vector3dVector(scan1[:, :3])
                pcd1.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))])
                pcd11 = o3d.geometry.PointCloud()
                pcd11.points = o3d.utility.Vector3dVector(scan1[:, :3])
                pcd11.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))])
                pcd2 = o3d.geometry.PointCloud()
                pcd2.points = o3d.utility.Vector3dVector(scan2[:, :3])
                pcd2.colors = o3d.utility.Vector3dVector([[0, 1, 0] for i in range(len(pcd2.points))])
                icp_config = o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=200, relative_fitness=1e-6,
                                                                               relative_rmse=1e-6)
                trans_init = transformation
                threshold = 2
                estimation_method = o3d.pipelines.registration.TransformationEstimationPointToPoint()
                registration_result = o3d.pipelines.registration.registration_icp(pcd1, pcd2, threshold, trans_init,
                                                                                  estimation_method, icp_config)
                # 将待配准点云应用变换
                pcd1.transform(registration_result.transformation)
                vis1 = o3d.visualization.Visualizer()
                vis1.create_window(window_name='registration', width=600, height=600)  # 创建窗口
                render_option: o3d.visualization.RenderOption = vis1.get_render_option()  # 设置点云渲染参数
                render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
                render_option.point_size = 2  # 设置渲染点的大小
                vis1.add_geometry(pcd11)
                vis1.run()
                vis2 = o3d.visualization.Visualizer()
                vis2.create_window(window_name='registration', width=600, height=600)  # 创建窗口
                render_option: o3d.visualization.RenderOption = vis2.get_render_option()  # 设置点云渲染参数
                render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
                render_option.point_size = 2  # 设置渲染点的大小
                vis2.add_geometry(pcd2)
                vis2.run()
                vis = o3d.visualization.Visualizer()
                vis.create_window(window_name='registration', width=600, height=600)  # 创建窗口
                render_option: o3d.visualization.RenderOption = vis.get_render_option()  # 设置点云渲染参数
                render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
                render_option.point_size = 2  # 设置渲染点的大小
                vis.add_geometry(pcd1)
                vis.add_geometry(pcd2)
                vis.run()
        #######################################################################################################
        return batch_dict
 class ImgHead(nn.Module):
    def __init__(self, alnet='alike-n', num_kpt=150, cluster_num=0,vlad_size=256):
        super(ImgHead, self).__init__()
        cfg = configs[alnet]
        self.feature_extractor = ALNet(c1=cfg['c1'], c2=cfg['c2'], c3=cfg['c3'], c4=cfg['c4'], dim=cfg['dim'],
                                       single_head=cfg['single_head'])
        self.feature_size = int(self.feature_extractor.feature_size)
        # try:
        #     model_path = cfg['model_path']
        # except:
        #     model_path = ''
        # if model_path != '':
        #     state_dict = torch.load(model_path)
        #     self.feature_extractor.load_state_dict(state_dict)
            # for param in self.feature_extractor.parameters():
            #     param.requires_grad = False
        if num_kpt>0:
            self.num_kpt = num_kpt
    def forward(self, batch_dict):
        x = batch_dict['img'][:, 0:3].float() / 255.0
        # x=x[:,:,:,384:768,]
        # pixels = batch_dict['img'][:, 3:5]
        b, c, h, w = x.shape
        pixel_features = []
        kpts = []
        scores = []
        score_img, feature_img = self.feature_extractor(x)
        # feature_img=feature_img*0
        if hasattr(self,'num_kpt') :
            score_img = simple_nms(score_img, 2, 2)
            s_thr = 0.1
            for i in range(b):
                score_global1 = score_img[i, 0]
                values, indices = torch.topk(score_global1.view(-1), k=self.num_kpt, dim=0, largest=True)
                if torch.max(values) < s_thr:
                    print('0 pixel')
                    exit()
                num_low_value = torch.sum(values < s_thr)
                if num_low_value > 0:
                    indices1 = indices.clone()
                    indices1[(self.num_kpt - num_low_value):] = indices[:num_low_value]
                    indices = indices1
                row = torch.div(indices, score_global1.shape[1], rounding_mode='trunc')
                col = indices % score_global1.shape[1]
                pixel_feature = feature_img[i:i + 1, :, row, col]
                pixel_features.append(pixel_feature)
                kpts.append(torch.cat([row.view(1, -1, 1), col.view(1, -1, 1)], dim=2))
                scores.append(values.view(1, -1))
            pixel_features = torch.cat(pixel_features)
            kpts = torch.cat(kpts)
            scores = torch.cat(scores)
        ####################################        show match       ############################################
        if 0:
            for i in range(b // 2):
                img1 = batch_dict['img'][i][0:3].permute(1, 2, 0)
                img1 = np.ascontiguousarray(img1.cpu().detach().numpy(), dtype=np.uint8)
                img2 = batch_dict['img'][i + b // 2][0:3].permute(1, 2, 0)
                img2 = np.ascontiguousarray(img2.cpu().detach().numpy(), dtype=np.uint8)
                pixel1 = kpts[i].cpu().detach().numpy()
                pixel2 = kpts[i + b // 2].cpu().detach().numpy()
                fea1 = pixel_features[i].permute(1, 0).cpu().detach().numpy()
                fea2 = pixel_features[i + b // 2].permute(1, 0).cpu().detach().numpy()
                idx1, idx2, dis = tools.nn_match(fea1, fea2, 'euclidean')
                idx1 = idx1[dis < 10]
                idx2 = idx2[dis < 10]
                h, w, _ = img1.shape
                img = np.vstack((img1, img2))
                img[h, :] = [255, 255, 255]
                for i in range(len(idx1)):
                    center1 = (int(pixel1[idx1[i], 1]), int(pixel1[idx1[i], 0]))
                    center2 = (int(pixel2[idx2[i], 1]), int(pixel2[idx2[i], 0] + h))
                    cv2.line(img, center1, center2, (0, 188, 0), 1, cv2.LINE_AA)
                    cv2.circle(img, center1, 2, (0, 0, 255), -1, cv2.LINE_AA)
                    cv2.circle(img, center2, 2, (0, 0, 255), -1, cv2.LINE_AA)
                fig = plt.figure()
                plt.imshow(img[:, :, [2, 1, 0]])
                plt.show()
                # cv2.namedWindow('img match')
                # cv2.imshow('img match', img)
                # cv2.waitKey(0)
        #########################################################################################################
        batch_dict['key_pixels'] = kpts
        batch_dict['fea_kpl'] = pixel_features
        batch_dict['fea_img'] = feature_img
        batch_dict['score_img'] = score_img
        if hasattr(self, 'netvlad_img'):
            vlad = self.netvlad_img(pixel_features.transpose(1, 2).contiguous())
            batch_dict['vlad_img'] = vlad
        return batch_dict
 class LocalPool(nn.Module):
    def __init__(self, in_c):
        super().__init__()
        self.conv1 = nn.Conv2d(100, 10, 1, 1, 0, bias=True)
        self.mp=nn.MaxPool2d((1, 10))
    def forward(self, x):
        b, c, n, k = x.shape #k=100
        x1 = x.permute(0, 3, 2, 1)  # b,k,n,c
        x2=self.conv1(x1)
        x3=x2.permute(0,3,2,1)
        x4=self.mp(x3)
        return x4  # bcn1
 class TransformerEncoder(nn.Module):
    def __init__(self, in_c=128, num_heads=4, dropout=0.1, num_layers=2):
        super().__init__()
        encoder_layer = nn.TransformerEncoderLayer(d_model=in_c, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
    def forward(self, x):
        y = self.encoder(x)
        return y
 class Attention(nn.Module):
    def __init__(self, d_model):
        super(Attention, self).__init__()
        self.d_model = d_model
        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)
        self.fnn = nn.Linear(d_model, d_model)
        # self.dp=nn.Dropout(0.1)
    def forward(self, q, k=None, v=None):
        proj_q = self.w_q(q)  # BNC
        proj_k = self.w_k(k)
        proj_w = self.w_v(v)
        # proj_q=self.dp(proj_q)
        # proj_k=self.dp(proj_k)
        # proj_w=self.dp(proj_w)
        weights = nn.functional.softmax(torch.matmul(proj_q, proj_k.transpose(-2, -1)) / (self.d_model ** 0.5), dim=-1)
        attn_output = torch.matmul(weights, proj_w).contiguous()
        output = self.fnn(attn_output)
        return output, weights
 class Generator(nn.Module):
    def __init__(self, in_c=128, num=150):
        super().__init__()
        self.mha = Attention(in_c)
        self.conv1 = nn.Sequential(
            nn.ConvTranspose1d(in_c, in_c, kernel_size=3, stride=3, padding=0),
            nn.AdaptiveMaxPool1d(num)
        )
    def forward(self, x):
        b, c, n = x.shape
        # x=x.detach()
        x1 = x.permute(0, 2, 1)  # BNC
        x2, _ = self.mha(x1, x1, x1)
        x2 = x2.permute(0, 2, 1)
        x3 = self.conv1(x2)
        return x3
 class Converter(nn.Module):
    def __init__(self, in_c=128):
        super().__init__()
        self.mha = Attention(in_c)
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_c, in_c, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c), nn.ReLU(),
            nn.Conv1d(in_c, in_c // 4, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 4), nn.ReLU(),
            nn.Conv1d(in_c // 4, in_c // 8, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 8), nn.ReLU(),
            nn.Conv1d(in_c // 8, in_c // 4, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 4), nn.ReLU(),
            nn.Conv1d(in_c // 4, in_c, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c), nn.ReLU(),
            nn.Conv1d(in_c, in_c, kernel_size=1, stride=1, padding=0)
        )
        self.conv2 = nn.Conv1d(in_c * 2, in_c, 1, 1, 0, bias=False)
    def forward(self, x):
        # return x
        b, c, n = x.shape
        # x=x.detach()
        mask = (x == 0).all(dim=1)
        x1 = x.permute(0, 2, 1)  # BNC
        x2, _ = self.mha(x1, x1, x1)
        x2 = x2.permute(0, 2, 1)
        x3 = self.conv1(x)
        x4=torch.cat([x2,x3],dim=1)
        x5=self.conv2(x4)
        x5 = x5.masked_fill(mask.unsqueeze(1), 0)
        return x5
 class FusionHead(nn.Module):
    def __init__(self, in_c=128):
        super().__init__()
        self.mha1 = Attention(in_c)
        self.mha2 = Attention(in_c)
        self.conv1 = nn.Conv1d(in_c * 2, in_c, 1)
    def forward(self, x):
        fea_kpt = x[:, :, 0]
        fea_kpl_gen = x[:, :, 3]
        B, C, K, N = x.shape
        x1 = x[:, :, :3]  # BC3N
        x2 = x1.permute(0, 3, 2, 1).contiguous()#BN3C
        x3 = x2.view(B * N, 3, C)
        x4, _ = self.mha1(x3, x3, x3)
        x5 = torch.max(x4, dim=1)[0]#B*N 3 C
        x6=x5.view(B,N,C)
        x7, _ = self.mha2(x6, fea_kpl_gen.permute(0, 2, 1), fea_kpl_gen.permute(0, 2, 1))
        x7 = x7.permute(0, 2, 1)
        x8 = torch.cat([fea_kpt, x7] ,dim=1)
        x9 = self.conv1(x8)
        return x9
 def cosine_similarity(feature1, feature2):
    # BNC
    feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8)
    feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8)
    C = torch.bmm(feature1, feature2.transpose(1, 2))
    # distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True)
    # distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2)
    # distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2))  # c^2=a^2+b^2-2abcos
    # C = distance_matrix ** 0.5
    return C
 class Fusion(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        flag = cfg['flag']
        self.flag = flag
        if flag == 'fusion':
            self.img = ImgHead(alnet='alike-n', num_kpt=cfg['kpts_number_img'],
                               cluster_num=cfg['cluster_num_img'], vlad_size=cfg['vlad_size'])
            self.bev = BEVHead(alnet='alike-n', iter=cfg['sinkhorn_iter'],
                               num_kpt=cfg['kpts_number_bev'], cluster_num=cfg['cluster_num_bev'], vlad_size=cfg['vlad_size'])
            assert self.img.feature_size == self.bev.feature_size, 'img feature and image feature should be the same size'
            feature_size = self.img.feature_size
            self.localpool = LocalPool(feature_size)
            self.cvt_img = Converter(feature_size)
            self.cvt_bev = Converter(feature_size)
            self.gen_pan = Generator(feature_size, cfg['kpts_number_bev'])
            self.att_fusion = FusionHead(feature_size)
            # self.netvlad_fusion = NetVLADLoupe(feature_size, cfg['cluster_num_fusion'], cfg['vlad_size'])
            self.netvlad_fusion = NetVLAD(feature_size, cfg['cluster_num_fusion'])
            self.uot = UOTHead(nb_iter=cfg['sinkhorn_iter'],name='fusion')
            self.vlad='fusion'
            self.w= torch.nn.Parameter(torch.zeros(1))
        if flag == 'bev':
            self.bev = BEVHead(alnet='alike-n',iter=cfg['sinkhorn_iter'], num_kpt=cfg['kpts_number_bev'], cluster_num=cfg['cluster_num_bev'], vlad_size=256)
        if flag == 'img':
            self.img = ImgHead(alnet='alike-n', num_kpt=cfg['kpts_number_img'], cluster_num=cfg['cluster_num_img'], vlad_size=cfg['vlad_size'])
    def forward(self, batch_dict):
        if self.flag == 'fusion':
            batch_dict = self.img(batch_dict)
            batch_dict = self.bev(batch_dict)
            fea_img = batch_dict['fea_img']
            fea_bev = batch_dict['fea_bev']
            relation = batch_dict['relation']
            fea_kpt_original = batch_dict['fea_kpt_original']
            # fea_kpl = batch_dict['fea_kpl']
            # pixel_kpt = batch_dict['pixels_kpt']
            b, n1, n2, _ = relation.shape
            n2 = n2 - 1
            # ns=torch.sum((relation[:,:,-1]>0).all(dim=2),dim=1)
            # n_least=torch.min(ns)
            # n_least=min(n_least,256)
            # relation1=[]
            # for i in range(b):
            #     idx=torch.randperm(ns[i])[:n_least].to(relation.device)
            #     relation1.append(relation[i:i+1,idx])
            # relation1=torch.cat(relation1)
            # relation=relation1  
            pixel_img = relation[:, :, 0:n2].clone()
            grid_img = pixel_img[:, :, :, [1, 0]].float() / torch.tensor([fea_img.shape[3] - 1, fea_img.shape[2] - 1]).to(fea_img.device).float() * 2 - 1
            fea_pl_dual = F.grid_sample(fea_img, grid_img, align_corners=True, mode='bilinear', padding_mode='zeros')
            fea_pl_dual = self.localpool(fea_pl_dual).squeeze(3)
            fea_pt_dual_gen = self.cvt_bev(fea_pl_dual)
            if 'pose_to_frame' in batch_dict.keys() and hasattr(self, 'uot'):
                pixel_bev = relation[:, :, n2:n2 + 1, 0:2].clone()
                grid_bev = pixel_bev[:, :, :, [1, 0]].float() / torch.tensor([fea_bev.shape[3] - 1, fea_bev.shape[2] - 1]).to(fea_bev.device).float() * 2 - 1
                fea_pt_dual = (F.grid_sample(fea_bev, grid_bev, align_corners=True, mode='bilinear', padding_mode='zeros')).squeeze(3)
                fea_pl_dual_gen = self.cvt_img(fea_pt_dual)
                batch_dict['fea_pt_dual_gen'] = fea_pt_dual_gen
                batch_dict['fea_pl_dual_gen'] = fea_pl_dual_gen
                batch_dict['fea_pt_dual'] = fea_pt_dual
                batch_dict['fea_pl_dual'] = fea_pl_dual
            fea_kpt_original_gen = self.gen_pan(fea_pt_dual_gen)
            batch_dict['fea_kpt_original_gen'] = fea_kpt_original_gen
            fea_kpl_gen = self.cvt_img(fea_kpt_original)
            fea_kpt_gen_gen = self.cvt_bev(fea_kpl_gen)
            batch_dict['fea_kpt_gen_gen'] = fea_kpt_gen_gen
            batch_dict['fea_kpl_gen']=fea_kpl_gen
            fea_kpts = torch.cat([fea_kpt_original.unsqueeze(2), fea_kpt_original_gen.unsqueeze(2), fea_kpt_gen_gen.unsqueeze(2), fea_kpl_gen.unsqueeze(2)], dim=2)
            fea_kpt_fusion = self.att_fusion(fea_kpts)
            batch_dict['fea_kpt_fusion'] = fea_kpt_original
            # sim10 = cosine_similarity(fea_pt_dual.permute(0, 2, 1), fea_pt_dual.permute(0, 2, 1))[0].cpu().detach().numpy()
            # sim11 = cosine_similarity(fea_pt_dual_gen.permute(0, 2, 1), fea_pt_dual_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
            # sim20 = cosine_similarity(fea_pl_dual.permute(0, 2, 1), fea_pl_dual.permute(0, 2, 1))[0].cpu().detach().numpy()
            # sim21 = cosine_similarity(fea_pl_dual_gen.permute(0, 2, 1), fea_pl_dual_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
            # sim30 = cosine_similarity(fea_kpt_original.permute(0, 2, 1), fea_kpt_original.permute(0, 2, 1))[0].cpu().detach().numpy()
            # sim31 = cosine_similarity(fea_kpt_original_gen.permute(0, 2, 1), fea_kpt_original_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
            # sim32 = cosine_similarity(fea_kpt_gen_gen.permute(0, 2, 1), fea_kpt_gen_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
            # fig=plt.figure()
            # plt.subplot(2, 4, 1), plt.imshow(sim10), plt.title('points')
            # plt.subplot(2, 4, 5), plt.imshow(sim11), plt.title('gen points')
            # plt.subplot(2, 4, 2), plt.imshow(sim20), plt.title('pixel')
            # plt.subplot(2, 4, 6), plt.imshow(sim21), plt.title('gen pixel')
            # plt.subplot(2, 4, 3), plt.imshow(sim30), plt.title('kpt orig')
            # plt.subplot(2, 4, 7), plt.imshow(sim31), plt.title('pan kpt')
            # plt.subplot(2, 4, 4), plt.imshow(sim30), plt.title('kpt orig')
            # plt.subplot(2, 4, 8), plt.imshow(sim32), plt.title('kpt gen gen')
            # plt.show()
            if 'pose_to_frame' in batch_dict.keys() and hasattr(self, 'uot'):
                self.uot(batch_dict)
            vlad_fusion = self.netvlad_fusion(fea_kpt_fusion.unsqueeze(3))
            if self.vlad=='bev':
                batch_dict['vlads']=batch_dict['vlad_bev']
            if self.vlad=='fusion':
                if 'vlad_bev' in batch_dict.keys():
                    batch_dict['vlads']=torch.sigmoid(self.w)*vlad_fusion + (1-torch.sigmoid(self.w))*batch_dict['vlad_bev']
                else:
                    batch_dict['vlads']=vlad_fusion
        if self.flag == 'bev':
            batch_dict = self.bev(batch_dict)
            batch_dict['vlads'] = batch_dict['vlad_bev']
        if self.flag == 'img':
            batch_dict = self.img(batch_dict)
            batch_dict['vlads'] = batch_dict['vlad_img']
        return batch_dict
 if __name__ == '__main__':
    b=BEVHead()
--- a/netvlad.py
+++ b/netvlad.py
@@ -0,0 +1,155 @@
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class NetVLAD(nn.Module):
    def __init__(self, fea_size=128, num_clusters=16):
        super(NetVLAD, self).__init__()
        self.num_clusters = num_clusters
        self.conv = nn.Conv2d(fea_size, num_clusters, kernel_size=(1, 1), bias=True)
        self.centroids = nn.Parameter(torch.randn(num_clusters, fea_size))
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        """
        x: B, C, H, W,W=1
        """
        soft_assign = self.conv(x)  # (B, num_clusters, H, W)
        soft_assign = self.relu(soft_assign)
        soft_assign = torch.nn.functional.softmax(soft_assign, dim=1)  # (B, num_clusters, H, W)
        # reshape for broadcasting
        B, C, H, W = x.shape
        soft_assign = soft_assign.view(B, self.num_clusters, -1)  # (B, num_clusters, H * W)
        x_flatten = x.view(B, C, -1)  # (B, C, H * W)
        # compute residuals
        x_flatten1 = x_flatten.unsqueeze(1).permute(0, 1, 3, 2)  # (B, 1, H*W, C)
        centroids = self.centroids.unsqueeze(0).unsqueeze(2)  # (1, num_cluster, 1, C)
        residual = x_flatten1 - centroids  # (B, num_clusters, H * W, C)
        residual *= soft_assign.unsqueeze(-1)  # (B, num_clusters, H * W, C)
        # sum residuals and assign
        vlad = residual.sum(dim=-2)  # (B, num_clusters, C)
        vlad = nn.functional.normalize(vlad, p=2, dim=2)  # (B, num_clusters, C)
        vlad = vlad.view(B, -1)
        vlad = nn.functional.normalize(vlad, p=2, dim=1)  # (B, num_clusters * C)
        return vlad
 class NetVLADLoupe(nn.Module):
    """
    Original Tensorflow implementation: https://github.com/antoine77340/LOUPE
    """
    def __init__(self, feature_size, cluster_size, output_dim,
                 gating=True, add_norm=True, is_training=True, normalization='batch'):
        super(NetVLADLoupe, self).__init__()
        self.feature_size = feature_size
        # output_dim=cluster_size * feature_size
        self.output_dim = output_dim
        self.is_training = is_training
        self.gating = gating
        self.add_batch_norm = add_norm
        self.cluster_size = cluster_size
        if normalization == 'instance':
            norm = lambda x: nn.LayerNorm(x)
        elif normalization == 'group':
            norm = lambda x: nn.GroupNorm(8, x)
        else:
            norm = lambda x: nn.BatchNorm1d(x)
        self.softmax = nn.Softmax(dim=-1)
        self.cluster_weights = nn.Parameter(torch.randn(feature_size, cluster_size) * 1 / math.sqrt(feature_size))
        self.cluster_weights2 = nn.Parameter(torch.randn(1, feature_size, cluster_size) * 1 / math.sqrt(feature_size))
        self.hidden1_weights = nn.Parameter(torch.randn(cluster_size * feature_size, output_dim) * 1 / math.sqrt(feature_size))
        if add_norm:
            self.cluster_biases = None
            self.bn1 = norm(cluster_size)
        else:
            self.cluster_biases = nn.Parameter(torch.randn(cluster_size) * 1 / math.sqrt(feature_size))
            self.bn1 = None
        self.bn2 = norm(output_dim)
        if gating:
            self.context_gating = GatingContext(output_dim, add_batch_norm=add_norm, normalization=normalization)
    def forward(self, x):
        """
        x: B N C
        """
        max_samples = x.shape[1]
        activation = torch.matmul(x, self.cluster_weights)
        if self.add_batch_norm:
            activation = activation.view(-1, self.cluster_size)
            activation = self.bn1(activation)
            activation = activation.view(-1, max_samples, self.cluster_size)
        else:
            activation = activation + self.cluster_biases
        activation = self.softmax(activation)
        a_sum = activation.sum(-2, keepdim=True)
        a = a_sum * self.cluster_weights2
        activation = torch.transpose(activation, 2, 1)
        x = x.view((-1, max_samples, self.feature_size))
        vlad = torch.matmul(activation, x)
        vlad = torch.transpose(vlad, 2, 1).contiguous()
        vlad0 = vlad - a
        vlad1 = F.normalize(vlad0, dim=1, p=2, eps=1e-6)
        vlad2 = vlad1.view((-1, self.cluster_size * self.feature_size))
        vlad = F.normalize(vlad2, dim=1, p=2, eps=1e-6)
        vlad = torch.matmul(vlad, self.hidden1_weights)
        vlad = self.bn2(vlad)
        if self.gating:
            vlad = self.context_gating(vlad)
        # vlad = vlad / vlad.norm(dim=1, keepdim=True)
        return vlad
 class GatingContext(nn.Module):
    """
    Original Tensorflow implementation: https://github.com/antoine77340/LOUPE
    """
    def __init__(self, dim, add_batch_norm=True, normalization='batch'):
        super(GatingContext, self).__init__()
        self.dim = dim
        self.add_batch_norm = add_batch_norm
        if normalization == 'instance':
            norm = lambda x: nn.LayerNorm(x)
        elif normalization == 'group':
            norm = lambda x: nn.GroupNorm(8, x)
        else:
            norm = lambda x: nn.BatchNorm1d(x)
        self.gating_weights = nn.Parameter(torch.randn(dim, dim) * 1 / math.sqrt(dim))
        self.sigmoid = nn.Sigmoid()
        if add_batch_norm:
            self.gating_biases = None
            self.bn1 = norm(dim)
        else:
            self.gating_biases = nn.Parameter(torch.randn(dim) * 1 / math.sqrt(dim))
            self.bn1 = None
    def forward(self, x):
        gates = torch.matmul(x, self.gating_weights)
        if self.add_batch_norm:
            gates = self.bn1(gates)
        else:
            gates = gates + self.gating_biases
        gates = self.sigmoid(gates)
        activation = x * gates
        return activation
--- a/preparedataset.py
+++ b/preparedataset.py
@@ -0,0 +1,105 @@
 import os
 import numpy as np
 import torch
 import tqdm
 def k3602k(k, k360):
    for src in [0, 3, 4, 5, 6, 7, 9, 10]:
        tgt = src + 50
        path_pose = k360 + '/data_poses/2013_05_28_drive_%04d_sync/cam0_to_world.txt' % src
        path_velo = k360 + '/data_3d_raw/2013_05_28_drive_%04d_sync/velodyne_points/data' % src
        path_calib = k360 + '/calibration/calib_cam_to_velo.txt'
        path_img = k360 + '/data_2d_raw/2013_05_28_drive_%04d_sync/image_00/data_rect' % src
        path_pose1 = k + '/data_odometry_poses/dataset/poses/%02d.npy' % tgt
        path_velo1 = k + '/data_odometry_velodyne/dataset/sequences/%02d/velodyne' % tgt
        path_calib1 = k + '/data_odometry_calib/dataset/sequences/%02d' % tgt
        path_img1 = k + '/data_odometry_color/dataset/sequences/%02d/image_2/' % tgt
        if not os.path.exists(path_velo1):
            os.makedirs(path_velo1)
        if not os.path.exists(path_img1):
            os.makedirs(path_img1)
        if not os.path.exists(path_calib1):
            os.makedirs(path_calib1)
        if not os.path.exists(path_calib1 + '/calib.txt'):
            os.symlink(path_calib, path_calib1 + '/calib.txt')
        with open(path_calib, 'r') as f:
            for line in f.readlines():
                data = np.array([float(x) for x in line.split()])
        cam0_to_velo = np.reshape(data, (3, 4))
        cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]])
        cam0_to_velo = torch.tensor(cam0_to_velo)
        poses2 = []
        ids = []
        with open(path_pose, 'r') as f:
            for x in f:
                x = x.strip().split()
                x = [float(v) for v in x]
                ids.append(int(x[0]))
                pose = torch.zeros((4, 4), dtype=torch.float64)
                pose[0, 0:4] = torch.tensor(x[1:5])
                pose[1, 0:4] = torch.tensor(x[5:9])
                pose[2, 0:4] = torch.tensor(x[9:13])
                pose[3, 3] = 1.0
                pose = pose @ cam0_to_velo.inverse()
                poses2.append(pose.float().numpy())
        pose = np.stack(poses2)
        np.save(path_pose1, pose)
        cnt = 0
        for i in tqdm.tqdm(ids, desc='%02d:' % src):
            path_velo_now = os.path.join(path_velo, '0000%06d.bin' % i)
            path_img_now = os.path.join(path_img, '0000%06d.png' % i)
            if os.path.exists(path_velo_now) and os.path.exists(path_img_now):
                pass
            else:
                break
            path_velo_now1 = os.path.join(path_velo1, '%06d.bin' % cnt)
            path_img_now1 = os.path.join(path_img1, '%06d.png' % cnt)
            if not os.path.exists(path_velo_now1):
                os.symlink(path_velo_now, path_velo_now1)
            if not os.path.exists(path_img_now1):
                os.symlink(path_img_now, path_img_now1)
            cnt = cnt + 1
 def todataset(kitti_root, dataset_root):
    sequences = [0, 5, 6, 7, 8, 9, 50, 54, 55, 56, 59]
    for s in sequences:
        if s >= 50:
            suffix = '.npy'
        else:
            suffix = '.txt'
        kitti_velo_dir = kitti_root + '/data_odometry_velodyne/dataset/sequences/%02d/velodyne' % s
        kitti_img_dir = kitti_root + '/data_odometry_color/dataset/sequences/%02d/image_2' % s
        kitti_pose_dir = kitti_root + '/data_odometry_poses/dataset/poses/%02d' % s + suffix
        kitti_calib_dir = kitti_root + '/data_odometry_calib/dataset/sequences/%02d/calib.txt' % s
        dataset_path = dataset_root + '/%02d' % s
        if not os.path.exists(dataset_path):
            os.makedirs(dataset_path)
        dataset_velo_dir = dataset_path + '/velodyne'
        dataset_img_dir = dataset_path + '/image_2'
        dataset_pose_dir = dataset_path + '/poses' + suffix
        dataset_calib_dir = dataset_path + '/calib.txt'
        if not os.path.exists(dataset_velo_dir):
            os.symlink(kitti_velo_dir, dataset_velo_dir)
        if not os.path.exists(dataset_img_dir):
            os.symlink(kitti_img_dir, dataset_img_dir)
        if not os.path.exists(dataset_pose_dir):
            os.symlink(kitti_pose_dir, dataset_pose_dir)
        if not os.path.exists(dataset_calib_dir):
            os.symlink(kitti_calib_dir, dataset_calib_dir)
 if __name__ == '__main__':
    k360 = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI360'
    k = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI'
    k3602k(k, k360)
    kitti_root = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI'
    dataset_root = '/home/coop8/chenyouyuan/FUSIONLCD/sequences'
    todataset(kitti_root, dataset_root)
--- a/tools.py
+++ b/tools.py
@@ -0,0 +1,149 @@
 import threading
 import torch
 import os
 import time
 def farthest_point_sample(xyz, npoint):
    """Iterative farthest point sampling
    Args:
        xyz: pointcloud data_loader, [B, N, C]
        npoint: number of samples
    Returns:
        centroids: sampled pointcloud index, [B, npoint]
    """
    device = xyz.device
    B, N, C = xyz.shape
    centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
    distance = torch.ones(B, N).to(device) * 1e10
    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
    batch_indices = torch.arange(B, dtype=torch.long).to(device)
    for i in range(npoint):
        centroids[:, i] = farthest
        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
        dist = torch.sum((xyz - centroid) ** 2, -1)
        mask = dist < distance
        distance[mask] = dist[mask]
        farthest = torch.max(distance, -1)[1]
    return centroids
 def batch_distance(feature1,feature2,mode='cosine'):
    if mode == 'cosine':
        # Transport cost matrix
        feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8)
        feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8)
        dis = 1.0 - torch.bmm(feature1, feature2.transpose(1, 2))
    elif mode == 'euclidean':
        feature=torch.cat([feature1,feature2],dim=1)
        feature_mean=torch.mean(feature,dim=1,keepdim=True)
        feature1=feature1-feature_mean
        feature2=feature2-feature_mean
        distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True)
        distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2)
        distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2))  # c^2=a^2+b^2-2abcos
        distance_matrix = distance_matrix ** 0.5
        dis = distance_matrix
    return dis
 def nn_match(fea1, fea2, matrix='cosine'):
    assert len(fea1.shape) == 2 and len(fea2.shape) == 2, 'nnmatch error'
    if not isinstance(fea1, torch.Tensor):
        fea1 = torch.tensor(fea1)
    if not isinstance(fea2, torch.Tensor):
        fea2 = torch.tensor(fea2)
    if matrix == 'cosine':
        # Transport cost matrix
        fea1 = fea1 / torch.sqrt(torch.sum(fea1 ** 2, -1, keepdim=True) + 1e-8)
        fea2 = fea2 / torch.sqrt(torch.sum(fea2 ** 2, -1, keepdim=True) + 1e-8)
        dis = 1.0 - torch.mm(fea1, fea2.transpose(0, 1))
    elif matrix == 'euclidean':
        distance_matrix = torch.sum(fea1 ** 2, -1, keepdim=True)
        distance_matrix = distance_matrix + torch.sum(fea2 ** 2, -1, keepdim=True).transpose(0, 1)
        distance_matrix = distance_matrix - 2 * torch.mm(fea1, fea2.transpose(0, 1))  # c^2=a^2+b^2-2abcos
        dis = distance_matrix ** 0.5
    else:
        dis = 0
        print('Invalid matrix')
    idx0_min = torch.argmin(dis, dim=0)
    idx1_min = torch.argmin(dis, dim=1)
    ids1 = torch.arange(0, dis.shape[1]).to(fea1.device)
    idx = idx1_min[idx0_min]
    idx_match = ids1 == idx
    idx1 = ids1[idx_match]
    idx2 = idx0_min[idx_match]
    dis_min = dis[idx2, idx1]
    return idx2, idx1, dis_min
 def path_join(*args):
    names = list(args)
    path = names[0]
    for i in range(len(names) - 1):
        path = os.path.join(path, names[i + 1])
    path = list(path)
    while "\\" in path:
        idx = path.index("\\")
        path[idx] = "/"
    path = ''.join(path)
    return path
 def make_save_path(*args):
    path = path_join(*args)
    if not os.path.exists(path):
        os.makedirs(path)
    return path
 def read_cfg(data):
    if type(data) is int:
        result = [data]
    else:
        result = data.split(',')
    return result
 class Timer:
    """A module to record the program running time"""
    def __init__(self, name="Now"):
        self.strat = time.time()
        self.cnt = 0
        self.end = time.time()
        self.avg = 0
        self.all = 0
        self.now = 0
        self.name = name
        time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
        print('Init timer: ',time_now)
    def update(self, name=None):
        if name is not None:
            self.name = name
        self.cnt = self.cnt + 1
        self.end = time.time()
        self.avg = (self.end - self.strat) / self.cnt
        self.now = self.end - self.all - self.strat
        self.all = self.end - self.strat
        time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
        if self.avg<1:
            print("%s | %s | using %d | each %.3f" %
                (time_now, self.name, self.all, self.now))
        elif self.avg<10:
            print("%s | %s | using %d | each %.2f" %
                (time_now, self.name, self.all, self.now))
        elif self.avg<100:
            print("%s | %s | using %d | each %.1f" %
                (time_now, self.name, self.all, self.now))
        else:
            print("%s | %s | using %d | each %d" %
                (time_now, self.name, self.all, self.now))
 if __name__ == '__main__':
    # draw_trace()
    pass
--- a/train.py
+++ b/train.py
@@ -0,0 +1,432 @@
 import argparse
 import os
 import time
 import numpy as np
 import torch
 import torch.optim as optim
 import yaml
 import net
 import tools
 from dataset import KittiTotalLoader
 from evaluate_lcd import lcd
 from loss import TotalLoss
 test_step = 10 # 保存测试点的步长
 def save_checkpoint(model, optimizer, loss_total_fun, epoch, iter_train, path_result):
    if (epoch + 1) % test_step == 0 and epoch+1>=test_step:
        time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
        checkpoint = {'time': time_now,
                      'epoch': epoch,
                      'model': model.state_dict(),
                      'optimizer': optimizer.state_dict()}
        savepath = tools.make_save_path(path_result, 'models')
        torch.save(checkpoint, savepath + '/checkpoint_%03d.pth.tar' % epoch)
        print(savepath + '/checkpoint_%03d.pth.tar is saved' % epoch)
 class log_result():
    def __init__(self,path_result):
        self.path=path_result
        if not os.path.exists(path_result):
            with open(path_result, 'w') as file:
                file.write('Time           Sequence Epoch  AP    R100  F1    R@1   R@2   R@3   R@4   R@5')
                file.write('   R@6   R@7   R@8   R@9   R@10  R@15  R@20  R@25\n')
                for i in range(300):
                    file.write('\n')
    def write(self,seq,epoch,row,x):
        with open(self.path, 'r') as file:
            lines = file.readlines()
        time_now = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
        new_content='%s %08d %06d'%(time_now,seq,epoch)
        for x1 in x:
            new_content=new_content + ' %.3f'%x1
        lines[row] = new_content+'\n'
        with open(self.path, 'w') as file:
            file.writelines(lines)
 def train(model, optimizer, loss_total_fun, data, device):
    model.train()
    sequences = data['sequence']
    id_query = data['id_query']
    id_positive = data['id_positive']
    batchsize = len(id_query)
    bev_query = data['bev_query'].to(device)
    bev_positive = data['bev_positive'].to(device)
    pose_query = data['pose_query'].to(device)
    pose_positive = data['pose_positive'].to(device)
    pose_to_frame = data['pose_to_frame'].to(device)
    label_score = data['label_score'].to(device)
    img_query = data['img_query'].to(device)
    img_positive = data['img_positive'].to(device)
    try:
        bev = torch.cat([bev_query, bev_positive], dim=0)
        bev = bev.permute(0, 3, 1, 2)
    except:
        bev = 0
    try:
        img = torch.cat([img_query, img_positive], dim=0)
        img = img.permute(0, 3, 1, 2)
    except:
        img = 0
    try:
        relation = data['relation'].to(device)
    except:
        relation = 0
    batch_dict = {'bev': bev,
                  'label_score': label_score,
                  'img': img,
                  'relation': relation,
                  'id_query': id_query,
                  'sequence': sequences,
                  'id_positive': id_positive,
                  'pose_to_frame': pose_to_frame,
                  'pose_query': pose_query,
                  'pose_positive': pose_positive,
                  'batch_size': int(batchsize * 2)}
    model(batch_dict)
    loss_total_fun(batch_dict)
    l_total = batch_dict['loss'][0]
    optimizer.zero_grad()
    l_total.backward()
    optimizer.step()
    for p in model.parameters():
        if torch.isnan(p).any():
            print('Model NAN, ', p.shape)
            exit()
    return batch_dict
 def validate(model, loss_total_fun, data, device):
    model.eval()
    with torch.no_grad():
        sequences = data['sequence']
        id_query = data['id_query']
        id_positive = data['id_positive']
        batchsize = len(id_query)
        bev_query = data['bev_query'].to(device)
        bev_positive = data['bev_positive'].to(device)
        pose_query = data['pose_query'].to(device)
        pose_positive = data['pose_positive'].to(device)
        pose_to_frame = data['pose_to_frame'].to(device)
        label_score = data['label_score'].to(device)
        img_query = data['img_query'].to(device)
        img_positive = data['img_positive'].to(device)
        try:
            bev = torch.cat([bev_query, bev_positive], dim=0)
            bev = bev.permute(0, 3, 1, 2)
        except:
            bev = 0
        try:
            img = torch.cat([img_query, img_positive], dim=0)
            img = img.permute(0, 3, 1, 2)
        except:
            img = 0
        try:
            relation = data['relation'].to(device)
        except:
            relation = 0
        batch_dict = {'bev': bev,
                      'label_score': label_score,
                      'img': img,
                      'relation': relation,
                      'id_query': id_query,
                      'sequence': sequences,
                      'id_positive': id_positive,
                      'pose_to_frame': pose_to_frame,
                      'pose_query': pose_query,
                      'pose_positive': pose_positive,
                      'batch_size': int(batchsize * 2)}
        model(batch_dict)
        loss_total_fun(batch_dict)
    return batch_dict
 def test(model, data, device):
    model.eval()
    with torch.no_grad():
        sequences = data['sequence']
        id_query = data['id_query']
        batchsize = len(id_query)
        bev_query = data['bev_query'].to(device)
        pose_query = data['pose_query'].to(device)
        img_query = data['img_query'].to(device)
        try:
            bev = bev_query
            bev = bev.permute(0, 3, 1, 2)
        except:
            bev = 0
        try:
            img = img_query
            img = img.permute(0, 3, 1, 2)
        except:
            img = 0
        try:
            relation = data['relation'].to(device)
        except:
            relation = 0
        batch_dict = {'bev': bev,
                      'img': img,
                      'relation': relation,
                      'id_query': id_query,
                      'sequence': sequences,
                      'pose_query': pose_query,
                      'batch_size': int(batchsize * 2)}
        model(batch_dict)
    return batch_dict
 def main(args):
    try:
        with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
        print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
    except:
        with open(os.path.join(os.getcwd(), "project/FUSIONLCD/config.yaml"), "r") as ymlfile:
            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
        print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
    cfg = cfg['experiment']
    for k, v in cfg.items():
        print(k, ':', v)
    path_result = os.path.join(cfg['path_result'],args.result_name)
    lres=log_result(os.path.join(os.getcwd(),'result',args.result_name+'.txt'))
    device = torch.device("cuda" if torch.cuda.is_available() and cfg['cuda'] else "cpu")
    start_epoch = 0
    iter_train = 0
    epochs = cfg['epochs']
    model = net.Fusion(cfg)
    print(model)
    model = model.to(device)
    loss_total_fun = TotalLoss(cfg).to(device)
    print("Model params: %.6fM" % (sum(p.numel() for p in model.parameters()) / 1e6))
    optimizer = optim.Adam(model.parameters(), lr=cfg['learning_rate'], betas=(cfg['beta1'], cfg['beta2']), eps=cfg['eps'], weight_decay=cfg['weight_decay'])
    # optimizer = optim.Adam([{'params': model.bev.parameters(), 'lr': 0.0002},
    #                         {'params': model.img.parameters(), 'lr': 0.0001},
    #                         {'params': model.vlad_fusion_layer.parameters(), 'lr': 0.0001}],
    #                        betas=(cfg['beta1'], cfg['beta2']), eps=cfg['eps'], weight_decay=cfg['weight_decay'])
    # print(optimizer)
    loader_train, loader_val, loader_test = KittiTotalLoader(cfg)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1, 3, 5, 10, 50, 100], gamma=0.5, last_epoch=start_epoch - 1)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.99)
    # scheduler = warmup(optimizer, 5, 1e-6, cfg['learning_rate'])
    # writer = SummaryWriter(tools.make_save_path(path_result, 'tensorboard_log'))
    t = tools.Timer()
    test_best = np.zeros([len(loader_test.dataset.datasets), 3])
    if cfg['load_model']:
        checkpoint = torch.load((cfg['last_model']))
        start_epoch = checkpoint['epoch'] + 1 * cfg['train_flag']
        state_dict_saved = checkpoint['model']
        model.load_state_dict(state_dict_saved)
        optimizer.load_state_dict(checkpoint['optimizer'])
        print('loaded %s' % cfg['last_model'])
    if not cfg['train_flag']:
        print_frequency = 1e9
    else:
        print_frequency = 1
    for epoch in range(start_epoch, epochs):
        torch.cuda.empty_cache()
        '''
        ============================== train ===============================
        '''
        if cfg['train_flag']:
            if epoch - start_epoch == 0:
                pf = print_frequency
                print_frequency = min(len(loader_train), print_frequency * 10)
            else:
                print_frequency = pf
            l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            step_print = max(1, int(len(loader_train) / print_frequency))
            step_now = 0
            optimizer.zero_grad()
            for id_sample, data in enumerate(loader_train):
                batch_dict = train(model, optimizer, loss_total_fun, data, device)
                # if (id_sample+1)%4==0 or (id_sample+1)==len(loader_train):
                #     optimizer.step()
                #     optimizer.zero_grad()
                if step_now < step_print:
                    step_now = step_now + 1
                    l0 = l0 + batch_dict['loss'][0]
                    l1 = l1 + batch_dict['loss'][1]
                    l2 = l2 + batch_dict['loss'][2]
                    l3 = l3 + batch_dict['loss'][3]
                    l4 = l4 + batch_dict['loss'][4]
                    l5 = l5 + batch_dict['loss'][5]
                    l6 = l6 + batch_dict['loss'][6]
                    l7 = l7 + batch_dict['loss'][7]
                    l8 = l8 + batch_dict['loss'][8]
                    l9 = l9 + batch_dict['loss'][9]
                    l10 = l10 + batch_dict['loss'][10]
                    l11 = l11 + batch_dict['loss'][11]
                if step_now == step_print:
                    step_now = 0
                    info = 'loss a%.3f p%.3f s%.3f m%.3f t%.3f tr%.3f_%.1f genb%.3f geni%.3f genpa%.3f genpo%.3f genkpl%.3f' % (
                        l0 / step_print, l1 / step_print, l2 / step_print, l3 / step_print,
                        l4 / step_print, l5 / step_print, l6 / step_print, l7 / step_print,
                        l8 / step_print, l9 / step_print, l10 / step_print, l11 / step_print)
                    t.update("Epoch %03d | train %04d/%04d | %s" %
                             (epoch, id_sample, len(loader_train) - 1, info))
                    l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            save_checkpoint(model, optimizer, loss_total_fun, epoch, iter_train, path_result)
            scheduler.step()
        '''
        ============================= validate =============================
        '''
        if cfg['validate_flag'] and (epoch + 1) % test_step == 0:
            l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            step_print = max(1, int(len(loader_val) / print_frequency))
            step_now = 0
            for id_sample, data in enumerate(loader_val):
                batch_dict = validate(model, loss_total_fun, data, device)
                if step_now < step_print:
                    step_now = step_now + 1
                    l0 = l0 + batch_dict['loss'][0]
                    l1 = l1 + batch_dict['loss'][1]
                    l2 = l2 + batch_dict['loss'][2]
                    l3 = l3 + batch_dict['loss'][3]
                    l4 = l4 + batch_dict['loss'][4]
                    l5 = l5 + batch_dict['loss'][5]
                    l6 = l6 + batch_dict['loss'][6]
                    l7 = l7 + batch_dict['loss'][7]
                    l8 = l8 + batch_dict['loss'][8]
                    l9 = l9 + batch_dict['loss'][9]
                    l10 = l10 + batch_dict['loss'][10]
                    l11 = l11 + batch_dict['loss'][11]
                if step_now == step_print:
                    step_now = 0
                    info = 'loss a%.3f p%.3f s%.3f m%.3f t%.3f tr%.3f_%.1f genb%.3f geni%.3f genpa%.3f genpo%.3f genkpl%.3f' % (
                        l0 / step_print, l1 / step_print, l2 / step_print, l3 / step_print,
                        l4 / step_print, l5 / step_print, l6 / step_print, l7 / step_print,
                        l8 / step_print, l9 / step_print, l10 / step_print, l11 / step_print)
                    t.update("Epoch %03d | validate %04d/%04d | %s" %
                             (epoch, id_sample, len(loader_val) - 1, info))
                    l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        '''
        ============================== test ================================
        '''
        if cfg['test_flag'] and (epoch + 1) % test_step == 0:
            step_print = max(1, int(len(loader_test) / print_frequency))
            step_now = 0
            vlads = []
            kpts = []
            feas_original = []
            feas_fusion = []
            sequences = []
            poses = []
            for id_sample, data in enumerate(loader_test):
                batch_dict = test(model, data, device)
                # save_figure(batch_dict, epoch, path_result, cfg)
                sequences.append((batch_dict['sequence']).detach().cpu())
                vlads.append(batch_dict['vlads'].detach().cpu())
                poses.append(batch_dict['pose_query'].detach().cpu())
                kpts.append(batch_dict['key_points'].detach().cpu())
                if 'fea_kpt_fusion' in batch_dict.keys():
                    feas_fusion.append(batch_dict['fea_kpt_fusion'].detach().cpu().permute(0, 2, 1))
                feas_original.append(batch_dict['fea_kpt_original'].detach().cpu().permute(0, 2, 1))
                if step_now < step_print:
                    step_now = step_now + 1
                if step_now == step_print:
                    step_now = 0
                    t.update("Epoch %03d | test %05d/%05d" % (epoch, id_sample, len(loader_test)))
            vlads = torch.cat(vlads)
            kpts = torch.cat(kpts)
            feas_original = torch.cat(feas_original)
            if 'fea_kpt_fusion' in batch_dict.keys():
                feas_fusion = torch.cat(feas_fusion)
            else:
                feas_fusion=feas_original
            poses = torch.cat(poses)
            sequences = torch.cat(sequences)
            database = {'vlads': vlads,
                        'key_points': kpts,
                        'fea_kpt_original': feas_original,
                        'fea_kpt_fusion': feas_fusion,
                        'fea_kpt': feas_fusion,
                        'sequences': sequences,
                        'pose_query': poses}
            savepath = tools.make_save_path(path_result, 'database')
            torch.save(database, savepath + '/database_bevp.pth.tar')
            # print('save ' + savepath + '/database_%03d.pth.tar' % epoch)
            # exit()
            # database = torch.load('/data4/caodanyang/results/FUSIONLCD/07250/database/database_159.pth.tar')
            print()
            print('***************************************************************************************************************************************')
            print('Epoch %03d' % epoch)
            # feature_match(loader_val,database)
            result,recall_at_k = lcd(database)
            seq = torch.unique(sequences)
            for i in range((test_best.shape[0])):
                recall_at_k1=recall_at_k[i]
                for j in range((test_best.shape[1])):
                    test_best[i, j] = max([test_best[i, j], result[i][j]])
                print('Best, sequence %02d, AP=%.3f, R100=%.3f, F1=%.3f' % (seq[i],  test_best[i, 0], test_best[i, 1],test_best[i, 2]))
                lres.write(seq[i],epoch,(epoch+1)//test_step+i*(epochs)//test_step,
                           [result[i][0],result[i][1],result[i][2],recall_at_k1[0],recall_at_k1[1],recall_at_k1[2],recall_at_k1[3],recall_at_k1[4],
                            recall_at_k1[5],recall_at_k1[6],recall_at_k1[7],recall_at_k1[8],recall_at_k1[9],recall_at_k1[14],recall_at_k1[19],recall_at_k1[24]
                            ])
            #     print('Sequence %02d, AP=%.3f[%.3f], R100=%.3f[%.3f], F1=%.3f[%.3f], Recall@1[%.3f] 2[%.3f] 5[%.3f] 10[%.3f] 15[%.3f] 25[%.3f]' %
            #           (sequences[i], result[i][0], test_best[i, 0], result[i][1], test_best[i, 1], result[i][2], test_best[i, 2],
            #            recall_at_k1[0],recall_at_k1[1],recall_at_k1[4],recall_at_k1[9],recall_at_k1[14],recall_at_k1[24]))
            print('***************************************************************************************************************************************')
            print()
            if cfg['train_flag']:
                pass
            else:
                exit()
            # exit()
 if __name__ == '__main__':
    # CUDA_VISIBLE_DEVICES=2 nohup python -u train.py --result_name=08280 --info=cosim >log/08280.log 2>&1 &
    # fuser /dev/nvidia*
    parser = argparse.ArgumentParser()
    parser.add_argument('--result_name', type=str, default='log', help='log name of result')
    parser.add_argument('--pro_name', type=str, default='python', help='name of process')
    parser.add_argument('--info', type=str, default='python', help='name of process')
    parser.add_argument('--gpu', type=str, default=None, help="GPU id(s), e.g. '0' or '0,1'. Use 'cpu' to force CPU.")
    args = parser.parse_args()
    # set visible GPUs before any CUDA call / seed
    if args.gpu:
        if args.gpu.lower() == 'cpu':
            # force CPU by hiding GPUs
            os.environ['CUDA_VISIBLE_DEVICES'] = ''
        else:
            os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    np.random.seed(123)
    torch.manual_seed(123)
    # only call cuda seed if CUDA visible
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    print(args.info)
    try:
        print("Using GPU device:", os.environ.get("CUDA_VISIBLE_DEVICES", ""))
    except:
        pass
    main(args)
--- a/uot.py
+++ b/uot.py
@@ -0,0 +1,150 @@
 import math
 import torch
 import torch._utils
 import torch.nn as nn
 import torch.nn.functional as F
 from torchvision.models import resnet
 from typing import Optional, Callable
 def compute_rigid_transform(points1, points2, weights):
    """Compute rigid transforms between two point clouds via weighted SVD.
       Adapted from https://github.com/yewzijian/RPMNet/
    Args:
        points1 (torch.Tensor): (B, M, 3) coordinates of the first point cloud
        points2 (torch.Tensor): (B, N, 3) coordinates of the second point cloud
        weights (torch.Tensor): (B, M)
    Returns:
        Transform T (B, 3, 4) to get from points1 to points2, i.e. T*points1 = points2
    """
    weights_normalized = weights[..., None] / (torch.sum(weights[..., None], dim=1, keepdim=True) + 1e-5)
    centroid_a = torch.sum(points1 * weights_normalized, dim=1)
    centroid_b = torch.sum(points2 * weights_normalized, dim=1)
    a_centered = points1 - centroid_a[:, None, :]
    b_centered = points2 - centroid_b[:, None, :]
    cov = a_centered.transpose(-2, -1) @ (b_centered * weights_normalized)
    # Compute rotation using Kabsch algorithm. Will compute two copies with +/-V[:,:3]
    # and choose based on determinant to avoid flips
    u, s, v = torch.svd(cov, some=False, compute_uv=True)
    rot_mat_pos = v @ u.transpose(-1, -2)
    v_neg = v.clone()
    v_neg[:, :, 2] *= -1
    rot_mat_neg = v_neg @ u.transpose(-1, -2)
    rot_mat = torch.where(torch.det(rot_mat_pos)[:, None, None] > 0, rot_mat_pos, rot_mat_neg)
    assert torch.all(torch.det(rot_mat) > 0)
    # Compute translation (uncenter centroid)
    translation = -rot_mat @ centroid_a[:, :, None] + centroid_b[:, :, None]
    transform = torch.cat((rot_mat, translation), dim=2)
    return transform
 def sinkhorn_unbalanced(feature1, feature2, epsilon, gamma, max_iter, matrix='cosine'):
    """
    Sinkhorn algorithm for Unbalanced Optimal Transport.
    Modified from https://github.com/valeoai/FLOT/
    Args:
        feature1 (torch.Tensor):
            (B, N, C) Point-wise features for points cloud 1.
        feature2 (torch.Tensor):
            (B, M, C) Point-wise features for points cloud 2.
        epsilon (torch.Tensor):
            Entropic regularization.
        gamma (torch.Tensor):
            Mass regularization.
        max_iter (int):
            Number of iteration of the Sinkhorn algorithm.
    Returns:
        T (torch.Tensor):
            (B, N, M) Transport plan between point cloud 1 and 2.
    """
    if matrix == 'cosine':
        # Transport cost matrix
        feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8)
        feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8)
        C = 1.0 - torch.bmm(feature1, feature2.transpose(1, 2))
    elif matrix == 'euclidean':
        distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True)
        distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2)
        distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2))  # c^2=a^2+b^2-2abcos
        distance_matrix = distance_matrix ** 0.5
        # d_max, _ = torch.max(distance_matrix, dim=2, keepdim=True)
        C = distance_matrix
    # Entropic regularisation
    K = torch.exp(-C / epsilon)  # * support
    # Early return if no iteration
    if max_iter == 0:
        return K
    # Init. of Sinkhorn algorithm
    power = gamma / (gamma + epsilon + 1e-8)
    a = (torch.ones((K.shape[0], K.shape[1], 1), device=feature1.device, dtype=feature1.dtype) / K.shape[1])
    prob1 = (torch.ones((K.shape[0], K.shape[1], 1), device=feature1.device, dtype=feature1.dtype) / K.shape[1])
    prob2 = (torch.ones((K.shape[0], K.shape[2], 1), device=feature2.device, dtype=feature2.dtype) / K.shape[2])
    # Sinkhorn algorithm
    for _ in range(max_iter):
        # Update b
        KTa = torch.bmm(K.transpose(1, 2), a)
        b = torch.pow(prob2 / (KTa + 1e-8), power)
        # Update a
        Kb = torch.bmm(K, b)
        a = torch.pow(prob1 / (Kb + 1e-8), power)
    # Transportation map
    T = torch.mul(torch.mul(a, K), b.transpose(1, 2))
    return T
 class UOTHead(nn.Module):
    def __init__(self, nb_iter=5,name='original'):
        super().__init__()
        self.epsilon = torch.nn.Parameter(torch.zeros(1))  # Entropic regularisation
        self.gamma = torch.nn.Parameter(torch.zeros(1))  # Mass regularisation
        self.nb_iter = nb_iter
        self.name=name
    def forward(self, batch_dict, src_coords=None, mode='pairs'):
        feats = batch_dict['fea_kpt_'+self.name].squeeze(-1)
        B, C, NUM = feats.shape
        assert B % 2 == 0, "Batch size must be multiple of 2: B anchor + B positive samples"
        B = B // 2
        feat1 = feats[:B]
        feat2 = feats[B:]
        coords = batch_dict['key_points']
        coords1 = coords[:B, :, 0:3]
        coords2 = coords[B:, :, 0:3]
        correspondences_feature = sinkhorn_unbalanced(
            feat1.permute(0, 2, 1),
            feat2.permute(0, 2, 1),
            epsilon=torch.exp(self.epsilon) + 0.03,
            gamma=torch.exp(self.gamma),
            max_iter=self.nb_iter,
            matrix='cosine',
        )
        feature_corr_sum = correspondences_feature.sum(-1, keepdim=True)
        project_kpts = (correspondences_feature @ coords2) / (feature_corr_sum + 1e-8)
        project_feas = (correspondences_feature @ feat2.permute(0, 2, 1)) / (feature_corr_sum + 1e-8)
        batch_dict['project_kpts_'+self.name] = project_kpts
        batch_dict['project_feas_'+self.name] = project_feas.permute(0, 2, 1)
        # batch_dict['project_coord_kpts'] = project_coord_kpts
        batch_dict['correspondences_feature_'+self.name] = correspondences_feature
        # batch_dict['correspondences_coord'] = correspondences_coord
        transformation = compute_rigid_transform(coords1, project_kpts, feature_corr_sum.squeeze(-1))
        batch_dict['transformation_'+self.name] = transformation
        return batch_dict