init fusion lcd orin config

2026-03-04 20:07:57 +08:00
commit bc0498e453
42 changed files with 4750 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
+result/
+sequences/
+KITTI/
+KITTI360/
--- a/ALIKE/LICENSE
+++ b/ALIKE/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2022, Zhao Xiaoming
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/ALIKE/README.md
+++ b/ALIKE/README.md
@@ -0,0 +1,117 @@
+# News: the cpp version is released [ALIKE-cpp](https://github.com/Shiaoming/ALIKE-cpp).
+
+# ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction
+
+ALIKE applies a differentiable keypoint detection module to detect accurate sub-pixel keypoints. The network can run at 95 frames per second for 640 x 480 images on NVIDIA Titan X (Pascal) GPU and achieve equivalent performance with the state-of-the-arts. ALIKE benefits real-time applications in resource-limited platforms/devices. Technical details are described in [this paper](https://arxiv.org/pdf/2112.02906.pdf).
+
+> ```
+> Xiaoming Zhao, Xingming Wu, Jinyu Miao, Weihai Chen, Peter C. Y. Chen, Zhengguo Li, "ALIKE: Accurate and Lightweight Keypoint
+> Detection and Descriptor Extraction," IEEE Transactions on Multimedia, 2022.
+> ```
+
+![](./assets/alike.png)
+
+
+If you use ALIKE in an academic work, please cite:
+
+```
+@article{Zhao2022ALIKE,
+    title = {ALIKE: Accurate and Lightweight Keypoint Detection and Descriptor Extraction},
+    url = {http://arxiv.org/abs/2112.02906},
+    doi = {10.1109/TMM.2022.3155927},
+    journal = {IEEE Transactions on Multimedia},
+    author = {Zhao, Xiaoming and Wu, Xingming and Miao, Jinyu and Chen, Weihai and Chen, Peter C. Y. and Li, Zhengguo},
+    month = march,
+    year = {2022},
+}
+```
+
+
+
+## 1. Prerequisites
+
+The required packages are listed in the `requirements.txt` :
+
+```shell
+pip install -r requirements.txt
+```
+
+
+
+## 2. Models
+
+The off-the-shelf weights of four variant ALIKE models are provided in `models/` .
+
+
+
+## 3. Run demo
+
+```shell
+$ python demo.py -h
+usage: demo.py [-h] [--model {alike-t,alike-s,alike-n,alike-l}]
+               [--device DEVICE] [--top_k TOP_K] [--scores_th SCORES_TH]
+               [--n_limit N_LIMIT] [--no_display] [--no_sub_pixel]
+               input
+
+ALike Demo.
+
+positional arguments:
+  input                 Image directory or movie file or "camera0" (for
+                        webcam0).
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --model {alike-t,alike-s,alike-n,alike-l}
+                        The model configuration
+  --device DEVICE       Running device (default: cuda).
+  --top_k TOP_K         Detect top K keypoints. -1 for threshold based mode,
+                        >0 for top K mode. (default: -1)
+  --scores_th SCORES_TH
+                        Detector score threshold (default: 0.2).
+  --n_limit N_LIMIT     Maximum number of keypoints to be detected (default:
+                        5000).
+  --no_display          Do not display images to screen. Useful if running
+                        remotely (default: False).
+  --no_sub_pixel        Do not detect sub-pixel keypoints (default: False).
+```
+
+
+
+## 4. Examples
+
+### KITTI example
+```shell
+python demo.py assets/kitti 
+```
+![](./assets/kitti.gif)
+
+### TUM example
+```shell
+python demo.py assets/tum 
+```
+![](./assets/tum.gif)
+
+## 5. Efficiency and performance
+
+| Models | Parameters | GFLOPs(640x480) | MHA@3 on Hpatches | mAA(10°) on [IMW2020-test](https://www.cs.ubc.ca/research/image-matching-challenge/2021/leaderboard) (Stereo) |
+|:---:|:---:|:---:|:-----------------:|:-------------------------------------------------------------------------------------------------------------:|
+| D2-Net(MS) | 7653KB | 889.40 |      38.33%       |                                                    12.27%                                                     |
+| LF-Net(MS) | 2642KB | 24.37 |      57.78%       |                                                    23.44%                                                     |
+| SuperPoint | 1301KB | 26.11 |      70.19%       |                                                    28.97%                                                     |
+| R2D2(MS) | 484KB | 464.55 |      71.48%       |                                                    39.02%                                                     |
+| ASLFeat(MS) | 823KB | 77.58 |      73.52%       |                                                    33.65%                                                     |
+| DISK | 1092KB | 98.97 |      70.56%       |                                                    51.22%                                                     |
+| ALike-N | 318KB | 7.909 |      75.74%       |                                                    47.18%                                                     |
+| ALike-L | 653KB | 19.685 |      76.85%       |                                                    49.58%                                                     |
+
+### Evaluation on Hpatches
+
+- Download [hpatches-sequences-release](https://hpatches.github.io/) and put it into `hseq/hpatches-sequences-release`.
+- Remove the unreliable sequences as D2-Net.
+- Run the following command to evaluate the performance:
+  ```shell  
+  python hseq/eval.py
+  ```
+
+
+For more details, please refer to the [paper](https://arxiv.org/abs/2112.02906).
--- a/ALIKE/pycache/alike.cpython-38.pyc
+++ b/ALIKE/pycache/alike.cpython-38.pyc
--- a/ALIKE/pycache/alnet.cpython-38.pyc
+++ b/ALIKE/pycache/alnet.cpython-38.pyc
--- a/ALIKE/pycache/soft_detect.cpython-38.pyc
+++ b/ALIKE/pycache/soft_detect.cpython-38.pyc
--- a/ALIKE/alike.py
+++ b/ALIKE/alike.py
@@ -0,0 +1,143 @@
+import logging
+import os
+import cv2
+import torch
+from copy import deepcopy
+import torch.nn.functional as F
+from torchvision.transforms import ToTensor
+import math
+
+from ALIKE.alnet import ALNet
+from ALIKE.soft_detect import DKD
+import time
+
+configs = {
+    'alike-t': {'c1': 8, 'c2': 16, 'c3': 32, 'c4': 64, 'dim': 64, 'single_head': True, 'radius': 2,
+                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-t.pth')},
+    'alike-s': {'c1': 8, 'c2': 16, 'c3': 48, 'c4': 96, 'dim': 96, 'single_head': True, 'radius': 2,
+                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-s.pth')},
+    'alike-n': {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True, 'radius': 2,
+                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-n.pth')},
+    'alike-l': {'c1': 32, 'c2': 64, 'c3': 128, 'c4': 128, 'dim': 128, 'single_head': False, 'radius': 2,
+                'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-l.pth')},
+}
+
+
+class ALike(ALNet):
+    def __init__(self,
+                 # ================================== feature encoder
+                 c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128,
+                 single_head: bool = False,
+                 # ================================== detect parameters
+                 radius: int = 2,
+                 top_k: int = 500, scores_th: float = 0.5,
+                 n_limit: int = 5000,
+                 device: str = 'cpu',
+                 model_path: str = ''
+                 ):
+        super().__init__(c1, c2, c3, c4, dim, single_head)
+        self.radius = radius
+        self.top_k = top_k
+        self.n_limit = n_limit
+        self.scores_th = scores_th
+        self.dkd = DKD(radius=self.radius, top_k=self.top_k,
+                       scores_th=self.scores_th, n_limit=self.n_limit)
+        self.device = device
+
+        if model_path != '':
+            state_dict = torch.load(model_path, self.device)
+            self.load_state_dict(state_dict)
+            self.to(self.device)
+            self.eval()
+            logging.info(f'Loaded model parameters from {model_path}')
+            logging.info(
+                f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB")
+
+    def extract_dense_map(self, image, ret_dict=False):
+        # ====================================================
+        # check image size, should be integer multiples of 2^5
+        # if it is not a integer multiples of 2^5, padding zeros
+        device = image.device
+        b, c, h, w = image.shape
+        h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h
+        w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w
+        if h_ != h:
+            h_padding = torch.zeros(b, c, h_ - h, w, device=device)
+            image = torch.cat([image, h_padding], dim=2)
+        if w_ != w:
+            w_padding = torch.zeros(b, c, h_, w_ - w, device=device)
+            image = torch.cat([image, w_padding], dim=3)
+        # ====================================================
+
+        scores_map, descriptor_map = super().forward(image)
+
+        # ====================================================
+        if h_ != h or w_ != w:
+            descriptor_map = descriptor_map[:, :, :h, :w]
+            scores_map = scores_map[:, :, :h, :w]  # Bx1xHxW
+        # ====================================================
+
+        # BxCxHxW
+        descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1)
+
+        if ret_dict:
+            return {'descriptor_map': descriptor_map, 'scores_map': scores_map, }
+        else:
+            return descriptor_map, scores_map
+
+    def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False):
+        """
+        :param img: np.array HxWx3, RGB
+        :param image_size_max: maximum image size, otherwise, the image will be resized
+        :param sort: sort keypoints by scores
+        :param sub_pixel: whether to use sub-pixel accuracy
+        :return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time'
+        """
+        H, W, three = img.shape
+        assert three == 3, "input image shape should be [HxWx3]"
+
+        # ==================== image size constraint
+        image = deepcopy(img)
+        max_hw = max(H, W)
+        if max_hw > image_size_max:
+            ratio = float(image_size_max / max_hw)
+            image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio)
+
+        # ==================== convert image to tensor
+        image = torch.from_numpy(image).to(self.device).to(torch.float32).permute(2, 0, 1)[None] / 255.0
+
+        # ==================== extract keypoints
+        start = time.time()
+
+        with torch.no_grad():
+            descriptor_map, scores_map = self.extract_dense_map(image)
+            keypoints, descriptors, scores, _ = self.dkd(scores_map, descriptor_map,
+                                                         sub_pixel=sub_pixel)
+            keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0]
+            keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]])
+
+        if sort:
+            indices = torch.argsort(scores, descending=True)
+            keypoints = keypoints[indices]
+            descriptors = descriptors[indices]
+            scores = scores[indices]
+
+        end = time.time()
+
+        return {'keypoints': keypoints.cpu().numpy(),
+                'descriptors': descriptors.cpu().numpy(),
+                'scores': scores.cpu().numpy(),
+                'scores_map': scores_map.cpu().numpy(),
+                'time': end - start, }
+
+
+if __name__ == '__main__':
+    import numpy as np
+    from thop import profile
+
+    net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False)
+
+    image = np.random.random((640, 480, 3)).astype(np.float32)
+    flops, params = profile(net, inputs=(image, 9999, False), verbose=False)
+    print('{:<30}  {:<8} GFLops'.format('Computational complexity: ', flops / 1e9))
+    print('{:<30}  {:<8} KB'.format('Number of parameters: ', params / 1e3))
--- a/ALIKE/alnet.py
+++ b/ALIKE/alnet.py
@@ -0,0 +1,163 @@
+import torch
+from torch import nn
+from torchvision.models import resnet
+from typing import Optional, Callable
+
+
+class ConvBlock(nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 gate: Optional[Callable[..., nn.Module]] = None,
+                 norm_layer: Optional[Callable[..., nn.Module]] = None):
+        super().__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self.conv1 = resnet.conv3x3(in_channels, out_channels)
+        self.bn1 = norm_layer(out_channels)
+        self.conv2 = resnet.conv3x3(out_channels, out_channels)
+        self.bn2 = norm_layer(out_channels)
+
+    def forward(self, x):
+        x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
+        x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
+        return x
+
+
+# copied from torchvision\models\resnet.py#27->BasicBlock
+class ResBlock(nn.Module):
+    expansion: int = 1
+
+    def __init__(
+            self,
+            inplanes: int,
+            planes: int,
+            stride: int = 1,
+            downsample: Optional[nn.Module] = None,
+            groups: int = 1,
+            base_width: int = 64,
+            dilation: int = 1,
+            gate: Optional[Callable[..., nn.Module]] = None,
+            norm_layer: Optional[Callable[..., nn.Module]] = None
+    ) -> None:
+        super(ResBlock, self).__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('ResBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in ResBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = resnet.conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.conv2 = resnet.conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.gate(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.gate(out)
+
+        return out
+
+
+class ALNet(nn.Module):
+    def __init__(self, c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128,
+                 single_head: bool = True,
+                 ):
+        super().__init__()
+        self.feature_size = dim
+        self.gate = nn.ReLU(inplace=True)
+
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.pool4 = nn.MaxPool2d(kernel_size=4, stride=4)
+
+        self.block1 = ConvBlock(3, c1, self.gate, nn.BatchNorm2d)
+
+        self.block2 = ResBlock(inplanes=c1, planes=c2, stride=1,
+                               downsample=nn.Conv2d(c1, c2, 1),
+                               gate=self.gate,
+                               norm_layer=nn.BatchNorm2d)
+        self.block3 = ResBlock(inplanes=c2, planes=c3, stride=1,
+                               downsample=nn.Conv2d(c2, c3, 1),
+                               gate=self.gate,
+                               norm_layer=nn.BatchNorm2d)
+        self.block4 = ResBlock(inplanes=c3, planes=c4, stride=1,
+                               downsample=nn.Conv2d(c3, c4, 1),
+                               gate=self.gate,
+                               norm_layer=nn.BatchNorm2d)
+        # ================================== feature aggregation
+        self.conv1 = resnet.conv1x1(c1, dim // 4)
+        self.conv2 = resnet.conv1x1(c2, dim // 4)
+        self.conv3 = resnet.conv1x1(c3, dim // 4)
+        self.conv4 = resnet.conv1x1(c4, dim // 4)
+        self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.upsample4 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
+        self.upsample8 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True)
+        self.upsample32 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True)
+
+        # ================================== detector and descriptor head
+        self.single_head = single_head
+        if not self.single_head:
+            self.convhead1 = resnet.conv1x1(dim, dim)
+        self.convhead2 = resnet.conv1x1(dim, dim + 1)
+
+    def forward(self, image):
+        # ================================== feature encoder
+        x1 = self.block1(image)  # B x c1 x H x W
+        x2 = self.pool2(x1)
+        x2 = self.block2(x2)  # B x c2 x H/2 x W/2
+        x3 = self.pool4(x2)
+        x3 = self.block3(x3)  # B x c3 x H/8 x W/8
+        x4 = self.pool4(x3)
+        x4 = self.block4(x4)  # B x dim x H/32 x W/32
+
+        # ================================== feature aggregation
+        x1 = self.gate(self.conv1(x1))  # B x dim//4 x H x W
+        x2 = self.gate(self.conv2(x2))  # B x dim//4 x H//2 x W//2
+        x3 = self.gate(self.conv3(x3))  # B x dim//4 x H//8 x W//8
+        x4 = self.gate(self.conv4(x4))  # B x dim//4 x H//32 x W//32
+        x2_up = self.upsample2(x2)  # B x dim//4 x H x W
+        x3_up = self.upsample8(x3)  # B x dim//4 x H x W
+        x4_up = self.upsample32(x4)  # B x dim//4 x H x W
+        x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
+
+        # ================================== detector and descriptor head
+        if not self.single_head:
+            x1234 = self.gate(self.convhead1(x1234))
+        x = self.convhead2(x1234)  # B x dim+1 x H x W
+
+        descriptor_map = x[:, :-1, :, :]
+        scores_map = torch.sigmoid(x[:, -1, :, :]).unsqueeze(1)
+
+        return scores_map, descriptor_map
+
+
+if __name__ == '__main__':
+    from thop import profile
+
+    net = ALNet(c1=16, c2=32, c3=64, c4=128, dim=128, single_head=True)
+
+    image = torch.randn(1, 3, 640, 480)
+    flops, params = profile(net, inputs=(image,), verbose=False)
+    print('{:<30}  {:<8} GFLops'.format('Computational complexity: ', flops / 1e9))
+    print('{:<30}  {:<8} KB'.format('Number of parameters: ', params / 1e3))
--- a/ALIKE/demo.py
+++ b/ALIKE/demo.py
@@ -0,0 +1,167 @@
+import copy
+import os
+import cv2
+import glob
+import logging
+import argparse
+import numpy as np
+from tqdm import tqdm
+from alike import ALike, configs
+
+
+class ImageLoader(object):
+    def __init__(self, filepath: str):
+        self.N = 3000
+        if filepath.startswith('camera'):
+            camera = int(filepath[6:])
+            self.cap = cv2.VideoCapture(camera)
+            if not self.cap.isOpened():
+                raise IOError(f"Can't open camera {camera}!")
+            logging.info(f'Opened camera {camera}')
+            self.mode = 'camera'
+        elif os.path.exists(filepath):
+            if os.path.isfile(filepath):
+                self.cap = cv2.VideoCapture(filepath)
+                if not self.cap.isOpened():
+                    raise IOError(f"Can't open video {filepath}!")
+                rate = self.cap.get(cv2.CAP_PROP_FPS)
+                self.N = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
+                duration = self.N / rate
+                logging.info(f'Opened video {filepath}')
+                logging.info(f'Frames: {self.N}, FPS: {rate}, Duration: {duration}s')
+                self.mode = 'video'
+            else:
+                self.images = glob.glob(os.path.join(filepath, '*.png')) + \
+                              glob.glob(os.path.join(filepath, '*.jpg')) + \
+                              glob.glob(os.path.join(filepath, '*.ppm'))
+                self.images.sort()
+                self.N = len(self.images)
+                logging.info(f'Loading {self.N} images')
+                self.mode = 'images'
+        else:
+            raise IOError('Error filepath (camerax/path of images/path of videos): ', filepath)
+
+    def __getitem__(self, item):
+        if self.mode == 'camera' or self.mode == 'video':
+            if item > self.N:
+                return None
+            ret, img = self.cap.read()
+            if not ret:
+                raise "Can't read image from camera"
+            if self.mode == 'video':
+                self.cap.set(cv2.CAP_PROP_POS_FRAMES, item)
+        elif self.mode == 'images':
+            filename = self.images[item]
+            img = cv2.imread(filename)
+            if img is None:
+                raise Exception('Error reading image %s' % filename)        
+        return img
+
+    def __len__(self):
+        return self.N
+
+
+class SimpleTracker(object):
+    def __init__(self):
+        self.pts_prev = None
+        self.desc_prev = None
+
+    def update(self, img, pts, desc):
+        N_matches = 0
+        if self.pts_prev is None:
+            self.pts_prev = pts
+            self.desc_prev = desc
+
+            out = copy.deepcopy(img)
+            for pt1 in pts:
+                p1 = (int(round(pt1[0])), int(round(pt1[1])))
+                cv2.circle(out, p1, 1, (0, 0, 255), -1, lineType=16)
+        else:
+            matches = self.mnn_mather(self.desc_prev, desc)
+            mpts1, mpts2 = self.pts_prev[matches[:, 0]], pts[matches[:, 1]]
+            N_matches = len(matches)
+
+            out = copy.deepcopy(img)
+            for pt1, pt2 in zip(mpts1, mpts2):
+                p1 = (int(round(pt1[0])), int(round(pt1[1])))
+                p2 = (int(round(pt2[0])), int(round(pt2[1])))
+                cv2.line(out, p1, p2, (0, 255, 0), lineType=16)
+                cv2.circle(out, p2, 1, (0, 0, 255), -1, lineType=16)
+
+            self.pts_prev = pts
+            self.desc_prev = desc
+
+        return out, N_matches
+
+    def mnn_mather(self, desc1, desc2):
+        sim = desc1 @ desc2.transpose()
+        sim[sim < 0.9] = 0
+        nn12 = np.argmax(sim, axis=1)
+        nn21 = np.argmax(sim, axis=0)
+        ids1 = np.arange(0, sim.shape[0])
+        mask = (ids1 == nn21[nn12])
+        matches = np.stack([ids1[mask], nn12[mask]])
+        return matches.transpose()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='ALike Demo.')
+    parser.add_argument('--input', type=str, default=r'E:\caodanyang\dataset\KITTI\odometry\data_odometry_color\dataset\sequences\00\image_2',
+                        help='Image directory or movie file or "camera0" (for webcam0).')
+    parser.add_argument('--model', choices=['alike-t', 'alike-s', 'alike-n', 'alike-l'], default="alike-t",
+                        help="The model configuration")
+    parser.add_argument('--device', type=str, default='cuda', help="Running device (default: cuda).")
+    parser.add_argument('--top_k', type=int, default=-1,
+                        help='Detect top K keypoints. -1 for threshold based mode, >0 for top K mode. (default: -1)')
+    parser.add_argument('--scores_th', type=float, default=0.2,
+                        help='Detector score threshold (default: 0.2).')
+    parser.add_argument('--n_limit', type=int, default=5000,
+                        help='Maximum number of keypoints to be detected (default: 5000).')
+    parser.add_argument('--no_display', action='store_true',
+                        help='Do not display images to screen. Useful if running remotely (default: False).')
+    parser.add_argument('--no_sub_pixel', action='store_true',
+                        help='Do not detect sub-pixel keypoints (default: False).')
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO)
+
+    image_loader = ImageLoader(args.input)
+    model = ALike(**configs[args.model],
+                  device=args.device,
+                  top_k=args.top_k,
+                  scores_th=args.scores_th,
+                  n_limit=args.n_limit)
+    tracker = SimpleTracker()
+
+    if not args.no_display:
+        logging.info("Press 'q' to stop!")
+        cv2.namedWindow(args.model)
+
+    runtime = []
+    progress_bar = tqdm(image_loader)
+    for img in progress_bar:
+        if img is None:
+            break
+        
+        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        pred = model(img_rgb, sub_pixel=not args.no_sub_pixel)
+        kpts = pred['keypoints']
+        desc = pred['descriptors']
+        runtime.append(pred['time'])
+
+        out, N_matches = tracker.update(img, kpts, desc)
+
+        ave_fps = (1. / np.stack(runtime)).mean()
+        status = f"Fps:{ave_fps:.1f}, Keypoints/Matches: {len(kpts)}/{N_matches}"
+        progress_bar.set_description(status)
+
+        if not args.no_display:
+            cv2.setWindowTitle(args.model, args.model + ': ' + status)
+            cv2.imshow(args.model, out)
+            if cv2.waitKey(1) == ord('q'):
+                break
+
+    logging.info('Finished!')
+    if not args.no_display:
+        logging.info('Press any key to exit!')
+        cv2.waitKey()
--- a/ALIKE/models/alike-l.pth
+++ b/ALIKE/models/alike-l.pth
--- a/ALIKE/models/alike-n.pth
+++ b/ALIKE/models/alike-n.pth
--- a/ALIKE/models/alike-s.pth
+++ b/ALIKE/models/alike-s.pth
--- a/ALIKE/models/alike-t.pth
+++ b/ALIKE/models/alike-t.pth
--- a/ALIKE/requirements.txt
+++ b/ALIKE/requirements.txt
@@ -0,0 +1,6 @@
+opencv-python~=4.5.1.48
+numpy~=1.19.5
+tqdm~=4.60.0
+torch~=1.8.0
+torchvision~=0.9.0
+thop~=0.0.31-2005241907
--- a/ALIKE/soft_detect.py
+++ b/ALIKE/soft_detect.py
@@ -0,0 +1,194 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+# coordinates system
+#  ------------------------------>  [ x: range=-1.0~1.0; w: range=0~W ]
+#  | -----------------------------
+#  | |                           |
+#  | |                           |
+#  | |                           |
+#  | |         image             |
+#  | |                           |
+#  | |                           |
+#  | |                           |
+#  | |---------------------------|
+#  v
+# [ y: range=-1.0~1.0; h: range=0~H ]
+
+def simple_nms(scores, nms_radius: int):
+    """ Fast Non-maximum suppression to remove nearby points """
+    assert (nms_radius >= 0)
+
+    def max_pool(x):
+        return torch.nn.functional.max_pool2d(
+            x, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius)
+
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == max_pool(scores)
+
+    for _ in range(2):
+        supp_mask = max_pool(max_mask.float()) > 0
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == max_pool(supp_scores)
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+
+
+def sample_descriptor(descriptor_map, kpts, bilinear_interp=False):
+    """
+    :param descriptor_map: BxCxHxW
+    :param kpts: list, len=B, each is Nx2 (keypoints) [h,w]
+    :param bilinear_interp: bool, whether to use bilinear interpolation
+    :return: descriptors: list, len=B, each is NxD
+    """
+    batch_size, channel, height, width = descriptor_map.shape
+
+    descriptors = []
+    for index in range(batch_size):
+        kptsi = kpts[index]  # Nx2,(x,y)
+
+        if bilinear_interp:
+            descriptors_ = torch.nn.functional.grid_sample(descriptor_map[index].unsqueeze(0), kptsi.view(1, 1, -1, 2),
+                                                           mode='bilinear', align_corners=True)[0, :, 0, :]  # CxN
+        else:
+            kptsi = (kptsi + 1) / 2 * kptsi.new_tensor([[width - 1, height - 1]])
+            kptsi = kptsi.long()
+            descriptors_ = descriptor_map[index, :, kptsi[:, 1], kptsi[:, 0]]  # CxN
+
+        descriptors_ = torch.nn.functional.normalize(descriptors_, p=2, dim=0)
+        descriptors.append(descriptors_.t())
+
+    return descriptors
+
+
+class DKD(nn.Module):
+    def __init__(self, radius=2, top_k=0, scores_th=0.2, n_limit=20000):
+        """
+        Args:
+            radius: soft detection radius, kernel size is (2 * radius + 1)
+            top_k: top_k > 0: return top k keypoints
+            scores_th: top_k <= 0 threshold mode:  scores_th > 0: return keypoints with scores>scores_th
+                                                   else: return keypoints with scores > scores.mean()
+            n_limit: max number of keypoint in threshold mode
+        """
+        super().__init__()
+        self.radius = radius
+        self.top_k = top_k
+        self.scores_th = scores_th
+        self.n_limit = n_limit
+        self.kernel_size = 2 * self.radius + 1
+        self.temperature = 0.1  # tuned temperature
+        self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius)
+
+        # local xy grid
+        x = torch.linspace(-self.radius, self.radius, self.kernel_size)
+        # (kernel_size*kernel_size) x 2 : (w,h)
+        self.hw_grid = torch.stack(torch.meshgrid([x, x])).view(2, -1).t()[:, [1, 0]]
+
+    def detect_keypoints(self, scores_map, sub_pixel=True):
+        b, c, h, w = scores_map.shape
+        scores_nograd = scores_map.detach()
+        # nms_scores = simple_nms(scores_nograd, self.radius)
+        nms_scores = simple_nms(scores_nograd, 2)
+
+        # remove border
+        nms_scores[:, :, :self.radius + 1, :] = 0
+        nms_scores[:, :, :, :self.radius + 1] = 0
+        nms_scores[:, :, h - self.radius:, :] = 0
+        nms_scores[:, :, :, w - self.radius:] = 0
+
+        # detect keypoints without grad
+        if self.top_k > 0:
+            topk = torch.topk(nms_scores.view(b, -1), self.top_k)
+            indices_keypoints = topk.indices  # B x top_k
+        else:
+            if self.scores_th > 0:
+                masks = nms_scores > self.scores_th
+                if masks.sum() == 0:
+                    th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
+                    masks = nms_scores > th.reshape(b, 1, 1, 1)
+            else:
+                th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
+                masks = nms_scores > th.reshape(b, 1, 1, 1)
+            masks = masks.reshape(b, -1)
+
+            indices_keypoints = []  # list, B x (any size)
+            scores_view = scores_nograd.reshape(b, -1)
+            for mask, scores in zip(masks, scores_view):
+                indices = mask.nonzero(as_tuple=False)[:, 0]
+                if len(indices) > self.n_limit:
+                    kpts_sc = scores[indices]
+                    sort_idx = kpts_sc.sort(descending=True)[1]
+                    sel_idx = sort_idx[:self.n_limit]
+                    indices = indices[sel_idx]
+                indices_keypoints.append(indices)
+
+        keypoints = []
+        scoredispersitys = []
+        kptscores = []
+        if sub_pixel:
+            # detect soft keypoints with grad backpropagation
+            patches = self.unfold(scores_map)  # B x (kernel**2) x (H*W)
+            self.hw_grid = self.hw_grid.to(patches)  # to device
+            for b_idx in range(b):
+                patch = patches[b_idx].t()  # (H*W) x (kernel**2)
+                indices_kpt = indices_keypoints[b_idx]  # one dimension vector, say its size is M
+                patch_scores = patch[indices_kpt]  # M x (kernel**2)
+
+                # max is detached to prevent undesired backprop loops in the graph
+                max_v = patch_scores.max(dim=1).values.detach()[:, None]
+                x_exp = ((patch_scores - max_v) / self.temperature).exp()  # M * (kernel**2), in [0, 1]
+
+                # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} }
+                xy_residual = x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None]  # Soft-argmax, Mx2
+
+                hw_grid_dist2 = torch.norm((self.hw_grid[None, :, :] - xy_residual[:, None, :]) / self.radius,
+                                           dim=-1) ** 2
+                scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1)
+
+                # compute result keypoints
+                keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1)  # Mx2
+                keypoints_xy = keypoints_xy_nms + xy_residual
+                keypoints_xy = keypoints_xy / keypoints_xy.new_tensor(
+                    [w - 1, h - 1]) * 2 - 1  # (w,h) -> (-1~1,-1~1)
+
+                kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0),
+                                                           keypoints_xy.view(1, 1, -1, 2),
+                                                           mode='bilinear', align_corners=True)[0, 0, 0, :]  # CxN
+
+                keypoints.append(keypoints_xy)
+                scoredispersitys.append(scoredispersity)
+                kptscores.append(kptscore)
+        else:
+            for b_idx in range(b):
+                indices_kpt = indices_keypoints[b_idx]  # one dimension vector, say its size is M
+                keypoints_xy_nms = torch.stack([indices_kpt % w, indices_kpt // w], dim=1)  # Mx2
+                keypoints_xy = keypoints_xy_nms / keypoints_xy_nms.new_tensor(
+                    [w - 1, h - 1]) * 2 - 1  # (w,h) -> (-1~1,-1~1)
+                kptscore = torch.nn.functional.grid_sample(scores_map[b_idx].unsqueeze(0),
+                                                           keypoints_xy.view(1, 1, -1, 2),
+                                                           mode='bilinear', align_corners=True)[0, 0, 0, :]  # CxN
+                keypoints.append(keypoints_xy)
+                scoredispersitys.append(None)
+                kptscores.append(kptscore)
+
+        return keypoints, scoredispersitys, kptscores
+
+    def forward(self, scores_map, descriptor_map, sub_pixel=False):
+        """
+        :param scores_map:  Bx1xHxW
+        :param descriptor_map: BxCxHxW
+        :param sub_pixel: whether to use sub-pixel keypoint detection
+        :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1.0 ~ 1.0
+        """
+        keypoints, scoredispersitys, kptscores = self.detect_keypoints(scores_map,
+                                                                       sub_pixel)
+
+        descriptors = sample_descriptor(descriptor_map, keypoints, sub_pixel)
+
+        # keypoints: B M 2
+        # descriptors: B M D
+        # scoredispersitys:
+        return keypoints, descriptors, kptscores, scoredispersitys
--- a/BEVNet.py
+++ b/BEVNet.py
@@ -0,0 +1,393 @@
+import math
+import torch
+import torch._utils
+import torch.nn as nn
+from typing import Optional, Callable
+from torchvision.models import resnet
+
+
+class RIConv2d(nn.Module):
+    def __init__(self, in_channel, out_channel, kernel_size=1, stride=1, padding=0, bias=True):
+        super().__init__()
+        self.padding = padding
+        self.stride = stride
+        self.use_bias = bias
+        idx = torch.arange(kernel_size ** 2).view(-1, 1)
+        row = torch.div(idx, kernel_size, rounding_mode='floor')
+        col = torch.fmod(idx, kernel_size)
+        idx = torch.cat([row, col], dim=1)
+        dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1)
+        dis = dis.view(kernel_size, kernel_size)
+        dis = torch.round(dis).long()
+        dis[dis > 0.5 * (kernel_size - 1)] = -1
+        self.mask = dis
+        self.number = int(torch.max(dis).item() + 1)
+        self.weight = torch.zeros([kernel_size, kernel_size, out_channel, in_channel])
+        if bias:
+            self.bias = torch.nn.Parameter(torch.rand([out_channel, ]))
+        else:
+            self.bias = None
+        self.weight1 = torch.nn.Parameter(torch.rand([self.number, out_channel, in_channel]))
+
+    def forward(self, x):
+        weight = self.weight.to(self.weight1.device)
+        for i in range(self.number):
+            mask = self.mask == i
+            weight[mask] = self.weight1[i]
+        weight = weight.permute(2, 3, 0, 1)
+        y = torch.nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding)
+        return y
+
+    def __repr__(self):
+        return f"RIConv2d(in_channel={self.weight.shape[3]}, out_channel={self.weight.shape[2]}," \
+               f" kernel_size={self.weight.shape[0]}, stride={self.stride}, padding={self.padding}, bias={self.bias is not None})"
+
+
+class RIMaxpool2d(nn.Module):
+    def __init__(self, kernel_size=1, stride=1, padding=0):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        idx = torch.arange(kernel_size ** 2).view(-1, 1)
+        row = torch.div(idx, kernel_size, rounding_mode='floor')
+        col = torch.fmod(idx, kernel_size)
+        idx = torch.cat([row, col], dim=1)
+        dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1)
+        dis = dis.view(kernel_size, kernel_size)
+        dis = torch.round(dis)
+        dis[dis > 0.5 * (kernel_size - 1)] = -1
+        self.mask = dis.view(-1, ) > -1
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        h_out = math.floor((H + 2 * self.padding - (self.kernel_size - 1) - 1) / self.stride + 1)
+        w_out = math.floor((W + 2 * self.padding - (self.kernel_size - 1) - 1) / self.stride + 1)
+        unfold_x = torch.nn.functional.unfold(x, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
+        y = unfold_x.view(B, C, self.kernel_size * self.kernel_size, h_out, w_out)
+        y = y.permute(2, 0, 1, 3, 4)
+        y1 = y[self.mask]
+        y_max = torch.max(y1, dim=0, keepdim=False)[0]
+        return y_max
+
+    def __repr__(self):
+        return f"RIMaxpool2d(kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding})"
+
+
+class RIAvgpool2d(nn.Module):
+    def __init__(self, kernel_size=1, stride=1, padding=0):
+        super().__init__()
+        self.padding = padding
+        self.stride = stride
+        idx = torch.arange(kernel_size ** 2).view(-1, 1)
+        row = torch.div(idx, kernel_size, rounding_mode='floor')
+        col = torch.fmod(idx, kernel_size)
+        idx = torch.cat([row, col], dim=1)
+        dis = (idx - 0.5 * (kernel_size - 1)).norm(dim=1) + 0.5 * (kernel_size % 2 - 1)
+        dis = dis.view(kernel_size, kernel_size)
+        dis = torch.round(dis)
+        dis[dis > 0.5 * (kernel_size - 1)] = -1
+        mask = dis > -1
+        self.number = torch.sum(mask)
+        self.weight = torch.zeros([kernel_size, kernel_size, 1, 1])
+        self.weight[mask] = 1
+
+    def forward(self, x):
+        weight = self.weight.to(x.device)
+        weight = weight.permute(2, 3, 0, 1)
+        weight = weight.repeat(x.shape[1], 1, 1, 1)
+        sum = torch.nn.functional.conv2d(x, weight, None, self.stride, self.padding, groups=x.shape[1])
+        avg = sum / self.number
+        return avg
+
+    def __repr__(self):
+        return f"RIAvgpool2d(kernel_size={self.weight.shape[0]}, stride={self.stride}, padding={self.padding})"
+
+
+class RIConvBlock(nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 gate: Optional[Callable[..., nn.Module]] = None,
+                 norm_layer: Optional[Callable[..., nn.Module]] = None):
+        super().__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self.conv1 = RIConv2d(in_channel=in_channels, out_channel=out_channels, kernel_size=5, padding=2, bias=False)
+        self.bn1 = norm_layer(out_channels)
+        self.conv2 = RIConv2d(in_channel=out_channels, out_channel=out_channels, kernel_size=5, padding=2, bias=False)
+        self.bn2 = norm_layer(out_channels)
+
+    def forward(self, x):
+        x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
+        x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
+        return x
+
+class RIResBlock(nn.Module):
+    expansion: int = 1
+
+    def __init__(
+            self,
+            inplanes: int,
+            planes: int,
+            stride: int = 1,
+            downsample: Optional[nn.Module] = None,
+            groups: int = 1,
+            base_width: int = 64,
+            dilation: int = 1,
+            gate: Optional[Callable[..., nn.Module]] = None,
+            norm_layer: Optional[Callable[..., nn.Module]] = None
+    ) -> None:
+        super(RIResBlock, self).__init__()
+        if gate is None:
+            self.gate = nn.ReLU(inplace=True)
+        else:
+            self.gate = gate
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('ResBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in ResBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = RIConv2d(in_channel=inplanes, out_channel=planes, kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn1 = norm_layer(planes)
+        self.conv2 = RIConv2d(in_channel=planes, out_channel=planes, kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.gate(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.gate(out)
+
+        return out
+
+class RICNN(nn.Module):
+    def __init__(self, c1: int = 8, c2: int = 16, c3: int = 32, c4: int = 64, dim: int = 64
+                 ):
+        super().__init__()
+
+        self.gate = nn.ReLU(inplace=True)
+        self.pool2 = RIMaxpool2d(kernel_size=2, stride=2)
+        self.pool4 = RIMaxpool2d(kernel_size=5, stride=4, padding=1)
+        self.block1 = RIConvBlock(3, c1, self.gate, nn.BatchNorm2d)
+        self.block2 = RIResBlock(inplanes=c1, planes=c2, stride=1,
+                                 downsample=nn.Conv2d(c1, c2, 1),
+                                 gate=self.gate,
+                                 norm_layer=nn.BatchNorm2d)
+        self.block3 = RIResBlock(inplanes=c2, planes=c3, stride=1,
+                                 downsample=nn.Conv2d(c2, c3, 1),
+                                 gate=self.gate,
+                                 norm_layer=nn.BatchNorm2d)
+        self.block4 = RIResBlock(inplanes=c3, planes=c4, stride=1,
+                                 downsample=nn.Conv2d(c3, c4, 1),
+                                 gate=self.gate,
+                                 norm_layer=nn.BatchNorm2d)
+
+        self.conv1 = resnet.conv1x1(c1, dim // 4)
+        self.conv2 = resnet.conv1x1(c2, dim // 4)
+        self.conv3 = resnet.conv1x1(c3, dim // 4)
+        self.conv4 = resnet.conv1x1(dim, dim // 4)
+        self.upsample2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+        self.upsample3 = nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True)
+        self.upsample4 = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True)
+        self.out = nn.Conv2d(dim, dim + 1, 1)
+
+    def forward(self, image):
+        x1 = self.block1(image)
+        x2 = self.pool2(x1)
+        x2 = self.block2(x2)
+        x3 = self.pool4(x2)
+        x3 = self.block3(x3)
+        x4 = self.pool4(x3)
+        x4 = self.block4(x4)
+        x1 = self.gate(self.conv1(x1))
+        x2 = self.gate(self.conv2(x2))
+        x3 = self.gate(self.conv3(x3))
+        x4 = self.gate(self.conv4(x4))
+        x2_up = self.upsample2(x2)
+        x3_up = self.upsample3(x3)
+        x4_up = self.upsample4(x4)
+        x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
+        y = self.out(x1234)
+        descriptor_map = y[:, :-1, :, :]
+        scores_map = torch.sigmoid(y[:, -1, :, :]).unsqueeze(1)
+        return scores_map, descriptor_map
+
+    def ri2maxpool(self, pool):
+        stride = pool.stride
+        pool_new = nn.MaxPool2d(stride)
+        return pool_new
+
+    def maxpool2ri(self, pool):
+        kernel_size = stride = pool.stride
+        ds = round((math.sqrt(2) - 1) / 2 * stride - 0.25 * (stride % 2 - 1))
+        kernel_size = kernel_size + ds
+        pool_new = RIMaxpool2d(kernel_size, stride, ds)
+        return pool_new
+
+    def ri2avgpool(self, pool):
+        stride = pool.stride
+        pool_new = nn.AvgPool2d(stride)
+        return pool_new
+
+    def avgpool2ri(self, pool):
+        kernel_size = stride = pool.stride
+        if stride > 3:
+            kernel_size = kernel_size + 1
+        pool_new = RIAvgpool2d(kernel_size, stride)
+        return pool_new
+
+    def ri2conv(self, conv):
+        ri = conv
+        weight = ri.weight
+        device = ri.weight1.device
+        bias = ri.bias
+        use_bias = bias is not None
+        weight_copy = weight.clone().to(device)
+        for i in range(ri.number):
+            mask = ri.mask == i
+            weight_copy[mask] = ri.weight1[i]
+        weight_copy = weight_copy.permute(2, 3, 0, 1)
+        in_c = weight.shape[3]
+        out_c = weight.shape[2]
+        kz = weight.shape[0]
+        sd = ri.stride
+        pd = ri.padding
+        conv_new = nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=kz, stride=sd, padding=pd, bias=use_bias)
+        if use_bias:
+            state_dict = {'weight': weight_copy, 'bias': bias}
+        else:
+            state_dict = {'weight': weight_copy}
+        conv_new.load_state_dict(state_dict)
+        return conv_new.to(device)
+
+    def conv2ri(self, conv):
+        weight = conv.weight
+        bias = conv.bias
+        device = weight.device
+        in_c = weight.shape[1]
+        out_c = weight.shape[0]
+        kz = weight.shape[2]
+        if kz < 3:
+            return conv
+        sd = conv.stride
+        pd = conv.padding
+        idx = torch.arange(kz ** 2).view(-1, 1)
+        row = torch.div(idx, kz, rounding_mode='floor')
+        col = torch.fmod(idx, kz)
+        idx = torch.cat([row, col], dim=1)
+        dis = (idx - 0.5 * (kz - 1)).norm(dim=1) + 0.5 * (kz % 2 - 1)
+        dis = dis.view(kz, kz)
+        dis = torch.round(dis).long()
+        dis[dis > 0.5 * (kz - 1)] = -1
+        mask = dis
+        number = int(torch.max(dis).item() + 1)
+        weight1 = torch.rand([number, out_c, in_c]).to(device)
+        weight2 = weight.clone()
+        weight2 = weight2.permute(2, 3, 0, 1)
+        used_bias = bias is not None
+        for i in range(number):
+            mask1 = mask == i
+            w = weight2[mask1]
+            weight1[i] = torch.mean(w, dim=0)
+        if used_bias:
+            state_dict = {'weight1': weight1, 'bias': bias}
+        else:
+            state_dict = {'weight1': weight1}
+        conv_new = RIConv2d(in_channel=in_c, out_channel=out_c, kernel_size=kz, stride=sd, padding=pd, bias=used_bias)
+        conv_new.load_state_dict(state_dict)
+        return conv_new.to(device)
+
+    def disable_ri(self):
+        modules = self.__dict__['_modules']
+        for key, value in modules.items():
+            if isinstance(value, RIMaxpool2d):
+                setattr(self, key, self.ri2maxpool(value))
+            if isinstance(value, RIAvgpool2d):
+                setattr(self, key, self.ri2avgpool(value))
+            if isinstance(value, RIConv2d):
+                setattr(self, key, self.ri2conv(value))
+            if 'block' in key:
+                block = value
+                block_modules = block.__dict__['_modules']
+                for bkey, bvalue in block_modules.items():
+                    if isinstance(bvalue, RIMaxpool2d):
+                        setattr(block, bkey, self.ri2maxpool(bvalue))
+                    if isinstance(bvalue, RIAvgpool2d):
+                        setattr(block, bkey, self.ri2avgpool(bvalue))
+                    if isinstance(bvalue, RIConv2d):
+                        setattr(block, bkey, self.ri2conv(bvalue))
+                modules[key] = block_modules
+                setattr(self, key, block)
+
+    def enable_ri(self):
+        modules = self.__dict__['_modules']
+        for key, value in modules.items():
+            if isinstance(value, nn.MaxPool2d):
+                setattr(self, key, self.maxpool2ri(value))
+            if isinstance(value, nn.AvgPool2d):
+                setattr(self, key, self.avgpool2ri(value))
+            if isinstance(value, nn.Conv2d):
+                setattr(self, key, self.conv2ri(value))
+            if 'block' in key:
+                block = value
+                block_modules = block.__dict__['_modules']
+                for bkey, bvalue in block_modules.items():
+                    if isinstance(bvalue, nn.MaxPool2d):
+                        setattr(block, bkey, self.maxpool2ri(bvalue))
+                    if isinstance(bvalue, nn.AvgPool2d):
+                        setattr(block, bkey, self.avgpool2ri(bvalue))
+                    if isinstance(bvalue, nn.Conv2d):
+                        setattr(block, bkey, self.conv2ri(bvalue))
+                modules[key] = block_modules
+                setattr(self, key, block)
+
+
+class EncodePosition(nn.Module):
+    def __init__(self, feature_size=128):
+        super().__init__()
+        self.bins = 16
+        self.conv1 = nn.Sequential(
+            nn.Conv1d(in_channels=self.bins, out_channels=feature_size//2, kernel_size=1, stride=1, padding=0, bias=True), nn.BatchNorm1d(feature_size//2), nn.ReLU(),
+            nn.Conv1d(in_channels=feature_size//2, out_channels=feature_size//2, kernel_size=1, stride=1, padding=0, bias=True), nn.BatchNorm1d(feature_size//2), nn.ReLU(),
+            nn.Conv1d(in_channels=feature_size//2, out_channels=feature_size, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+        # self.conv2=(nn.Conv1d(in_channels=256,out_channels=128,kernel_size=1))
+
+    def forward(self, x, fea):
+        b, n, c = x.shape
+        x1 = x.unsqueeze(1)
+        x2 = x.unsqueeze(2)
+        dx = x1 - x2
+        distance = dx.norm(p=2, dim=3)
+        hists = torch.zeros([b, n, self.bins]).to(x.device)
+        for i in range(b):
+            for j in range(n):
+                dis = distance[i, j]
+                hist = torch.histc(dis, bins=self.bins, min=1, max=80)
+                hists[i, j] = hist
+        hists = hists / torch.sum(hists, dim=2, keepdim=True)
+        x3 = hists.permute(0, 2, 1)
+        x4 = self.conv1(x3)
+        if hasattr(self, 'conv2'):
+            x5 = torch.cat([fea, x4], dim=1)
+            y = self.conv2(x5)
+        else:
+            y = fea + x4
+        return y
--- a/README.md
+++ b/README.md
@@ -0,0 +1,42 @@
+# FUSION
+
+## Table of Contents
+- [Paper](#paper)
+- [Overview](#overview)
+- [Prerequisites](#prerequisites)
+- [Running the Code](#running-the-code)
+- [Evaluation](#evaluation)
+## Paper
+If you find the poject helps you, you can cite our paper:
+
+Cao D, Yue H, Liu Z, et al. BEVLCD+: Real-Time and Rotation-Invariant Loop Closure Detection Based on BEV of Point Cloud[J]. IEEE Transactions on Instrumentation and Measurement, 2023.
+
+Yue H, Cao D, Liu Z, et al. Cross Fusion of Point Cloud and Learned Image for Loop Closure Detection[J]. IEEE Robotics and Automation Letters, 2024.
+
+## Overview
+We provide code for BEV mode and fusion mode, so you can easily train and test.
+
+## Prerequisites
+Before you can use this project, you'll need to do the following:
+
+1. **Download Datasets**: Download the [KITTI](https://www.cvlibs.net/datasets/kitti/eval_odometry.php) and [KITTI-360](https://www.cvlibs.net/datasets/kitti-360/download.php).
+
+2. **Prepare Dataset Structure**: Use `preparedataset.py` to construct a dataset structure that complies with the project's requirements. Make sure to update the necessary paths in the code.
+
+3. **Prepare environment**: Use the commonds on `env.txt` to create your environment. Windows and Ubuntu is OK.
+ 
+## Running the Code
+To run the code, follow these steps:
+
+1. Configure the code to run in either BEV mode or fusion mode using the settings in `config.yaml`.
+
+2. If you want to load a trained model used in the paper, ensure that you update the file path accordingly.
+
+3. Run `python train.py`
+
+
+## Evaluation
+Evaluate the saved data using the evaluation script.
+
+## Others
+If you have any questions please feel free to contact us.
--- a/pycache/BEVNet.cpython-38.pyc
+++ b/pycache/BEVNet.cpython-38.pyc
--- a/pycache/dataset.cpython-38.pyc
+++ b/pycache/dataset.cpython-38.pyc
--- a/pycache/evaluate_lcd.cpython-38.pyc
+++ b/pycache/evaluate_lcd.cpython-38.pyc
--- a/pycache/loss.cpython-38.pyc
+++ b/pycache/loss.cpython-38.pyc
--- a/pycache/net.cpython-38.pyc
+++ b/pycache/net.cpython-38.pyc
--- a/pycache/netvlad.cpython-38.pyc
+++ b/pycache/netvlad.cpython-38.pyc
--- a/pycache/tools.cpython-38.pyc
+++ b/pycache/tools.cpython-38.pyc
--- a/pycache/uot.cpython-38.pyc
+++ b/pycache/uot.cpython-38.pyc
--- a/config.yaml
+++ b/config.yaml
@@ -0,0 +1,56 @@
+'experiment' :
+
+  # 'path_dataset' : '/mnt/data/cdy/project/dataset/FUSION'
+  # 'path_result': '/mnt/data/cdy/data2/results/FUSIONLCD'
+
+  # 'path_dataset' : 'E:\work\Project\dataset\FUSION'
+  # 'path_result' : 'E:\work\Project\results\FUSIONLCD\bev2'
+  
+  'path_dataset' : '/home/adlab36/chenyouyuan/FUSIONLCD'
+  'path_result': '/home/adlab36/chenyouyuan/FUSIONLCD/result'
+  'train_flag' : 0
+  'validate_flag' : 1
+  'test_flag' : 1
+  'flag' : 'fusion' 
+  'cuda' : 1
+  # TRAINING
+  'epochs' : 200
+  'batchsize' : 6
+  'learning_rate' : 1.e-3
+  'beta1' : 0.9
+  'beta2' : 0.999
+  'eps' : 1.e-8
+  'weight_decay' : 5.e-6
+  'load_model' : 1
+  #FUSION
+  # 'last_model' : '/data4/caodanyang/results/FUSIONLCD/08310/models/checkpoint_079.pth.tar'
+  #BEV
+  # 'last_model' : '/data4/caodanyang/results/FUSIONLCD/bev_09030/models/checkpoint_066.pth.tar'
+  #BEV+EP
+  'last_model' : '/home/adlab36/chenyouyuan/FUSIONLCD/result/log/models/checkpoint_199.pth.tar'
+  #DATASET
+  'train' : 0,5,6,7,9
+  'validate' : 8,50,54,55,56,59
+  'test' : 8,50,54,55,56,59
+  'voxel_num' : 15000
+  'voxel_max_points' : 100
+  'voxel_sample' : 'top'
+#  'bev_range' : -51.2,-51.2,-2.5,51.2,51.2,1.5
+#  'bev_resolution' : 0.16
+#  'bev_range' : -64,-64,-2.5,64,64,1.5
+#  'bev_resolution' : 0.2
+  'bev_range' : -32,-32,-2.5,32,32,1.5
+  'bev_resolution' : 0.2
+
+  # NETWORK PARAMS
+  'kpts_number_bev' : 150
+  'kpts_number_img' : 150
+  'cluster_num_bev' : 16
+  'cluster_num_img' : 16
+  'cluster_num_fusion' : 16
+  'sinkhorn_iter' : 5
+  'vlad_size' : 256
+  # LOSS
+  'loop_file' :  'loop_GT_4m'
+  'trip_margin' : 0.5
+  'negetative_selsector' : 'random'
--- a/dataset.py
+++ b/dataset.py
@@ -0,0 +1,751 @@
+import glob
+import math
+import os
+import pickle
+from functools import reduce
+import matplotlib.pylab as plt
+import cv2
+import numba
+import numpy as np
+import torch
+import yaml
+from scipy.spatial.distance import cdist
+from torch.utils.data import Dataset, DataLoader, ConcatDataset
+from torch.utils.data.dataloader import default_collate
+from torch.nn.utils.rnn import pad_sequence
+
+import tools
+
+IMG_HEIGHT = 384
+IMG_WIDTH = 1152
+EGEG_PROJ = 10
+IMAGE_SCALE = 0.5
+
+def euler2mat(z, y, x):
+    Ms = []
+    if z:
+        cosz = math.cos(z)
+        sinz = math.sin(z)
+        Ms.append(np.array(
+            [[cosz, -sinz, 0],
+             [sinz, cosz, 0],
+             [0, 0, 1]]))
+    if y:
+        cosy = math.cos(y)
+        siny = math.sin(y)
+        Ms.append(np.array(
+            [[cosy, 0, siny],
+             [0, 1, 0],
+             [-siny, 0, cosy]]))
+    if x:
+        cosx = math.cos(x)
+        sinx = math.sin(x)
+        Ms.append(np.array(
+            [[1, 0, 0],
+             [0, cosx, -sinx],
+             [0, sinx, cosx]]))
+    if Ms:
+        return reduce(np.dot, Ms[::-1])
+    return np.eye(3)
+
+
+def rt_mat(rx, ry, rz, tx, ty, tz):
+    rt = np.eye(4, dtype=np.float32)
+    r = euler2mat(rz, ry, rx)
+    rt[0:3, 0:3] = r
+    rt[0:3, 3] = [tx, ty, tz]
+    return rt
+
+@numba.jit(nopython=True)
+def _points_to_voxel_reverse_kernel(points,
+                                    voxel_size,
+                                    coors_range,
+                                    num_points_per_voxel,
+                                    coor_to_voxelidx,
+                                    voxels,
+                                    coors,
+                                    max_points,
+                                    max_voxels,voxel_idx_empty,voxel_mamiz):
+    # put all computations to one loop.
+    # we shouldn't create large array in main jit code, otherwise
+    # reduce performance
+    N = points.shape[0]
+    # ndim = points.shape[1] - 1
+    ndim = 3
+    ndim_minus_1 = ndim - 1
+    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
+    # np.round(grid_size)
+    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)
+    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
+    coor = np.zeros(shape=(3,), dtype=np.int32)
+    voxel_num = 0
+    failed = False
+    for i in range(N):
+        failed = False
+        for j in range(ndim):
+            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
+            if c < 0 or c >= grid_size[j]:
+                failed = True
+                break
+            coor[ndim_minus_1 - j] = c
+        if failed:
+            continue
+        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]#0-15000
+        if voxelidx == -1:
+            voxelidx = voxel_num
+            if voxel_num >= max_voxels:
+                break
+            voxel_num += 1
+            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
+            coors[voxelidx] = coor
+        num = num_points_per_voxel[voxelidx]
+        if num < max_points:
+            voxel_idx_empty[voxelidx,num_points_per_voxel[voxelidx]]=1
+            if points[i,2]>voxel_mamiz[voxelidx,0]:
+                voxel_mamiz[voxelidx,0]=points[i,2]
+            if points[i,2]<voxel_mamiz[voxelidx,1]:
+                voxel_mamiz[voxelidx,1]=points[i,2]
+            voxels[voxelidx, num] = points[i]
+            num_points_per_voxel[voxelidx] += 1
+
+    return voxel_num
+
+
+def points_to_voxel(points,
+                    voxel_size,
+                    coors_range,
+                    max_points=35,
+                    reverse_index=True,
+                    max_voxels=20000):
+    """convert kitti points(N, >=3) to voxels. This version calculate
+    everything in one loop. now it takes only 4.2ms(complete point cloud)
+    with jit and 3.2ghz cpu.(don't calculate other features)
+    Note: this function in ubuntu seems faster than windows 10.
+
+    Args:
+        points: [N, ndim] float tensor. points[:, :3] contain xyz points and
+            points[:, 3:] contain other information such as reflectivity.
+        voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
+        coors_range: [6] list/tuple or array, float. indicate voxel range.
+            format: xyzxyz, minmax
+        max_points: int. indicate maximum points contained in a voxel.
+        reverse_index: boolean. indicate whether return reversed coordinates.
+            if points has xyz format and reverse_index is True, output
+            coordinates will be zyx format, but points in features always
+            xyz format.
+        max_voxels: int. indicate maximum voxels this function create.
+            for second, 20000 is a good choice. you should shuffle points
+            before call this function because max_voxels may drop some points.
+
+    Returns:
+        voxels: [M, max_points, ndim] float tensor. only contain points.
+        coordinates: [M, 3] int32 tensor.
+        num_points_per_voxel: [M] int32 tensor.
+    """
+    if not isinstance(voxel_size, np.ndarray):
+        voxel_size = np.array(voxel_size, dtype=points.dtype)
+    if not isinstance(coors_range, np.ndarray):
+        coors_range = np.array(coors_range, dtype=points.dtype)
+    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
+    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
+    if reverse_index:
+        voxelmap_shape = voxelmap_shape[::-1]
+    # don't create large array in jit(nopython=True) code.
+    num_points_per_voxel = np.zeros(shape=(max_voxels,), dtype=np.int32)
+    coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)
+    voxels = np.zeros(shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
+    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
+    voxel_idx_empty = np.zeros((max_voxels,max_points),dtype=np.int32)
+    voxel_mamiz=np.zeros((max_voxels,2),dtype=points.dtype)
+    voxel_mamiz[:,0]=-99
+    voxel_mamiz[:,1]=99
+    voxel_num = _points_to_voxel_reverse_kernel(
+        points, voxel_size, coors_range, num_points_per_voxel,
+        coor_to_voxelidx, voxels, coors, max_points, max_voxels,voxel_idx_empty,voxel_mamiz)
+
+    # coors = coors[:voxel_num]
+    # voxels = voxels[:voxel_num]
+    # num_points_per_voxel = num_points_per_voxel[:voxel_num]
+    # voxels[:, :, -3:] = voxels[:, :, :3] - \
+    #     voxels[:, :, :3].sum(axis=1, keepdims=True)/num_points_per_voxel.reshape(-1, 1, 1)
+    return voxels, coors, num_points_per_voxel, coor_to_voxelidx,voxel_idx_empty,voxel_mamiz
+
+
+@numba.jit(nopython=True)
+def pixel_choose(pixel1, pixel2):
+    n = pixel1.shape[0]
+    k = pixel1.shape[1]
+    k1 = pixel2.shape[1]
+    for i in range(n):
+        idx = []
+        for j in range(k):
+            if pixel1[i, j, 0] > -1:
+                idx.append(j)
+        k2 = len(idx)
+        idx=np.asarray(idx)
+        if k2 >= k1:
+            choice = np.random.choice(idx, k1, replace=False)
+        else:
+            choice = np.random.choice(idx, k1, replace=True)
+        for j in range(k1):
+            pixel2[i,j] = pixel1[i, choice[j]]
+
+def pointcloud_encoder(pointcloud=None, cfg=None):
+    try:
+        resolution = cfg['bev_resolution']
+        pointcloud_range = [float(x) for x in tools.read_cfg(cfg['bev_range'])]
+        voxel_max_points = cfg['voxel_max_points']
+        voxel_num = cfg['voxel_num']
+        voxel_sample = cfg['voxel_sample']
+    except:
+        resolution = 0.2
+        pointcloud_range = [-40, -40, -2.5, 40, 40, 1.5]
+        voxel_max_points = 100
+        voxel_num = 15000
+        voxel_sample = 'top'
+
+    pc_filter = (pointcloud[:, 0] > pointcloud_range[0]) & (pointcloud[:, 0] < pointcloud_range[3]) \
+                & (pointcloud[:, 1] > pointcloud_range[1]) & (pointcloud[:, 1] < pointcloud_range[4]) \
+                & (pointcloud[:, 2] > pointcloud_range[2]) & (pointcloud[:, 2] < pointcloud_range[5])
+    pointcloud = pointcloud[pc_filter]
+    if voxel_sample == 'top':
+        idx = np.argsort(-pointcloud[:, 2])
+        pointcloud = pointcloud[idx]
+    else:
+        idx = np.arange(len(pointcloud))
+        np.random.shuffle(idx)
+        pointcloud = pointcloud[idx]
+    resolution_z=pointcloud_range[5]-pointcloud_range[2]
+    voxels, coors, num_points_per_voxel, coor_to_voxelidx,voxel_idx_empty,voxel_mamiz = points_to_voxel(pointcloud,
+                                                                            voxel_size=[
+                                                                                resolution, resolution,resolution_z],
+                                                                            coors_range=[
+                                                                                *pointcloud_range],
+                                                                            max_points=voxel_max_points,
+                                                                            max_voxels=voxel_num)
+
+    
+    coor_to_voxelidx = np.squeeze(coor_to_voxelidx)
+    voxel_idx_empty=voxel_idx_empty.astype(np.bool_)
+
+    voxels_center = np.sum(voxels, axis=1)
+    voxels_center[:, 0:4] = voxels_center[:, 0:4] / (num_points_per_voxel.reshape(-1, 1)+1e-8)
+    
+    max_z = voxel_mamiz[:,0]
+    min_z = voxel_mamiz[:,1]
+    max_z1 = max_z / resolution_z
+    mean_i = np.sum(voxels[:, :, 3], axis=1) / (num_points_per_voxel+1e-8)
+    density = np.log(np.clip(num_points_per_voxel, 1, None)) / np.log(cfg['voxel_max_points'])
+    
+
+    dz = max_z - min_z
+    idx_not_ground = (dz > 0.05) & (num_points_per_voxel > 1)
+    coors1 = coors[idx_not_ground, 1:3]
+    relation = 0
+    if voxels.shape[2] == 6:  # x,y,z,i,pu,pv
+        have_pixel = (voxels[:, :, 4] > 0) & (voxels[:, :, 5] > 0)  # each 3d point has a pixel
+        have_pixel = np.bitwise_or.reduce(have_pixel, axis=1)  # each cell of bev has pixels,num of pixels may less than num of 3d points
+
+        # num_voxels_pixel = np.sum(voxels_pixel, axis=1)
+        # num_voxels_pixel = np.clip(num_voxels_pixel, 1, None)
+        # voxels_center[:, 4:6] = np.int_(voxels_center[:, 4:6] / num_voxels_pixel.reshape(-1, 1))
+        feature = np.concatenate([max_z1, mean_i, density,
+                                  voxels_center[:, 0], voxels_center[:, 1], voxels_center[:, 2], voxels_center[:, 3]],
+                                 axis=0).reshape(7, -1).T
+        pixels = voxels[idx_not_ground & have_pixel, :, 4:6]
+        # pixels2 = np.zeros([pixels.shape[0], 10, 2], dtype=pixels.dtype)
+        # pixel_choose(pixels, pixels2)
+        # pixels = pixels2
+
+        coors2 = coors[idx_not_ground & have_pixel, 1:3].reshape(-1, 1, 2)
+        relation = np.hstack((pixels, coors2))
+
+    elif voxels.shape[2] == 4:  # x,y,z,i
+        feature = np.concatenate([max_z1, mean_i, density,
+                                  voxels_center[:, 0], voxels_center[:, 1], voxels_center[:, 2], voxels_center[:, 3]],
+                                 axis=0).reshape(7, -1).T
+    else:
+        print('ERROR VOXEL')
+        exit()
+    # eigvalues = voxel_svd(voxels)
+    # feature = np.hstack((feature, eigvalues))
+    bev = np.zeros([coor_to_voxelidx.shape[1], coor_to_voxelidx.shape[1], feature.shape[1]], dtype=np.float32)
+    bev[coors1[:, 0], coors1[:, 1]] = feature[idx_not_ground]
+    # bev[coors1[:, 0], coors1[:, 1]] = feature[idx_not_ground]
+    # bev_show=np.uint8(bev[:,:,0:3]*255)
+    # cv2.imshow('1',bev_show)
+    # cv2.waitKey(0)
+
+    return bev, relation
+
+
+def crop_image(img, height=IMG_HEIGHT, width=IMG_WIDTH):
+    # img: the original image, a numpy array of size H*W*C
+    # height: the target height
+    # width: the target width
+
+    # get the size of the original image
+    h, w, _ = img.shape
+
+    # calculate the padding size if necessary
+    if h < height:
+        pad_top = (height - h) // 2
+        pad_bottom = height - h - pad_top
+    else:
+        pad_top, pad_bottom = 0, 0
+    if w < width:
+        pad_left = (width - w) // 2
+        pad_right = width - w - pad_left
+    else:
+        pad_left, pad_right = 0, 0
+
+    # pad the original image with black pixels if necessary
+    img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
+
+    # crop the padded image to the target size
+    h1 = (img_padded.shape[0] - height) // 2
+    h2 = h1 + height
+    w1 = (img_padded.shape[1] - width) // 2
+    w2 = w1 + width
+    img_cropped = img_padded[h1:h2, w1:w2]
+    dh = int((height - h) / 2)
+    dw = int((width - w) / 2)
+    if IMAGE_SCALE<1:
+        img_cropped = cv2.resize(img_cropped, None, None, IMAGE_SCALE, IMAGE_SCALE)
+        dh = int(dh * IMAGE_SCALE)
+        dw = int(dw * IMAGE_SCALE)
+
+    return img_cropped, dh, dw
+
+
+class KittiDataset(Dataset):
+    def __init__(self, cfg, sequence, argument=True, mode='train', flag_bev=True, flag_img=True, flag_fuse=True):
+        root_dataset = cfg['path_dataset']
+        if (flag_img == False) & (flag_bev == False) & (flag_fuse == False):
+            print('No module will be used!')
+            exit()
+        if (flag_img == False) or (flag_bev == False):
+            flag_fuse = False
+        self.flag_img = flag_img
+        self.flag_bev = flag_bev
+        self.flag_fuse = flag_fuse
+        self.cfg = cfg
+        self.sequence = sequence
+        self.mode = mode
+        scans = glob.glob(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'velodyne', "*.bin"))
+        images = glob.glob(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'image_2', "*.png"))
+        if int(sequence) >= 50:
+            calib = np.loadtxt(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'calib.txt'))
+            poses = os.path.join(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'poses.npy'))
+        else:
+            calib = np.genfromtxt(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'calib.txt'))[:, 1:]
+            poses = os.path.join(os.path.join(root_dataset, 'sequences', '%02d' % sequence, 'poses.txt'))
+        f_gt = open(os.path.join(root_dataset, 'sequences', '%02d' % sequence, cfg['loop_file'] + '.pickle'), 'rb')
+        scans.sort()
+        images.sort()
+        self.scans = scans
+        self.images = images
+        if int(sequence) >= 50:
+            cam0_to_velo = np.reshape(calib, (3, 4))
+            cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]])
+            cam0_to_velo = np.linalg.inv(cam0_to_velo)
+            self.cam0_to_velo = cam0_to_velo.astype(np.float32)
+            k = [552.554261, 0.000000, 682.049453, 0.000000, 
+                 0.000000, 552.554261, 238.769549, 0.000000, 
+                 0.000000, 0.000000, 1.000000, 0.000000]
+            k = np.array(k).reshape([3, 4])
+            p2 = np.eye(4)
+            p2[:3] = k
+            self.p2 = p2.astype(np.float32)
+            poses2 = np.load(poses)
+        else:
+            p2 = np.reshape(calib[2], (3, 4))
+            p2 = np.vstack([p2, [0, 0, 0, 1]])
+            self.p2 = p2.astype(np.float32)
+            cam0_to_velo = np.reshape(calib[4], (3, 4))
+            cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]])
+            self.cam0_to_velo = cam0_to_velo.astype(np.float32)
+            cam0_to_velo = torch.tensor(cam0_to_velo)
+            poses2 = []
+            with open(poses, 'r') as f:
+                for x in f:
+                    x = x.strip().split()
+                    x = [float(v) for v in x]
+                    pose = torch.zeros((4, 4), dtype=torch.float64)
+                    pose[0, 0:4] = torch.tensor(x[0:4])
+                    pose[1, 0:4] = torch.tensor(x[4:8])
+                    pose[2, 0:4] = torch.tensor(x[8:12])
+                    pose[3, 3] = 1.0
+                    pose = cam0_to_velo.inverse() @ (pose @ cam0_to_velo)  #
+                    poses2.append(pose.float().numpy())
+            poses2 = np.stack(poses2)
+        # for i in range(12):
+        #     plt.subplot(3, 4, i + 1), plt.plot(np.arange(len(poses2)), poses2[:, i // 4, i % 4])
+        # plt.show()
+        self.poses = poses2
+
+        gt = pickle.load(f_gt)
+        self.gt=gt
+        # gt_new=[]
+        # for i in range(len(gt)):
+        #     idx=gt[i]['idx']
+        #     positive_idxs=gt[i]['positive_idxs']
+        #     for j in positive_idxs:
+        #         sample={'idx':idx,'positive_idxs':[j]}
+        #         gt_new.append(sample)
+        # self.gt=gt_new
+        self.argument = argument
+
+    def __len__(self):
+        if self.mode == 'test':
+            return len(self.poses)
+        else:
+            return int(len(self.gt))
+
+    def __getitem__(self, idx):
+        if self.mode == 'test':
+            idx_query = idx
+            pose_query = self.poses[idx_query]
+            image_query, scan_query, bev_query, dw, dh, W, H, relation_query = 0, 0, 0, 0, 0, 0, 0, 0
+            if self.flag_img:
+                image_query = cv2.imread(self.images[idx_query])
+                # image_query = cv2.GaussianBlur(image_query, (15,15),0)
+                image_query, dh, dw = crop_image(image_query, IMG_HEIGHT, IMG_WIDTH)
+            if self.flag_bev:
+                print("DEBUG __len__ scans:", len(self.scans), "requested idx_query:", idx_query)
+                scan_query = np.fromfile(self.scans[idx_query], dtype=np.float32).reshape((-1, 4))
+                # idx = np.random.choice(len(scan_query), int(len(scan_query) /4), replace=False)
+                # scan_query = scan_query[idx]
+                if self.flag_bev & self.flag_img & self.flag_fuse:
+                    # mat_proj = np.matmul(self.p2, self.cam0_to_velo)
+                    mat_proj = torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)).numpy()
+                    pts_query = scan_query.copy()
+                    pts_query[:, 3] = 1
+                    # pts_proj_query = np.matmul(mat_proj, pts_query.T).T
+                    pts_proj_query = torch.matmul(torch.from_numpy(mat_proj), torch.from_numpy(pts_query.T)).numpy().T
+                    z = pts_proj_query[:, 2:3]
+                    pts_proj_query = pts_proj_query / z * IMAGE_SCALE 
+                    pts_proj_query[:, 0:2] = pts_proj_query[:, 0:2] + [dw, dh]
+                    H, W, _ = image_query.shape
+                    mask_query = (pts_proj_query[:, 0] >= EGEG_PROJ) & (pts_proj_query[:, 0] < W - EGEG_PROJ) & (
+                            pts_proj_query[:, 1] >= EGEG_PROJ) & (pts_proj_query[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0)
+                    pts_proj_query[~mask_query] = -1
+                    pixel_query = pts_proj_query[:, 0:2]
+                    pixel_query = pixel_query[:, [1, 0]]
+                    scan_query = np.hstack((scan_query, pixel_query))
+                bev_query, relation_query = pointcloud_encoder(scan_query, self.cfg)
+            sample = {
+                'sequence': self.sequence,
+                'id_query': idx_query,
+                'bev_query': bev_query,
+                'img_query': image_query,
+                'pose_query': pose_query,
+                'relation_query': relation_query
+            }
+        else:
+            gt = self.gt[idx]
+            idx_query = gt['idx']
+            idx_ps = gt['positive_idxs']
+            idx_positive = np.random.choice(idx_ps)
+            pose_query = self.poses[idx_query]
+            pose_positive = self.poses[idx_positive]
+            image_query, scan_query, bev_query, image_positive, scan_query, bev_positive, dw, dh, W, H, pose_to_frame, \
+                label_score, relation_query, relation_positive, = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            if self.flag_img:
+                image_query = cv2.imread(self.images[idx_query])
+                image_query, dh, dw = crop_image(image_query)
+                image_positive = cv2.imread(self.images[idx_positive])
+                image_positive, _, _ = crop_image(image_positive)
+            if self.flag_bev:
+                scan_query = np.fromfile(self.scans[idx_query], dtype=np.float32).reshape((-1, 4))
+
+                scan_positive = np.fromfile(self.scans[idx_positive], dtype=np.float32).reshape((-1, 4))
+
+                # return {'1':np.zeros(5)}
+                # import open3d as o3d
+                # pcd1 = o3d.geometry.PointCloud()
+                # sc1 = scan_query.copy()
+                # sc1[:, 3] = 1
+                # sc1 = np.matmul(pose_query, sc1.T).T
+                # pcd1.points = o3d.utility.Vector3dVector(sc1[:, :3])
+                # pcd1.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))])
+                # pcd2 = o3d.geometry.PointCloud()
+                # sc2 = scan_positive.copy()
+                # sc2[:, 3] = 1
+                # sc2 = np.matmul(pose_positive, sc2.T).T
+                # pcd2.points = o3d.utility.Vector3dVector(sc2[:, :3])
+                # pcd2.colors = o3d.utility.Vector3dVector([[0, 1, 0] for i in range(len(pcd2.points))])
+                # vis1 = o3d.visualization.Visualizer()
+                # vis1.create_window(window_name='registration', width=600, height=600)  # 创建窗口
+                # render_option: o3d.visualization.RenderOption = vis1.get_render_option()  # 设置点云渲染参数
+                # render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
+                # render_option.point_size = 2  # 设置渲染点的大小
+                # vis1.add_geometry(pcd1)
+                # vis1.add_geometry(pcd2)
+                # coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.0, origin=pose_query[0:3,3])
+                # vis1.add_geometry(coord_frame)
+                # vis1.run()
+
+                if self.argument:
+                    # Rt1 = np.eye(4)
+                    # idx1=np.random.random((len(scan_query),))
+                    # idx1=idx1>0.1
+                    # scan_query=scan_query[idx1]
+                    # idx2=np.random.random((len(scan_positive),))
+                    # idx2=idx2>0.1
+                    # scan_positive=scan_positive[idx2]
+                    rand = np.random.random(6) * 2 - 1
+                    Rt = rt_mat(rand[0] * 3 / 180 * np.pi,
+                                rand[1] * 3 / 180 * np.pi,
+                                rand[2] * 180 / 180 * np.pi,
+                                rand[3] * 3, rand[4] * 3, rand[5] * 0.3)
+                    Rt1 = torch.from_numpy(Rt).inverse().numpy()
+                    ints = scan_positive[:, 3].copy()
+                    scan_positive[:, 3] = 1
+                    # scan_positive = np.matmul(Rt, scan_positive.T).T
+                    scan_positive = torch.matmul(torch.from_numpy(Rt),torch.from_numpy(scan_positive.T)).numpy().T
+                    scan_positive[:, 3] = ints
+                    # pose_positive = np.matmul(pose_positive, np.linalg.inv(Rt))
+                    pose_positive=torch.matmul(torch.from_numpy(pose_positive),torch.from_numpy(Rt).inverse()).numpy()
+                else:
+                    Rt1 = np.eye(4).astype(np.float32)
+                if self.flag_fuse:
+                    # mat_proj = np.matmul(self.p2, self.cam0_to_velo)
+                    mat_proj = torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)).numpy()
+                    pts_query = scan_query.copy()
+                    pts_query[:, 3] = 1
+                    # pts_proj_query = np.matmul(mat_proj, pts_query.T).T
+                    pts_proj_query = torch.matmul(torch.from_numpy(mat_proj), torch.from_numpy(pts_query.T)).numpy().T
+                    z = pts_proj_query[:, 2:3]
+                    pts_proj_query = pts_proj_query / z * IMAGE_SCALE 
+                    pts_proj_query[:, 0:2] = pts_proj_query[:, 0:2] + [dw, dh]
+                    H, W, _ = image_query.shape
+                    mask_query = (pts_proj_query[:, 0] >= EGEG_PROJ) & (pts_proj_query[:, 0] < W - EGEG_PROJ) & (
+                            pts_proj_query[:, 1] >= EGEG_PROJ) & (pts_proj_query[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0)
+                    pts_proj_query[~mask_query] = -1
+                    pixel_query = pts_proj_query[:, 0:2]
+                    pixel_query = pixel_query[:, [1, 0]]  # h,w
+                    scan_query = np.hstack((scan_query, pixel_query))
+
+                    # fig = plt.figure()
+                    # plt.subplot(2, 1, 1), plt.imshow(image_query[:, :, [2, 1, 0]])
+                    # plt.subplot(2, 1, 2), plt.imshow(image_query[:, :, [2, 1, 0]])
+                    # plt.scatter(pixel_query[mask_query, 1], pixel_query[mask_query, 0], c=z[mask_query], cmap='jet', alpha=0.5, s=1)
+                    # plt.show()
+
+                    # mat_proj1 = self.p2.dot(self.cam0_to_velo).dot(Rt1)
+                    mat_proj1 = torch.matmul(torch.matmul(torch.from_numpy(self.p2), torch.from_numpy(self.cam0_to_velo)), torch.from_numpy(Rt1)).numpy()
+                    pts_positive = scan_positive.copy()
+                    pts_positive[:, 3] = 1
+                    # pts_proj_positive = np.matmul(mat_proj1, pts_positive.T).T
+                    pts_proj_positive = torch.matmul(torch.from_numpy(mat_proj1), torch.from_numpy(pts_positive.T)).numpy().T
+                    z = pts_proj_positive[:, 2:3]
+                    pts_proj_positive = pts_proj_positive / z * IMAGE_SCALE 
+                    pts_proj_positive[:, 0:2] = pts_proj_positive[:, 0:2] + [dw, dh]
+                    mask_positive = (pts_proj_positive[:, 0] >= EGEG_PROJ) & (pts_proj_positive[:, 0] < W - EGEG_PROJ) & (
+                            pts_proj_positive[:, 1] >= 0 - EGEG_PROJ) & (pts_proj_positive[:, 1] < H - EGEG_PROJ) & (z[:, 0] >= 0)
+                    pts_proj_positive[~mask_positive] = -1
+                    pixel_positive = pts_proj_positive[:, 0:2]
+                    pixel_positive = pixel_positive[:, [1, 0]]
+                    scan_positive = np.hstack((scan_positive, pixel_positive))
+
+                # pose_to_frame = np.matmul(np.linalg.inv(pose_positive), pose_query)
+                pose_to_frame=torch.matmul(torch.from_numpy(pose_positive).inverse(),torch.from_numpy(pose_query)).numpy()
+                # scan_query1=scan_query.copy()
+                # scan_query1[:,3]=1
+                # scan_query1=np.matmul(pose_to_frame,scan_query1.T).T
+                # scan_query[:,0:3]=scan_query1[:,0:3]
+                # scan_query[:, 3] = 1
+                # scan_query = np.matmul(pose_query, scan_query.T).T
+                # scan_positive[:, 3] = 1
+                # scan_positive = np.matmul(pose_positive, scan_positive.T).T
+                # plt.subplot(1, 2, 1), plt.plot(scan_query[:, 0], scan_query[:, 1], 'b.', markersize=1),plt.axis([-60,60,-60,60])
+                # plt.subplot(1, 2, 2), plt.plot(scan_positive[:, 0], scan_positive[:, 1], 'b.', markersize=1),plt.axis([-60,60,-60,60])
+                # plt.show()
+                
+                bev_query, relation_query = pointcloud_encoder(scan_query, self.cfg)
+                bev_positive, relation_positive = pointcloud_encoder(scan_positive, self.cfg)
+                # if self.argument:
+                #     rand = np.random.randint(0, 9, [2, ])
+                #     if rand[0] > 4:
+                #         bev_query1 = np.rot90(bev_query, 2, axes=(0, 1))
+                #         bev_query = bev_query1.copy()
+                #     if rand[1] > 4:
+                #         bev_positive1 = np.rot90(bev_positive, 2, axes=(0, 1))
+                #         bev_positive = bev_positive1.copy()
+                h_bev, w_bev, _ = bev_query.shape
+                label_score = np.zeros_like(bev_positive[:, :, :2])
+                grid = np.array(np.meshgrid(np.arange(h_bev), np.arange(w_bev))).swapaxes(0, 2)
+                scan_query_sample = bev_query[:, :, 3:5]
+                mask_query = scan_query_sample != 0
+                mask_query = mask_query[:, :, 0] | mask_query[:, :, 1]
+                grid_query = grid[mask_query]
+                scan_query_sample = scan_query_sample[mask_query]
+                scan_positive_sample = bev_positive[:, :, 3:5]
+                mask_positive = scan_positive_sample != 0
+                mask_positive = mask_positive[:, :, 0] | mask_positive[:, :, 1]
+                grid_positive = grid[mask_positive]
+                scan_positive_sample = scan_positive_sample[mask_positive]
+                scan_query_sample1 = np.hstack((scan_query_sample, scan_query_sample * 0))
+                scan_query_sample1[:, 3] = 1
+                # scan_query_sample1 = np.matmul(pose_to_frame, scan_query_sample1.T).T
+                scan_query_sample1 = torch.matmul(torch.from_numpy(pose_to_frame), torch.from_numpy(scan_query_sample1.T)).numpy().T
+
+                idx1, idx2, dis = tools.nn_match(scan_query_sample1[:, 0:2], scan_positive_sample[:, 0:2], 'euclidean')
+                if len(dis) > 50:
+                    th1 = max([2, dis[min([256, int(len(dis) * 0.3)])]])
+                    idx1 = idx1[dis < th1]
+                    idx2 = idx2[dis < th1]
+                else:
+                    dis = cdist(scan_query_sample1[:, 0:2], scan_positive_sample)
+                    min1 = np.min(dis, axis=1)
+                    min2 = np.min(dis, axis=0)
+                    min11 = np.sort(min1)
+                    th1 = max([0.2, min11[min([256, int(len(min11) * 0.2)])]])
+                    min21 = np.sort(min2)
+                    th2 = max([0.2, min21[min([256, int(len(min21) * 0.2)])]])
+                    idx1 = np.arange(len(scan_query_sample))[min1 < th1]
+                    idx2 = np.arange(len(scan_positive_sample))[min2 < th2]
+
+                # points1=scan_query_sample1[:, 0:2]
+                # points2=scan_positive_sample[:, 0:2]
+                # points = np.mean(np.vstack((points1, points2)), axis=0, keepdims=True)
+                # points1 = points1 - points
+                # points2 = points2 - points
+                # af = torch.sum(torch.from_numpy(points1) ** 2, -1, keepdim=True)
+                # bf = torch.sum(torch.from_numpy(points2) ** 2, -1, keepdim=True).transpose(0, 1)
+                # cf = af + bf - 2 * torch.mm(torch.from_numpy(points1), torch.from_numpy(points2).transpose(0, 1))  # c^2=a^2+b^2-2abcos
+                # c = torch.sqrt(cf)
+                # dis = c
+                # dis1 = torch.min(dis, dim=1)[0]
+                # dis2 = torch.min(dis, dim=0)[0]
+                # idx1 = torch.where(dis1 < 0.5)[0].numpy()
+                # idx2 = torch.where(dis2 < 0.5)[0].numpy()
+                # dis1=dis1.numpy()
+                # dis2= dis2.numpy()
+
+                grid_query = grid_query[idx1]
+                grid_positive = grid_positive[idx2]
+                label_score[grid_query[:, 0], grid_query[:, 1], 0] = 1
+                label_score[grid_positive[:, 0], grid_positive[:, 1], 1] = 1
+                # fig, ax = plt.subplots(2, 2)
+                # ax[0,0].imshow(bev_query[:, :, 0:3])
+                # ax[0,1].imshow(label_score[:, :, 0])
+                # ax[1,0].imshow(bev_positive[:, :, 0:3])
+                # ax[1,1].imshow(label_score[:, :, 1])
+                # plt.savefig('1.png')
+                # plt.show()
+
+            sample = {
+                'sequence': self.sequence,
+                'id_query': idx_query,
+                'bev_query': bev_query,
+                'img_query': image_query,
+                'pose_query': pose_query,
+                'id_positive': idx_positive,
+                'bev_positive': bev_positive,
+                'img_positive': image_positive,
+                'pose_positive': pose_positive,
+                'pose_to_frame': pose_to_frame,
+                'label_score': label_score,
+                'relation_query': relation_query,
+                'relation_positive': relation_positive
+            }
+        return sample
+
+
+def collate(samples):
+    relation_query = []
+    relation_positive = []
+    samples2 = {key: default_collate([d[key] for d in samples]) for key in samples[0]
+                if key != 'relation_query' and key != 'relation_positive'}
+
+    for single_sample in samples:
+        try:
+            relation_query.append(torch.from_numpy(single_sample['relation_query']))
+        except:
+            pass
+        try:
+            relation_positive.append(torch.from_numpy(single_sample['relation_positive']))
+        except:
+            pass
+
+    relation = relation_query + relation_positive
+    if len(relation) > 0:
+        relation1 = pad_sequence(relation, batch_first=True, padding_value=-1)
+        relation1 = relation1.float()
+        samples2['relation'] = relation1
+
+    return samples2
+
+
+def KittiTotalLoader(cfg):
+    flag = cfg['flag']
+    bev = False
+    img = False
+    fuse = False
+    if flag == 'fusion':
+        bev = True
+        img = True
+        fuse = True
+    elif flag == 'img':
+        img = True
+    else:
+        bev = True
+    sequence_train = [int(x) for x in tools.read_cfg(cfg['train'])]
+    sequence_val = [int(x) for x in tools.read_cfg(cfg['validate'])]
+    sequence_test = [int(x) for x in tools.read_cfg(cfg['test'])]
+    dataset_list = []
+    for sequence in sequence_train:
+        single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=True, mode='train')
+        print('===Trainloader add: sequence %02d, %04d files, %04d frames with loop' % (sequence, len(single_dataset.poses), len(single_dataset.gt)))
+        dataset_list.append(single_dataset)
+    dataset_train = ConcatDataset(dataset_list)
+    dataset_list = []
+    for sequence in sequence_val:
+        single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=False, mode='train')
+        print('===Validationloader add: sequence %02d, %04d files, %04d frames with loop' % (sequence, len(single_dataset.poses), len(single_dataset.gt)))
+        dataset_list.append(single_dataset)
+    dataset_val = ConcatDataset(dataset_list)
+    dataset_list = []
+    for sequence in sequence_test:
+        single_dataset = KittiDataset(cfg, sequence, flag_bev=bev, flag_img=img, flag_fuse=fuse, argument=False, mode='test')
+        print('===Testloader add: sequence %02d, %04d files' % (sequence, len(single_dataset.poses)))
+        dataset_list.append(single_dataset)
+    dataset_test = ConcatDataset(dataset_list)
+    loader_train = DataLoader(dataset_train, batch_size=cfg['batchsize'], shuffle=True, num_workers=6, collate_fn=collate)
+    loader_val = DataLoader(dataset_val, batch_size=cfg['batchsize'], shuffle=True, num_workers=6, collate_fn=collate)
+    loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=6, collate_fn=collate)
+    return loader_train, loader_val, loader_test
+
+
+if __name__ == '__main__':
+    root_dir = '/media/ubuntu/Workshop/caodanyang/Project_CDY/results/mylcd'
+    # sequence = 0
+    # dataset = KittiDataset(root_dir, sequence)
+    # dataloader = DataLoader(dataset, batch_size=1, num_workers=8, shuffle=False)
+    # t = tools.Timer(name='Loading', number=len(dataloader))
+    # for data in dataloader:
+    #     t.update()
+    try:
+        with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
+            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+        print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
+    except:
+        with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile:
+            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+        print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
+    cfg = cfg['experiment']
+    path_dataset = cfg['path_dataset']
+    path_result = cfg['path_result']
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    train, val, test = KittiTotalLoader(cfg)
+    t = tools.Timer(name='Loading')
+    ds = []
+    for i, data in enumerate(train):
+        # ds.append(d)
+        t.update(i)
+
+    # t = tools.Timer(name='Loading', number=len(val))
+    # for i, data in enumerate(val):
+    #     t.update()
--- a/env.txt
+++ b/env.txt
@@ -0,0 +1,59 @@
+## 环境配置备忘
+conda create -n fusion_cyy python=3.8
+
+conda activate fusion_cyy
+
+conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=11.3 -c pytorch -c conda-forge
+
+pip install pykitti pytorch-metric-learning pyyaml scipy scikit-image scikit-learn tqdm open3d matplotlib numba opencv-python opencv-contrib-python pandas -i https://pypi.mirrors.ustc.edu.cn/simple/
+
+conda remove -n fusion_cyy --all
+
+在.git的config下
+
+[user]
+    name = MobKBK
+    email = 202311250413@csust.edu.cn    /home/adlab8/pub_data1/Kitti360/KITTI-360
+
+tmux使用备忘：
+列表
+tmux ls
+
+tmux new -s your-session-name   wget -c https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_color.zip
+转后台：
+ctrl+b d
+杀死
+ctrl+d
+分离：
+tmux detach
+连接：
+tmux attach -t cyy
+切换：
+tmux switch -t your-session-name
+重命名：
+tmux rename-session -t old-session new-session
+新建窗口：
+tmux new-window -n your-window-name
+选择窗口：
+ctrl+b c: 创建一个新窗口（状态栏会显示多个窗口的信息）
+ctrl+b p: 切换到上一个窗口（按照状态栏的顺序）
+ctrl+b n: 切换到下一个窗口
+ctrl+b w: 从列表中选择窗口
+重命名窗口：
+tmux rename-window -t old_name new_name
+# 划分为上下两个窗格
+tmux split-window
+
+# 划分左右两个窗格
+tmux split-window -h
+左右划分：ctrl+b %
+上下划分：ctrl+b "
+
+ctrl+b [ 使用PageUp和PageDown可以实现上下翻页
+
+kitti位置
+/home/adlab8/pub_data1/OpenDataLab___KITTI_Odometry_2012/raw
+kitt360
+
+/home/adlab8/pub_data1/KITTI-360/
+ln -s <源文件或目录的路径> <软连接的路径>
--- a/evaluate_fm.py
+++ b/evaluate_fm.py
@@ -0,0 +1,81 @@
+import os
+import time
+import torch
+import tools
+import numpy as np
+import yaml
+from dataset import KittiTotalLoader
+from tqdm import tqdm
+
+def match_err(fea1,fea2,pose1,pose2,kpt1,kpt2):
+    pose_to_frame=torch.matmul(pose2.inverse(),pose1)
+    kpt1 = (pose_to_frame @ kpt1.permute(1, 0)).permute(1, 0)
+    kpt1[:,2]=0
+    kpt2[:,2]=0
+    idxp1,idxp2,dis_kpt=tools.nn_match(kpt1,kpt2,'euclidean')
+    idxf1,idxf2,dis_fea=tools.nn_match(fea1,fea2,'cosine')
+    kpt11=kpt1[idxf1]
+    kpt21=kpt2[idxf2]
+    dis_kpt1=(kpt11-kpt21).norm(p=2,dim=1)
+    mas=[]
+    mss=[]
+    reps=[]
+    for thr in [0.3,0.5,1,2,3]:
+        mas.append((torch.sum(dis_kpt1<=thr)/(len(idxf1)+1e-8)).item())
+        mss.append((torch.sum(dis_kpt1<=thr)/(torch.sum(dis_kpt<=thr)+1e-8)).item())
+        reps.append((torch.sum(dis_kpt<=thr)).item()/(len(fea1)+len(fea2))*2)
+    return mas,mss,reps
+
+def feature_match(loader_val=None,data=None):
+    if loader_val is None:
+        try:
+            with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
+                cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+            print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
+        except:
+            with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile:
+                cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+            print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
+        cfg = cfg['experiment']
+        _, loader_val, _ = KittiTotalLoader(cfg)
+    sequences=data['sequences']
+    fea_kpt=data['fea_kpt'].cuda()
+    poses=data['pose_query'].cuda()
+    kpts=data['key_points'].cuda()
+    seq=torch.unique(sequences)
+    if __name__ == '__main__':
+        flag = False
+    else:
+        flag = True
+    for i in range(len(seq)):
+        idx=sequences==seq[i]
+        fea_kpt1=fea_kpt[idx]
+        poses1=poses[idx]
+        kpts1=kpts[idx]
+        gt=loader_val.dataset.datasets[i].gt
+        ms=[]
+        for j in tqdm(range(0, len(gt)), disable=flag, ncols=60, desc='feature match'):
+            query=gt[j]['idx']
+            p_idxs=gt[j]['positive_idxs']
+            for p in p_idxs:
+                a,s,reps=match_err(fea_kpt1[query],fea_kpt1[p],poses1[query],poses1[p],kpts1[query],kpts1[p])
+                ms.append(a+s+reps)
+        ms=np.asarray(ms)
+        ms1=np.mean(ms,axis=0)
+        print('Feature matching, sequence %02d'%(seq[i]))
+        print('MA@0.3:%.3f, MA@0.5:%.3f, MA@1:%.3f, MA@2:%.3f, MA@3:%.3f' %(ms1[0], ms1[1], ms1[2], ms1[3], ms1[4]))
+        print('MS@0.3:%.3f, MS@0.5:%.3f, MS@1:%.3f, MS@2:%.3f, MS@3:%.3f' %(ms1[5], ms1[6], ms1[7], ms1[8], ms1[9]))
+        print('RP@0.3:%.3f, RP@0.5:%.3f, RP@1:%.3f, RP@2:%.3f, RP@3:%.3f' %(ms1[10], ms1[11], ms1[12], ms1[13], ms1[14]))
+        time.sleep(1)
+
+        
+
+
+
+
+if __name__ == '__main__':
+    # data = torch.load("/mnt/data2/datasets/cdy/results/FUSIONLCD/05230/database/database_149_b0.pth.tar")
+    # data = torch.load("/mnt/data2/datasets/cdy/results/BEVLCD/ricnn03202/database/database_xyz_000.pth.tar")
+    data= torch.load("/data4/caodanyang/results/FUSIONLCD/bev_07190/database/database_all.pth.tar")
+    feature_match(data=data)
+   
--- a/evaluate_lcd.py
+++ b/evaluate_lcd.py
@@ -0,0 +1,265 @@
+import os
+import time
+import matplotlib
+# set non-interactive backend for server (must be set before pyplot import)
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from skimage.measure import ransac
+from skimage.transform import EuclideanTransform
+import tools
+from tqdm import tqdm
+from sklearn.metrics import auc
+from sklearn.neighbors import KDTree
+import warnings
+
+warnings.filterwarnings("ignore")
+
+
+def recall_with_candidates(vlads, poses, sequence, recall_num=25, positive_distance=4):
+    recall_at_k = [0] * recall_num
+    num_with_loop = 0
+    if __name__ == '__main__':
+        flag = False
+    else:
+        flag = True
+    for i in tqdm(range(0, len(vlads)), disable=flag, ncols=60, desc='Recall@k'):
+        valid_idx = list(set(range(0, len(vlads))) - set(range(max(0, i - 50), min(len(vlads), i + 50))))
+        valid_idx = torch.tensor(valid_idx).to(vlads.device)
+        vlad_query = vlads[i].view(1, -1)
+        vlad_valid = vlads[valid_idx]
+        dis_valid = torch.linalg.norm((poses[i:i + 1, 0:3, 3] - poses[valid_idx, 0:3, 3]), dim=1)
+        min_dis = torch.min(dis_valid)
+        if min_dis > positive_distance:
+            continue
+        num_with_loop = num_with_loop + 1
+        # global feature to query quickly
+        dis_vlad = torch.cdist(vlad_query, vlad_valid).view(-1, )
+        dis, idx_cand = torch.topk(dis_vlad, recall_num, largest=False)
+        idx_cand = valid_idx[idx_cand]
+        for j in range(recall_num):
+            idx_cand1 = idx_cand[j]
+            dis = torch.linalg.norm((poses[i:i + 1, 0:3, 3] - poses[idx_cand1, 0:3, 3]), dim=1)
+            if dis <= positive_distance:
+                recall_at_k[j] = recall_at_k[j] + 1
+                break
+    time.sleep(1)
+    recall_at_k = np.cumsum(recall_at_k) / float(num_with_loop)
+    print('Sequence %02d, Recall@' % sequence, end='')
+    for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 19, 24]:
+        if i == (len(recall_at_k) - 1):
+            print('%d[%.3f]' % (i + 1, recall_at_k[i]))
+        else:
+            print('%d[%.3f]' % (i + 1, recall_at_k[i]), end=', ')
+    return recall_at_k
+
+
+def retrieve(vlads, feas, kpts, poses, num_cand=1, verify='ransac'):
+    loops = []
+    if __name__ == '__main__':
+        flag = False
+    else:
+        flag = True
+    ts = []
+    for i in tqdm(range(0, len(feas)), disable=flag, ncols=60, desc='retrieve loop'):
+        t0 = time.time()
+        valid_idx = list(set(range(0, len(feas))) - set(range(max(0, i - 50), min(len(feas), i + 50))))
+        valid_idx = torch.tensor(valid_idx).to(vlads.device)
+        vlad_query = vlads[i].view(1, -1)
+        vlad_valid = vlads[valid_idx]
+        # global feature to query quickly
+        dis_vlad = torch.cdist(vlad_query, vlad_valid).view(-1, )
+        dis, idx_cand = torch.topk(dis_vlad, num_cand, largest=False)
+        t_retrieve = time.time() - t0
+        idx_cand = valid_idx[idx_cand]
+        # local feature to qverify
+        fea_query = feas[i]
+        if verify == 'ransac':
+            p1 = kpts[i]
+            p1 = p1.cpu().detach().numpy()
+            min_dis = torch.tensor([9999])
+            idx_detect = idx_cand[0]
+            dis_truth = torch.tensor([9999])
+            for idx_cand1 in idx_cand:
+                fea_cand1 = feas[idx_cand1]
+                p2 = kpts[idx_cand1].cpu().detach().numpy()
+                idx1, idx2, dis = tools.nn_match(fea_query, fea_cand1, 'cosine')
+                if len(idx1) < 31:
+                    continue
+                idx1 = idx1.cpu().detach().numpy()
+                idx2 = idx2.cpu().detach().numpy()
+                try:
+                    model, inliers = ransac((p1[idx1, 0:2], p2[idx2, 0:2]), model_class=EuclideanTransform, min_samples=15, max_trials=3, residual_threshold=1)
+                    num_inlier = np.sum(inliers)
+                    # r = model.params[0:2, 0:2]
+                    dis_estimate = np.linalg.norm(model.params[0:2, 2])
+                    # rot = model.rotation
+                    if num_inlier > 30:  # ransac存在足够内点
+                        if min_dis > dis_estimate:
+                            min_dis = dis_estimate
+                            idx_detect = idx_cand1
+                            dis_truth = torch.linalg.norm((poses[i, 0:3, 3] - poses[idx_detect, 0:3, 3]))
+                except:
+                    pass
+            loops.append([i, idx_detect.item(), min_dis.item(), dis_truth.item()])
+
+        else:
+            idx_detect = idx_cand[0]
+            dis_truth = torch.linalg.norm(poses[i, 0:3, 3] - poses[idx_detect, 0:3, 3])
+            loops.append([i, idx_detect.item(), dis[0].item(), dis_truth.item()])
+        t_verify = time.time() - t0 - t_retrieve
+
+        ts.append([t_retrieve, t_verify])
+    #     if loops[-1][2] < 4 and loops[-1][1] < i:
+    #         loop1.append(loops[-1][1])
+    # x = poses[:, 0, 3]
+    # y = poses[:, 1, 3]
+    # x1 = x[loop1]
+    # y1 = y[loop1]
+    # plt.plot(x, y, 'b.', markersize=1)
+    # plt.plot(x1, y1, 'ro', markersize=2, markerfacecolor='none')
+    # plt.axis('equal')
+    # plt.show()
+    ts = np.array(ts) * 1000
+    # np.savetxt('times.txt', ts)
+    # x=np.arange(len(ts))
+    # plt.plot(x,ts[:,0],'b.')
+    # plt.plot(x,ts[:,1],'r.')
+    # plt.show()
+    loops = np.array(loops)
+    return loops
+
+
+def pr_curve(poses, loops, sequence, positive_distance=4):
+    
+    map_tree_poses = KDTree(poses[:, 0:3, 3])
+    reverse_loops = []
+    real_loop = []
+    for i in range(0,len(poses)):
+        min_range = max(0, i - 50)
+        max_range = min(i + 50, poses.shape[0])
+        current_pose = poses[i]
+        indices = map_tree_poses.query_radius(np.expand_dims(current_pose[0:3, 3], 0), positive_distance)
+        valid_idxs = list(set(indices[0]) - set(range(min_range, max_range)))
+        valid_idxs = np.array(valid_idxs)
+        if len(valid_idxs) > 0:
+            # dis = np.linalg.norm(current_pose[0:3, 3]-poses[valid_idxs,0:3,3],axis=1)
+            real_loop.append(1)
+            r0 = poses[i, :3, :3]
+            rs = poses[valid_idxs, :3, :3]
+            dr = np.linalg.inv(r0) @ rs.swapaxes(0, 2)
+            angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1))
+            angle = angle * 180 / np.pi
+            if np.min(angle) > 90:
+                reverse_loops.append(1)
+            else:
+                reverse_loops.append(0)
+        else:
+            real_loop.append(0)
+            reverse_loops.append(0)
+    reverse_loops = np.array(reverse_loops)
+    real_loop = np.array(real_loop)
+    # loops=np.hstack((loops,real_loop.reshape(-1,1)))
+    # np.savetxt('loops_bev%02d.txt'%sequence,loops,fmt='%.6f')
+    # print('sequence %d, %d frames, %d loops, %d reverse loops' % (sequence,len(real_loop), np.sum(real_loop), np.sum(reverse_loops)))
+    # # return 0
+    distances = loops[:, 3]
+    detected_loop = loops[:, 2]
+    precision2 = [1]
+    recall2 = [0]
+    for thr in np.unique(detected_loop):
+        tp = detected_loop <= thr
+        tp = tp & real_loop
+        tp = tp & (distances <= positive_distance)
+        tp = tp.sum()
+        fp = (detected_loop <= thr).sum() - tp
+        fn = (real_loop.sum()) - tp
+        if (tp + fp) > 0.:
+            precision2.append(tp / (tp + fp))
+        else:
+            precision2.append(1.)
+
+        recall2.append(tp / (tp + fn))
+    f1s = []
+    for i in range(len(recall2)):
+        f1s.append((2 * precision2[i] * recall2[i]) / (precision2[i] + recall2[i]))
+    f1 = max(f1s)
+    recall_p1 = np.max(np.array(recall2)[np.array(precision2) == 1])
+    # plt.plot(recall2, precision2, 'b-')
+    # plt.show()
+    pr = np.array(precision2 + recall2).reshape(2, -1).T
+    # np.save('fusion_pr_%02d.npy' % sequence, pr)
+    ap = auc(recall2, precision2)
+    idx=loops[:,2]<9999
+    loops1=loops[idx]
+    rp=np.sum(np.abs(loops1[:,2]-loops1[:,3])<2)/len(loops1)
+
+    print('Sequence %02d, AP %.3f, Recall@100 %.3f, F1 %.3f, RP %.3f/%d' % (sequence, ap, recall_p1, f1, rp, len(loops1)))
+    # if ap<0.1:
+    #     exit()
+
+    # --- save PR curve to file (for server usage) ---
+    try:
+        out_dir = os.path.join('/home/adlab36/chenyouyuan/FUSIONLCD', 'result', 'plots')
+        os.makedirs(out_dir, exist_ok=True)
+        plt.figure(figsize=(6, 5))
+        plt.plot(recall2, precision2, 'b-', marker='o', linewidth=2)
+        plt.xlabel('Recall')
+        plt.ylabel('Precision')
+        plt.title(f'Sequence {int(sequence):02d} PR (AP={ap:.3f})')
+        plt.grid(True, linestyle='--', alpha=0.4)
+        fname = os.path.join(out_dir, f'pr_sequence_{int(sequence):02d}.png')
+        plt.tight_layout()
+        plt.savefig(fname, dpi=150)
+        plt.close()
+    except Exception as e:
+        # 如果保存失败也不要阻塞主流程
+        print(f'Warning: failed to save PR plot for sequence {sequence}: {e}')
+
+    return ap, recall_p1, f1
+
+
+def lcd(data):
+    vlads = data['vlads'].cuda()
+    kpts = data['key_points']
+    sequences = data['sequences']
+    poses = data['pose_query'].cuda()
+    feas = data['fea_kpt'].cuda()
+    # feas = feas / torch.sqrt(torch.sum(feas ** 2, -1, keepdim=True) + 1e-8)
+    result = []
+    recall_at_ks = []
+    recall_at_k=[]
+    for s in torch.unique(sequences):
+        # if s==54:
+        #     continue
+        mask = sequences == s
+
+        vlads1 = vlads[mask]
+        feas1 = feas[mask]
+        kpts1 = kpts[mask]
+        poses1 = poses[mask]
+        poses2 = poses1.cpu().detach().numpy()
+        # recall_at_k = recall_with_candidates(vlads1, poses1, s)
+        # idx=np.arange(len(vlads1)//2)
+        # idx=np.tile(idx, 2)
+        # vlads1, feas1, kpts1, poses1 =vlads1[idx], feas1[idx], kpts1[idx], poses1[idx]
+        loops = retrieve(vlads1, feas1, kpts1, poses1, 1, 'ransac')
+        ap, recall_p1, f1 = pr_curve(poses2, loops, s, 4)
+        recall_at_ks.append(recall_at_k)
+        result.append([ap, recall_p1, f1])
+        
+    return result, recall_at_ks
+
+
+if __name__ == '__main__':
+    np.random.seed(123)
+    # data = torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_bev.pth.tar')
+    # lcd(data)
+    print('----------------------------------------------------------------------')
+    data= torch.load('/home/adlab36/chenyouyuan/FUSIONLCD/result/log/database/database_bevp.pth.tar')
+    lcd(data)
+    print('----------------------------------------------------------------------')
+    # data=torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_fusion.pth.tar')
+    # lcd(data)
+
--- a/evaluate_pose.py
+++ b/evaluate_pose.py
@@ -0,0 +1,191 @@
+import math
+import os
+import time
+import warnings
+
+import numpy as np
+import torch
+import yaml
+from skimage.measure import ransac
+from skimage.transform import EuclideanTransform
+from tqdm import tqdm
+
+import net
+import tools
+from dataset import KittiTotalLoader
+
+warnings.filterwarnings("ignore")
+
+
+def npto_XYZRPY(rotmatrix):
+    '''
+    Usa mathutils per trasformare una matrice di trasformazione omogenea in xyzrpy
+    https://docs.blender.org/api/master/mathutils.html#
+    WARNING: funziona in 32bits quando le variabili numpy sono a 64 bit
+
+    :param rotmatrix: np array
+    :return: np array with the xyzrpy
+    '''
+
+    # qui sotto corrisponde a
+    # quat2eul([ 0.997785  -0.0381564  0.0358964  0.041007 ],'XYZ')
+    roll = math.atan2(-rotmatrix[1, 2], rotmatrix[2, 2])
+    pitch = math.asin(rotmatrix[0, 2])
+    yaw = math.atan2(-rotmatrix[0, 1], rotmatrix[0, 0])
+    x = rotmatrix[:3, 3][0]
+    y = rotmatrix[:3, 3][1]
+    z = rotmatrix[:3, 3][2]
+
+    return np.array([x, y, z, roll, pitch, yaw])
+
+
+def yt_error(pose1, pose2):
+    distance = np.linalg.norm((pose1 - pose2)[0:3, 3])
+    yaw1 = npto_XYZRPY(pose1)[-1]
+    yaw2 = npto_XYZRPY(pose2)[-1]
+    yaw1 = yaw1 % (2 * np.pi)
+    yaw2 = yaw2 % (2 * np.pi)
+    dyaw = abs(yaw1 - yaw2) % (2 * np.pi)
+    angle = dyaw * 180 / np.pi
+    if angle > 180:
+        angle = 360 - angle
+    return distance, angle
+
+
+def rt_error(pose1, pose2):
+    r0 = pose1[:3, :3]
+    r1 = pose2[:3, :3]
+    dr = np.linalg.inv(r0) @ r1
+    angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1)) * 180 / np.pi
+    distance = np.sqrt(np.sum(np.square(pose1[:3, -1] - pose2[:3, -1])))
+    return distance, angle
+
+
+def pose_err(vlads=None, dataset=None, positive_distance=4., kpts=None, feas=None, model=None, num_cand=10):
+    poses = dataset.poses
+    error_ransac = []
+    error_uot = []
+    gt = dataset.gt
+    pairs = []
+    num_reverse = 0
+    for i in range(len(gt)):
+        sample = gt[i]
+        idx_query = sample['idx']
+        positive_idxs = sample['positive_idxs']
+        for j in range(len(positive_idxs)):
+            idx_positive = positive_idxs[j]
+            if idx_query < idx_positive:
+                pairs.append([idx_query, idx_positive])
+                _, r = yt_error(poses[idx_query], poses[idx_positive])
+                if r > 90:
+                    num_reverse = num_reverse + 1
+    # print('Total %d frames, %d pair of loops, %d[%.3f] reverse'%(len(poses),len(pairs),num_reverse,num_reverse/len(pairs)))
+    cnt_ransca = 0
+    pairs = np.asarray(pairs)
+    idx = np.argsort(pairs[:, 1])
+    pairs = pairs[idx]
+    times_uot=[]
+    times_ransac=[]
+    for i in tqdm(range(len(pairs)),ncols=60):
+        fea_query = feas[pairs[i][0]]
+        p1 = kpts[pairs[i][0]].cpu().detach().numpy()
+        fea_cand = feas[pairs[i][1]]
+        p2 = kpts[pairs[i][1]].cpu().detach().numpy()
+        pose_to_frame = np.matmul(np.linalg.inv(poses[pairs[i][1]]), poses[pairs[i][0]])
+
+        st_ransac=time.time()
+        idx1, idx2, dis = tools.nn_match(fea_query, fea_cand, 'cosine')
+        ransac_flag=False
+        if len(idx1) >= 20:
+            idx1 = idx1.cpu().detach().numpy()
+            idx2 = idx2.cpu().detach().numpy()
+            try:
+                result_ransac, inliers = ransac((p1[idx1, 0:3], p2[idx2, 0:3]), model_class=EuclideanTransform, min_samples=15, max_trials=3, residual_threshold=1.7)
+                num_inlier = np.sum(inliers)
+                if num_inlier > 30:  # ransac存在一致的点且无缩
+                    cnt_ransca = cnt_ransca + 1
+                    error_ransac.append(yt_error(pose_to_frame, result_ransac.params))
+                    ransac_flag=True
+            except:
+                pass
+        # if not ransac_flag:
+        #     try:
+        #         result_ransac, inliers = ransac((p1[idx1, 0:3], p2[idx2, 0:3]), model_class=EuclideanTransform,min_samples=5,max_trials=10,residual_threshold=5)
+        #         num_inlier = np.sum(inliers)
+        #         cnt_ransca = cnt_ransca + 1
+        #         error_ransac.append(yt_error(pose_to_frame, result_ransac.params))
+        #     except:
+        #         pass
+        times_ransac.append(time.time()-st_ransac)
+
+        fea1 = feas[pairs[i], :, :].to(model.epsilon.device).permute(0, 2, 1)
+        kpts1 = kpts[pairs[i], :, :].to(model.epsilon.device)
+        bd = {'fea_kpt': fea1, 'key_points': kpts1}
+        st_uot=time.time()
+        bd = model(bd)
+        times_uot.append(time.time()-st_uot)
+        pose_estimate1 = bd['transformation'].squeeze(0).cpu().detach().numpy()
+        pose_estimate1 = np.vstack((pose_estimate1, [0, 0, 0, 1]))
+        error_uot.append(yt_error(pose_to_frame, pose_estimate1))
+
+    ransac_rate = cnt_ransca / len(pairs)
+    error_ransac = np.asarray(error_ransac)
+    error_uot = np.asarray(error_uot)
+    # np.save('error_ransac_%02d.npy' % dataset.sequence, error_ransac)
+    # np.save('error_uot_%02d.npy' % dataset.sequence, error_uot)
+    et_ransac = np.mean(error_ransac[:, 0])
+    er_ransac = np.mean(error_ransac[:, 1])
+    et_uot = np.mean(error_uot[:, 0])
+    er_uot = np.mean(error_uot[:, 1])
+    print('uot time: ',np.array(times_uot).mean())
+    print('ransac time: ',np.array(times_ransac).mean())
+    return et_ransac, er_ransac, et_uot, er_uot, ransac_rate
+
+
+def estimate_pose(database):
+    try:
+        with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
+            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+        print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
+    except:
+        with open(os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"), "r") as ymlfile:
+            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+        print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
+    cfg = cfg['experiment']
+    path_result = cfg['path_result']
+    _, _, loader_test = KittiTotalLoader(cfg)
+    model = net.Fusion(cfg)
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    checkpoint = torch.load(tools.path_join(path_result, 'models', cfg['last_model']))
+    model.load_state_dict(checkpoint['model'])
+    uot = model.uot
+    # uot = model.bev.uot
+
+    vlads = database['vlads']
+    key_points = database['key_points']
+    fea_kpt = database['fea_kpt']
+    sequences = database['sequences']
+    print()
+    print('****************************************************************************')
+    end = 0
+    for i in range(len(loader_test.dataset.datasets)):
+        start = end
+        end = start + len(loader_test.dataset.datasets[i])
+        # end=0
+        # start=0
+        et_ransac, er_ransac, et_uot, er_uot, rate = pose_err(vlads=vlads[start:end], dataset=loader_test.dataset.datasets[i],
+                                                              kpts=key_points[start:end], feas=fea_kpt[start:end], model=uot)
+        print('Sequence %02d' % (torch.unique(sequences)[i]))
+        print('ransac rate:%.4f, translation error:%.4f[m], rototion error:%.4f[deg]' % (rate, float(et_ransac), float(er_ransac)))
+        print('uot translation error:%.4f[m], uot rototion error:%.4f[deg],' % (float(et_uot), float(er_uot)))
+    print('****************************************************************************')
+
+
+if __name__ == '__main__':
+    # CUDA_VISIBLE_DEVICES=5 python evaluate_pose.py
+    # CUDA_VISIBLE_DEVICES=2 nohup python -u evaluate_pose.py >03090.log 2>&1 &
+    # fuser /dev/nvidia*
+    database = torch.load('/data4/caodanyang/results/FUSIONLCD/07030/database/database_fusion.pth.tar')
+    estimate_pose(database)
--- a/loop_gt.py
+++ b/loop_gt.py
@@ -0,0 +1,130 @@
+import argparse
+import torch
+from torch.utils.data import Dataset
+import pykitti
+import os
+from sklearn.neighbors import KDTree
+import pickle
+import numpy as np
+skip_frame=50
+
+class KITTILoader3DPosesOnlyLoopPositives(Dataset):
+
+    def __init__(self, dir, sequence, poses, positive_range=5., negative_range=25., hard_range=None):
+        super(KITTILoader3DPosesOnlyLoopPositives, self).__init__()
+
+        self.positive_range = positive_range
+        self.negative_range = negative_range
+        self.hard_range = hard_range
+        self.dir = dir
+        self.sequence = sequence
+
+        if int(sequence) > 21:
+            self.poses = np.load(poses)
+        else:
+            calib = np.genfromtxt(os.path.join(dir, 'sequences', sequence, 'calib.txt'))[:, 1:]
+            T_cam_velo = np.reshape(calib[4], (3, 4))
+            T_cam_velo = np.vstack([T_cam_velo, [0, 0, 0, 1]])
+            poses2 = []
+            with open(poses, 'r') as f:
+                for x in f:
+                    x = x.strip().split()
+                    x = [float(v) for v in x]
+                    pose = np.zeros((4, 4))
+                    pose[0, 0:4] = np.array(x[0:4])
+                    pose[1, 0:4] = np.array(x[4:8])
+                    pose[2, 0:4] = np.array(x[8:12])
+                    pose[3, 3] = 1.0
+                    pose = np.linalg.inv(T_cam_velo) @ (pose @ T_cam_velo)
+                    poses2.append(pose)
+            self.poses = np.stack(poses2)
+        self.kdtree = KDTree(self.poses[:, :3, 3])
+
+    def __len__(self):
+        return len(self.poses)
+
+    def __getitem__(self, idx):
+
+        x = self.poses[idx, 0, 3]
+        y = self.poses[idx, 1, 3]
+        z = self.poses[idx, 2, 3]
+        r0 = self.poses[idx, :3, :3]
+        rs = self.poses[:, :3, :3]
+        dr = np.linalg.inv(r0) @ rs.swapaxes(0, 2)
+        angle = np.arccos(np.clip((np.trace(dr) - 1) / 2, -1, 1))
+        angle = angle * 180 / np.pi
+
+        idx_angle = np.where(angle < 99999)[0]
+        anchor_pose = torch.tensor([x, y, z])
+
+        indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.positive_range, sort_results=True, return_distance=True)
+        indices = [indices[0][0], indices[1][0]]
+        min_range = max(0, idx - skip_frame)
+        max_range = min(idx + skip_frame, len(self.poses))
+        positive_idxs = list(set(indices[0]) & set(idx_angle) - set(range(min_range, max_range)))
+        loop_angle = angle[positive_idxs]
+        reverse = 0
+        if len(loop_angle) > 0:
+            reverse=np.sum(loop_angle>90)
+            if min(loop_angle) > 90:
+                reverse = -1*reverse
+        positive_idxs.sort()
+        num_loop = len(positive_idxs)
+
+        indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.negative_range)
+        indices = set(indices[0])
+        negative_idxs = set(range(len(self.poses))) - indices
+        negative_idxs = list(negative_idxs)
+        negative_idxs.sort()
+
+        hard_idxs = None
+        if self.hard_range is not None:
+            inner_indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.hard_range[0])
+            outer_indices = self.kdtree.query_radius(anchor_pose.unsqueeze(0).numpy(), self.hard_range[1])
+            hard_idxs = set(outer_indices[0]) - set(inner_indices[0])
+            pass
+
+        return num_loop, positive_idxs, negative_idxs, hard_idxs, reverse
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--root_folder', default='/home/adlab36/chenyouyuan/FUSIONLCD',
+                        help='dataset directory')
+    args = parser.parse_args()
+    base_dir = args.root_folder
+    for sequence in ['00', '05', '06', '07', '08', '09', '50', '54', '55', '56', '59']:#'00', '05', '06', '07', '08', '09', '50', '54', '55', '56', '59', '120205','130405'
+        if int(sequence) < 50:
+            poses_file = base_dir + "/sequences/" + sequence + "/poses.txt"
+        elif int(sequence)<100:
+            poses_file = base_dir + "/sequences/" + sequence + "/poses.npy"
+        else:
+            pass
+            
+        
+        dataset = KITTILoader3DPosesOnlyLoopPositives(base_dir, sequence, poses_file, 4, 15, [8, 15])
+        lc_gt = []
+        lc_gt_file = os.path.join(base_dir, 'sequences', sequence, 'loop_GT_4m.pickle')
+        loop_pairs = []
+        loop_files = []
+        for i in range(len(dataset)):
+            sample, pos, neg, hard, reverse = dataset[i]
+            if sample > 0.:
+                loop_files.append([i, reverse])
+                sample_dict = {}
+                sample_dict['idx'] = i
+                sample_dict['positive_idxs'] = pos
+                for p in pos:
+                    if i < p:
+                        loop_pairs.append([i, p])
+                # sample_dict['negative_idxs'] = neg
+                # sample_dict['hard_idxs'] = hard
+                lc_gt.append(sample_dict)
+        loop_files = np.array(loop_files)
+        num_reverse_file = int(np.sum(loop_files[:, 1]<0))
+        num_reverse_pairs = int(np.sum(np.abs(loop_files[:, 1])))/2
+        with open(lc_gt_file, 'wb') as f:
+            pickle.dump(lc_gt, f)
+        print('Sequence %02d done,%05d files, %05d files with loop, %05d[%.4f] files only has reverse loop, %05d loop pairs, %05d[%.4f] reverse loop' %
+              (int(sequence), len(dataset), len(loop_files), num_reverse_file, num_reverse_file / len(loop_files), len(loop_pairs),num_reverse_pairs,num_reverse_pairs/len(loop_pairs)))
--- a/loss.py
+++ b/loss.py
@@ -0,0 +1,340 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from pytorch_metric_learning import distances
+
+
+def tr_loss(batch_dict,key):
+    loss1 = (batch_dict[key][:, 0:3, 3] -
+             batch_dict['pose_to_frame'][:, 0:3, 3]).norm(dim=1).mean()
+    loss2 = (torch.acos(torch.clip(batch_dict[key][:, 0, 0].view(-1, 1), -1, 1)) -
+             torch.acos(torch.clip(batch_dict['pose_to_frame'][:, 0, 0].view(-1, 1), -1, 1))).norm(dim=1).mean() / 3.1415 * 180
+    return loss1, loss2
+
+
+def gen_points_loss(batch_dict):
+    key_points_gen = batch_dict['key_points_gen']
+    key_points = batch_dict['key_points']
+    key_points_gen1 = torch.cat((key_points_gen, key_points_gen * 0), dim=2)
+    key_points_gen1[:, :, 3] = 1
+    # pose_query=batch_dict['pose_query']
+    # pose_positive=batch_dict['pose_positive']
+    # poses=torch.cat((pose_query,pose_positive),dim=0)
+    # key_points_gen2=torch.bmm(poses,key_points_gen1.permute(0,2,1)).permute(0,2,1)
+    # key_points2=torch.bmm(poses,key_points.permute(0,2,1)).permute(0,2,1)
+    # loss_gpo=(key_points_gen2[:,:,:2]-key_points2[:,:,:2]).norm(p=1,dim=2).mean()
+    pose_to_frame = batch_dict['pose_to_frame']
+    B = pose_to_frame.shape[0]
+    src_pts = key_points[:B]
+    tgt_pts = key_points[B:]
+    src_pts_gen = key_points_gen1[:B]
+    tgt_pts_gen = key_points_gen1[B:]
+    srcs = torch.cat((src_pts, src_pts_gen), dim=0)
+    tgts = torch.cat((tgt_pts_gen, tgt_pts), dim=0)
+    pose_to_frame1 = torch.cat((pose_to_frame, pose_to_frame), dim=0)
+    srcs1 = torch.bmm(pose_to_frame1, srcs.permute(0, 2, 1)).permute(0, 2, 1)
+    loss = torch.mean(torch.abs(srcs1[:, :, :2] - tgts[:, :, :2]))
+    return loss
+
+
+def rand_dis(x, y):
+    assert len(x.shape)==2 and len(y.shape)==2,'x and y must be 2 dim'
+    N, N = x.size()
+    ids=torch.arange(N).to(x.device)
+    idx = ids.view(1, N).repeat(N, 1)
+    mask = ~(idx == idx.transpose(0, 1))
+    idx1 = idx[mask].view(N, N - 1)
+    random_indices = torch.randint(N - 1, size=(N,)).to(x.device)
+    rand_idx = torch.gather(idx1, 1, random_indices.view(-1, 1))
+    rand_idx1 = torch.cat([ids.view(N, 1), rand_idx], dim=1)
+    diag = ids.view(N, 1).repeat(1, 2)
+    x1 = x[rand_idx1[:, 0], rand_idx1[:, 1]]
+    x2 = x1*0
+    x3 = torch.cat([x1.view(N, 1), x2.view(N, 1)], dim=1)
+    y1 = y[rand_idx1[:, 0], rand_idx1[:, 1]]
+    y2 = y[diag[:, 0], diag[:, 1]]
+    y3 = torch.cat([y1.view(N, 1), y2.view(N, 1)], dim=1)
+    dis=torch.abs(x3-y3).mean()+F.relu(0.2-torch.abs(y1)).mean()
+    return dis
+
+
+def gen_feature_loss(batch_dict):
+    #BCN
+    fea_pt_dual_gen = batch_dict['fea_pt_dual_gen']
+    fea_pl_dual_gen = batch_dict['fea_pl_dual_gen']
+    fea_kpt_original_gen = batch_dict['fea_kpt_original_gen']
+    # fea_kpt_gen_gen=batch_dict['fea_kpt_gen_gen']
+    fea_pt_dual = batch_dict['fea_pt_dual']
+    fea_pl_dual = batch_dict['fea_pl_dual']
+    fea_kpt_original = batch_dict['fea_kpt_original']
+    # fea_pt_dual = batch_dict['fea_pt_dual'].detach()
+    # fea_pl_dual = batch_dict['fea_pl_dual'].detach()
+    # fea_kpt_original = batch_dict['fea_kpt_original'].detach()
+
+    b = fea_pl_dual.shape[0]
+    loss0 = 0
+    loss1 = 0
+    loss2 = 0
+    loss3 = 0
+    relation = batch_dict['relation']
+    nums=0
+    for i in range(b):
+        cnt = torch.sum((relation[i, :, -1, 0] > 0) & (relation[i, :, -1, 1] > 0))
+        nums+=cnt
+        fea_pt_dual1 = fea_pt_dual[i, :, :cnt]  # 匹配点云特征，CN
+        fea_pt_dual_gen1 = fea_pt_dual_gen[i, :, :cnt]  # 匹配点云特征，生成于图像
+        fea_pl_dual1 = fea_pl_dual[i, :, :cnt]  # 匹配图像特征
+        fea_pl_dual_gen1 = fea_pl_dual_gen[i, :, :cnt]  # 匹配图像特征，生成于点云
+
+        # loss0 = loss0 + torch.abs(fea_pt_dual1 - fea_pt_dual_gen1).mean()
+        loss0 = loss0 + (1 - F.cosine_similarity(fea_pt_dual1,fea_pt_dual_gen1,dim=0)).mean()
+        # loss0 = loss0 + F.mse_loss(fea_pt_dual1, fea_pt_dual_gen1)
+        # loss0 = loss0 + ((fea_pt_dual1 - fea_pt_dual_gen1).norm(p=2, dim=0)).mean()
+        # sims00=tools.batch_distance(fea_pt_dual1.unsqueeze(0).permute(0,2,1),fea_pt_dual1.unsqueeze(0).permute(0,2,1),'cosine')
+        # sims01=tools.batch_distance(fea_pt_dual_gen1.unsqueeze(0).permute(0,2,1),fea_pt_dual_gen1.unsqueeze(0).permute(0,2,1),'cosine')
+        # loss0 = loss0 + torch.abs(sims00-sims01).mean()
+        
+        # loss1 = loss1 + torch.abs(fea_pl_dual1 - fea_pl_dual_gen1).mean()
+        loss1 = loss1 + (1 - F.cosine_similarity(fea_pl_dual1,fea_pl_dual_gen1,dim=0)).mean()
+        # loss1 = loss1 + F.mse_loss(fea_pl_dual1, fea_pl_dual_gen1)
+        # loss1 = loss1 + ((fea_pl_dual1 - fea_pl_dual_gen1).norm(p=2, dim=0)).mean()
+        # sims10=tools.batch_distance(fea_pl_dual1.unsqueeze(0).permute(0,2,1),fea_pl_dual1.unsqueeze(0).permute(0,2,1),'cosine')
+        # sims11=tools.batch_distance(fea_pl_dual_gen1.unsqueeze(0).permute(0,2,1),fea_pl_dual_gen1.unsqueeze(0).permute(0,2,1),'cosine')
+        # loss1 = loss1 + torch.abs(sims10-sims11).mean()
+
+        #全景特征生成模块损失计算
+        # loss2 = loss2 + torch.abs(fea_kpt_original[i] - fea_kpt_original_gen[i]).mean()
+        loss2= loss2 + (1-F.cosine_similarity(fea_kpt_original[i], fea_kpt_original_gen[i],dim=0)).mean()
+        # loss2 = loss2 +  F.mse_loss(fea_kpt_original[i], fea_kpt_original_gen[i])
+        # loss2 = loss2 + ((fea_kpt_original[i] - fea_kpt_original_gen[i]).norm(p=2, dim=0)).mean()
+        # sims20=tools.batch_distance(fea_kpt_original[i:i+1].permute(0,2,1),fea_kpt_original[i:i+1].permute(0,2,1),'cosine')
+        # sims21=tools.batch_distance(fea_kpt_original_gen[i:i+1].permute(0,2,1),fea_kpt_original_gen[i:i+1].permute(0,2,1),'cosine')
+        # loss2 = loss2 + torch.abs(sims20-sims21).mean()
+    loss0 = loss0 / b
+    loss1 = loss1 / b
+    loss2 = loss2 / b
+    return loss0, loss1, loss2, loss3
+
+
+def sinkhorn_matches_loss(batch_dict,key):
+    project_kpts = batch_dict[key]  # calculated from corrspondence of kpts
+    src_coords = batch_dict['key_points']
+    pose_to_frame = batch_dict['pose_to_frame']
+    src_coords = src_coords.clone().view(batch_dict['batch_size'], -1, 4)
+    B, N_POINT, _ = src_coords.shape
+    B = B // 2
+    src_coords = src_coords[:B, :, [0, 1, 2, 3]]
+    src_coords[:, :, -1] = 1.
+    gt_dst_coords = torch.bmm(pose_to_frame, src_coords.permute(0, 2, 1))  # True project kpts
+    gt_dst_coords = gt_dst_coords.permute(0, 2, 1)[:, :, :3]
+    loss = (gt_dst_coords - project_kpts).norm(dim=2).mean()
+    return loss
+
+
+
+def score_loss(batch_dict):
+    score = batch_dict['score_bev']
+    label_score = batch_dict['label_score']
+    label_score = torch.cat([label_score[:, :, :, 0], label_score[:, :, :, 1]], dim=0)
+    mask1 = score > 1e-8
+    # mask2 = label_score > 1e-8
+    # mask = mask1 | mask2
+    score = score[mask1]
+    label_score = label_score[mask1]
+    loss = nn.functional.mse_loss(score, label_score)
+
+    return loss
+
+
+def pose_loss(batch_dict,key):
+    src_coords = batch_dict['key_points']
+    src_coords = src_coords.clone().view(batch_dict['batch_size'], -1, 4)
+    delta_pose = batch_dict['pose_to_frame']
+    B, N_POINT, _ = src_coords.shape
+    B = B // 2
+    src_coords = src_coords[:B]
+    gt_dst_coords = torch.bmm(delta_pose, src_coords.permute(0, 2, 1)).float()
+    gt_dst_coords = gt_dst_coords.permute(0, 2, 1)[:, :, :3]
+    
+    transformation = batch_dict[key]
+    pred_dst_coords = torch.bmm(transformation, src_coords.permute(0, 2, 1))
+    pred_dst_coords = pred_dst_coords.permute(0, 2, 1)[:, :, :3]
+    loss = torch.mean(torch.abs(pred_dst_coords - gt_dst_coords))
+    return loss
+
+
+def get_all_triplets(dist_mat, pos_mask, neg_mask, is_inverted=False, margin=0.5, different_embedding=False):
+    if not different_embedding:
+        pos_mask = torch.triu(pos_mask, 1)
+    triplets = pos_mask.unsqueeze(2) * neg_mask.unsqueeze(1)
+    return torch.where(triplets)
+
+
+def hardest_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False):
+    if not different_embedding:
+        pos_mask = torch.triu(pos_mask, 1)
+    a, p = torch.where(pos_mask)
+    if neg_mask.sum() == 0:
+        return a, p, None
+    if is_inverted:
+        dist_neg = dist_mat * neg_mask
+        n = torch.max(dist_neg, dim=1)
+    else:
+        dist_neg = dist_mat.clone()
+        dist_neg[~neg_mask] = dist_neg.max() + 1.
+        _, n = torch.min(dist_neg, dim=1)
+    n = n[a]
+    return a, p, n
+
+
+def random_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False):
+    if not different_embedding:
+        pos_mask = torch.triu(pos_mask, 1)
+    a, p = torch.where(pos_mask)
+    selected_negs = []
+    for i in range(a.shape[0]):
+        possible_negs = torch.where(neg_mask[a[i]])[0]
+        if len(possible_negs) == 0:
+            return a, p, None
+
+        dist_neg = dist_mat[a[i], possible_negs]
+        if is_inverted:
+            curr_loss = -dist_mat[a[i], p[i]] + dist_neg + margin
+        else:
+            curr_loss = dist_mat[a[i], p[i]] - dist_neg + margin
+
+        if len(possible_negs[curr_loss > 0]) > 0:
+            possible_negs = possible_negs[curr_loss > 0]
+        random_neg = np.random.choice(possible_negs.cpu().numpy())
+        selected_negs.append(random_neg)
+    n = torch.tensor(selected_negs, dtype=a.dtype, device=a.device)
+    return a, p, n
+
+
+def semihard_negative_selector(dist_mat, pos_mask, neg_mask, is_inverted, margin=0.5, different_embedding=False):
+    if not different_embedding:
+        pos_mask = torch.triu(pos_mask, 1)
+    a, p = torch.where(pos_mask)
+    selected_negs = []
+    for i in range(a.shape[0]):
+        possible_negs = torch.where(neg_mask[a[i]])[0]
+        if len(possible_negs) == 0:
+            return a, p, None
+
+        dist_neg = dist_mat[a[i], possible_negs]
+        if is_inverted:
+            curr_loss = -dist_mat[a[i], p[i]] + dist_neg + margin
+        else:
+            curr_loss = dist_mat[a[i], p[i]] - dist_neg + margin
+
+        semihard_idxs = (curr_loss > 0) & (curr_loss < margin)
+        if len(possible_negs[semihard_idxs]) > 0:
+            possible_negs = possible_negs[semihard_idxs]
+        random_neg = np.random.choice(possible_negs.cpu().numpy())
+        selected_negs.append(random_neg)
+    n = torch.tensor(selected_negs, dtype=a.dtype, device=a.device)
+    return a, p, n
+
+
+class TripletLoss(nn.Module):
+    def __init__(self, margin: float, triplet_selector, distance: distances.BaseDistance):
+        super(TripletLoss, self).__init__()
+        self.margin = margin
+        self.triplet_selector = triplet_selector
+        self.distance = distance
+
+    def forward(self, embeddings, pos_mask, neg_mask, other_embeddings=None):
+        if other_embeddings is None:
+            other_embeddings = embeddings
+        dist_mat = self.distance(embeddings, other_embeddings)
+        triplets = self.triplet_selector(
+            dist_mat, pos_mask, neg_mask, self.distance.is_inverted)
+        distance_positive = dist_mat[triplets[0], triplets[1]]
+        if triplets[-1] is None:
+            if self.distance.is_inverted:
+                return F.relu(1 - distance_positive).mean()
+            else:
+                return F.relu(distance_positive).mean()
+        distance_negative = dist_mat[triplets[0], triplets[2]]
+        curr_margin = self.distance.margin(
+            distance_positive, distance_negative)
+        loss = F.relu(curr_margin + self.margin)
+        return loss.mean()
+
+
+def _pairwise_distance(x, squared=False, eps=1e-16):
+    # Compute the 2D matrix of distances between all the embeddings.
+
+    cor_mat = torch.matmul(x, x.t())
+    norm_mat = cor_mat.diag()
+    distances = norm_mat.unsqueeze(1) - 2 * cor_mat + norm_mat.unsqueeze(0)
+    distances = F.relu(distances)
+
+    if not squared:
+        mask = torch.eq(distances, 0.0).float()
+        distances = distances + mask * eps
+        distances = torch.sqrt(distances)
+        distances = distances * (1.0 - mask)
+
+    return distances
+
+
+class TotalLoss(nn.Module):
+    def __init__(self, cfg):
+        super(TotalLoss, self).__init__()
+        if 'hardest' == cfg['negetative_selsector']:
+            neg_selector = hardest_negative_selector
+        elif 'semihard' == cfg['negetative_selsector']:
+            neg_selector = semihard_negative_selector
+        else:
+            neg_selector = random_negative_selector
+        self.trip_fun = TripletLoss(margin=cfg['trip_margin'], triplet_selector=neg_selector, distance=distances.LpDistance())
+        self.negetative_distcance = 50
+
+    def forward(self, batch_dict):
+        l_pose=l_score=l_match=l_tra=l_rot=l_gb=l_gi=l_gpa=l_gpo=l_kpl = 0
+        if 'key_points' in batch_dict.keys():
+            l_score = score_loss(batch_dict)
+        l_match1,l_pose1,l_match2,l_pose2,l_tra1,l_rot1,l_tra2,l_rot2=0,0,0,0,0,0,0,0
+        
+        if 'transformation_original' in batch_dict.keys():
+            l_match1 = sinkhorn_matches_loss(batch_dict,'project_kpts_original')
+            l_tra1, l_rot1 = tr_loss(batch_dict,'transformation_original')
+            l_pose1 = pose_loss(batch_dict,'transformation_original')
+        if  'transformation_fusion' in batch_dict.keys():
+            l_match2 = sinkhorn_matches_loss(batch_dict,'project_kpts_fusion')
+            l_tra2, l_rot2 = tr_loss(batch_dict,'transformation_fusion')
+            l_pose2 = pose_loss(batch_dict,'transformation_fusion')
+        cnt=1
+        if min(l_rot1,l_rot2)>0:
+            cnt=2
+        l_match=(l_match1+l_match2)/cnt
+        l_pose=(l_pose1+l_pose2)/cnt
+        l_tra=(l_tra1+l_tra2)/cnt
+        l_rot=(l_rot1+l_rot2)/cnt
+        if ('fea_pt_dual_gen' in batch_dict.keys()) or ('fea_pl_dual_gen' in batch_dict.keys()):
+            l_gb, l_gi, l_gpa,l_kpl = gen_feature_loss(batch_dict)
+        if 'key_points_gen' in batch_dict.keys():
+            l_gpo = gen_points_loss(batch_dict)
+        if 'sequence' in batch_dict:
+            neg_mask = batch_dict['sequence'].view(1, -1) != batch_dict['sequence'].view(-1, 1)
+        else:
+            neg_mask = torch.zeros((batch_dict['pose_query'].shape[0] * 2, batch_dict['pose_query'].shape[0] * 2), dtype=torch.bool)
+        pair_dist = _pairwise_distance(batch_dict['pose_query'][:, 0:3, 3])
+        neg_mask = ((pair_dist > self.negetative_distcance) | neg_mask.to(pair_dist.device))
+        neg_mask = neg_mask.repeat(2, 2)
+        batch_size = batch_dict['batch_size']
+        pos_mask = torch.zeros((batch_size, batch_size), device=neg_mask.device)
+
+        for i in range(batch_size // 2):
+            pos_mask[i, i + batch_size // 2] = 1
+            pos_mask[i + batch_size // 2, i] = 1
+        l_triplet = self.trip_fun(batch_dict['vlads'], pos_mask, neg_mask)
+        l_total = l_score + l_pose + 0.05 * l_match + l_triplet + (l_gb + l_gi + l_gpa + l_kpl)
+        loss = [l_total, l_pose, l_score, l_match, l_triplet, l_tra, l_rot, l_gb, l_gi, l_gpa, l_gpo,l_kpl]
+        for i in range(len(loss)):
+            if loss[i]==0:
+                loss[i]=loss[0]*0
+        batch_dict['loss']=loss
+        return batch_dict
--- a/models/checkpoint_bev.pth.tar
+++ b/models/checkpoint_bev.pth.tar
--- a/models/checkpoint_bevp.pth.tar
+++ b/models/checkpoint_bevp.pth.tar
--- a/models/checkpoint_fusion.pth.tar
+++ b/models/checkpoint_fusion.pth.tar
--- a/net.py
+++ b/net.py
@@ -0,0 +1,628 @@
+import cv2
+import math
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch._utils
+import torch.nn as nn
+import torch.nn.functional as F
+from uot import UOTHead
+from netvlad import NetVLAD, NetVLADLoupe
+from ALIKE.alike import configs
+from ALIKE.alnet import ALNet
+from BEVNet import RICNN, EncodePosition, RIAvgpool2d, RIMaxpool2d
+import tools
+
+
+def simple_nms(scores, nms_radius=2, itertation=2, mode='1'):
+    """ Fast Non-maximum suppression to remove nearby points """
+    assert (nms_radius >= 0)
+    if mode == 'ri':
+        max_pool = RIMaxpool2d(kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius)
+    else:
+        max_pool = nn.MaxPool2d(kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius)
+
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == max_pool(scores)
+
+    for _ in range(itertation):
+        supp_mask = max_pool(max_mask.float()) > 0
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == max_pool(supp_scores)
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+
+
+class BEVHead(nn.Module):
+    def __init__(self, alnet='alike-n', iter=5, num_kpt=100, cluster_num=16, vlad_size=256):
+        super(BEVHead, self).__init__()
+        cfg = configs[alnet]
+        self.feature_extractor = ALNet(c1=cfg['c1'], c2=cfg['c2'], c3=cfg['c3'], c4=cfg['c4'], dim=cfg['dim'],
+                                       single_head=cfg['single_head'])
+        self.feature_size = int(self.feature_extractor.feature_size)
+        self.select = 'maxpool'
+        self.num_kpt = num_kpt
+        self.ep = EncodePosition(feature_size=self.feature_size)
+        self.uot = UOTHead(nb_iter=iter,name='original')
+        self.netvlad_bev = NetVLAD(self.feature_size, cluster_num)
+        # state_dict=torch.load('/data4/caodanyang/results/FUSIONLCD/bev_07250/models/checkpoint_049.pth.tar', map_location='cpu')['model']
+        # state_dict_new={}
+        # for k,v in state_dict.items():
+        #     state_dict_new[k[4:]]=v
+        # self.load_state_dict(state_dict_new)
+        # for param in self.parameters():
+        #     param.requires_grad = False
+
+    def forward(self, batch_dict):
+        assert type(batch_dict) is dict, 'Input should be a dict'
+        bev = batch_dict['bev']
+        guider = (bev[:, 2:3] > 0).float()
+        b, c, h_bev, w_bev = bev.shape
+        x = bev[:, 0:3, :, :]
+        points = bev[:, 3:7, :, :]  # xyzi
+        points[:, 2] = 0
+        points[:, 3] = 1
+
+        score_bev, feature_bev = self.feature_extractor(x)
+        score_bev = score_bev * guider
+
+        if self.select == 'avgpool':
+            avgpool = RIAvgpool2d(kernel_size=5, stride=4, padding=1)
+            grid = np.array(np.meshgrid(np.arange(h_bev), np.arange(w_bev))).swapaxes(0, 2)
+            grid = torch.from_numpy(grid).to(x.device).permute(2, 0, 1).unsqueeze(0).repeat(b, 1, 1, 1)
+            score_bev_avg = avgpool(score_bev)
+            grid_avg = avgpool(grid.float() * score_bev) / (score_bev_avg + 1e-8)
+            grid_avg = torch.round(grid_avg).long().permute(0, 2, 3, 1)
+            points_avg = avgpool(score_bev * points) / (score_bev_avg + 1e-8)
+            feature_bev_avg = avgpool(feature_bev * score_bev) / (score_bev_avg + 1e-8)
+            score_bev = score_bev.view(b, h_bev, w_bev)
+            score_bev_avg = score_bev_avg.squeeze(1)
+            kpts = []
+            feas_kpt = []
+            pixels_kpt = []
+            # cnt=0
+            for i in range(b):
+                uv = list(torch.where(score_bev_avg[i] > 0))
+                num_kpt = int(self.num_kpt)
+                if num_kpt == 0:
+                    print('NO BEV key point')
+                    exit()
+                while len(uv[0]) < num_kpt:
+                    uv[0] = torch.cat([uv[0], uv[0][:(num_kpt - len(uv[0]))]])
+                    uv[1] = torch.cat([uv[1], uv[1][:(num_kpt - len(uv[1]))]])
+                score_bev0 = score_bev_avg[i, uv[0], uv[1]]
+                score_bev1, idx = torch.topk(score_bev0, k=self.num_kpt)
+                # cnt=max(cnt,len(uv[0]))
+                # idx=torch.arange(len(uv[0])).to(x.device)
+                pc = points_avg[i, :, uv[0], uv[1]].permute(1, 0)
+                # pc = torch.cat([pc, pc * 0], dim=1)
+                kpt = pc[idx]
+                fea_kpt = feature_bev_avg[i, :, uv[0][idx], uv[1][idx]]
+                pixel_kpt = grid_avg[i, uv[0][idx], uv[1][idx]]
+                pixels_kpt.append(pixel_kpt)
+                kpts.append(kpt.unsqueeze(0))
+                feas_kpt.append(fea_kpt.unsqueeze(0))
+        else:
+            score_bev_max = simple_nms(score_bev, nms_radius=3)
+            score_bev = score_bev.view(b, h_bev, w_bev)
+            score_bev_max = score_bev_max.view(b, h_bev, w_bev)
+            kpts = []
+            feas_kpt = []
+            pixels_kpt = []
+            for i in range(b):
+                uv = list(torch.where((score_bev[i] == score_bev_max[i]) & (score_bev[i] > 0)))
+                num_kpt = int(self.num_kpt)
+                if num_kpt == 0:
+                    print('NO BEV key point')
+                    exit()
+                while len(uv[0]) < num_kpt:
+                    uv[0] = torch.cat([uv[0], uv[0][:(num_kpt - len(uv[0]))]])
+                    uv[1] = torch.cat([uv[1], uv[1][:(num_kpt - len(uv[1]))]])
+                score_bev0 = score_bev[i, uv[0], uv[1]]
+                # sc0 = score_bev0.cpu().detach().numpy()
+                score_bev1, idx = torch.topk(score_bev0, k=self.num_kpt)
+                pc = points[i, :, uv[0], uv[1]].permute(1, 0)
+                # pc = torch.cat([pc, pc * 0], dim=1)
+                kpt = pc[idx]
+                fea_kpt = feature_bev[i, :, uv[0][idx], uv[1][idx]]
+                pixel_kpt = torch.cat([uv[0][idx], uv[1][idx]]).view(2, -1).T
+                pixels_kpt.append(pixel_kpt.unsqueeze(0))
+                kpts.append(kpt.unsqueeze(0))
+                feas_kpt.append(fea_kpt.unsqueeze(0))
+
+        # kpts1=torch.zeros((b,cnt,kpt.shape[1])).to(x.device)
+        # feas_kpt1=torch.zeros((b,fea_kpt.shape[0],cnt)).to(x.device)
+        # for i in range(b):
+        #     kpts1[i,:kpts[i].shape[1]]=kpts[i].squeeze(0)
+        #     feas_kpt1[i,:,:feas_kpt[i].shape[2]]=feas_kpt[i].squeeze(0)
+
+        kpts = torch.cat(kpts)
+        feas_kpt = torch.cat(feas_kpt)
+        pixels_kpt = torch.cat(pixels_kpt)
+        if hasattr(self, 'ep'):
+            feas_kpt = self.ep(kpts, feas_kpt)
+        batch_dict['pixels_kpt'] = pixels_kpt
+        batch_dict['score_bev'] = score_bev
+        batch_dict['fea_kpt_original'] = feas_kpt
+        batch_dict['fea_bev'] = feature_bev
+        batch_dict['key_points'] = kpts
+        if hasattr(self, 'netvlad_bev'):
+            try:
+                vlad_bev = self.netvlad_bev(feas_kpt.transpose(1, 2).contiguous())
+            except:
+                vlad_bev = self.netvlad_bev(feas_kpt.unsqueeze(3))
+            batch_dict['vlad_bev'] = vlad_bev
+        if ('pose_to_frame' in batch_dict.keys()) and (hasattr(self, 'uot')):
+            self.uot(batch_dict)
+        ####################################    show bev and kpt     ############################################
+        if 0:
+            for i in range(b):
+                bevshow = x[i].permute(1, 2, 0).cpu().detach().numpy()
+                bevshow = np.ascontiguousarray(bevshow[:, :, 0:3] * 255, dtype=np.uint8)
+                bevshow1 = bevshow.copy()
+                bevshow1[:, 1] = [255, 255, 255]
+                for j in range(kpt.shape[0]):
+                    center = (int(uv[1][idx[j]].cpu().detach().numpy()), int(uv[0][idx[j]].cpu().detach().numpy()))
+                    cv2.circle(bevshow1, center, 2, (0, 0, 255), -1, cv2.LINE_AA)
+                bevshow2 = np.hstack((bevshow, bevshow1))
+                # cv2.namedWindow('2', cv2.WINDOW_NORMAL)
+                # cv2.imshow('2', bevshow2)
+                # cv2.waitKey(0)
+                fig = plt.figure()
+                plt.imshow(bevshow2)
+                plt.show()
+        #########################################################################################################
+
+        ####################################        show match       ############################################
+        if 0:
+            for i in range(b // 2):
+                kpt1 = kpts[i]
+                pose_to_frame = batch_dict['pose_to_frame'][i]
+                # pose_to_frame = batch_dict['transformation'][i]
+                # pose_to_frame = torch.cat((pose_to_frame, torch.tensor([0, 0, 0, 1]).view(1, 4).to(pose_to_frame.device)))
+                kpt1 = (pose_to_frame @ kpt1.permute(1, 0)).permute(1, 0)
+                kpt2 = kpts[i + b // 2]
+                bev1 = batch_dict['bev'][i][0:3].permute(1, 2, 0)
+                bev1 = np.ascontiguousarray(bev1.cpu().detach().numpy() * 255, dtype=np.uint8)
+                bev2 = batch_dict['bev'][i + b // 2][0:3].permute(1, 2, 0)
+                bev2 = np.ascontiguousarray(bev2.cpu().detach().numpy() * 255, dtype=np.uint8)
+                pixel1 = pixels_kpt[i].cpu().detach().numpy()
+                pixel2 = pixels_kpt[i + b // 2].cpu().detach().numpy()
+                fea1 = feas_kpt[i].permute(1, 0).cpu().detach().numpy()
+                fea2 = feas_kpt[i + b // 2].permute(1, 0).cpu().detach().numpy()
+                idx1, idx2, dis = tools.nn_match(fea1, fea2, 'cosine')
+                # idx11, idx21, dis1 = tools.nn_match(kpt1, kpt2, 'euclidean')
+                # idx1 = idx1[dis < 0.1]
+                # idx2 = idx2[dis < 0.1]
+                h, w, _ = bev1.shape
+                img = np.hstack((bev1, bev2))
+                img[:, w] = [255, 255, 255]
+                tp = 0
+                img1 = img.copy()
+                for j in range(len(pixel1)):
+                    center1 = (int(pixel1[j, 1]), int(pixel1[j, 0]))
+                    center2 = (int(pixel2[j, 1]) + w, int(pixel2[j, 0]))
+                    cv2.circle(img, center1, 2, (155, 155, 155), -1, cv2.LINE_AA)
+                    cv2.circle(img, center2, 2, (155, 155, 155), -1, cv2.LINE_AA)
+                for j in range(len(idx1)):
+                    center1 = (int(pixel1[idx1[j], 1]), int(pixel1[idx1[j], 0]))
+                    center2 = (int(pixel2[idx2[j], 1]) + w, int(pixel2[idx2[j], 0]))
+                    dis_kpt = (kpt1[idx1[j]] - kpt2[idx2[j]]).norm(p=2)
+                    if dis_kpt < 2:
+                        tp = tp + 1
+                        cv2.line(img, center1, center2, (0, 166, 0), 1, cv2.LINE_AA)
+                    else:
+                        cv2.line(img, center1, center2, (0, 0, 188), 1, cv2.LINE_AA)
+                    cv2.circle(img, center1, 2, (255, 255, 255), -1, cv2.LINE_AA)
+                    cv2.circle(img, center2, 2, (255, 255, 255), -1, cv2.LINE_AA)
+                # print(np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180, (tp / len(idx1)))
+                img2 = np.vstack((img1, img))
+                img2[h, :] = [255, 255, 255]
+                cv2.namedWindow('bev match %.3f,%.1fdeg' % (tp / len(idx1), np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180))
+                cv2.imshow('bev match %.3f,%.1fdeg' % (tp / len(idx1), np.arccos(pose_to_frame.cpu().detach().numpy()[0, 0]) / np.pi * 180), img2)
+                cv2.waitKey(0)
+        #####################################################################################################
+
+        ############################################  ICP  ##################################################
+        if 0:
+            import open3d as o3d
+            for i in range(b // 2):
+                pose_to_frame = batch_dict['pose_to_frame'][i].cpu().detach().numpy()
+                print('angle', np.arccos(pose_to_frame[0, 0]) / 3.14 * 180)
+                transformation = batch_dict['transformation'][i].cpu().detach().numpy()
+                transformation = np.vstack((transformation, [0, 0, 0, 1]))
+                scan1 = batch_dict['scan_query'][i].cpu().detach().numpy()
+                scan2 = batch_dict['scan_positive'][i].cpu().detach().numpy()
+
+                pcd1 = o3d.geometry.PointCloud()
+                pcd1.points = o3d.utility.Vector3dVector(scan1[:, :3])
+                pcd1.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))])
+
+                pcd11 = o3d.geometry.PointCloud()
+                pcd11.points = o3d.utility.Vector3dVector(scan1[:, :3])
+                pcd11.colors = o3d.utility.Vector3dVector([[0, 0, 1] for i in range(len(pcd1.points))])
+
+                pcd2 = o3d.geometry.PointCloud()
+                pcd2.points = o3d.utility.Vector3dVector(scan2[:, :3])
+                pcd2.colors = o3d.utility.Vector3dVector([[0, 1, 0] for i in range(len(pcd2.points))])
+
+                icp_config = o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration=200, relative_fitness=1e-6,
+                                                                               relative_rmse=1e-6)
+                trans_init = transformation
+                threshold = 2
+                estimation_method = o3d.pipelines.registration.TransformationEstimationPointToPoint()
+                registration_result = o3d.pipelines.registration.registration_icp(pcd1, pcd2, threshold, trans_init,
+                                                                                  estimation_method, icp_config)
+
+                # 将待配准点云应用变换
+                pcd1.transform(registration_result.transformation)
+
+                vis1 = o3d.visualization.Visualizer()
+                vis1.create_window(window_name='registration', width=600, height=600)  # 创建窗口
+                render_option: o3d.visualization.RenderOption = vis1.get_render_option()  # 设置点云渲染参数
+                render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
+                render_option.point_size = 2  # 设置渲染点的大小
+                vis1.add_geometry(pcd11)
+                vis1.run()
+
+                vis2 = o3d.visualization.Visualizer()
+                vis2.create_window(window_name='registration', width=600, height=600)  # 创建窗口
+                render_option: o3d.visualization.RenderOption = vis2.get_render_option()  # 设置点云渲染参数
+                render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
+                render_option.point_size = 2  # 设置渲染点的大小
+                vis2.add_geometry(pcd2)
+                vis2.run()
+
+                vis = o3d.visualization.Visualizer()
+                vis.create_window(window_name='registration', width=600, height=600)  # 创建窗口
+                render_option: o3d.visualization.RenderOption = vis.get_render_option()  # 设置点云渲染参数
+                render_option.background_color = np.array([1, 1, 1])  # 设置背景色（这里为黑色）
+                render_option.point_size = 2  # 设置渲染点的大小
+                vis.add_geometry(pcd1)
+                vis.add_geometry(pcd2)
+                vis.run()
+        #######################################################################################################
+
+        return batch_dict
+
+
+class ImgHead(nn.Module):
+    def __init__(self, alnet='alike-n', num_kpt=150, cluster_num=0,vlad_size=256):
+        super(ImgHead, self).__init__()
+        cfg = configs[alnet]
+        self.feature_extractor = ALNet(c1=cfg['c1'], c2=cfg['c2'], c3=cfg['c3'], c4=cfg['c4'], dim=cfg['dim'],
+                                       single_head=cfg['single_head'])
+        self.feature_size = int(self.feature_extractor.feature_size)
+        # try:
+        #     model_path = cfg['model_path']
+        # except:
+        #     model_path = ''
+        # if model_path != '':
+        #     state_dict = torch.load(model_path)
+        #     self.feature_extractor.load_state_dict(state_dict)
+            # for param in self.feature_extractor.parameters():
+            #     param.requires_grad = False
+        if num_kpt>0:
+            self.num_kpt = num_kpt
+
+    def forward(self, batch_dict):
+        x = batch_dict['img'][:, 0:3].float() / 255.0
+        # x=x[:,:,:,384:768,]
+        # pixels = batch_dict['img'][:, 3:5]
+        b, c, h, w = x.shape
+        pixel_features = []
+        kpts = []
+        scores = []
+        score_img, feature_img = self.feature_extractor(x)
+        # feature_img=feature_img*0
+        if hasattr(self,'num_kpt') :
+            score_img = simple_nms(score_img, 2, 2)
+            s_thr = 0.1
+            for i in range(b):
+                score_global1 = score_img[i, 0]
+                values, indices = torch.topk(score_global1.view(-1), k=self.num_kpt, dim=0, largest=True)
+                if torch.max(values) < s_thr:
+                    print('0 pixel')
+                    exit()
+                num_low_value = torch.sum(values < s_thr)
+                if num_low_value > 0:
+                    indices1 = indices.clone()
+                    indices1[(self.num_kpt - num_low_value):] = indices[:num_low_value]
+                    indices = indices1
+                row = torch.div(indices, score_global1.shape[1], rounding_mode='trunc')
+                col = indices % score_global1.shape[1]
+                pixel_feature = feature_img[i:i + 1, :, row, col]
+                pixel_features.append(pixel_feature)
+                kpts.append(torch.cat([row.view(1, -1, 1), col.view(1, -1, 1)], dim=2))
+                scores.append(values.view(1, -1))
+            pixel_features = torch.cat(pixel_features)
+            kpts = torch.cat(kpts)
+            scores = torch.cat(scores)
+        ####################################        show match       ############################################
+        if 0:
+            for i in range(b // 2):
+                img1 = batch_dict['img'][i][0:3].permute(1, 2, 0)
+                img1 = np.ascontiguousarray(img1.cpu().detach().numpy(), dtype=np.uint8)
+                img2 = batch_dict['img'][i + b // 2][0:3].permute(1, 2, 0)
+                img2 = np.ascontiguousarray(img2.cpu().detach().numpy(), dtype=np.uint8)
+                pixel1 = kpts[i].cpu().detach().numpy()
+                pixel2 = kpts[i + b // 2].cpu().detach().numpy()
+                fea1 = pixel_features[i].permute(1, 0).cpu().detach().numpy()
+                fea2 = pixel_features[i + b // 2].permute(1, 0).cpu().detach().numpy()
+                idx1, idx2, dis = tools.nn_match(fea1, fea2, 'euclidean')
+                idx1 = idx1[dis < 10]
+                idx2 = idx2[dis < 10]
+                h, w, _ = img1.shape
+                img = np.vstack((img1, img2))
+                img[h, :] = [255, 255, 255]
+                for i in range(len(idx1)):
+                    center1 = (int(pixel1[idx1[i], 1]), int(pixel1[idx1[i], 0]))
+                    center2 = (int(pixel2[idx2[i], 1]), int(pixel2[idx2[i], 0] + h))
+                    cv2.line(img, center1, center2, (0, 188, 0), 1, cv2.LINE_AA)
+                    cv2.circle(img, center1, 2, (0, 0, 255), -1, cv2.LINE_AA)
+                    cv2.circle(img, center2, 2, (0, 0, 255), -1, cv2.LINE_AA)
+                fig = plt.figure()
+                plt.imshow(img[:, :, [2, 1, 0]])
+                plt.show()
+                # cv2.namedWindow('img match')
+                # cv2.imshow('img match', img)
+                # cv2.waitKey(0)
+        
+        #########################################################################################################
+        batch_dict['key_pixels'] = kpts
+        batch_dict['fea_kpl'] = pixel_features
+        batch_dict['fea_img'] = feature_img
+        batch_dict['score_img'] = score_img
+        if hasattr(self, 'netvlad_img'):
+            vlad = self.netvlad_img(pixel_features.transpose(1, 2).contiguous())
+            batch_dict['vlad_img'] = vlad
+
+        return batch_dict
+
+class LocalPool(nn.Module):
+    def __init__(self, in_c):
+        super().__init__()
+        self.conv1 = nn.Conv2d(100, 10, 1, 1, 0, bias=True)
+        self.mp=nn.MaxPool2d((1, 10))
+
+    def forward(self, x):
+        b, c, n, k = x.shape #k=100
+        x1 = x.permute(0, 3, 2, 1)  # b,k,n,c
+        x2=self.conv1(x1)
+        x3=x2.permute(0,3,2,1)
+        x4=self.mp(x3)
+        return x4  # bcn1
+
+class TransformerEncoder(nn.Module):
+    def __init__(self, in_c=128, num_heads=4, dropout=0.1, num_layers=2):
+        super().__init__()
+        encoder_layer = nn.TransformerEncoderLayer(d_model=in_c, nhead=num_heads, dropout=dropout, batch_first=True)
+        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
+
+    def forward(self, x):
+        y = self.encoder(x)
+        return y
+
+
+
+class Attention(nn.Module):
+    def __init__(self, d_model):
+        super(Attention, self).__init__()
+        self.d_model = d_model
+        self.w_q = nn.Linear(d_model, d_model)
+        self.w_k = nn.Linear(d_model, d_model)
+        self.w_v = nn.Linear(d_model, d_model)
+        self.fnn = nn.Linear(d_model, d_model)
+        # self.dp=nn.Dropout(0.1)
+
+    def forward(self, q, k=None, v=None):
+        proj_q = self.w_q(q)  # BNC
+        proj_k = self.w_k(k)
+        proj_w = self.w_v(v)
+        # proj_q=self.dp(proj_q)
+        # proj_k=self.dp(proj_k)
+        # proj_w=self.dp(proj_w)
+        weights = nn.functional.softmax(torch.matmul(proj_q, proj_k.transpose(-2, -1)) / (self.d_model ** 0.5), dim=-1)
+        attn_output = torch.matmul(weights, proj_w).contiguous()
+        output = self.fnn(attn_output)
+        return output, weights
+
+
+class Generator(nn.Module):
+    def __init__(self, in_c=128, num=150):
+        super().__init__()
+        self.mha = Attention(in_c)
+        self.conv1 = nn.Sequential(
+            nn.ConvTranspose1d(in_c, in_c, kernel_size=3, stride=3, padding=0),
+            nn.AdaptiveMaxPool1d(num)
+        )
+
+    def forward(self, x):
+        b, c, n = x.shape
+        # x=x.detach()
+        x1 = x.permute(0, 2, 1)  # BNC
+        x2, _ = self.mha(x1, x1, x1)
+        x2 = x2.permute(0, 2, 1)
+        x3 = self.conv1(x2)
+        return x3
+
+
+class Converter(nn.Module):
+    def __init__(self, in_c=128):
+        super().__init__()
+        self.mha = Attention(in_c)
+        self.conv1 = nn.Sequential(
+            nn.Conv1d(in_c, in_c, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c), nn.ReLU(),
+            nn.Conv1d(in_c, in_c // 4, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 4), nn.ReLU(),
+            nn.Conv1d(in_c // 4, in_c // 8, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 8), nn.ReLU(),
+            nn.Conv1d(in_c // 8, in_c // 4, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c // 4), nn.ReLU(),
+            nn.Conv1d(in_c // 4, in_c, kernel_size=1, stride=1, padding=0),# nn.BatchNorm1d(in_c), nn.ReLU(),
+            nn.Conv1d(in_c, in_c, kernel_size=1, stride=1, padding=0)
+        )
+        self.conv2 = nn.Conv1d(in_c * 2, in_c, 1, 1, 0, bias=False)
+
+    def forward(self, x):
+        # return x
+        b, c, n = x.shape
+        # x=x.detach()
+        mask = (x == 0).all(dim=1)
+        x1 = x.permute(0, 2, 1)  # BNC
+        x2, _ = self.mha(x1, x1, x1)
+        x2 = x2.permute(0, 2, 1)
+        x3 = self.conv1(x)
+        x4=torch.cat([x2,x3],dim=1)
+        x5=self.conv2(x4)
+        x5 = x5.masked_fill(mask.unsqueeze(1), 0)
+        return x5
+
+class FusionHead(nn.Module):
+    def __init__(self, in_c=128):
+        super().__init__()
+        self.mha1 = Attention(in_c)
+        self.mha2 = Attention(in_c)
+        self.conv1 = nn.Conv1d(in_c * 2, in_c, 1)
+
+    def forward(self, x):
+        fea_kpt = x[:, :, 0]
+        fea_kpl_gen = x[:, :, 3]
+        B, C, K, N = x.shape
+        x1 = x[:, :, :3]  # BC3N
+        x2 = x1.permute(0, 3, 2, 1).contiguous()#BN3C
+        x3 = x2.view(B * N, 3, C)
+        x4, _ = self.mha1(x3, x3, x3)
+        x5 = torch.max(x4, dim=1)[0]#B*N 3 C
+        x6=x5.view(B,N,C)
+        x7, _ = self.mha2(x6, fea_kpl_gen.permute(0, 2, 1), fea_kpl_gen.permute(0, 2, 1))
+        x7 = x7.permute(0, 2, 1)
+        x8 = torch.cat([fea_kpt, x7] ,dim=1)
+        x9 = self.conv1(x8)
+        return x9
+
+
+def cosine_similarity(feature1, feature2):
+    # BNC
+    feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8)
+    feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8)
+    C = torch.bmm(feature1, feature2.transpose(1, 2))
+    # distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True)
+    # distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2)
+    # distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2))  # c^2=a^2+b^2-2abcos
+    # C = distance_matrix ** 0.5
+    return C
+
+
+class Fusion(nn.Module):
+    def __init__(self, cfg):
+        super().__init__()
+        flag = cfg['flag']
+        self.flag = flag
+        if flag == 'fusion':
+            self.img = ImgHead(alnet='alike-n', num_kpt=cfg['kpts_number_img'],
+                               cluster_num=cfg['cluster_num_img'], vlad_size=cfg['vlad_size'])
+            self.bev = BEVHead(alnet='alike-n', iter=cfg['sinkhorn_iter'],
+                               num_kpt=cfg['kpts_number_bev'], cluster_num=cfg['cluster_num_bev'], vlad_size=cfg['vlad_size'])
+            assert self.img.feature_size == self.bev.feature_size, 'img feature and image feature should be the same size'
+            feature_size = self.img.feature_size
+            self.localpool = LocalPool(feature_size)
+            self.cvt_img = Converter(feature_size)
+            self.cvt_bev = Converter(feature_size)
+            self.gen_pan = Generator(feature_size, cfg['kpts_number_bev'])
+            self.att_fusion = FusionHead(feature_size)
+            # self.netvlad_fusion = NetVLADLoupe(feature_size, cfg['cluster_num_fusion'], cfg['vlad_size'])
+            self.netvlad_fusion = NetVLAD(feature_size, cfg['cluster_num_fusion'])
+            self.uot = UOTHead(nb_iter=cfg['sinkhorn_iter'],name='fusion')
+            self.vlad='fusion'
+            self.w= torch.nn.Parameter(torch.zeros(1))
+
+        if flag == 'bev':
+            self.bev = BEVHead(alnet='alike-n',iter=cfg['sinkhorn_iter'], num_kpt=cfg['kpts_number_bev'], cluster_num=cfg['cluster_num_bev'], vlad_size=256)
+        if flag == 'img':
+            self.img = ImgHead(alnet='alike-n', num_kpt=cfg['kpts_number_img'], cluster_num=cfg['cluster_num_img'], vlad_size=cfg['vlad_size'])
+
+    def forward(self, batch_dict):
+        if self.flag == 'fusion':
+            batch_dict = self.img(batch_dict)
+            batch_dict = self.bev(batch_dict)
+            fea_img = batch_dict['fea_img']
+            fea_bev = batch_dict['fea_bev']
+            relation = batch_dict['relation']
+            fea_kpt_original = batch_dict['fea_kpt_original']
+            # fea_kpl = batch_dict['fea_kpl']
+            # pixel_kpt = batch_dict['pixels_kpt']
+            b, n1, n2, _ = relation.shape
+            n2 = n2 - 1
+            # ns=torch.sum((relation[:,:,-1]>0).all(dim=2),dim=1)
+            # n_least=torch.min(ns)
+            # n_least=min(n_least,256)
+            # relation1=[]
+            # for i in range(b):
+            #     idx=torch.randperm(ns[i])[:n_least].to(relation.device)
+            #     relation1.append(relation[i:i+1,idx])
+            # relation1=torch.cat(relation1)
+            # relation=relation1  
+            pixel_img = relation[:, :, 0:n2].clone()
+            grid_img = pixel_img[:, :, :, [1, 0]].float() / torch.tensor([fea_img.shape[3] - 1, fea_img.shape[2] - 1]).to(fea_img.device).float() * 2 - 1
+            fea_pl_dual = F.grid_sample(fea_img, grid_img, align_corners=True, mode='bilinear', padding_mode='zeros')
+            fea_pl_dual = self.localpool(fea_pl_dual).squeeze(3)
+            fea_pt_dual_gen = self.cvt_bev(fea_pl_dual)
+
+            if 'pose_to_frame' in batch_dict.keys() and hasattr(self, 'uot'):
+                pixel_bev = relation[:, :, n2:n2 + 1, 0:2].clone()
+                grid_bev = pixel_bev[:, :, :, [1, 0]].float() / torch.tensor([fea_bev.shape[3] - 1, fea_bev.shape[2] - 1]).to(fea_bev.device).float() * 2 - 1
+                fea_pt_dual = (F.grid_sample(fea_bev, grid_bev, align_corners=True, mode='bilinear', padding_mode='zeros')).squeeze(3)
+                fea_pl_dual_gen = self.cvt_img(fea_pt_dual)
+                batch_dict['fea_pt_dual_gen'] = fea_pt_dual_gen
+                batch_dict['fea_pl_dual_gen'] = fea_pl_dual_gen
+                batch_dict['fea_pt_dual'] = fea_pt_dual
+                batch_dict['fea_pl_dual'] = fea_pl_dual
+
+            fea_kpt_original_gen = self.gen_pan(fea_pt_dual_gen)
+            batch_dict['fea_kpt_original_gen'] = fea_kpt_original_gen
+            fea_kpl_gen = self.cvt_img(fea_kpt_original)
+            fea_kpt_gen_gen = self.cvt_bev(fea_kpl_gen)
+            batch_dict['fea_kpt_gen_gen'] = fea_kpt_gen_gen
+            batch_dict['fea_kpl_gen']=fea_kpl_gen
+            fea_kpts = torch.cat([fea_kpt_original.unsqueeze(2), fea_kpt_original_gen.unsqueeze(2), fea_kpt_gen_gen.unsqueeze(2), fea_kpl_gen.unsqueeze(2)], dim=2)
+            fea_kpt_fusion = self.att_fusion(fea_kpts)
+            batch_dict['fea_kpt_fusion'] = fea_kpt_original
+
+            # sim10 = cosine_similarity(fea_pt_dual.permute(0, 2, 1), fea_pt_dual.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # sim11 = cosine_similarity(fea_pt_dual_gen.permute(0, 2, 1), fea_pt_dual_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # sim20 = cosine_similarity(fea_pl_dual.permute(0, 2, 1), fea_pl_dual.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # sim21 = cosine_similarity(fea_pl_dual_gen.permute(0, 2, 1), fea_pl_dual_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # sim30 = cosine_similarity(fea_kpt_original.permute(0, 2, 1), fea_kpt_original.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # sim31 = cosine_similarity(fea_kpt_original_gen.permute(0, 2, 1), fea_kpt_original_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # sim32 = cosine_similarity(fea_kpt_gen_gen.permute(0, 2, 1), fea_kpt_gen_gen.permute(0, 2, 1))[0].cpu().detach().numpy()
+            # fig=plt.figure()
+            # plt.subplot(2, 4, 1), plt.imshow(sim10), plt.title('points')
+            # plt.subplot(2, 4, 5), plt.imshow(sim11), plt.title('gen points')
+            # plt.subplot(2, 4, 2), plt.imshow(sim20), plt.title('pixel')
+            # plt.subplot(2, 4, 6), plt.imshow(sim21), plt.title('gen pixel')
+            # plt.subplot(2, 4, 3), plt.imshow(sim30), plt.title('kpt orig')
+            # plt.subplot(2, 4, 7), plt.imshow(sim31), plt.title('pan kpt')
+            # plt.subplot(2, 4, 4), plt.imshow(sim30), plt.title('kpt orig')
+            # plt.subplot(2, 4, 8), plt.imshow(sim32), plt.title('kpt gen gen')
+            # plt.show()
+
+            if 'pose_to_frame' in batch_dict.keys() and hasattr(self, 'uot'):
+                self.uot(batch_dict)
+
+            vlad_fusion = self.netvlad_fusion(fea_kpt_fusion.unsqueeze(3))
+            if self.vlad=='bev':
+                batch_dict['vlads']=batch_dict['vlad_bev']
+            if self.vlad=='fusion':
+                if 'vlad_bev' in batch_dict.keys():
+                    batch_dict['vlads']=torch.sigmoid(self.w)*vlad_fusion + (1-torch.sigmoid(self.w))*batch_dict['vlad_bev']
+                else:
+                    batch_dict['vlads']=vlad_fusion
+        if self.flag == 'bev':
+            batch_dict = self.bev(batch_dict)
+            batch_dict['vlads'] = batch_dict['vlad_bev']
+        if self.flag == 'img':
+            batch_dict = self.img(batch_dict)
+            batch_dict['vlads'] = batch_dict['vlad_img']
+        return batch_dict
+
+
+if __name__ == '__main__':
+    b=BEVHead()
--- a/netvlad.py
+++ b/netvlad.py
@@ -0,0 +1,155 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class NetVLAD(nn.Module):
+    def __init__(self, fea_size=128, num_clusters=16):
+        super(NetVLAD, self).__init__()
+        self.num_clusters = num_clusters
+        self.conv = nn.Conv2d(fea_size, num_clusters, kernel_size=(1, 1), bias=True)
+        self.centroids = nn.Parameter(torch.randn(num_clusters, fea_size))
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        """
+        x: B, C, H, W,W=1
+        """
+        soft_assign = self.conv(x)  # (B, num_clusters, H, W)
+        soft_assign = self.relu(soft_assign)
+        soft_assign = torch.nn.functional.softmax(soft_assign, dim=1)  # (B, num_clusters, H, W)
+
+        # reshape for broadcasting
+        B, C, H, W = x.shape
+        soft_assign = soft_assign.view(B, self.num_clusters, -1)  # (B, num_clusters, H * W)
+        x_flatten = x.view(B, C, -1)  # (B, C, H * W)
+
+        # compute residuals
+        x_flatten1 = x_flatten.unsqueeze(1).permute(0, 1, 3, 2)  # (B, 1, H*W, C)
+        centroids = self.centroids.unsqueeze(0).unsqueeze(2)  # (1, num_cluster, 1, C)
+        residual = x_flatten1 - centroids  # (B, num_clusters, H * W, C)
+
+        residual *= soft_assign.unsqueeze(-1)  # (B, num_clusters, H * W, C)
+
+        # sum residuals and assign
+        vlad = residual.sum(dim=-2)  # (B, num_clusters, C)
+        vlad = nn.functional.normalize(vlad, p=2, dim=2)  # (B, num_clusters, C)
+
+        vlad = vlad.view(B, -1)
+        vlad = nn.functional.normalize(vlad, p=2, dim=1)  # (B, num_clusters * C)
+        return vlad
+
+
+class NetVLADLoupe(nn.Module):
+    """
+    Original Tensorflow implementation: https://github.com/antoine77340/LOUPE
+    """
+
+    def __init__(self, feature_size, cluster_size, output_dim,
+                 gating=True, add_norm=True, is_training=True, normalization='batch'):
+        super(NetVLADLoupe, self).__init__()
+        self.feature_size = feature_size
+        # output_dim=cluster_size * feature_size
+        self.output_dim = output_dim
+        self.is_training = is_training
+        self.gating = gating
+        self.add_batch_norm = add_norm
+        self.cluster_size = cluster_size
+        if normalization == 'instance':
+            norm = lambda x: nn.LayerNorm(x)
+        elif normalization == 'group':
+            norm = lambda x: nn.GroupNorm(8, x)
+        else:
+            norm = lambda x: nn.BatchNorm1d(x)
+        self.softmax = nn.Softmax(dim=-1)
+        self.cluster_weights = nn.Parameter(torch.randn(feature_size, cluster_size) * 1 / math.sqrt(feature_size))
+        self.cluster_weights2 = nn.Parameter(torch.randn(1, feature_size, cluster_size) * 1 / math.sqrt(feature_size))
+        self.hidden1_weights = nn.Parameter(torch.randn(cluster_size * feature_size, output_dim) * 1 / math.sqrt(feature_size))
+        if add_norm:
+            self.cluster_biases = None
+            self.bn1 = norm(cluster_size)
+        else:
+            self.cluster_biases = nn.Parameter(torch.randn(cluster_size) * 1 / math.sqrt(feature_size))
+            self.bn1 = None
+
+        self.bn2 = norm(output_dim)
+
+        if gating:
+            self.context_gating = GatingContext(output_dim, add_batch_norm=add_norm, normalization=normalization)
+
+    def forward(self, x):
+        """
+        x: B N C
+        """
+        max_samples = x.shape[1]
+        activation = torch.matmul(x, self.cluster_weights)
+        if self.add_batch_norm:
+            activation = activation.view(-1, self.cluster_size)
+            activation = self.bn1(activation)
+            activation = activation.view(-1, max_samples, self.cluster_size)
+        else:
+            activation = activation + self.cluster_biases
+        activation = self.softmax(activation)
+
+        a_sum = activation.sum(-2, keepdim=True)
+        a = a_sum * self.cluster_weights2
+
+        activation = torch.transpose(activation, 2, 1)
+        x = x.view((-1, max_samples, self.feature_size))
+        vlad = torch.matmul(activation, x)
+        vlad = torch.transpose(vlad, 2, 1).contiguous()
+        vlad0 = vlad - a
+
+        vlad1 = F.normalize(vlad0, dim=1, p=2, eps=1e-6)
+        vlad2 = vlad1.view((-1, self.cluster_size * self.feature_size))
+        vlad = F.normalize(vlad2, dim=1, p=2, eps=1e-6)
+
+        vlad = torch.matmul(vlad, self.hidden1_weights)
+
+        vlad = self.bn2(vlad)
+
+        if self.gating:
+            vlad = self.context_gating(vlad)
+        # vlad = vlad / vlad.norm(dim=1, keepdim=True)
+        return vlad
+
+
+class GatingContext(nn.Module):
+    """
+    Original Tensorflow implementation: https://github.com/antoine77340/LOUPE
+    """
+
+    def __init__(self, dim, add_batch_norm=True, normalization='batch'):
+        super(GatingContext, self).__init__()
+        self.dim = dim
+        self.add_batch_norm = add_batch_norm
+        if normalization == 'instance':
+            norm = lambda x: nn.LayerNorm(x)
+        elif normalization == 'group':
+            norm = lambda x: nn.GroupNorm(8, x)
+        else:
+            norm = lambda x: nn.BatchNorm1d(x)
+        self.gating_weights = nn.Parameter(torch.randn(dim, dim) * 1 / math.sqrt(dim))
+        self.sigmoid = nn.Sigmoid()
+
+        if add_batch_norm:
+            self.gating_biases = None
+            self.bn1 = norm(dim)
+        else:
+            self.gating_biases = nn.Parameter(torch.randn(dim) * 1 / math.sqrt(dim))
+            self.bn1 = None
+
+    def forward(self, x):
+        gates = torch.matmul(x, self.gating_weights)
+
+        if self.add_batch_norm:
+            gates = self.bn1(gates)
+        else:
+            gates = gates + self.gating_biases
+
+        gates = self.sigmoid(gates)
+
+        activation = x * gates
+
+        return activation
--- a/preparedataset.py
+++ b/preparedataset.py
@@ -0,0 +1,105 @@
+import os
+
+import numpy as np
+import torch
+import tqdm
+
+
+def k3602k(k, k360):
+    for src in [0, 3, 4, 5, 6, 7, 9, 10]:
+        tgt = src + 50
+        path_pose = k360 + '/data_poses/2013_05_28_drive_%04d_sync/cam0_to_world.txt' % src
+        path_velo = k360 + '/data_3d_raw/2013_05_28_drive_%04d_sync/velodyne_points/data' % src
+        path_calib = k360 + '/calibration/calib_cam_to_velo.txt'
+        path_img = k360 + '/data_2d_raw/2013_05_28_drive_%04d_sync/image_00/data_rect' % src
+
+        path_pose1 = k + '/data_odometry_poses/dataset/poses/%02d.npy' % tgt
+        path_velo1 = k + '/data_odometry_velodyne/dataset/sequences/%02d/velodyne' % tgt
+        path_calib1 = k + '/data_odometry_calib/dataset/sequences/%02d' % tgt
+        path_img1 = k + '/data_odometry_color/dataset/sequences/%02d/image_2/' % tgt
+
+        if not os.path.exists(path_velo1):
+            os.makedirs(path_velo1)
+        if not os.path.exists(path_img1):
+            os.makedirs(path_img1)
+        if not os.path.exists(path_calib1):
+            os.makedirs(path_calib1)
+
+        if not os.path.exists(path_calib1 + '/calib.txt'):
+            os.symlink(path_calib, path_calib1 + '/calib.txt')
+
+        with open(path_calib, 'r') as f:
+            for line in f.readlines():
+                data = np.array([float(x) for x in line.split()])
+        cam0_to_velo = np.reshape(data, (3, 4))
+        cam0_to_velo = np.vstack([cam0_to_velo, [0, 0, 0, 1]])
+        cam0_to_velo = torch.tensor(cam0_to_velo)
+        poses2 = []
+        ids = []
+        with open(path_pose, 'r') as f:
+            for x in f:
+                x = x.strip().split()
+                x = [float(v) for v in x]
+                ids.append(int(x[0]))
+                pose = torch.zeros((4, 4), dtype=torch.float64)
+                pose[0, 0:4] = torch.tensor(x[1:5])
+                pose[1, 0:4] = torch.tensor(x[5:9])
+                pose[2, 0:4] = torch.tensor(x[9:13])
+                pose[3, 3] = 1.0
+                pose = pose @ cam0_to_velo.inverse()
+                poses2.append(pose.float().numpy())
+        pose = np.stack(poses2)
+        np.save(path_pose1, pose)
+
+        cnt = 0
+        for i in tqdm.tqdm(ids, desc='%02d:' % src):
+            path_velo_now = os.path.join(path_velo, '0000%06d.bin' % i)
+            path_img_now = os.path.join(path_img, '0000%06d.png' % i)
+            if os.path.exists(path_velo_now) and os.path.exists(path_img_now):
+                pass
+            else:
+                break
+            path_velo_now1 = os.path.join(path_velo1, '%06d.bin' % cnt)
+            path_img_now1 = os.path.join(path_img1, '%06d.png' % cnt)
+            if not os.path.exists(path_velo_now1):
+                os.symlink(path_velo_now, path_velo_now1)
+            if not os.path.exists(path_img_now1):
+                os.symlink(path_img_now, path_img_now1)
+            cnt = cnt + 1
+
+
+def todataset(kitti_root, dataset_root):
+    sequences = [0, 5, 6, 7, 8, 9, 50, 54, 55, 56, 59]
+    for s in sequences:
+        if s >= 50:
+            suffix = '.npy'
+        else:
+            suffix = '.txt'
+        kitti_velo_dir = kitti_root + '/data_odometry_velodyne/dataset/sequences/%02d/velodyne' % s
+        kitti_img_dir = kitti_root + '/data_odometry_color/dataset/sequences/%02d/image_2' % s
+        kitti_pose_dir = kitti_root + '/data_odometry_poses/dataset/poses/%02d' % s + suffix
+        kitti_calib_dir = kitti_root + '/data_odometry_calib/dataset/sequences/%02d/calib.txt' % s
+        dataset_path = dataset_root + '/%02d' % s
+        if not os.path.exists(dataset_path):
+            os.makedirs(dataset_path)
+        dataset_velo_dir = dataset_path + '/velodyne'
+        dataset_img_dir = dataset_path + '/image_2'
+        dataset_pose_dir = dataset_path + '/poses' + suffix
+        dataset_calib_dir = dataset_path + '/calib.txt'
+        if not os.path.exists(dataset_velo_dir):
+            os.symlink(kitti_velo_dir, dataset_velo_dir)
+        if not os.path.exists(dataset_img_dir):
+            os.symlink(kitti_img_dir, dataset_img_dir)
+        if not os.path.exists(dataset_pose_dir):
+            os.symlink(kitti_pose_dir, dataset_pose_dir)
+        if not os.path.exists(dataset_calib_dir):
+            os.symlink(kitti_calib_dir, dataset_calib_dir)
+
+
+if __name__ == '__main__':
+    k360 = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI360'
+    k = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI'
+    k3602k(k, k360)
+    kitti_root = '/home/coop8/chenyouyuan/FUSIONLCD/KITTI'
+    dataset_root = '/home/coop8/chenyouyuan/FUSIONLCD/sequences'
+    todataset(kitti_root, dataset_root)
--- a/tools.py
+++ b/tools.py
@@ -0,0 +1,149 @@
+import threading
+import torch
+import os
+import time
+
+
+def farthest_point_sample(xyz, npoint):
+    """Iterative farthest point sampling
+
+    Args:
+        xyz: pointcloud data_loader, [B, N, C]
+        npoint: number of samples
+    Returns:
+        centroids: sampled pointcloud index, [B, npoint]
+    """
+    device = xyz.device
+    B, N, C = xyz.shape
+    centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
+    distance = torch.ones(B, N).to(device) * 1e10
+    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
+    batch_indices = torch.arange(B, dtype=torch.long).to(device)
+    for i in range(npoint):
+        centroids[:, i] = farthest
+        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
+        dist = torch.sum((xyz - centroid) ** 2, -1)
+        mask = dist < distance
+        distance[mask] = dist[mask]
+        farthest = torch.max(distance, -1)[1]
+    return centroids
+
+
+def batch_distance(feature1,feature2,mode='cosine'):
+    if mode == 'cosine':
+        # Transport cost matrix
+        feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8)
+        feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8)
+        dis = 1.0 - torch.bmm(feature1, feature2.transpose(1, 2))
+    elif mode == 'euclidean':
+        feature=torch.cat([feature1,feature2],dim=1)
+        feature_mean=torch.mean(feature,dim=1,keepdim=True)
+        feature1=feature1-feature_mean
+        feature2=feature2-feature_mean
+        distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True)
+        distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2)
+        distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2))  # c^2=a^2+b^2-2abcos
+        distance_matrix = distance_matrix ** 0.5
+        dis = distance_matrix
+    return dis
+
+def nn_match(fea1, fea2, matrix='cosine'):
+    assert len(fea1.shape) == 2 and len(fea2.shape) == 2, 'nnmatch error'
+    if not isinstance(fea1, torch.Tensor):
+        fea1 = torch.tensor(fea1)
+    if not isinstance(fea2, torch.Tensor):
+        fea2 = torch.tensor(fea2)
+    if matrix == 'cosine':
+        # Transport cost matrix
+        fea1 = fea1 / torch.sqrt(torch.sum(fea1 ** 2, -1, keepdim=True) + 1e-8)
+        fea2 = fea2 / torch.sqrt(torch.sum(fea2 ** 2, -1, keepdim=True) + 1e-8)
+        dis = 1.0 - torch.mm(fea1, fea2.transpose(0, 1))
+    elif matrix == 'euclidean':
+        distance_matrix = torch.sum(fea1 ** 2, -1, keepdim=True)
+        distance_matrix = distance_matrix + torch.sum(fea2 ** 2, -1, keepdim=True).transpose(0, 1)
+        distance_matrix = distance_matrix - 2 * torch.mm(fea1, fea2.transpose(0, 1))  # c^2=a^2+b^2-2abcos
+        dis = distance_matrix ** 0.5
+    else:
+        dis = 0
+        print('Invalid matrix')
+    idx0_min = torch.argmin(dis, dim=0)
+    idx1_min = torch.argmin(dis, dim=1)
+    ids1 = torch.arange(0, dis.shape[1]).to(fea1.device)
+    idx = idx1_min[idx0_min]
+    idx_match = ids1 == idx
+    idx1 = ids1[idx_match]
+    idx2 = idx0_min[idx_match]
+    dis_min = dis[idx2, idx1]
+
+    return idx2, idx1, dis_min
+
+
+def path_join(*args):
+    names = list(args)
+    path = names[0]
+    for i in range(len(names) - 1):
+        path = os.path.join(path, names[i + 1])
+    path = list(path)
+    while "\\" in path:
+        idx = path.index("\\")
+        path[idx] = "/"
+    path = ''.join(path)
+    return path
+
+
+def make_save_path(*args):
+    path = path_join(*args)
+    if not os.path.exists(path):
+        os.makedirs(path)
+    return path
+
+
+
+def read_cfg(data):
+    if type(data) is int:
+        result = [data]
+    else:
+        result = data.split(',')
+    return result
+
+
+class Timer:
+    """A module to record the program running time"""
+
+    def __init__(self, name="Now"):
+        self.strat = time.time()
+        self.cnt = 0
+        self.end = time.time()
+        self.avg = 0
+        self.all = 0
+        self.now = 0
+        self.name = name
+        time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
+        print('Init timer: ',time_now)
+
+    def update(self, name=None):
+        if name is not None:
+            self.name = name
+        self.cnt = self.cnt + 1
+        self.end = time.time()
+        self.avg = (self.end - self.strat) / self.cnt
+        self.now = self.end - self.all - self.strat
+        self.all = self.end - self.strat
+        time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
+        if self.avg<1:
+            print("%s | %s | using %d | each %.3f" %
+                (time_now, self.name, self.all, self.now))
+        elif self.avg<10:
+            print("%s | %s | using %d | each %.2f" %
+                (time_now, self.name, self.all, self.now))
+        elif self.avg<100:
+            print("%s | %s | using %d | each %.1f" %
+                (time_now, self.name, self.all, self.now))
+        else:
+            print("%s | %s | using %d | each %d" %
+                (time_now, self.name, self.all, self.now))
+
+
+if __name__ == '__main__':
+    # draw_trace()
+    pass
--- a/train.py
+++ b/train.py
@@ -0,0 +1,432 @@
+import argparse
+import os
+import time
+
+import numpy as np
+import torch
+import torch.optim as optim
+import yaml
+
+import net
+import tools
+from dataset import KittiTotalLoader
+from evaluate_lcd import lcd
+from loss import TotalLoss
+
+test_step = 10 # 保存测试点的步长
+
+def save_checkpoint(model, optimizer, loss_total_fun, epoch, iter_train, path_result):
+    if (epoch + 1) % test_step == 0 and epoch+1>=test_step:
+
+        time_now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
+        checkpoint = {'time': time_now,
+                      'epoch': epoch,
+                      'model': model.state_dict(),
+                      'optimizer': optimizer.state_dict()}
+        savepath = tools.make_save_path(path_result, 'models')
+        torch.save(checkpoint, savepath + '/checkpoint_%03d.pth.tar' % epoch)
+        print(savepath + '/checkpoint_%03d.pth.tar is saved' % epoch)
+
+class log_result():
+    def __init__(self,path_result):
+        self.path=path_result
+        if not os.path.exists(path_result):
+            with open(path_result, 'w') as file:
+                file.write('Time           Sequence Epoch  AP    R100  F1    R@1   R@2   R@3   R@4   R@5')
+                file.write('   R@6   R@7   R@8   R@9   R@10  R@15  R@20  R@25\n')
+                for i in range(300):
+                    file.write('\n')
+    def write(self,seq,epoch,row,x):
+        with open(self.path, 'r') as file:
+            lines = file.readlines()
+        time_now = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
+        new_content='%s %08d %06d'%(time_now,seq,epoch)
+        for x1 in x:
+            new_content=new_content + ' %.3f'%x1
+        
+        lines[row] = new_content+'\n'
+        with open(self.path, 'w') as file:
+            file.writelines(lines)
+
+def train(model, optimizer, loss_total_fun, data, device):
+    model.train()
+    sequences = data['sequence']
+    id_query = data['id_query']
+    id_positive = data['id_positive']
+    batchsize = len(id_query)
+
+    bev_query = data['bev_query'].to(device)
+    bev_positive = data['bev_positive'].to(device)
+
+    pose_query = data['pose_query'].to(device)
+    pose_positive = data['pose_positive'].to(device)
+
+    pose_to_frame = data['pose_to_frame'].to(device)
+    label_score = data['label_score'].to(device)
+
+    img_query = data['img_query'].to(device)
+    img_positive = data['img_positive'].to(device)
+    try:
+        bev = torch.cat([bev_query, bev_positive], dim=0)
+        bev = bev.permute(0, 3, 1, 2)
+    except:
+        bev = 0
+    try:
+        img = torch.cat([img_query, img_positive], dim=0)
+        img = img.permute(0, 3, 1, 2)
+    except:
+        img = 0
+    try:
+        relation = data['relation'].to(device)
+    except:
+        relation = 0
+
+    batch_dict = {'bev': bev,
+                  'label_score': label_score,
+                  'img': img,
+                  'relation': relation,
+                  'id_query': id_query,
+                  'sequence': sequences,
+                  'id_positive': id_positive,
+                  'pose_to_frame': pose_to_frame,
+                  'pose_query': pose_query,
+                  'pose_positive': pose_positive,
+                  'batch_size': int(batchsize * 2)}
+
+    model(batch_dict)
+
+    loss_total_fun(batch_dict)
+    l_total = batch_dict['loss'][0]
+
+    optimizer.zero_grad()
+    l_total.backward()
+    optimizer.step()
+    for p in model.parameters():
+        if torch.isnan(p).any():
+            print('Model NAN, ', p.shape)
+            exit()
+    return batch_dict
+
+
+def validate(model, loss_total_fun, data, device):
+    model.eval()
+    with torch.no_grad():
+        sequences = data['sequence']
+        id_query = data['id_query']
+        id_positive = data['id_positive']
+        batchsize = len(id_query)
+
+        bev_query = data['bev_query'].to(device)
+        bev_positive = data['bev_positive'].to(device)
+
+        pose_query = data['pose_query'].to(device)
+        pose_positive = data['pose_positive'].to(device)
+
+        pose_to_frame = data['pose_to_frame'].to(device)
+        label_score = data['label_score'].to(device)
+
+        img_query = data['img_query'].to(device)
+        img_positive = data['img_positive'].to(device)
+        try:
+            bev = torch.cat([bev_query, bev_positive], dim=0)
+            bev = bev.permute(0, 3, 1, 2)
+        except:
+            bev = 0
+        try:
+            img = torch.cat([img_query, img_positive], dim=0)
+            img = img.permute(0, 3, 1, 2)
+        except:
+            img = 0
+        try:
+            relation = data['relation'].to(device)
+        except:
+            relation = 0
+
+        batch_dict = {'bev': bev,
+                      'label_score': label_score,
+                      'img': img,
+                      'relation': relation,
+                      'id_query': id_query,
+                      'sequence': sequences,
+                      'id_positive': id_positive,
+                      'pose_to_frame': pose_to_frame,
+                      'pose_query': pose_query,
+                      'pose_positive': pose_positive,
+                      'batch_size': int(batchsize * 2)}
+        model(batch_dict)
+        loss_total_fun(batch_dict)
+
+    return batch_dict
+
+
+def test(model, data, device):
+    model.eval()
+    with torch.no_grad():
+        sequences = data['sequence']
+        id_query = data['id_query']
+        batchsize = len(id_query)
+        bev_query = data['bev_query'].to(device)
+        pose_query = data['pose_query'].to(device)
+
+        img_query = data['img_query'].to(device)
+        try:
+            bev = bev_query
+            bev = bev.permute(0, 3, 1, 2)
+        except:
+            bev = 0
+        try:
+            img = img_query
+            img = img.permute(0, 3, 1, 2)
+        except:
+            img = 0
+        try:
+            relation = data['relation'].to(device)
+        except:
+            relation = 0
+
+        batch_dict = {'bev': bev,
+                      'img': img,
+                      'relation': relation,
+                      'id_query': id_query,
+                      'sequence': sequences,
+                      'pose_query': pose_query,
+                      'batch_size': int(batchsize * 2)}
+
+        model(batch_dict)
+
+    return batch_dict
+
+
+def main(args):
+    try:
+        with open(os.path.join(os.getcwd(), "config.yaml"), "r") as ymlfile:
+            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+        print('Loading config file from %s' % os.path.join(os.getcwd(), "config.yaml"))
+    except:
+        with open(os.path.join(os.getcwd(), "project/FUSIONLCD/config.yaml"), "r") as ymlfile:
+            cfg = yaml.load(ymlfile, Loader=yaml.SafeLoader)
+        print('Loading config file from %s' % os.path.join(os.getcwd(), "project/BevNvLcd/config.yaml"))
+    cfg = cfg['experiment']
+    for k, v in cfg.items():
+        print(k, ':', v)
+    path_result = os.path.join(cfg['path_result'],args.result_name)
+    lres=log_result(os.path.join(os.getcwd(),'result',args.result_name+'.txt'))
+    device = torch.device("cuda" if torch.cuda.is_available() and cfg['cuda'] else "cpu")
+    start_epoch = 0
+    iter_train = 0
+    epochs = cfg['epochs']
+    model = net.Fusion(cfg)
+    print(model)
+    model = model.to(device)
+    loss_total_fun = TotalLoss(cfg).to(device)
+    print("Model params: %.6fM" % (sum(p.numel() for p in model.parameters()) / 1e6))
+    optimizer = optim.Adam(model.parameters(), lr=cfg['learning_rate'], betas=(cfg['beta1'], cfg['beta2']), eps=cfg['eps'], weight_decay=cfg['weight_decay'])
+    # optimizer = optim.Adam([{'params': model.bev.parameters(), 'lr': 0.0002},
+    #                         {'params': model.img.parameters(), 'lr': 0.0001},
+    #                         {'params': model.vlad_fusion_layer.parameters(), 'lr': 0.0001}],
+    #                        betas=(cfg['beta1'], cfg['beta2']), eps=cfg['eps'], weight_decay=cfg['weight_decay'])
+    # print(optimizer)
+    loader_train, loader_val, loader_test = KittiTotalLoader(cfg)
+    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1, 3, 5, 10, 50, 100], gamma=0.5, last_epoch=start_epoch - 1)
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, 0.99)
+    # scheduler = warmup(optimizer, 5, 1e-6, cfg['learning_rate'])
+    # writer = SummaryWriter(tools.make_save_path(path_result, 'tensorboard_log'))
+    t = tools.Timer()
+    test_best = np.zeros([len(loader_test.dataset.datasets), 3])
+    if cfg['load_model']:
+        checkpoint = torch.load((cfg['last_model']))
+        start_epoch = checkpoint['epoch'] + 1 * cfg['train_flag']
+        state_dict_saved = checkpoint['model']
+        model.load_state_dict(state_dict_saved)
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        print('loaded %s' % cfg['last_model'])
+    if not cfg['train_flag']:
+        print_frequency = 1e9
+    else:
+        print_frequency = 1
+    for epoch in range(start_epoch, epochs):
+        torch.cuda.empty_cache()
+        '''
+        ============================== train ===============================
+        '''
+        if cfg['train_flag']:
+            if epoch - start_epoch == 0:
+                pf = print_frequency
+                print_frequency = min(len(loader_train), print_frequency * 10)
+            else:
+                print_frequency = pf
+            l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            step_print = max(1, int(len(loader_train) / print_frequency))
+            step_now = 0
+            optimizer.zero_grad()
+            for id_sample, data in enumerate(loader_train):
+                batch_dict = train(model, optimizer, loss_total_fun, data, device)
+
+                # if (id_sample+1)%4==0 or (id_sample+1)==len(loader_train):
+                #     optimizer.step()
+                #     optimizer.zero_grad()
+                if step_now < step_print:
+                    step_now = step_now + 1
+                    l0 = l0 + batch_dict['loss'][0]
+                    l1 = l1 + batch_dict['loss'][1]
+                    l2 = l2 + batch_dict['loss'][2]
+                    l3 = l3 + batch_dict['loss'][3]
+                    l4 = l4 + batch_dict['loss'][4]
+                    l5 = l5 + batch_dict['loss'][5]
+                    l6 = l6 + batch_dict['loss'][6]
+                    l7 = l7 + batch_dict['loss'][7]
+                    l8 = l8 + batch_dict['loss'][8]
+                    l9 = l9 + batch_dict['loss'][9]
+                    l10 = l10 + batch_dict['loss'][10]
+                    l11 = l11 + batch_dict['loss'][11]
+                if step_now == step_print:
+                    step_now = 0
+                    info = 'loss a%.3f p%.3f s%.3f m%.3f t%.3f tr%.3f_%.1f genb%.3f geni%.3f genpa%.3f genpo%.3f genkpl%.3f' % (
+                        l0 / step_print, l1 / step_print, l2 / step_print, l3 / step_print,
+                        l4 / step_print, l5 / step_print, l6 / step_print, l7 / step_print,
+                        l8 / step_print, l9 / step_print, l10 / step_print, l11 / step_print)
+                    t.update("Epoch %03d | train %04d/%04d | %s" %
+                             (epoch, id_sample, len(loader_train) - 1, info))
+                    l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            save_checkpoint(model, optimizer, loss_total_fun, epoch, iter_train, path_result)
+            scheduler.step()
+        '''
+        ============================= validate =============================
+        '''
+        if cfg['validate_flag'] and (epoch + 1) % test_step == 0:
+            l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+            step_print = max(1, int(len(loader_val) / print_frequency))
+            step_now = 0
+            for id_sample, data in enumerate(loader_val):
+                batch_dict = validate(model, loss_total_fun, data, device)
+                if step_now < step_print:
+                    step_now = step_now + 1
+                    l0 = l0 + batch_dict['loss'][0]
+                    l1 = l1 + batch_dict['loss'][1]
+                    l2 = l2 + batch_dict['loss'][2]
+                    l3 = l3 + batch_dict['loss'][3]
+                    l4 = l4 + batch_dict['loss'][4]
+                    l5 = l5 + batch_dict['loss'][5]
+                    l6 = l6 + batch_dict['loss'][6]
+                    l7 = l7 + batch_dict['loss'][7]
+                    l8 = l8 + batch_dict['loss'][8]
+                    l9 = l9 + batch_dict['loss'][9]
+                    l10 = l10 + batch_dict['loss'][10]
+                    l11 = l11 + batch_dict['loss'][11]
+                if step_now == step_print:
+                    step_now = 0
+                    info = 'loss a%.3f p%.3f s%.3f m%.3f t%.3f tr%.3f_%.1f genb%.3f geni%.3f genpa%.3f genpo%.3f genkpl%.3f' % (
+                        l0 / step_print, l1 / step_print, l2 / step_print, l3 / step_print,
+                        l4 / step_print, l5 / step_print, l6 / step_print, l7 / step_print,
+                        l8 / step_print, l9 / step_print, l10 / step_print, l11 / step_print)
+                    t.update("Epoch %03d | validate %04d/%04d | %s" %
+                             (epoch, id_sample, len(loader_val) - 1, info))
+                    l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,l11 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        '''
+        ============================== test ================================
+        '''
+        if cfg['test_flag'] and (epoch + 1) % test_step == 0:
+            step_print = max(1, int(len(loader_test) / print_frequency))
+            step_now = 0
+            vlads = []
+            kpts = []
+            feas_original = []
+            feas_fusion = []
+            sequences = []
+            poses = []
+            for id_sample, data in enumerate(loader_test):
+                batch_dict = test(model, data, device)
+                # save_figure(batch_dict, epoch, path_result, cfg)
+                sequences.append((batch_dict['sequence']).detach().cpu())
+                vlads.append(batch_dict['vlads'].detach().cpu())
+                poses.append(batch_dict['pose_query'].detach().cpu())
+                kpts.append(batch_dict['key_points'].detach().cpu())
+                if 'fea_kpt_fusion' in batch_dict.keys():
+                    feas_fusion.append(batch_dict['fea_kpt_fusion'].detach().cpu().permute(0, 2, 1))
+                feas_original.append(batch_dict['fea_kpt_original'].detach().cpu().permute(0, 2, 1))
+
+                if step_now < step_print:
+                    step_now = step_now + 1
+                if step_now == step_print:
+                    step_now = 0
+                    t.update("Epoch %03d | test %05d/%05d" % (epoch, id_sample, len(loader_test)))
+            vlads = torch.cat(vlads)
+            kpts = torch.cat(kpts)
+            feas_original = torch.cat(feas_original)
+            if 'fea_kpt_fusion' in batch_dict.keys():
+                feas_fusion = torch.cat(feas_fusion)
+            else:
+                feas_fusion=feas_original
+
+            poses = torch.cat(poses)
+            sequences = torch.cat(sequences)
+
+            database = {'vlads': vlads,
+                        'key_points': kpts,
+                        'fea_kpt_original': feas_original,
+                        'fea_kpt_fusion': feas_fusion,
+                        'fea_kpt': feas_fusion,
+                        'sequences': sequences,
+                        'pose_query': poses}
+            savepath = tools.make_save_path(path_result, 'database')
+
+            torch.save(database, savepath + '/database_bevp.pth.tar')
+            # print('save ' + savepath + '/database_%03d.pth.tar' % epoch)
+            # exit()
+            # database = torch.load('/data4/caodanyang/results/FUSIONLCD/07250/database/database_159.pth.tar')
+            print()
+            print('***************************************************************************************************************************************')
+            print('Epoch %03d' % epoch)
+            # feature_match(loader_val,database)
+            result,recall_at_k = lcd(database)
+            seq = torch.unique(sequences)
+            for i in range((test_best.shape[0])):
+                recall_at_k1=recall_at_k[i]
+                for j in range((test_best.shape[1])):
+                    test_best[i, j] = max([test_best[i, j], result[i][j]])
+                print('Best, sequence %02d, AP=%.3f, R100=%.3f, F1=%.3f' % (seq[i],  test_best[i, 0], test_best[i, 1],test_best[i, 2]))
+                lres.write(seq[i],epoch,(epoch+1)//test_step+i*(epochs)//test_step,
+                           [result[i][0],result[i][1],result[i][2],recall_at_k1[0],recall_at_k1[1],recall_at_k1[2],recall_at_k1[3],recall_at_k1[4],
+                            recall_at_k1[5],recall_at_k1[6],recall_at_k1[7],recall_at_k1[8],recall_at_k1[9],recall_at_k1[14],recall_at_k1[19],recall_at_k1[24]
+                            ])
+            #     print('Sequence %02d, AP=%.3f[%.3f], R100=%.3f[%.3f], F1=%.3f[%.3f], Recall@1[%.3f] 2[%.3f] 5[%.3f] 10[%.3f] 15[%.3f] 25[%.3f]' %
+            #           (sequences[i], result[i][0], test_best[i, 0], result[i][1], test_best[i, 1], result[i][2], test_best[i, 2],
+            #            recall_at_k1[0],recall_at_k1[1],recall_at_k1[4],recall_at_k1[9],recall_at_k1[14],recall_at_k1[24]))
+            print('***************************************************************************************************************************************')
+            print()
+            if cfg['train_flag']:
+                pass
+            else:
+                exit()
+            # exit()
+
+if __name__ == '__main__':
+    # CUDA_VISIBLE_DEVICES=2 nohup python -u train.py --result_name=08280 --info=cosim >log/08280.log 2>&1 &
+    # fuser /dev/nvidia*
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--result_name', type=str, default='log', help='log name of result')
+    parser.add_argument('--pro_name', type=str, default='python', help='name of process')
+    parser.add_argument('--info', type=str, default='python', help='name of process')
+    parser.add_argument('--gpu', type=str, default=None, help="GPU id(s), e.g. '0' or '0,1'. Use 'cpu' to force CPU.")
+    args = parser.parse_args()
+
+    # set visible GPUs before any CUDA call / seed
+    if args.gpu:
+        if args.gpu.lower() == 'cpu':
+            # force CPU by hiding GPUs
+            os.environ['CUDA_VISIBLE_DEVICES'] = ''
+        else:
+            os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
+
+    np.random.seed(123)
+    torch.manual_seed(123)
+    # only call cuda seed if CUDA visible
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(123)
+
+    print(args.info)
+    try:
+        print("Using GPU device:", os.environ.get("CUDA_VISIBLE_DEVICES", ""))
+    except:
+        pass
+    main(args)
--- a/uot.py
+++ b/uot.py
@@ -0,0 +1,150 @@
+import math
+import torch
+import torch._utils
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models import resnet
+from typing import Optional, Callable
+
+
+def compute_rigid_transform(points1, points2, weights):
+    """Compute rigid transforms between two point clouds via weighted SVD.
+       Adapted from https://github.com/yewzijian/RPMNet/
+    Args:
+        points1 (torch.Tensor): (B, M, 3) coordinates of the first point cloud
+        points2 (torch.Tensor): (B, N, 3) coordinates of the second point cloud
+        weights (torch.Tensor): (B, M)
+    Returns:
+        Transform T (B, 3, 4) to get from points1 to points2, i.e. T*points1 = points2
+    """
+
+    weights_normalized = weights[..., None] / (torch.sum(weights[..., None], dim=1, keepdim=True) + 1e-5)
+    centroid_a = torch.sum(points1 * weights_normalized, dim=1)
+    centroid_b = torch.sum(points2 * weights_normalized, dim=1)
+    a_centered = points1 - centroid_a[:, None, :]
+    b_centered = points2 - centroid_b[:, None, :]
+    cov = a_centered.transpose(-2, -1) @ (b_centered * weights_normalized)
+
+    # Compute rotation using Kabsch algorithm. Will compute two copies with +/-V[:,:3]
+    # and choose based on determinant to avoid flips
+    u, s, v = torch.svd(cov, some=False, compute_uv=True)
+    rot_mat_pos = v @ u.transpose(-1, -2)
+    v_neg = v.clone()
+    v_neg[:, :, 2] *= -1
+    rot_mat_neg = v_neg @ u.transpose(-1, -2)
+    rot_mat = torch.where(torch.det(rot_mat_pos)[:, None, None] > 0, rot_mat_pos, rot_mat_neg)
+    assert torch.all(torch.det(rot_mat) > 0)
+
+    # Compute translation (uncenter centroid)
+    translation = -rot_mat @ centroid_a[:, :, None] + centroid_b[:, :, None]
+
+    transform = torch.cat((rot_mat, translation), dim=2)
+    return transform
+
+
+def sinkhorn_unbalanced(feature1, feature2, epsilon, gamma, max_iter, matrix='cosine'):
+    """
+    Sinkhorn algorithm for Unbalanced Optimal Transport.
+    Modified from https://github.com/valeoai/FLOT/
+    Args:
+        feature1 (torch.Tensor):
+            (B, N, C) Point-wise features for points cloud 1.
+        feature2 (torch.Tensor):
+            (B, M, C) Point-wise features for points cloud 2.
+        epsilon (torch.Tensor):
+            Entropic regularization.
+        gamma (torch.Tensor):
+            Mass regularization.
+        max_iter (int):
+            Number of iteration of the Sinkhorn algorithm.
+    Returns:
+        T (torch.Tensor):
+            (B, N, M) Transport plan between point cloud 1 and 2.
+    """
+    if matrix == 'cosine':
+        # Transport cost matrix
+        feature1 = feature1 / torch.sqrt(torch.sum(feature1 ** 2, -1, keepdim=True) + 1e-8)
+        feature2 = feature2 / torch.sqrt(torch.sum(feature2 ** 2, -1, keepdim=True) + 1e-8)
+        C = 1.0 - torch.bmm(feature1, feature2.transpose(1, 2))
+    elif matrix == 'euclidean':
+        distance_matrix = torch.sum(feature1 ** 2, -1, keepdim=True)
+        distance_matrix = distance_matrix + torch.sum(feature2 ** 2, -1, keepdim=True).transpose(1, 2)
+        distance_matrix = distance_matrix - 2 * torch.bmm(feature1, feature2.transpose(1, 2))  # c^2=a^2+b^2-2abcos
+        distance_matrix = distance_matrix ** 0.5
+        # d_max, _ = torch.max(distance_matrix, dim=2, keepdim=True)
+        C = distance_matrix
+
+    # Entropic regularisation
+    K = torch.exp(-C / epsilon)  # * support
+
+    # Early return if no iteration
+    if max_iter == 0:
+        return K
+
+    # Init. of Sinkhorn algorithm
+    power = gamma / (gamma + epsilon + 1e-8)
+    a = (torch.ones((K.shape[0], K.shape[1], 1), device=feature1.device, dtype=feature1.dtype) / K.shape[1])
+    prob1 = (torch.ones((K.shape[0], K.shape[1], 1), device=feature1.device, dtype=feature1.dtype) / K.shape[1])
+    prob2 = (torch.ones((K.shape[0], K.shape[2], 1), device=feature2.device, dtype=feature2.dtype) / K.shape[2])
+
+    # Sinkhorn algorithm
+    for _ in range(max_iter):
+        # Update b
+        KTa = torch.bmm(K.transpose(1, 2), a)
+        b = torch.pow(prob2 / (KTa + 1e-8), power)
+        # Update a
+        Kb = torch.bmm(K, b)
+        a = torch.pow(prob1 / (Kb + 1e-8), power)
+
+    # Transportation map
+    T = torch.mul(torch.mul(a, K), b.transpose(1, 2))
+    return T
+
+
+class UOTHead(nn.Module):
+
+    def __init__(self, nb_iter=5,name='original'):
+        super().__init__()
+        self.epsilon = torch.nn.Parameter(torch.zeros(1))  # Entropic regularisation
+        self.gamma = torch.nn.Parameter(torch.zeros(1))  # Mass regularisation
+        self.nb_iter = nb_iter
+        self.name=name
+
+    def forward(self, batch_dict, src_coords=None, mode='pairs'):
+
+        feats = batch_dict['fea_kpt_'+self.name].squeeze(-1)
+
+        B, C, NUM = feats.shape
+
+        assert B % 2 == 0, "Batch size must be multiple of 2: B anchor + B positive samples"
+        B = B // 2
+        feat1 = feats[:B]
+        feat2 = feats[B:]
+
+        coords = batch_dict['key_points']
+        coords1 = coords[:B, :, 0:3]
+        coords2 = coords[B:, :, 0:3]
+
+        correspondences_feature = sinkhorn_unbalanced(
+            feat1.permute(0, 2, 1),
+            feat2.permute(0, 2, 1),
+            epsilon=torch.exp(self.epsilon) + 0.03,
+            gamma=torch.exp(self.gamma),
+            max_iter=self.nb_iter,
+            matrix='cosine',
+        )
+
+        feature_corr_sum = correspondences_feature.sum(-1, keepdim=True)
+        project_kpts = (correspondences_feature @ coords2) / (feature_corr_sum + 1e-8)
+        project_feas = (correspondences_feature @ feat2.permute(0, 2, 1)) / (feature_corr_sum + 1e-8)
+
+        batch_dict['project_kpts_'+self.name] = project_kpts
+        batch_dict['project_feas_'+self.name] = project_feas.permute(0, 2, 1)
+        # batch_dict['project_coord_kpts'] = project_coord_kpts
+        batch_dict['correspondences_feature_'+self.name] = correspondences_feature
+        # batch_dict['correspondences_coord'] = correspondences_coord
+
+        transformation = compute_rigid_transform(coords1, project_kpts, feature_corr_sum.squeeze(-1))
+        batch_dict['transformation_'+self.name] = transformation
+
+        return batch_dict