From 78298e56f172dac6a44ee0660d1a33887085f2c3 Mon Sep 17 00:00:00 2001
From: cyy_mac <cyy@mac.com>
Date: Sat, 9 May 2026 17:03:40 +0800
Subject: [PATCH] =?UTF-8?q?=E7=BD=91=E7=BB=9C=E6=B5=8B=E8=AF=95=E5=92=8C?=
 =?UTF-8?q?=E5=AD=A6=E4=B9=A0demo?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 network_learning/01_alnet_demo.py            | 260 ++++++++++
 network_learning/02_ricnn_demo.py            | 425 +++++++++++++++
 network_learning/03_converter_demo.py        | 230 +++++++++
 network_learning/04_generator_fusion_demo.py | 304 +++++++++++
 network_learning/05_netvlad_demo.py          | 308 +++++++++++
 network_learning/06_uot_demo.py              | 356 +++++++++++++
 network_learning/08_full_pipeline_demo.py    | 516 +++++++++++++++++++
 network_learning/LEARNING_GUIDE.md           | 419 +++++++++++++++
 network_learning/README.md                   |  50 ++
 9 files changed, 2868 insertions(+)
 create mode 100644 network_learning/01_alnet_demo.py
 create mode 100644 network_learning/02_ricnn_demo.py
 create mode 100644 network_learning/03_converter_demo.py
 create mode 100644 network_learning/04_generator_fusion_demo.py
 create mode 100644 network_learning/05_netvlad_demo.py
 create mode 100644 network_learning/06_uot_demo.py
 create mode 100644 network_learning/08_full_pipeline_demo.py
 create mode 100644 network_learning/LEARNING_GUIDE.md
 create mode 100644 network_learning/README.md

diff --git a/network_learning/01_alnet_demo.py b/network_learning/01_alnet_demo.py
new file mode 100644
index 0000000..5a283d3
--- /dev/null
+++ b/network_learning/01_alnet_demo.py
@@ -0,0 +1,260 @@
+"""
+ALNet 网络结构可视化 Demo
+===========================
+ALNet 是图像分支的特征提取网络，基于 ALIKE 架构。
+输入：图像 (B, 3, 192, 576)
+输出：score_map (B, 1, 192, 576) + descriptor_map (B, 128, 192, 576)
+
+网络由以下部分组成：
+  block1: ConvBlock(3→16)          - 保持分辨率
+  pool2:  MaxPool2d(2)             - 下采样 2x
+  block2: ResBlock(16→32)          - 残差块
+  pool4:  MaxPool2d(4)             - 下采样 4x
+  block3: ResBlock(32→64)          - 残差块
+  pool4:  MaxPool2d(4)             - 下采样 4x
+  block4: ResBlock(64→128)         - 残差块
+  特征聚合: 4层concat + 上采样      - 多尺度融合
+  输出头: Conv1x1(128→129)         - score + descriptor
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')  # 非交互后端，适合服务器
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ALIKE.alnet import ALNet, ConvBlock, ResBlock
+
+# ============================================================
+# 配置
+# ============================================================
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# 使用 alike-n 配置（论文中使用）
+CFG = {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True}
+
+
+def visualize_tensor(tensor, title, save_name, cmap='viridis', n_channels=8):
+    """可视化特征图的多个通道"""
+    if tensor.dim() == 4:
+        tensor = tensor[0]  # 取第一个batch
+    C, H, W = tensor.shape
+    n_show = min(n_channels, C)
+
+    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
+    fig.suptitle(title, fontsize=14, fontweight='bold')
+
+    for i in range(n_show):
+        ax = axes[i // 4, i % 4]
+        im = ax.imshow(tensor[i].detach().cpu().numpy(), cmap=cmap)
+        ax.set_title(f'Channel {i}')
+        ax.axis('off')
+        plt.colorbar(im, ax=ax, fraction=0.046)
+
+    for i in range(n_show, 8):
+        axes[i // 4, i % 4].axis('off')
+
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, save_name)
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_score_map(score_map, title, save_name):
+    """可视化得分图"""
+    if score_map.dim() == 4:
+        score_map = score_map[0, 0]
+    elif score_map.dim() == 3:
+        score_map = score_map[0]
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    fig.suptitle(title, fontsize=14, fontweight='bold')
+
+    im0 = axes[0].imshow(score_map.detach().cpu().numpy(), cmap='hot')
+    axes[0].set_title('Score Map (热力图)')
+    axes[0].axis('off')
+    plt.colorbar(im0, ax=axes[0])
+
+    # 直方图
+    axes[1].hist(score_map.detach().cpu().numpy().flatten(), bins=50, color='steelblue', edgecolor='white')
+    axes[1].set_title('Score 分布直方图')
+    axes[1].set_xlabel('Score Value')
+    axes[1].set_ylabel('Frequency')
+
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, save_name)
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_intermediate_features(model, input_tensor):
+    """逐层提取并可视化中间特征图"""
+    print('\n' + '=' * 60)
+    print('ALNet 中间特征逐层可视化')
+    print('=' * 60)
+
+    x = input_tensor
+    print(f'输入: {x.shape}')
+
+    # Block 1: ConvBlock
+    x1 = model.block1(x)
+    print(f'block1 (ConvBlock 3→16): {x1.shape}')
+    visualize_tensor(x1, 'Block1: ConvBlock 输出 (16通道)', 'alnet_block1.png')
+
+    # Pool2 + Block 2
+    x2 = model.pool2(x1)
+    x2 = model.block2(x2)
+    print(f'pool2 + block2 (ResBlock 16→32): {x2.shape}')
+    visualize_tensor(x2, 'Block2: ResBlock 输出 (32通道) [1/2分辨率]', 'alnet_block2.png')
+
+    # Pool4 + Block 3
+    x3 = model.pool4(x2)
+    x3 = model.block3(x3)
+    print(f'pool4 + block3 (ResBlock 32→64): {x3.shape}')
+    visualize_tensor(x3, 'Block3: ResBlock 输出 (64通道) [1/8分辨率]', 'alnet_block3.png')
+
+    # Pool4 + Block 4
+    x4 = model.pool4(x3)
+    x4 = model.block4(x4)
+    print(f'pool4 + block4 (ResBlock 64→128): {x4.shape}')
+    visualize_tensor(x4, 'Block4: ResBlock 输出 (128通道) [1/32分辨率]', 'alnet_block4.png')
+
+    # 特征聚合
+    f1 = model.gate(model.conv1(x1))  # dim//4 通道
+    f2 = model.gate(model.conv2(x2))
+    f3 = model.gate(model.conv3(x3))
+    f4 = model.gate(model.conv4(x4))
+
+    f2_up = model.upsample2(f2)
+    f3_up = model.upsample8(f3)
+    f4_up = model.upsample32(f4)
+
+    print(f'特征聚合: f1={f1.shape}, f2_up={f2_up.shape}, f3_up={f3_up.shape}, f4_up={f4_up.shape}')
+
+    fused = torch.cat([f1, f2_up, f3_up, f4_up], dim=1)
+    print(f'多尺度拼接后: {fused.shape}')
+    visualize_tensor(fused, '多尺度特征拼接 (128通道)', 'alnet_fused_features.png', n_channels=8)
+
+    # 输出头
+    output = model.convhead2(fused)
+    score_map = torch.sigmoid(output[:, -1:, :, :])
+    descriptor_map = output[:, :-1, :, :]
+
+    print(f'Score Map: {score_map.shape}')
+    print(f'Descriptor Map: {descriptor_map.shape}')
+
+    visualize_score_map(score_map, 'ALNet 最终输出 Score Map', 'alnet_final_score.png')
+    visualize_tensor(descriptor_map, 'ALNet 最终输出 Descriptor Map (128通道)', 'alnet_final_descriptor.png')
+
+
+def visualize_receptive_field():
+    """可视化有效感受野（通过梯度反传）"""
+    print('\n--- 感受野分析 ---')
+    model = ALNet(**CFG)
+    model.eval()
+
+    input_tensor = torch.randn(1, 3, 192, 576, requires_grad=True)
+    score_map, _ = model(input_tensor)
+
+    # 对score_map中心点的梯度反传
+    h, w = score_map.shape[2], score_map.shape[3]
+    score_map[0, 0, h // 2, w // 2].backward()
+
+    grad = input_tensor.grad.abs().sum(dim=1)[0]
+    fig, ax = plt.subplots(figsize=(12, 4))
+    im = ax.imshow(grad.detach().cpu().numpy(), cmap='hot')
+    ax.set_title('ALNet 有效感受野 (梯度幅度)', fontsize=14)
+    ax.axis('off')
+    plt.colorbar(im, ax=ax)
+    path = os.path.join(OUTPUT_DIR, 'alnet_receptive_field.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def analyze_parameters():
+    """分析网络参数量"""
+    print('\n--- 参数量分析 ---')
+    model = ALNet(**CFG)
+    total = sum(p.numel() for p in model.parameters())
+    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    print(f'总参数量: {total:,} ({total / 1e6:.2f}M)')
+    print(f'可训练参数: {trainable:,} ({trainable / 1e6:.2f}M)')
+
+    # 逐模块分析
+    for name, module in model.named_children():
+        params = sum(p.numel() for p in module.parameters())
+        print(f'  {name:20s}: {params:>10,} params ({params / 1e3:.1f}K)')
+
+
+def main():
+    print('=' * 60)
+    print('ALNet (图像特征提取网络) 结构与特征可视化')
+    print('=' * 60)
+
+    analyze_parameters()
+
+    # 构建模型
+    model = ALNet(**CFG)
+    model.eval()
+
+    # 模拟输入: 裁剪后的KITTI图像 (192, 576)
+    input_tensor = torch.randn(1, 3, 192, 576)
+
+    # 前向传播
+    with torch.no_grad():
+        score_map, descriptor_map = model(input_tensor)
+
+    print(f'\n输入尺寸: {input_tensor.shape}')
+    print(f'Score Map 输出: {score_map.shape} (范围: [{score_map.min():.3f}, {score_map.max():.3f}])')
+    print(f'Descriptor Map 输出: {descriptor_map.shape}')
+
+    # 逐层可视化中间特征
+    visualize_intermediate_features(model, input_tensor)
+
+    # 感受野分析
+    visualize_receptive_field()
+
+    # 网络结构文本总结
+    print('\n' + '=' * 60)
+    print('网络结构总结:')
+    print('=' * 60)
+    print("""
+    ALNet (alike-n config):
+    ┌──────────────────────────────────────────────────────┐
+    │ 输入: (B, 3, 192, 576)                               │
+    │   ↓                                                  │
+    │ block1: ConvBlock(3→16)  → (B, 16, 192, 576)        │
+    │   ↓ MaxPool2d(2)                                     │
+    │ block2: ResBlock(16→32)  → (B, 32, 96, 288)         │
+    │   ↓ MaxPool2d(4)                                     │
+    │ block3: ResBlock(32→64)  → (B, 64, 24, 72)          │
+    │   ↓ MaxPool2d(4)                                     │
+    │ block4: ResBlock(64→128) → (B, 128, 6, 18)           │
+    │   ↓                                                  │
+    │ 特征聚合: 4尺度1×1conv + 上采样 + concat → (B,128,192,576) │
+    │   ↓ Conv1x1(128→129)                                  │
+    │ 输出: score(B,1,192,576) + desc(B,128,192,576)       │
+    └──────────────────────────────────────────────────────┘
+
+    block1/2/3/4 各阶段的作用：
+    - block1: 浅层特征（边缘、角点等低级特征）
+    - block2: 中层特征（纹理、局部形状）
+    - block3: 高层特征（语义信息、物体部件）
+    - block4: 最抽象特征（全局上下文）
+    - 多尺度融合: 结合各层信息，兼顾定位精度和语义鲁棒性
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/02_ricnn_demo.py b/network_learning/02_ricnn_demo.py
new file mode 100644
index 0000000..c5612f5
--- /dev/null
+++ b/network_learning/02_ricnn_demo.py
@@ -0,0 +1,425 @@
+"""
+RICNN 旋转不变CNN网络结构可视化 Demo
+=======================================
+RICNN 是点云BEV分支的特征提取网络，核心创新是"旋转不变性"。
+与标准CNN不同，RICNN的卷积核根据像素到中心的欧氏距离分组，
+使得旋转后的特征保持一致。
+
+输入：BEV图像 (B, 3, 320, 320)
+输出：score_map (B, 1, 320, 320) + descriptor_map (B, 128, 320, 320)
+
+关键组件:
+  RIConv2d:   旋转不变卷积（按距离分组共享权重）
+  RIMaxpool2d: 旋转不变最大池化（只对圆形邻域取max）
+  RIAvgpool2d: 旋转不变平均池化（只对圆形邻域取avg）
+  RIResBlock: 旋转不变残差块
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from BEVNet import RICNN, RIConv2d, RIMaxpool2d, RIAvgpool2d, EncodePosition
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def visualize_tensor(tensor, title, save_name, cmap='viridis', n_channels=8):
+    """可视化特征图"""
+    if tensor.dim() == 4:
+        tensor = tensor[0]
+    C, H, W = tensor.shape
+    n_show = min(n_channels, C)
+
+    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
+    fig.suptitle(title, fontsize=14, fontweight='bold')
+    for i in range(n_show):
+        ax = axes[i // 4, i % 4]
+        im = ax.imshow(tensor[i].detach().cpu().numpy(), cmap=cmap)
+        ax.set_title(f'Channel {i}')
+        ax.axis('off')
+        plt.colorbar(im, ax=ax, fraction=0.046)
+    for i in range(n_show, 8):
+        axes[i // 4, i % 4].axis('off')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, save_name)
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_score_map(score_map, title, save_name):
+    """可视化得分图"""
+    if score_map.dim() == 4:
+        score_map = score_map[0, 0]
+    elif score_map.dim() == 3:
+        score_map = score_map[0]
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    fig.suptitle(title, fontsize=14, fontweight='bold')
+    im0 = axes[0].imshow(score_map.detach().cpu().numpy(), cmap='hot')
+    axes[0].set_title('Score Map')
+    axes[0].axis('off')
+    plt.colorbar(im0, ax=axes[0])
+    axes[1].hist(score_map.detach().cpu().numpy().flatten(), bins=50, color='steelblue', edgecolor='white')
+    axes[1].set_title('Score 分布')
+    axes[1].set_xlabel('Score')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, save_name)
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_ri_conv_kernel():
+    """可视化旋转不变卷积核的权重分组模式"""
+    print('\n--- RIConv2d 卷积核分组可视化 ---')
+
+    fig, axes = plt.subplots(1, 3, figsize=(16, 5))
+
+    for idx, kz in enumerate([3, 5, 7]):
+        # 计算距离掩码
+        coords = torch.arange(kz ** 2).view(-1, 1)
+        row = torch.div(coords, kz, rounding_mode='floor')
+        col = torch.fmod(coords, kz)
+        coords = torch.cat([row, col], dim=1)
+        dis = (coords - 0.5 * (kz - 1)).norm(dim=1) + 0.5 * (kz % 2 - 1)
+        dis = dis.view(kz, kz)
+        dis = torch.round(dis).long()
+        dis[dis > 0.5 * (kz - 1)] = -1
+
+        ax = axes[idx]
+        im = ax.imshow(dis.numpy(), cmap='tab10')
+        ax.set_title(f'Kernel {kz}x{kz}\nDistance Groups: {dis.max().item() + 1}')
+        # 标注每个位置的距离值
+        for i in range(kz):
+            for j in range(kz):
+                val = dis[i, j].item()
+                color = 'white' if val >= 0 else 'red'
+                ax.text(j, i, str(val), ha='center', va='center', fontsize=8, color=color)
+        ax.axis('off')
+
+    plt.suptitle('RIConv2d: 按到中心距离分组的卷积核权重', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'ricnn_kernel_groups.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_ri_pooling():
+    """可视化旋转不变池化的有效区域"""
+    print('\n--- 旋转不变池化区域可视化 ---')
+
+    fig, axes = plt.subplots(2, 2, figsize=(10, 10))
+
+    # RIMaxpool2d 有效区域 (kernel_size=5)
+    kz = 5
+    coords = torch.arange(kz ** 2).view(-1, 1)
+    row = torch.div(coords, kz, rounding_mode='floor')
+    col = torch.fmod(coords, kz)
+    coords = torch.cat([row, col], dim=1)
+    dis = (coords - 0.5 * (kz - 1)).norm(dim=1) + 0.5 * (kz % 2 - 1)
+    dis = dis.view(kz, kz)
+    dis = torch.round(dis)
+    dis[dis > 0.5 * (kz - 1)] = -1
+    mask_ri = (dis > -1).numpy().astype(float)
+
+    # 标准 MaxPool2d 有效区域（正方形）
+    mask_std = np.ones((kz, kz))
+
+    ax = axes[0, 0]
+    ax.imshow(mask_std, cmap='Blues')
+    ax.set_title(f'标准 MaxPool {kz}x{kz}\n有效区域: {mask_std.sum():.0f} 个像素', fontsize=12)
+    for i in range(kz):
+        for j in range(kz):
+            ax.text(j, i, '✓', ha='center', va='center', fontsize=10)
+    ax.axis('off')
+
+    ax = axes[0, 1]
+    ax.imshow(mask_ri, cmap='Oranges')
+    ax.set_title(f'RI MaxPool {kz}x{kz}\n有效区域: {mask_ri.sum():.0f} 个像素 (圆形)', fontsize=12)
+    for i in range(kz):
+        for j in range(kz):
+            text = '✓' if mask_ri[i, j] else '✗'
+            color = 'white' if mask_ri[i, j] else 'red'
+            ax.text(j, i, text, ha='center', va='center', fontsize=10, color=color)
+    ax.axis('off')
+
+    # 可视化旋转不变性：对比旋转前后的特征
+    ax = axes[1, 0]
+    ax.set_title('旋转不变性原理', fontsize=12)
+    ax.text(0.5, 0.7, '标准CNN:', transform=ax.transAxes, fontsize=11, ha='center',
+            bbox=dict(boxstyle='round', facecolor='lightblue'))
+    ax.text(0.5, 0.5, '旋转图像 → 特征也旋转 → 不匹配', transform=ax.transAxes, fontsize=10, ha='center')
+    ax.text(0.5, 0.3, 'RICNN:', transform=ax.transAxes, fontsize=11, ha='center',
+            bbox=dict(boxstyle='round', facecolor='lightgreen'))
+    ax.text(0.5, 0.1, '旋转图像 → 特征不变 → 可以匹配', transform=ax.transAxes, fontsize=10, ha='center')
+    ax.axis('off')
+
+    ax = axes[1, 1]
+    ax.set_title('RI vs 标准 CNN 对比', fontsize=12)
+    categories = ['旋转鲁棒性', '计算效率', '平移不变性', '尺度不变性']
+    ri_scores = [0.9, 0.7, 0.8, 0.5]
+    std_scores = [0.3, 1.0, 0.8, 0.5]
+    x = np.arange(len(categories))
+    width = 0.35
+    ax.bar(x - width / 2, ri_scores, width, label='RICNN', color='orange', alpha=0.8)
+    ax.bar(x + width / 2, std_scores, width, label='标准CNN', color='blue', alpha=0.8)
+    ax.set_xticks(x)
+    ax.set_xticklabels(categories, fontsize=9)
+    ax.set_ylim(0, 1.2)
+    ax.legend()
+    ax.set_ylabel('能力评分')
+
+    plt.suptitle('RICNN 旋转不变池化详解', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'ricnn_pooling_visualization.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def test_rotation_invariance():
+    """测试旋转不变性：对比旋转前后特征差异"""
+    print('\n--- 旋转不变性测试 ---')
+
+    model = RICNN()
+    model.eval()
+
+    # 创建测试BEV图像（带明显特征）
+    bev = torch.zeros(1, 3, 320, 320)
+    # 添加一些矩形特征
+    bev[0, 0, 100:120, 150:170] = 1.0
+    bev[0, 1, 140:160, 100:140] = 0.8
+    bev[0, 2, 150:170, 160:200] = 0.6
+
+    with torch.no_grad():
+        score_orig, desc_orig = model(bev)
+
+        # 旋转90度
+        bev_rot90 = torch.rot90(bev, k=1, dims=[2, 3])
+        score_rot90, desc_rot90 = model(bev_rot90)
+        # 旋转回去比较
+        desc_rot90_back = torch.rot90(desc_rot90, k=-1, dims=[2, 3])
+
+        # 旋转180度
+        bev_rot180 = torch.rot90(bev, k=2, dims=[2, 3])
+        score_rot180, desc_rot180 = model(bev_rot180)
+        desc_rot180_back = torch.rot90(desc_rot180, k=-2, dims=[2, 3])
+
+    # 计算相似度
+    cos_sim_90 = torch.nn.functional.cosine_similarity(
+        desc_orig.flatten(), desc_rot90_back.flatten(), dim=0)
+    cos_sim_180 = torch.nn.functional.cosine_similarity(
+        desc_orig.flatten(), desc_rot180_back.flatten(), dim=0)
+
+    print(f'原始 vs 旋转90°后特征 余弦相似度: {cos_sim_90.item():.4f}')
+    print(f'原始 vs 旋转180°后特征 余弦相似度: {cos_sim_180.item():.4f}')
+    print(f'(越接近1.0说明旋转不变性越好)')
+
+    # 可视化
+    fig, axes = plt.subplots(2, 4, figsize=(18, 8))
+
+    axes[0, 0].imshow(bev[0].permute(1, 2, 0).numpy())
+    axes[0, 0].set_title('原始BEV')
+    axes[0, 1].imshow(bev_rot90[0].permute(1, 2, 0).numpy())
+    axes[0, 1].set_title('旋转90°')
+    axes[0, 2].imshow(score_orig[0, 0].numpy(), cmap='hot')
+    axes[0, 2].set_title('原始Score')
+    axes[0, 3].imshow(score_rot90[0, 0].numpy(), cmap='hot')
+    axes[0, 3].set_title('旋转90° Score')
+
+    axes[1, 0].imshow(desc_orig[0, 0].numpy(), cmap='viridis')
+    axes[1, 0].set_title(f'原始Desc ch0')
+    axes[1, 1].imshow(desc_rot90_back[0, 0].numpy(), cmap='viridis')
+    axes[1, 1].set_title(f'旋回后Desc ch0\n相似度:{cos_sim_90.item():.3f}')
+    axes[1, 2].imshow((desc_orig[0, 0] - desc_rot90_back[0, 0]).abs().numpy(), cmap='Reds')
+    axes[1, 2].set_title('差异热图 ch0')
+    axes[1, 3].axis('off')
+
+    for ax in axes.flatten():
+        if ax.collections or ax.images:
+            continue
+        ax.axis('off')
+
+    plt.suptitle('RICNN 旋转不变性测试', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'ricnn_rotation_invariance.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+    return cos_sim_90.item(), cos_sim_180.item()
+
+
+def visualize_ricnn_intermediate():
+    """可视化RICNN中间层特征"""
+    print('\n--- RICNN 中间特征可视化 ---')
+
+    model = RICNN()
+    model.eval()
+
+    # 使用更有结构的输入
+    x = torch.linspace(-1, 1, 320)
+    y = torch.linspace(-1, 1, 320)
+    grid_y, grid_x = torch.meshgrid(y, x, indexing='ij')
+    r = torch.sqrt(grid_x ** 2 + grid_y ** 2)
+
+    bev = torch.zeros(1, 3, 320, 320)
+    bev[0, 0] = (torch.sin(grid_x * 10) * torch.cos(grid_y * 10) + 1) / 2
+    bev[0, 1] = (torch.cos(r * 5) + 1) / 2
+    bev[0, 2] = (r < 0.5).float()
+
+    # 逐层前向
+    with torch.no_grad():
+        x1 = model.block1(bev)
+        x2 = model.pool2(x1)
+        x2 = model.block2(x2)
+        x3 = model.pool4(x2)
+        x3 = model.block3(x3)
+        x4 = model.pool4(x3)
+        x4 = model.block4(x4)
+
+    print(f'输入BEV: {bev.shape}')
+    print(f'block1 (RIConvBlock 3→16): {x1.shape}')
+    print(f'pool2+block2 (RIResBlock 16→32): {x2.shape}')
+    print(f'pool4+block3 (RIResBlock 32→64): {x3.shape}')
+    print(f'pool4+block4 (RIResBlock 64→128): {x4.shape}')
+
+    visualize_tensor(x1, 'RICNN Block1 输出 (16通道)', 'ricnn_block1.png')
+    visualize_tensor(x2, 'RICNN Block2 输出 (32通道)', 'ricnn_block2.png')
+    visualize_tensor(x3, 'RICNN Block3 输出 (64通道)', 'ricnn_block3.png')
+    visualize_tensor(x4, 'RICNN Block4 输出 (128通道)', 'ricnn_block4.png')
+
+
+def visualize_position_encoding():
+    """可视化位置编码模块"""
+    print('\n--- EncodePosition 位置编码可视化 ---')
+
+    ep = EncodePosition(feature_size=128)
+    ep.eval()
+
+    # 模拟150个BEV关键点 (B, 150, 4) — [x,y,z,intensity]
+    kpts = torch.randn(2, 150, 4)
+    kpts[:, :, :2] = kpts[:, :, :2] * 30  # x,y 在 ±30m 范围
+    kpts[:, :, 2] = 0  # z=0 (BEV平面)
+    kpts[:, :, 3] = 1  # intensity=1
+
+    # 模拟特征 (B, 128, 150)
+    fea = torch.randn(2, 128, 150)
+
+    with torch.no_grad():
+        fea_encoded = ep(kpts, fea)
+
+    print(f'关键点输入: {kpts.shape}')
+    print(f'原始特征: {fea.shape}')
+    print(f'位置编码后特征: {fea_encoded.shape}')
+
+    # 可视化距离直方图
+    x1 = kpts[0].unsqueeze(1)  # (150, 1, 4)
+    x2 = kpts[0].unsqueeze(0)  # (1, 150, 4)
+    dx = x1 - x2
+    distance = dx.norm(p=2, dim=2)  # (150, 150)
+
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    im0 = axes[0].imshow(distance.numpy(), cmap='plasma')
+    axes[0].set_title('关键点间距离矩阵 (150x150)')
+    axes[0].set_xlabel('Keypoint j')
+    axes[0].set_ylabel('Keypoint i')
+    plt.colorbar(im0, ax=axes[0])
+
+    # 示例直方图 (第一个关键点)
+    hist = torch.histc(distance[0], bins=16, min=1, max=80)
+    axes[1].bar(range(16), hist.numpy(), color='steelblue')
+    axes[1].set_title('距离直方图 (16 bins, 1-80m)\n用于位置编码')
+    axes[1].set_xlabel('Distance Bin')
+    axes[1].set_ylabel('Count')
+
+    plt.suptitle('EncodePosition 位置编码模块', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'ricnn_position_encoding.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def analyze_parameters():
+    """参数量分析"""
+    print('\n--- 参数量分析 ---')
+    model = RICNN()
+    total = sum(p.numel() for p in model.parameters())
+    print(f'总参数量: {total:,} ({total / 1e6:.2f}M)')
+    for name, module in model.named_children():
+        params = sum(p.numel() for p in module.parameters())
+        print(f'  {name:20s}: {params:>10,} params ({params / 1e3:.1f}K)')
+
+
+def main():
+    print('=' * 60)
+    print('RICNN (旋转不变CNN) 网络结构与特征可视化')
+    print('=' * 60)
+
+    analyze_parameters()
+
+    # 1. 卷积核分组可视化
+    visualize_ri_conv_kernel()
+
+    # 2. 池化区域可视化
+    visualize_ri_pooling()
+
+    # 3. 中间特征可视化
+    visualize_ricnn_intermediate()
+
+    # 4. 旋转不变性测试
+    test_rotation_invariance()
+
+    # 5. 位置编码可视化
+    visualize_position_encoding()
+
+    print('\n' + '=' * 60)
+    print('网络结构总结:')
+    print('=' * 60)
+    print("""
+    RICNN (Rotation-Invariant CNN):
+    ┌──────────────────────────────────────────────────────┐
+    │ 输入: BEV图像 (B, 3, 320, 320)                       │
+    │   ↓                                                  │
+    │ block1: RIConvBlock(3→16)  → (B, 16, 320, 320)      │
+    │   ↓ RIMaxpool2d(2)                                   │
+    │ block2: RIResBlock(16→32)  → (B, 32, 160, 160)      │
+    │   ↓ RIMaxpool2d(5, s=4)                              │
+    │ block3: RIResBlock(32→64)  → (B, 64, 40, 40)        │
+    │   ↓ RIMaxpool2d(5, s=4)                              │
+    │ block4: RIResBlock(64→128) → (B, 128, 10, 10)        │
+    │   ↓                                                  │
+    │ 多尺度特征聚合 (1x1conv + 上采样 + concat)             │
+    │   → (B, 128, 320, 320)                               │
+    │   ↓ Conv1x1(128→129)                                  │
+    │ 输出: score(B,1,320,320) + desc(B,128,320,320)       │
+    └──────────────────────────────────────────────────────┘
+
+    旋转不变性的实现:
+    - RIConv2d: 根据kernel位置到中心的欧氏距离分组
+      同距离的位置共享权重 → 旋转后权重不变
+    - RIMaxpool2d: 只在圆形邻域内取max（忽略角点）
+    - RIAvgpool2d: 只在圆形邻域内取mean
+
+    EncodePosition (位置编码):
+    - 输入: 150个关键点的3D坐标
+    - 计算150×150距离矩阵 → 直方图(16 bins) → MLP
+    - 残差加到特征上，增强空间感知能力
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/03_converter_demo.py b/network_learning/03_converter_demo.py
new file mode 100644
index 0000000..b51ccec
--- /dev/null
+++ b/network_learning/03_converter_demo.py
@@ -0,0 +1,230 @@
+"""
+Converter 跨模态特征转换器 Demo
+================================
+Converter 是跨模态融合的核心组件，负责在不同模态之间转换特征：
+  - cvt_bev: 图像特征 → BEV空间特征
+  - cvt_img: BEV特征 → 图像空间特征
+
+结构:
+  Self-Attention (MHA) + Conv1d瓶颈残差块
+  输入: (B, 128, N)  N个特征点
+  输出: (B, 128, N)  转换后的特征
+
+作用: 使两个模态的特征在同一个空间中对齐，便于后续匹配和融合
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from net import Converter
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def visualize_feature_similarity(fea_before, fea_after, title, save_name):
+    """可视化特征转换前后的相似度矩阵"""
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    # 转换前特征相似度
+    fea_before_norm = fea_before / (fea_before.norm(dim=1, keepdim=True) + 1e-8)
+    sim_before = (fea_before_norm[0].T @ fea_before_norm[0]).detach().numpy()
+
+    im0 = axes[0, 0].imshow(sim_before, cmap='RdYlBu_r', vmin=-1, vmax=1)
+    axes[0, 0].set_title('转换前 特征相似度矩阵')
+    axes[0, 0].set_xlabel('Point j'); axes[0, 0].set_ylabel('Point i')
+    plt.colorbar(im0, ax=axes[0, 0])
+
+    # 转换后特征相似度
+    fea_after_norm = fea_after / (fea_after.norm(dim=1, keepdim=True) + 1e-8)
+    sim_after = (fea_after_norm[0].T @ fea_after_norm[0]).detach().numpy()
+
+    im1 = axes[0, 1].imshow(sim_after, cmap='RdYlBu_r', vmin=-1, vmax=1)
+    axes[0, 1].set_title('转换后 特征相似度矩阵')
+    axes[0, 1].set_xlabel('Point j'); axes[0, 1].set_ylabel('Point i')
+    plt.colorbar(im1, ax=axes[0, 1])
+
+    # 差异
+    im2 = axes[0, 2].imshow(np.abs(sim_after - sim_before), cmap='YlOrRd')
+    axes[0, 2].set_title('相似度变化 |差值|')
+    axes[0, 2].set_xlabel('Point j'); axes[0, 2].set_ylabel('Point i')
+    plt.colorbar(im2, ax=axes[0, 2])
+
+    # 特征值分布 before
+    vals_before = fea_before[0].detach().numpy().flatten()
+    axes[1, 0].hist(vals_before, bins=50, color='steelblue', edgecolor='white', alpha=0.7)
+    axes[1, 0].set_title('转换前 特征值分布')
+    axes[1, 0].set_xlabel('Feature Value')
+
+    # 特征值分布 after
+    vals_after = fea_after[0].detach().numpy().flatten()
+    axes[1, 1].hist(vals_after, bins=50, color='coral', edgecolor='white', alpha=0.7)
+    axes[1, 1].set_title('转换后 特征值分布')
+    axes[1, 1].set_xlabel('Feature Value')
+
+    # 重叠对比
+    axes[1, 2].hist(vals_before, bins=50, color='steelblue', edgecolor='white',
+                    alpha=0.5, label='Before')
+    axes[1, 2].hist(vals_after, bins=50, color='coral', edgecolor='white',
+                    alpha=0.5, label='After')
+    axes[1, 2].set_title('分布对比')
+    axes[1, 2].legend()
+
+    plt.suptitle(title, fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, save_name)
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_attention(converter, fea_input):
+    """提取并可视化Self-Attention权重"""
+    b, c, n = fea_input.shape
+    x1 = fea_input.permute(0, 2, 1)  # B, N, C
+
+    # 手动计算attention权重
+    with torch.no_grad():
+        q = converter.mha.w_q(x1)
+        k = converter.mha.w_k(x1)
+        weights = torch.nn.functional.softmax(
+            torch.matmul(q, k.transpose(-2, -1)) / (converter.mha.d_model ** 0.5),
+            dim=-1
+        )
+
+    # 可视化前几个点的attention
+    n_show = 6
+    n = min(n, weights.shape[1])
+
+    fig, axes = plt.subplots(2, 3, figsize=(16, 10))
+    for idx in range(min(n_show, n)):
+        ax = axes[idx // 3, idx % 3]
+        ax.bar(range(min(n, 50)), weights[0, idx, :min(n, 50)].detach().numpy(),
+               color='steelblue', width=1.0)
+        ax.set_title(f'Query Point {idx} 的 Attention')
+        ax.set_xlabel('Key Point')
+        ax.set_ylabel('Weight')
+        ax.axhline(y=1.0 / n, color='red', linestyle='--', alpha=0.5, label=f'平均={1/n:.3f}')
+        ax.legend(fontsize=8)
+
+    for idx in range(n_show, 6):
+        axes[idx // 3, idx % 3].axis('off')
+
+    plt.suptitle('Converter Self-Attention 权重分析', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'converter_attention.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def test_cross_modal_convert():
+    """测试跨模态转换：模拟图像特征→BEV特征转换"""
+    print('\n--- 跨模态转换测试 ---')
+
+    converter_bev = Converter(in_c=128)
+    converter_img = Converter(in_c=128)
+
+    # 模拟两个模态的特征
+    # 图像空间特征 (从图像特征图采样的N个点)
+    torch.manual_seed(42)
+    fea_img_space = torch.randn(2, 128, 100)  # B=2, C=128, N=100
+
+    # BEV空间特征
+    fea_bev_space = torch.randn(2, 128, 100)
+
+    with torch.no_grad():
+        # 图像→BEV: 将图像空间特征转换到BEV空间
+        fea_to_bev = converter_bev(fea_img_space)
+
+        # BEV→图像: 将BEV空间特征转换到图像空间
+        fea_to_img = converter_img(fea_bev_space)
+
+    print(f'图像空间特征输入: {fea_img_space.shape}')
+    print(f'→ cvt_bev 转换后: {fea_to_bev.shape}')
+    print(f'BEV空间特征输入: {fea_bev_space.shape}')
+    print(f'→ cvt_img 转换后: {fea_to_img.shape}')
+
+    # 可视化转换前后
+    visualize_feature_similarity(
+        fea_img_space, fea_to_bev,
+        'cvt_bev: 图像特征 → BEV空间',
+        'converter_img_to_bev.png'
+    )
+
+    visualize_feature_similarity(
+        fea_bev_space, fea_to_img,
+        'cvt_img: BEV特征 → 图像空间',
+        'converter_bev_to_img.png'
+    )
+
+    # 可视化attention
+    visualize_attention(converter_bev, fea_img_space)
+
+
+def analyze_architecture():
+    """分析Converter结构"""
+    print('\n--- Converter 架构分析 ---')
+
+    converter = Converter(in_c=128)
+    total = sum(p.numel() for p in converter.parameters())
+    print(f'总参数量: {total:,} ({total / 1e3:.1f}K)')
+
+    for name, module in converter.named_children():
+        params = sum(p.numel() for p in module.parameters())
+        print(f'  {name:15s}: {params:>10,} params')
+
+    # 详细结构
+    print("""
+    Converter 内部结构:
+
+    ┌──────────────────────────────────────────┐
+    │         输入 x: (B, 128, N)               │
+    │           │                               │
+    │     ┌─────┴─────┐                         │
+    │     │ 路径1: MHA │  路径2: Conv1d瓶颈块     │
+    │     │ Self-Attn  │  Conv1d(128→32→128)     │
+    │     │  x → x2    │  x → x3                │
+    │     └─────┬─────┘                         │
+    │           │                               │
+    │     concat([x2, x3]) → Conv1d(256→128)    │
+    │           │                               │
+    │     输出: (B, 128, N)                      │
+    └──────────────────────────────────────────┘
+
+    MHA (多头自注意力):
+    - d_model=128, num_heads=4
+    - Q,K,V → 点积attention → FFN
+    - 捕捉特征点之间的全局关系
+
+    Conv1d瓶颈块:
+    - 128→32→16→32→128→128 (bottleneck)
+    - 逐点卷积，提取通道间的非线性关系
+
+    两条路径互补:
+    - MHA: 全局上下文建模
+    - Conv1d: 局部特征变换
+    - 残差连接 + concat融合
+    """)
+
+
+def main():
+    print('=' * 60)
+    print('Converter (跨模态特征转换器) 结构与功能可视化')
+    print('=' * 60)
+
+    analyze_architecture()
+    test_cross_modal_convert()
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/04_generator_fusion_demo.py b/network_learning/04_generator_fusion_demo.py
new file mode 100644
index 0000000..722e655
--- /dev/null
+++ b/network_learning/04_generator_fusion_demo.py
@@ -0,0 +1,304 @@
+"""
+Generator & FusionHead 全景生成器与融合头 Demo
+==============================================
+Generator: 从变长图像特征生成固定数量的全景特征
+  Self-Attention → ConvTranspose1d(k3,s3) → AdaptiveMaxPool1d(150)
+  输入: (B, 128, N) N可变
+  输出: (B, 128, 150) 固定150个
+
+FusionHead: 融合多来源特征
+  对 [original, gen, gen_gen, kpl_gen] 四个特征
+  → pair-wise Self-Attention → max聚合 → Cross-Attention → 输出
+  输入: (B, 128, 150, 4)
+  输出: (B, 128, 150) 融合后特征
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from net import Generator, FusionHead, Attention
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def test_generator():
+    """测试Generator: 变长→定长特征转换"""
+    print('\n--- Generator 全景特征生成器 ---')
+
+    generator = Generator(in_c=128, num=150)
+    generator.eval()
+
+    # 模拟变长输入 (B=2, C=128, N=可变的200)
+    torch.manual_seed(42)
+    x = torch.randn(2, 128, 200)
+
+    with torch.no_grad():
+        output = generator(x)
+
+    print(f'输入: {x.shape} (变长，N=200)')
+    print(f'输出: {output.shape} (固定，K=150)')
+
+    # 可视化输入输出特征
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    # 输入特征相似度矩阵 (前50个点)
+    x_norm = x[0] / (x[0].norm(dim=0, keepdim=True) + 1e-8)
+    sim_in = (x_norm.T[:50] @ x_norm[:, :50]).detach().numpy()
+    im0 = axes[0, 0].imshow(sim_in, cmap='RdYlBu_r', vmin=-1, vmax=1)
+    axes[0, 0].set_title('输入特征相似度 (前50点)')
+    plt.colorbar(im0, ax=axes[0, 0])
+
+    # 输出特征相似度矩阵
+    out_norm = output[0] / (output[0].norm(dim=0, keepdim=True) + 1e-8)
+    sim_out = (out_norm.T @ out_norm).detach().numpy()
+    im1 = axes[0, 1].imshow(sim_out, cmap='RdYlBu_r', vmin=-1, vmax=1)
+    axes[0, 1].set_title('输出特征相似度 (150点)')
+    plt.colorbar(im1, ax=axes[0, 1])
+
+    # 输入特征热图
+    im2 = axes[0, 2].imshow(x[0, :, :30].detach().numpy(), cmap='viridis', aspect='auto')
+    axes[0, 2].set_title('输入特征 (30点)')
+    axes[0, 2].set_xlabel('Point Index'); axes[0, 2].set_ylabel('Channel')
+    plt.colorbar(im2, ax=axes[0, 2])
+
+    # 输出特征热图
+    im3 = axes[1, 0].imshow(output[0, :, :30].detach().numpy(), cmap='viridis', aspect='auto')
+    axes[1, 0].set_title('输出特征 (30点)')
+    axes[1, 0].set_xlabel('Point Index'); axes[1, 0].set_ylabel('Channel')
+    plt.colorbar(im3, ax=axes[1, 0])
+
+    # ConvTranspose + AdaptiveMaxPool 原理
+    axes[1, 1].set_title('Generator 内部变换', fontsize=12)
+    axes[1, 1].text(0.5, 0.8, 'ConvTranspose1d(k3,s3)', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=11, bbox=dict(boxstyle='round', facecolor='lightblue'))
+    axes[1, 1].text(0.5, 0.6, f'200 → 200*3 = 600', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=10)
+    axes[1, 1].text(0.5, 0.4, 'AdaptiveMaxPool1d(150)', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=11, bbox=dict(boxstyle='round', facecolor='lightgreen'))
+    axes[1, 1].text(0.5, 0.2, f'600 → 150', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=10)
+    axes[1, 1].axis('off')
+
+    # 特征值分布对比
+    axes[1, 2].hist(x[0].detach().numpy().flatten(), bins=50, alpha=0.5,
+                    label='Input', color='steelblue')
+    axes[1, 2].hist(output[0].detach().numpy().flatten(), bins=50, alpha=0.5,
+                    label='Output', color='coral')
+    axes[1, 2].set_title('特征值分布对比')
+    axes[1, 2].legend()
+
+    plt.suptitle('Generator: 变长特征→固定大小特征', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'generator_demo.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+    # 测试不同输入长度
+    print('\nGenerator 对不同输入长度的适应:')
+    for n in [50, 100, 200, 500]:
+        x_test = torch.randn(1, 128, n)
+        with torch.no_grad():
+            out = generator(x_test)
+        print(f'  N={n:4d} → 输出形状 {out.shape}')
+
+
+def test_fusion_head():
+    """测试FusionHead: 多来源特征融合"""
+    print('\n--- FusionHead 融合头 ---')
+
+    fusion_head = FusionHead(in_c=128)
+    fusion_head.eval()
+
+    # 模拟4种特征:
+    #   [0]: fea_kpt_original    - BEV原始关键点特征
+    #   [1]: fea_kpt_original_gen - Generator生成的BEV特征
+    #   [2]: fea_kpt_gen_gen      - 双路径转换器输出
+    #   [3]: fea_kpl_gen          - BEV→图像空间特征
+    B, C, K = 2, 128, 150
+    torch.manual_seed(42)
+
+    # 让不同来源的特征有相关性但不完全相同
+    base = torch.randn(B, C, K)
+    fea_original = base
+    fea_gen = base + 0.3 * torch.randn(B, C, K)
+    fea_gen_gen = fea_gen + 0.2 * torch.randn(B, C, K)
+    fea_kpl_gen = base + 0.5 * torch.randn(B, C, K)
+
+    fea_kpts = torch.stack([fea_original, fea_gen, fea_gen_gen, fea_kpl_gen], dim=2)
+    print(f'输入: {fea_kpts.shape} [B, C, K, 4来源]')
+
+    with torch.no_grad():
+        fea_fused = fusion_head(fea_kpts)
+
+    print(f'输出: {fea_fused.shape} [B, C, K] 融合特征')
+
+    # 可视化
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    names = ['Original (BEV原始)', 'Generated (全景生成)',
+             'Gen_Gen (双路径)', 'KPL_Gen (图像空间)']
+
+    for idx in range(4):
+        ax = axes[idx // 2, idx % 2]
+        sim = torch.nn.functional.cosine_similarity(
+            fea_kpts[0, :, :, 0].T.unsqueeze(-1),
+            fea_kpts[0, :, :, idx].T.unsqueeze(0),
+            dim=1
+        )
+        im = ax.imshow(sim.detach().numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
+        ax.set_title(f'{names[idx]}\nvs Original 相似度')
+        ax.set_xlabel('Point'); ax.set_ylabel('Point')
+        plt.colorbar(im, ax=ax)
+
+    # 融合特征 vs 原始特征
+    ax = axes[1, 2]
+    sim_fused = torch.nn.functional.cosine_similarity(
+        fea_original[0].T.unsqueeze(-1),
+        fea_fused[0].T.unsqueeze(0),
+        dim=1
+    )
+    im = ax.imshow(sim_fused.detach().numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
+    ax.set_title('Fused vs Original 相似度')
+    ax.set_xlabel('Point'); ax.set_ylabel('Point')
+    plt.colorbar(im, ax=ax)
+
+    plt.suptitle('FusionHead: 多来源特征融合分析', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'fusion_head_demo.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_attention_detail():
+    """详细可视化FusionHead中的Attention机制"""
+    print('\n--- FusionHead Attention 详细分析 ---')
+
+    att = Attention(d_model=128)
+    att.eval()
+
+    # 模拟3对特征的Self-Attention
+    B, N_pair, C = 2, 3, 128
+    torch.manual_seed(42)
+    x = torch.randn(B * 2, N_pair, C)  # 模拟batch*样本数的3对特征
+
+    with torch.no_grad():
+        output, weights = att(x, x, x)
+
+    print(f'Self-Attention 输入: {x.shape}')
+    print(f'输出: {output.shape}')
+    print(f'Attention权重: {weights.shape} (B, 3, 3)')
+
+    # 可视化attention权重
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+
+    weights_np = weights[0].detach().numpy()
+    im0 = axes[0].imshow(weights_np, cmap='YlOrRd', vmin=0, vmax=1)
+    axes[0].set_title('Self-Attention 权重 (3对特征)')
+    axes[0].set_xticks(range(3))
+    axes[0].set_xticklabels(['Original', 'Generated', 'Gen_Gen'])
+    axes[0].set_yticks(range(3))
+    axes[0].set_yticklabels(['Original', 'Generated', 'Gen_Gen'])
+
+    for i in range(3):
+        for j in range(3):
+            axes[0].text(j, i, f'{weights_np[i, j]:.3f}', ha='center', va='center',
+                         fontsize=12, color='white' if weights_np[i, j] > 0.5 else 'black')
+    plt.colorbar(im0, ax=axes[0])
+
+    # Cross-Attention 示意图
+    axes[1].set_title('FusionHead Attention 流程', fontsize=12)
+    steps = [
+        '1. 拼接4种特征 [original, gen, gen_gen, kpl_gen]',
+        '2. 取前3种 [original, gen, gen_gen]',
+        '3. 对每个样本的3对特征做Self-Attention',
+        '4. max聚合 → 每样本1个特征',
+        '5. Cross-Attention with kpl_gen (图像空间特征)',
+        '6. concat(original, cross_out) → Conv1d → 输出'
+    ]
+    for i, step in enumerate(steps):
+        axes[1].text(0.1, 0.9 - i * 0.15, step, transform=axes[1].transAxes,
+                     fontsize=10, family='monospace',
+                     bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.7))
+    axes[1].axis('off')
+
+    plt.suptitle('FusionHead Attention 机制详解', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'fusion_attention_detail.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def analyze_parameters():
+    """参数量分析"""
+    print('\n--- 参数量分析 ---')
+
+    gen = Generator(in_c=128, num=150)
+    fusion = FusionHead(in_c=128)
+
+    for name, model in [('Generator', gen), ('FusionHead', fusion)]:
+        total = sum(p.numel() for p in model.parameters())
+        print(f'\n{name}: {total:,} params ({total / 1e3:.1f}K)')
+        for n, m in model.named_children():
+            p = sum(pmt.numel() for pmt in m.parameters())
+            print(f'  {n:15s}: {p:>10,} params')
+
+
+def main():
+    print('=' * 60)
+    print('Generator & FusionHead 结构与功能可视化')
+    print('=' * 60)
+
+    analyze_parameters()
+    test_generator()
+    test_fusion_head()
+    visualize_attention_detail()
+
+    print('\n' + '=' * 60)
+    print('结构总结:')
+    print('=' * 60)
+    print("""
+    Generator (全景特征生成器):
+    ┌──────────────────────────────────────────────┐
+    │ 输入: (B, 128, N)  N可变                      │
+    │   ↓ Self-Attention (MHA)                     │
+    │ x2: (B, 128, N)  全局上下文特征                │
+    │   ↓ ConvTranspose1d(k3,s3)                    │
+    │ x3: (B, 128, N*3)  上采样扩展                  │
+    │   ↓ AdaptiveMaxPool1d(150)                    │
+    │ 输出: (B, 128, 150)  固定K个全景特征            │
+    └──────────────────────────────────────────────┘
+    作用: 将BEV中可变数量的匹配点特征压缩为固定150个，
+         与BEV关键点数量对齐
+
+    FusionHead (跨模态融合头):
+    ┌──────────────────────────────────────────────┐
+    │ 输入: (B, 128, 150, 4)                        │
+    │   [original, gen, gen_gen, kpl_gen]           │
+    │   ↓                                           │
+    │ 对前3对 (B*N, 3, C):                           │
+    │   Self-Attn → max(dim=1) → (B*N, C)           │
+    │   ↓ reshape → (B, N, C)                       │
+    │ Cross-Attention with kpl_gen                  │
+    │   ↓                                           │
+    │ concat(original, cross_out) → Conv1d(256→128) │
+    │ 输出: (B, 128, 150) 融合特征                   │
+    └──────────────────────────────────────────────┘
+    作用: 整合多来源特征，增强融合表示
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/05_netvlad_demo.py b/network_learning/05_netvlad_demo.py
new file mode 100644
index 0000000..efa0e27
--- /dev/null
+++ b/network_learning/05_netvlad_demo.py
@@ -0,0 +1,308 @@
+"""
+NetVLAD 全局描述子 Demo
+=======================
+NetVLAD (Vector of Locally Aggregated Descriptors) 将局部特征聚合为全局描述子。
+
+原理:
+1. Soft Assignment: 每个局部特征软分配到K个聚类中心
+2. Residual: 计算特征与聚类中心的残差
+3. Aggregation: 加权求和残差
+4. Normalization: 逐聚类L2归一化 + 全局L2归一化
+
+论文中使用 cluster_num=16, feature_size=128
+输出: 16 × 128 = 2048 维全局描述子
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from netvlad import NetVLAD, NetVLADLoupe
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def test_netvlad_basic():
+    """测试NetVLAD基本功能"""
+    print('\n--- NetVLAD 基本功能测试 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad.eval()
+
+    # 输入: (B=2, C=128, K=150, W=1)
+    torch.manual_seed(42)
+    features = torch.randn(2, 128, 150, 1)
+
+    with torch.no_grad():
+        vlad = netvlad(features)
+
+    print(f'输入特征: {features.shape}  [B, C, K, W]')
+    print(f'VLAD输出: {vlad.shape}  [B, cluster_num × C = 2048]')
+    print(f'VLAD L2 norm: {vlad.norm(dim=1)}')  # 应该是全1（已归一化）
+
+
+def visualize_soft_assignment():
+    """可视化软分配过程"""
+    print('\n--- 软分配可视化 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad.eval()
+
+    torch.manual_seed(42)
+    features = torch.randn(1, 128, 150, 1)
+
+    # 手动提取中间结果
+    with torch.no_grad():
+        x = features
+        soft_assign = netvlad.conv(x)
+        soft_assign = netvlad.relu(soft_assign)
+        soft_assign = torch.nn.functional.softmax(soft_assign, dim=1)
+
+    # soft_assign: (B, 16, 150, 1)
+    assign_np = soft_assign[0, :, :, 0].numpy()  # (16, 150)
+
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    # 软分配矩阵
+    im0 = axes[0, 0].imshow(assign_np, cmap='YlOrRd', aspect='auto')
+    axes[0, 0].set_title('软分配矩阵 (16 clusters × 150 points)')
+    axes[0, 0].set_xlabel('Point Index')
+    axes[0, 0].set_ylabel('Cluster')
+    plt.colorbar(im0, ax=axes[0, 0])
+
+    # 每个聚类中心的总权重
+    cluster_weight = assign_np.sum(axis=1)
+    axes[0, 1].bar(range(16), cluster_weight, color='steelblue')
+    axes[0, 1].axhline(y=150 / 16, color='red', linestyle='--',
+                       label=f'平均={150 / 16:.1f}')
+    axes[0, 1].set_title('每个聚类的总权重')
+    axes[0, 1].set_xlabel('Cluster')
+    axes[0, 1].legend()
+
+    # 每个点的最大分配
+    max_cluster = assign_np.argmax(axis=0)
+    axes[0, 2].hist(max_cluster, bins=16, color='coral', edgecolor='white')
+    axes[0, 2].set_title('每个点被分配到哪个聚类 (argmax)')
+    axes[0, 2].set_xlabel('Cluster')
+    axes[0, 2].set_ylabel('点数')
+
+    # 分配熵（混乱度）
+    entropy = -(assign_np * np.log(assign_np + 1e-8)).sum(axis=0)
+    axes[1, 0].bar(range(150), entropy, color='steelblue', width=1.0)
+    axes[1, 0].set_title('每个点的分配熵\n(高=模糊分配, 低=确定分配)')
+    axes[1, 0].set_xlabel('Point Index')
+    axes[1, 0].set_ylabel('Entropy')
+
+    # 前3个聚类的分配权重
+    for i in range(3):
+        axes[1, 1].plot(assign_np[i], alpha=0.7, label=f'Cluster {i}')
+    axes[1, 1].set_title('前3个聚类的分配权重')
+    axes[1, 1].set_xlabel('Point Index')
+    axes[1, 1].set_ylabel('Weight')
+    axes[1, 1].legend(fontsize=8)
+
+    # 聚类中心可视化 (前2维t-SNE类比)
+    centroids = netvlad.centroids.detach().numpy()  # (16, 128)
+    # PCA降维到2维
+    U, S, Vt = np.linalg.svd(centroids - centroids.mean(axis=0), full_matrices=False)
+    centroids_2d = (centroids @ Vt[:2].T)
+
+    axes[1, 2].scatter(centroids_2d[:, 0], centroids_2d[:, 1], c=range(16),
+                       cmap='tab20', s=200, edgecolors='black')
+    for i in range(16):
+        axes[1, 2].annotate(str(i), (centroids_2d[i, 0], centroids_2d[i, 1]),
+                            fontsize=10, ha='center', va='center')
+    axes[1, 2].set_title('聚类中心 PCA 2D 可视化')
+    axes[1, 2].set_xlabel('PC1'); axes[1, 2].set_ylabel('PC2')
+
+    plt.suptitle('NetVLAD 软分配机制', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'netvlad_soft_assignment.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_vlad_structure():
+    """可视化VLAD向量结构"""
+    print('\n--- VLAD向量结构可视化 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad.eval()
+
+    # 两组明显不同的特征 → 应该产生不同的VLAD
+    torch.manual_seed(42)
+    fea1 = torch.randn(1, 128, 150, 1)  # 场景A
+    fea2 = torch.randn(1, 128, 150, 1)  # 场景B（不同随机种子）
+
+    with torch.no_grad():
+        vlad1 = netvlad(fea1)[0]  # (2048,)
+        vlad2 = netvlad(fea2)[0]
+
+    # 每组同场景特征（加噪声）→ VLAD应相似
+    fea1_noisy = fea1 + 0.1 * torch.randn(1, 128, 150, 1)
+    with torch.no_grad():
+        vlad1_noisy = netvlad(fea1_noisy)[0]
+
+    sim_same = torch.nn.functional.cosine_similarity(vlad1, vlad1_noisy, dim=0)
+    sim_diff = torch.nn.functional.cosine_similarity(vlad1, vlad2, dim=0)
+
+    print(f'同场景(加噪声) VLAD相似度: {sim_same.item():.4f}')
+    print(f'不同场景 VLAD相似度: {sim_diff.item():.4f}')
+    print(f'区分度 (同-异): {sim_same.item() - sim_diff.item():.4f}')
+
+    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
+
+    # VLAD向量可视化 (reshape为16x128)
+    vlad1_2d = vlad1.view(16, 128).numpy()
+    vlad2_2d = vlad2.view(16, 128).numpy()
+
+    im0 = axes[0].imshow(vlad1_2d, cmap='RdBu_r', aspect='auto')
+    axes[0].set_title('VLAD场景A (16×128)')
+    axes[0].set_xlabel('Feature Dim'); axes[0].set_ylabel('Cluster')
+    plt.colorbar(im0, ax=axes[0])
+
+    im1 = axes[1].imshow(vlad2_2d, cmap='RdBu_r', aspect='auto')
+    axes[1].set_title('VLAD场景B (16×128)')
+    axes[1].set_xlabel('Feature Dim'); axes[1].set_ylabel('Cluster')
+    plt.colorbar(im1, ax=axes[1])
+
+    im2 = axes[2].imshow(np.abs(vlad1_2d - vlad2_2d), cmap='YlOrRd', aspect='auto')
+    axes[2].set_title(f'|差异|  (cos_sim={sim_same.item():.3f})')
+    axes[2].set_xlabel('Feature Dim'); axes[2].set_ylabel('Cluster')
+    plt.colorbar(im2, ax=axes[2])
+
+    plt.suptitle('NetVLAD 全局描述子结构', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'netvlad_vlad_structure.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def compare_netvlad_variants():
+    """对比NetVLAD和NetVLADLoupe"""
+    print('\n--- NetVLAD vs NetVLADLoupe 对比 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad_loupe = NetVLADLoupe(feature_size=128, cluster_size=16, output_dim=256)
+
+    torch.manual_seed(42)
+    x = torch.randn(2, 128, 150, 1)  # NetVLAD输入 (B,C,H,W)
+    x_loupe = torch.randn(2, 150, 128)  # NetVLADLoupe输入 (B,N,C)
+
+    with torch.no_grad():
+        v1 = netvlad(x)
+        v2 = netvlad_loupe(x_loupe)
+
+    print(f'NetVLAD:        {sum(p.numel() for p in netvlad.parameters()):,} params')
+    print(f'  输入: {list(x.shape)} → 输出: {list(v1.shape)}')
+    print(f'NetVLADLoupe:   {sum(p.numel() for p in netvlad_loupe.parameters()):,} params')
+    print(f'  输入: {list(x_loupe.shape)} → 输出: {list(v2.shape)}')
+
+    # 示意图
+    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
+
+    # NetVLAD 流程
+    axes[0].set_title('NetVLAD (论文使用)', fontsize=13, fontweight='bold')
+    steps_vlad = [
+        '输入: (B, 128, 150, 1)',
+        '↓ Conv2d(128→16) + Softmax',
+        '软分配: (B, 16, 150, 1)',
+        '↓ 残差 = x - centroids',
+        '残差: (B, 16, 150, 128)',
+        '↓ sum(软分配 × 残差)',
+        'VLAD: (B, 16, 128)',
+        '↓ L2归一化 (per cluster)',
+        '↓ flatten + L2归一化',
+        '输出: (B, 2048)'
+    ]
+    for i, s in enumerate(steps_vlad):
+        axes[0].text(0.1, 0.95 - i * 0.09, s, transform=axes[0].transAxes,
+                     fontsize=10, family='monospace')
+    axes[0].axis('off')
+
+    # NetVLADLoupe 流程
+    axes[1].set_title('NetVLADLoupe', fontsize=13, fontweight='bold')
+    steps_loupe = [
+        '输入: (B, N, 128)',
+        '↓ x @ cluster_weights',
+        '↓ Softmax + BatchNorm',
+        '软分配: (B, N, 16)',
+        '↓ activation @ x',
+        '↓ 减去中心校正项 a',
+        '↓ L2归一化',
+        '↓ MLP: 2048 → 256',
+        '↓ Context Gating',
+        '输出: (B, 256)'
+    ]
+    for i, s in enumerate(steps_loupe):
+        axes[1].text(0.1, 0.95 - i * 0.09, s, transform=axes[1].transAxes,
+                     fontsize=10, family='monospace')
+    axes[1].axis('off')
+
+    plt.suptitle('NetVLAD 两种变体对比', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'netvlad_variants.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def main():
+    print('=' * 60)
+    print('NetVLAD 全局描述子 结构与功能可视化')
+    print('=' * 60)
+
+    test_netvlad_basic()
+    visualize_soft_assignment()
+    visualize_vlad_structure()
+    compare_netvlad_variants()
+
+    print('\n' + '=' * 60)
+    print('结构总结:')
+    print('=' * 60)
+    print("""
+    NetVLAD (全局描述子聚合):
+
+    论文中使用:
+    - cluster_num: 16
+    - feature_size: 128
+    - 输出: 2048维全局描述子
+
+    VLAD计算步骤:
+    1. Soft Assignment: soft_assign = Softmax(Conv2d(128→16)(x))
+       每个局部特征被软分配到16个聚类中心
+
+    2. Residual: residual = x - centroids
+       计算特征与每个聚类中心的残差
+
+    3. VLAD Core: vlad = Σ(soft_assign × residual) / Σsoft_assign
+       按聚类聚合加权残差
+
+    4. Normalization:
+       - 逐聚类 L2 norm
+       - flatten
+       - 全局 L2 norm
+
+    最终VLAD融合:
+    vlads = sigmoid(w) × vlad_fusion + (1-sigmoid(w)) × vlad_bev
+    其中 w 是可学习参数
+
+    VLAD vs 平均池化:
+    - 平均池化: 丢失空间分布信息
+    - VLAD: 通过聚类保留了"哪些类型的特征在哪里出现"的信息
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/06_uot_demo.py b/network_learning/06_uot_demo.py
new file mode 100644
index 0000000..7948163
--- /dev/null
+++ b/network_learning/06_uot_demo.py
@@ -0,0 +1,356 @@
+"""
+UOT (Unbalanced Optimal Transport) 位姿估计 Demo
+=================================================
+UOTHead 使用 Sinkhorn 非平衡最优传输进行特征匹配和位姿估计。
+
+流程:
+1. Cosine Cost Matrix: C = 1 - cosine_sim(feat1, feat2)
+2. Sinkhorn Unbalanced OT: 迭代求解运输计划 T
+3. Point Projection: project_kpts = T @ kpts2 / sum(T)
+4. Weighted SVD: 从匹配点对估计刚体变换 R|t
+
+关键参数:
+- epsilon: 熵正则化（控制运输计划的平滑度）
+- gamma: 质量正则化（允许部分匹配）
+- sinkhorn_iter: 5次迭代
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from uot import UOTHead, sinkhorn_unbalanced, compute_rigid_transform
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def visualize_cost_matrix():
+    """可视化代价矩阵"""
+    print('\n--- 代价矩阵 (Cost Matrix) ---')
+
+    torch.manual_seed(42)
+    # 模拟query和positive的150个关键点特征
+    feat1 = torch.randn(2, 150, 128)  # query
+    feat2 = torch.randn(2, 150, 128)  # positive
+
+    # 让部分特征相似（模拟真实闭环场景）
+    # 前100个特征点有对应关系
+    feat2[:, :100] = feat1[:, :100] + 0.1 * torch.randn(2, 100, 128)
+
+    # 计算cosine cost matrix
+    feat1_norm = feat1 / (feat1.norm(dim=2, keepdim=True) + 1e-8)
+    feat2_norm = feat2 / (feat2.norm(dim=2, keepdim=True) + 1e-8)
+    C = 1.0 - torch.bmm(feat1_norm, feat2_norm.transpose(1, 2))
+
+    C_np = C[0].numpy()
+
+    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
+
+    im0 = axes[0].imshow(C_np, cmap='YlOrRd')
+    axes[0].set_title('Cost Matrix C = 1 - cos_sim')
+    axes[0].set_xlabel('Positive Point j')
+    axes[0].set_ylabel('Query Point i')
+    plt.colorbar(im0, ax=axes[0])
+
+    # 缩放看前30个点（有对应关系的）
+    im1 = axes[1].imshow(C_np[:30, :30], cmap='YlOrRd')
+    axes[1].set_title('Cost Matrix (前30×30)\n有模拟对应关系')
+    axes[1].set_xlabel('Positive Point j')
+    axes[1].set_ylabel('Query Point i')
+    plt.colorbar(im1, ax=axes[1])
+
+    # 对角线cost分布 vs 非对角线
+    diag_cost = np.diag(C_np)
+    off_diag = C_np[~np.eye(150, dtype=bool)]
+
+    axes[2].hist(diag_cost, bins=30, alpha=0.6, label=f'对角线(匹配点)\nmean={diag_cost.mean():.3f}',
+                 color='green')
+    axes[2].hist(off_diag, bins=30, alpha=0.6, label=f'非对角线\nmean={off_diag.mean():.3f}',
+                 color='gray')
+    axes[2].set_title('Cost分布: 匹配 vs 非匹配')
+    axes[2].set_xlabel('Cost')
+    axes[2].legend(fontsize=8)
+
+    plt.suptitle('UOT 代价矩阵分析', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'uot_cost_matrix.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_sinkhorn():
+    """可视化Sinkhorn迭代过程"""
+    print('\n--- Sinkhorn 迭代过程 ---')
+
+    torch.manual_seed(42)
+    # 构造有明显对应关系的特征
+    B, N, C = 1, 50, 128
+    feat1 = torch.randn(B, N, C)
+    feat1_norm = feat1 / (feat1.norm(dim=2, keepdim=True) + 1e-8)
+
+    # feat2是feat1的扰动版本
+    feat2 = feat1 + 0.15 * torch.randn(B, N, C)
+    feat2_norm = feat2 / (feat2.norm(dim=2, keepdim=True) + 1e-8)
+
+    C = 1.0 - torch.bmm(feat1_norm, feat2_norm.transpose(1, 2))
+
+    epsilon = torch.tensor([0.05])
+    gamma = torch.tensor([1.0])
+
+    # 逐步可视化Sinkhorn迭代
+    K = torch.exp(-C / epsilon)
+    max_iter = 5
+    power = gamma / (gamma + epsilon + 1e-8)
+
+    a = torch.ones((B, N, 1)) / N
+    prob1 = torch.ones((B, N, 1)) / N
+    prob2 = torch.ones((B, N, 1)) / N
+
+    fig, axes = plt.subplots(2, 4, figsize=(18, 9))
+
+    # K (初始)
+    K_np = K[0].numpy()
+    im0 = axes[0, 0].imshow(K_np, cmap='YlOrRd')
+    axes[0, 0].set_title('K (exp(-C/ε))\n迭代0')
+    axes[0, 0].set_xlabel('Positive'); axes[0, 0].set_ylabel('Query')
+    plt.colorbar(im0, ax=axes[0, 0])
+
+    for iteration in range(1, min(max_iter + 1, 7)):
+        # Update b
+        KTa = torch.bmm(K.transpose(1, 2), a)
+        b = torch.pow(prob2 / (KTa + 1e-8), power)
+        # Update a
+        Kb = torch.bmm(K, b)
+        a = torch.pow(prob1 / (Kb + 1e-8), power)
+
+        T = torch.mul(torch.mul(a, K), b.transpose(1, 2))
+        T_np = T[0].numpy()
+
+        ax = axes[(iteration) // 4, (iteration) % 4]
+        im = ax.imshow(T_np, cmap='YlOrRd')
+        ax.set_title(f'Transport Plan T\n迭代{iteration}')
+        ax.set_xlabel('Positive'); ax.set_ylabel('Query')
+        plt.colorbar(im, ax=ax)
+
+    # 空余位置
+    for i in range(max_iter + 1, 8):
+        axes[i // 4, i % 4].axis('off')
+
+    plt.suptitle('Sinkhorn 非平衡最优传输迭代过程', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'uot_sinkhorn_iterations.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def test_rigid_transform():
+    """测试刚体变换估计"""
+    print('\n--- Weighted SVD 刚体变换估计 ---')
+
+    torch.manual_seed(42)
+    B, N = 2, 150
+
+    # 真实变换
+    angle = torch.tensor(0.5)  # ~28.6度
+    R_true = torch.tensor([
+        [torch.cos(angle), -torch.sin(angle), 0],
+        [torch.sin(angle), torch.cos(angle), 0],
+        [0, 0, 1]
+    ]).unsqueeze(0).repeat(B, 1, 1)
+    t_true = torch.tensor([2.0, -1.0, 0.1]).unsqueeze(0).unsqueeze(-1).repeat(B, 1, 1)
+
+    # query点云
+    pts1 = torch.randn(B, N, 3) * 20
+
+    # positive点云 = R * query + t + noise
+    pts2 = R_true @ pts1.transpose(1, 2) + t_true
+    pts2 = pts2.transpose(1, 2) + 0.3 * torch.randn(B, N, 3)
+
+    # 模拟transport weights（前80个点匹配好，后70个匹配差）
+    weights = torch.ones(B, N)
+    weights[:, 80:] = 0.1  # 降低后70个点的权重
+
+    # 估计变换
+    transform = compute_rigid_transform(pts1, pts2, weights)
+
+    # 评估
+    R_est = transform[:, :3, :3]
+    t_est = transform[:, :3, 3]
+
+    # 旋转误差
+    R_err = R_est @ R_true.transpose(1, 2)
+    trace = torch.diagonal(R_err, dim1=1, dim2=2).sum(dim=1)
+    angle_err = torch.acos(torch.clamp((trace - 1) / 2, -1, 1)) * 180 / np.pi
+
+    # 平移误差
+    t_err = (t_est - t_true.squeeze(-1)).norm(dim=1)
+
+    print(f'旋转误差: {angle_err[0].item():.2f}°')
+    print(f'平移误差: {t_err[0].item():.3f}m')
+
+    # 可视化
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+
+    # 3D点云（XY平面投影）
+    pts1_2d = pts1[0, :, :2].numpy()
+    pts2_2d = pts2[0, :, :2].numpy()
+
+    # 投影点
+    pts1_transformed = (R_est[0] @ pts1[0].T + t_est[0].unsqueeze(-1)).T[:, :2].numpy()
+
+    axes[0].scatter(pts1_2d[:, 0], pts1_2d[:, 1], c='blue', s=10, alpha=0.6, label='Query')
+    axes[0].scatter(pts2_2d[:, 0], pts2_2d[:, 1], c='red', s=10, alpha=0.6, label='Positive')
+    for i in range(min(20, N)):
+        if weights[0, i] > 0.5:
+            axes[0].plot([pts1_2d[i, 0], pts2_2d[i, 0]],
+                         [pts1_2d[i, 1], pts2_2d[i, 1]],
+                         'gray', alpha=0.3, linewidth=0.5)
+    axes[0].set_title('匹配点对 (蓝色→红色)')
+    axes[0].set_xlabel('X (m)'); axes[0].set_ylabel('Y (m)')
+    axes[0].legend(fontsize=8)
+    axes[0].set_aspect('equal')
+
+    # 变换后
+    axes[1].scatter(pts1_transformed[:, 0], pts1_transformed[:, 1],
+                    c='blue', s=10, alpha=0.6, label='Query (变换后)')
+    axes[1].scatter(pts2_2d[:, 0], pts2_2d[:, 1],
+                    c='red', s=10, alpha=0.6, label='Positive (目标)')
+    axes[1].set_title(f'变换后对比\n旋转误差:{angle_err[0].item():.2f}° 平移误差:{t_err[0].item():.3f}m')
+    axes[1].set_xlabel('X (m)'); axes[1].set_ylabel('Y (m)')
+    axes[1].legend(fontsize=8)
+    axes[1].set_aspect('equal')
+
+    plt.suptitle('Weighted SVD 刚体变换估计', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'uot_rigid_transform.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_epsilon_gamma():
+    """可视化epsilon和gamma参数的影响"""
+    print('\n--- epsilon/gamma 参数分析 ---')
+
+    torch.manual_seed(42)
+    N = 50
+    feat1 = torch.randn(1, N, 128)
+    feat1_norm = feat1 / (feat1.norm(dim=2, keepdim=True) + 1e-8)
+    feat2 = feat1 + 0.2 * torch.randn(1, N, 128)
+    feat2_norm = feat2 / (feat2.norm(dim=2, keepdim=True) + 1e-8)
+
+    epsilons = [0.01, 0.05, 0.1, 0.5]
+    gammas = [0.1, 1.0, 10.0]
+
+    fig, axes = plt.subplots(len(gammas), len(epsilons), figsize=(16, 12))
+
+    for gi, gamma in enumerate(gammas):
+        for ei, eps in enumerate(epsilons):
+            epsilon = torch.tensor([eps])
+            gam = torch.tensor([gamma])
+            T = sinkhorn_unbalanced(
+                feat1_norm, feat2_norm,
+                epsilon=epsilon, gamma=gam,
+                max_iter=5, matrix='cosine'
+            )
+            ax = axes[gi, ei]
+            im = ax.imshow(T[0].numpy(), cmap='YlOrRd')
+            ax.set_title(f'ε={eps}, γ={gamma}')
+            ax.set_xlabel('Positive'); ax.set_ylabel('Query')
+            plt.colorbar(im, ax=ax)
+
+    plt.suptitle('epsilon (熵正则) 和 gamma (质量正则) 对 Transport Plan 的影响',
+                 fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'uot_epsilon_gamma.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+    print("""
+    参数解释:
+    - epsilon (ε): 熵正则化强度
+      - 小ε → Transport Plan更稀疏（hard matching）
+      - 大ε → Transport Plan更平滑（soft matching）
+    - gamma (γ): 质量正则化强度
+      - 小γ → 允许部分匹配（质量可增减）
+      - 大γ → 要求质量守恒（所有点必须匹配）
+    """)
+
+
+def analyze_parameters():
+    """参数量分析"""
+    print('\n--- 参数量分析 ---')
+    uot = UOTHead(nb_iter=5, name='original')
+    total = sum(p.numel() for p in uot.parameters())
+    print(f'总参数量: {total} (仅 epsilon, gamma 两个可学习标量)')
+    for name, param in uot.named_parameters():
+        print(f'  {name}: {param.data.item():.4f}')
+
+
+def main():
+    print('=' * 60)
+    print('UOT (Unbalanced Optimal Transport) 位姿估计可视化')
+    print('=' * 60)
+
+    analyze_parameters()
+    visualize_cost_matrix()
+    visualize_sinkhorn()
+    test_rigid_transform()
+    visualize_epsilon_gamma()
+
+    print('\n' + '=' * 60)
+    print('结构总结:')
+    print('=' * 60)
+    print("""
+    UOTHead (非平衡最优传输位姿估计):
+
+    ┌──────────────────────────────────────────────────────┐
+    │ 输入: feat1(B,150,128), feat2(B,150,128)              │
+    │       kpts1(B,150,3),  kpts2(B,150,3)                │
+    │                                                      │
+    │ 1. Cost Matrix: C = 1 - cosine_sim(feat1, feat2)     │
+    │    → (B, 150, 150)                                   │
+    │                                                      │
+    │ 2. Sinkhorn Unbalanced OT (迭代5次):                  │
+    │    K = exp(-C / epsilon)                             │
+    │    for i in range(5):                                │
+    │        b = (prob2 / Kᵀa)^(γ/(γ+ε))                   │
+    │        a = (prob1 / Kb)^(γ/(γ+ε))                    │
+    │    T = a ⊙ K ⊙ bᵀ                                    │
+    │    → (B, 150, 150) 运输计划                           │
+    │                                                      │
+    │ 3. 投影: project_kpts = T @ kpts2 / ΣT               │
+    │    → (B, 150, 3)  query匹配点在positive空间的投影坐标   │
+    │                                                      │
+    │ 4. Weighted SVD 刚体变换:                             │
+    │    - 加权中心化                                       │
+    │    - SVD分解协方差                                    │
+    │    - 输出 R(3×3), t(3×1)                             │
+    │    → transformation: (B, 3, 4)                       │
+    └──────────────────────────────────────────────────────┘
+
+    为什么用Unbalanced OT（非平衡最优传输）？
+    - 标准OT要求两个点集大小相同且质量守恒
+    - 实际场景：部分关键点在另一帧中可能被遮挡
+    - Unbalanced OT允许部分匹配，更鲁棒
+
+    两个可学习参数:
+    - epsilon (ε): 熵正则化，exp(ε)+0.03
+    - gamma (γ): 质量正则化，exp(γ)
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/08_full_pipeline_demo.py b/network_learning/08_full_pipeline_demo.py
new file mode 100644
index 0000000..6e77211
--- /dev/null
+++ b/network_learning/08_full_pipeline_demo.py
@@ -0,0 +1,516 @@
+"""
+完整流水线 Demo: 端到端网络结构可视化
+=====================================
+集成所有子网络，展示从输入到输出的完整数据流。
+
+运行模式:
+  python 08_full_pipeline_demo.py --mode bev    # 仅BEV分支
+  python 08_full_pipeline_demo.py --mode img    # 仅图像分支
+  python 08_full_pipeline_demo.py --mode fusion # 完整融合模式
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+import argparse
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from net import Fusion, BEVHead, ImgHead, FusionHead
+from BEVNet import RICNN
+from ALIKE.alnet import ALNet
+from netvlad import NetVLAD
+from uot import UOTHead
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def create_dummy_batch_dict(mode='fusion'):
+    """创建模拟的batch_dict"""
+    B = 2  # batch中1对 (query + positive)
+    batch_dict = {
+        'batch_size': 2 * B,
+    }
+
+    if mode in ('fusion', 'bev'):
+        batch_dict['bev'] = torch.randn(2 * B, 7, 320, 320)
+        batch_dict['bev'][:, :3] = torch.sigmoid(batch_dict['bev'][:, :3])  # 可视通道
+        batch_dict['bev'][:, 2:3] = (batch_dict['bev'][:, 2:3] > 0.3).float()  # guider mask
+
+    if mode in ('fusion', 'img'):
+        batch_dict['img'] = torch.randint(0, 256, (2 * B, 5, 192, 576)).float()
+
+    if mode == 'fusion':
+        # 模拟 relation: (B, max_len, K, 2)
+        max_len, K = 200, 11  # K=1+10: last dim is bev coord
+        batch_dict['relation'] = torch.zeros(2 * B, max_len, K, 2, dtype=torch.long)
+        for i in range(2 * B):
+            n_valid = 150
+            batch_dict['relation'][i, :n_valid, :K - 1, 0] = torch.randint(0, 576, (n_valid, K - 1))
+            batch_dict['relation'][i, :n_valid, :K - 1, 1] = torch.randint(0, 192, (n_valid, K - 1))
+            batch_dict['relation'][i, :n_valid, K - 1, 0] = torch.randint(0, 320, (n_valid,))
+            batch_dict['relation'][i, :n_valid, K - 1, 1] = torch.randint(0, 320, (n_valid,))
+
+        # pose_to_frame (训练时需要)
+        angle = 0.3
+        pose = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
+        pose[:, 0, 0] = torch.cos(torch.tensor(angle))
+        pose[:, 0, 1] = -torch.sin(torch.tensor(angle))
+        pose[:, 1, 0] = torch.sin(torch.tensor(angle))
+        pose[:, 1, 1] = torch.cos(torch.tensor(angle))
+        pose[:, 0, 3] = 2.0
+        pose[:, 1, 3] = -1.0
+        batch_dict['pose_to_frame'] = pose.clone()
+
+        batch_dict['pose_query'] = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
+        batch_dict['pose_positive'] = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
+
+        batch_dict['label_score'] = torch.zeros(B, 320, 320, 2)
+        batch_dict['id_query'] = torch.arange(B)
+        batch_dict['id_positive'] = torch.arange(B)
+        batch_dict['sequence'] = torch.zeros(B, dtype=torch.long)
+
+    return batch_dict
+
+
+def run_bev_only():
+    """仅BEV分支"""
+    print('\n' + '=' * 60)
+    print('模式: BEV Only (仅点云分支)')
+    print('=' * 60)
+
+    cfg = {
+        'flag': 'bev',
+        'kpts_number_bev': 150,
+        'kpts_number_img': 150,
+        'cluster_num_bev': 16,
+        'cluster_num_img': 16,
+        'cluster_num_fusion': 16,
+        'sinkhorn_iter': 5,
+        'vlad_size': 256,
+    }
+
+    model = Fusion(cfg)
+    model.eval()
+    total_params = sum(p.numel() for p in model.parameters())
+    print(f'模型参数量: {total_params:,} ({total_params / 1e6:.2f}M)')
+
+    batch_dict = create_dummy_batch_dict('bev')
+
+    with torch.no_grad():
+        output = model(batch_dict)
+
+    print('\n输出:')
+    for k, v in output.items():
+        if isinstance(v, torch.Tensor):
+            print(f'  {k:30s}: {list(v.shape)}')
+        else:
+            print(f'  {k:30s}: {v}')
+
+    # 可视化BEV分支数据流
+    fig, axes = plt.subplots(2, 4, figsize=(18, 9))
+
+    # BEV输入 (3个可视通道)
+    if 'bev' in output or 'bev' in batch_dict:
+        bev_in = batch_dict['bev'][0, :3].permute(1, 2, 0).numpy()
+        axes[0, 0].imshow(bev_in)
+        axes[0, 0].set_title('BEV输入 (3通道)')
+        axes[0, 0].axis('off')
+
+    # Score Map
+    if 'score_bev' in output:
+        axes[0, 1].imshow(output['score_bev'][0].numpy(), cmap='hot')
+        axes[0, 1].set_title('BEV Score Map')
+        axes[0, 1].axis('off')
+
+    # 关键点位置
+    if 'key_points' in output and 'pixels_kpt' in output:
+        bev_show = batch_dict['bev'][0, :3].permute(1, 2, 0).numpy()
+        axes[0, 2].imshow(bev_show)
+        kpt = output['pixels_kpt'][0].numpy()
+        axes[0, 2].scatter(kpt[:, 1], kpt[:, 0], c='red', s=5, alpha=0.8)
+        axes[0, 2].set_title(f'BEV Top-{len(kpt)} 关键点')
+        axes[0, 2].axis('off')
+
+    # Descriptor Map (第一通道)
+    if 'fea_bev' in output:
+        axes[0, 3].imshow(output['fea_bev'][0, 0].numpy(), cmap='viridis')
+        axes[0, 3].set_title('BEV Descriptor ch0')
+        axes[0, 3].axis('off')
+
+    # 关键点特征相似度
+    if 'fea_kpt_original' in output:
+        fea = output['fea_kpt_original']
+        # query vs positive 的相似度
+        B = fea.shape[0] // 2
+        sim = torch.nn.functional.cosine_similarity(
+            fea[:B].permute(0, 2, 1).unsqueeze(-1),
+            fea[B:].permute(0, 2, 1).unsqueeze(-2),
+            dim=1
+        )[0]
+        im = axes[1, 0].imshow(sim.numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
+        axes[1, 0].set_title('Query-Positive 特征相似度')
+        axes[1, 0].set_xlabel('Positive'); axes[1, 0].set_ylabel('Query')
+        plt.colorbar(im, ax=axes[1, 0])
+
+    # VLAD
+    if 'vlads' in output:
+        vlad = output['vlads'][0].view(16, 128).numpy()
+        im = axes[1, 1].imshow(vlad, cmap='RdBu_r', aspect='auto')
+        axes[1, 1].set_title('VLAD描述子 (16×128)')
+        axes[1, 1].set_xlabel('Feature Dim'); axes[1, 1].set_ylabel('Cluster')
+        plt.colorbar(im, ax=axes[1, 1])
+
+    # 数据流图
+    axes[1, 2].set_title('BEV分支数据流')
+    flow = [
+        'bev (7,320,320)',
+        '→ x = bev[:3] (可视BEV)',
+        '→ points = bev[3:7] (坐标)',
+        '→ RICNN前向',
+        '→ score_bev (1,320,320)',
+        '→ fea_bev (128,320,320)',
+        '→ NMS + Top-K(150)',
+        '→ key_points (150,4)',
+        '→ fea_kpt (128,150)',
+        '→ EncodePosition',
+        '→ NetVLAD → vlad_bev (2048)',
+    ]
+    for i, f in enumerate(flow):
+        axes[1, 2].text(0.1, 0.95 - i * 0.1, f, transform=axes[1, 2].transAxes,
+                        fontsize=9, family='monospace')
+    axes[1, 2].axis('off')
+
+    # 参数量饼图
+    axes[1, 3].set_title('BEV分支参数分布')
+    modules = dict(model.bev.feature_extractor.named_children())
+    sizes = []
+    labels = []
+    for name, mod in modules.items():
+        p = sum(pm.numel() for pm in mod.parameters())
+        if p > 0:
+            sizes.append(p)
+            labels.append(f'{name}\n({p/1e3:.0f}K)')
+    axes[1, 3].pie(sizes, labels=labels, autopct='%1.1f%%', textprops={'fontsize': 8})
+
+    plt.suptitle('BEV Only 模式: 点云分支可视化', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'full_pipeline_bev.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'[保存] {path}')
+
+
+def run_img_only():
+    """仅图像分支"""
+    print('\n' + '=' * 60)
+    print('模式: Image Only (仅图像分支)')
+    print('=' * 60)
+
+    cfg = {
+        'flag': 'img',
+        'kpts_number_bev': 150,
+        'kpts_number_img': 150,
+        'cluster_num_bev': 16,
+        'cluster_num_img': 16,
+        'cluster_num_fusion': 16,
+        'sinkhorn_iter': 5,
+        'vlad_size': 256,
+    }
+
+    model = Fusion(cfg)
+    model.eval()
+    total_params = sum(p.numel() for p in model.parameters())
+    print(f'模型参数量: {total_params:,} ({total_params / 1e6:.2f}M)')
+
+    batch_dict = create_dummy_batch_dict('img')
+
+    with torch.no_grad():
+        output = model(batch_dict)
+
+    print('\n输出:')
+    for k, v in output.items():
+        if isinstance(v, torch.Tensor):
+            print(f'  {k:30s}: {list(v.shape)}')
+        else:
+            print(f'  {k:30s}: {v}')
+
+    # 可视化
+    fig, axes = plt.subplots(2, 4, figsize=(18, 9))
+
+    # 输入图像
+    img_in = batch_dict['img'][0, :3].permute(1, 2, 0).numpy().astype(np.uint8)
+    axes[0, 0].imshow(img_in)
+    axes[0, 0].set_title('图像输入 (192×576)')
+    axes[0, 0].axis('off')
+
+    # Score Map
+    if 'score_img' in output:
+        axes[0, 1].imshow(output['score_img'][0, 0].numpy(), cmap='hot')
+        axes[0, 1].set_title('图像 Score Map')
+        axes[0, 1].axis('off')
+
+    # 关键点
+    if 'key_pixels' in output:
+        axes[0, 2].imshow(img_in)
+        kpt = output['key_pixels'][0].numpy()
+        axes[0, 2].scatter(kpt[:, 1], kpt[:, 0], c='red', s=5, alpha=0.8)
+        axes[0, 2].set_title(f'Top-{len(kpt)} 关键点')
+        axes[0, 2].axis('off')
+
+    # Descriptor Map
+    if 'fea_img' in output:
+        axes[0, 3].imshow(output['fea_img'][0, 0].numpy(), cmap='viridis')
+        axes[0, 3].set_title('图像 Descriptor ch0')
+        axes[0, 3].axis('off')
+
+    # 关键点特征相似度
+    if 'fea_kpl' in output:
+        fea = output['fea_kpl']
+        B = fea.shape[0] // 2
+        sim = torch.nn.functional.cosine_similarity(
+            fea[:B].permute(0, 2, 1).unsqueeze(-1),
+            fea[B:].permute(0, 2, 1).unsqueeze(-2),
+            dim=1
+        )[0]
+        im = axes[1, 0].imshow(sim.numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
+        axes[1, 0].set_title('Query-Positive 特征相似度')
+        plt.colorbar(im, ax=axes[1, 0])
+
+    # 数据流图
+    axes[1, 1].set_title('图像分支数据流')
+    flow = [
+        'img (5,192,576)',
+        '→ x = img[:3]/255',
+        '→ ALNet前向',
+        '→ score_img (1,192,576)',
+        '→ fea_img (128,192,576)',
+        '→ NMS(2) + Top-K(150)',
+        '→ key_pixels (150,2)',
+        '→ fea_kpl (128,150)',
+    ]
+    for i, f in enumerate(flow):
+        axes[1, 1].text(0.1, 0.95 - i * 0.11, f, transform=axes[1, 1].transAxes,
+                        fontsize=9, family='monospace')
+    axes[1, 1].axis('off')
+
+    # 参数量饼图
+    axes[1, 2].set_title('图像分支参数分布')
+    modules = dict(model.img.feature_extractor.named_children())
+    sizes = []
+    labels = []
+    for name, mod in modules.items():
+        p = sum(pm.numel() for pm in mod.parameters())
+        if p > 0:
+            sizes.append(p)
+            labels.append(f'{name}\n({p/1e3:.0f}K)')
+    axes[1, 2].pie(sizes, labels=labels, autopct='%1.1f%%', textprops={'fontsize': 8})
+
+    axes[1, 3].axis('off')
+
+    plt.suptitle('Image Only 模式: 图像分支可视化', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'full_pipeline_img.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'[保存] {path}')
+
+
+def run_fusion():
+    """完整融合模式"""
+    print('\n' + '=' * 60)
+    print('模式: Fusion (完整融合)')
+    print('=' * 60)
+
+    cfg = {
+        'flag': 'fusion',
+        'kpts_number_bev': 150,
+        'kpts_number_img': 150,
+        'cluster_num_bev': 16,
+        'cluster_num_img': 16,
+        'cluster_num_fusion': 16,
+        'sinkhorn_iter': 5,
+        'vlad_size': 256,
+    }
+
+    model = Fusion(cfg)
+    model.eval()
+    total_params = sum(p.numel() for p in model.parameters())
+    print(f'模型参数量: {total_params:,} ({total_params / 1e6:.2f}M)')
+
+    batch_dict = create_dummy_batch_dict('fusion')
+
+    with torch.no_grad():
+        output = model(batch_dict)
+
+    print('\n输出:')
+    for k, v in output.items():
+        if isinstance(v, torch.Tensor):
+            print(f'  {k:30s}: {list(v.shape)}')
+        else:
+            print(f'  {k:30s}: {v}')
+
+    # 可视化融合数据流
+    fig, axes = plt.subplots(3, 4, figsize=(22, 15))
+
+    # BEV输入
+    bev_in = batch_dict['bev'][0, :3].permute(1, 2, 0).numpy()
+    axes[0, 0].imshow(bev_in)
+    axes[0, 0].set_title('BEV 输入 (320×320)')
+    axes[0, 0].axis('off')
+
+    # 图像输入
+    img_in = batch_dict['img'][0, :3].permute(1, 2, 0).numpy().astype(np.uint8)
+    axes[0, 1].imshow(img_in)
+    axes[0, 1].set_title('图像输入 (192×576)')
+    axes[0, 1].axis('off')
+
+    # Score maps
+    if 'score_bev' in output:
+        axes[0, 2].imshow(output['score_bev'][0].numpy(), cmap='hot')
+        axes[0, 2].set_title('BEV Score')
+        axes[0, 2].axis('off')
+    if 'score_img' in output:
+        axes[0, 3].imshow(output['score_img'][0, 0].numpy(), cmap='hot')
+        axes[0, 3].set_title('Image Score')
+        axes[0, 3].axis('off')
+
+    # 融合特征空间中的相似度
+    if 'fea_kpt_original' in output and 'fea_kpt_fusion' in output:
+        fea_orig = output['fea_kpt_original']
+        fea_fusion = output['fea_kpt_fusion']
+        B = fea_orig.shape[0] // 2
+
+        sim_orig = torch.nn.functional.cosine_similarity(
+            fea_orig[:B].permute(0, 2, 1).unsqueeze(-1),
+            fea_orig[B:].permute(0, 2, 1).unsqueeze(-2),
+            dim=1
+        )[0].numpy()
+
+        sim_fusion = torch.nn.functional.cosine_similarity(
+            fea_fusion[:B].permute(0, 2, 1).unsqueeze(-1),
+            fea_fusion[B:].permute(0, 2, 1).unsqueeze(-2),
+            dim=1
+        )[0].numpy()
+
+        im1 = axes[1, 0].imshow(sim_orig, cmap='RdYlBu_r', vmin=-1, vmax=1)
+        axes[1, 0].set_title('原始特征 相似度 (150×150)')
+        plt.colorbar(im1, ax=axes[1, 0])
+
+        im2 = axes[1, 1].imshow(sim_fusion, cmap='RdYlBu_r', vmin=-1, vmax=1)
+        axes[1, 1].set_title('融合特征 相似度 (150×150)')
+        plt.colorbar(im2, ax=axes[1, 1])
+
+        axes[1, 2].imshow(np.abs(sim_orig - sim_fusion), cmap='YlOrRd')
+        axes[1, 2].set_title('相似度变化 |差异|')
+        plt.colorbar(im2, ax=axes[1, 2])
+
+    # VLAD
+    if 'vlads' in output:
+        vlad = output['vlads'][0].view(16, 128).numpy()
+        im = axes[1, 3].imshow(vlad, cmap='RdBu_r', aspect='auto')
+        axes[1, 3].set_title('VLAD 融合 (16×128)')
+        plt.colorbar(im, ax=axes[1, 3])
+
+    # 整体架构图
+    axes[2, 0].set_title('完整架构')
+    arch = [
+        '┌─ BEVHead ─────────────┐',
+        '│ RICNN + EncodePos     │',
+        '│ → fea_kpt_original    │',
+        '│ → vlad_bev            │',
+        '└───────────────────────┘',
+        '┌─ ImgHead ─────────────┐',
+        '│ ALNet + NMS           │',
+        '│ → fea_kpl             │',
+        '│ → fea_img             │',
+        '└───────────────────────┘',
+        '┌─ FusionHead ──────────┐',
+        '│ LocalPool + Converter │',
+        '│ Generator + FusionHead│',
+        '│ → fea_kpt_fusion      │',
+        '└───────────────────────────────────────────────────────┘',
+        '  VLAD = w·vlad_fusion + (1-w)·vlad_bev'
+    ]
+    for i, a in enumerate(arch):
+        axes[2, 0].text(0.05, 0.98 - i * 0.075, a, transform=axes[2, 0].transAxes,
+                        fontsize=7.5, family='monospace')
+    axes[2, 0].axis('off')
+
+    # 模块参数对比
+    axes[2, 1].set_title('各模块参数量')
+    module_names = []
+    module_params = []
+    for name, mod in model.named_children():
+        p = sum(pm.numel() for pm in mod.parameters())
+        if p > 0:
+            module_names.append(name)
+            module_params.append(p)
+    colors = plt.cm.Set3(np.linspace(0, 1, len(module_names)))
+    axes[2, 1].barh(range(len(module_names)), module_params, color=colors)
+    axes[2, 1].set_yticks(range(len(module_names)))
+    axes[2, 1].set_yticklabels(module_names, fontsize=8)
+    for i, p in enumerate(module_params):
+        axes[2, 1].text(p, i, f' {p/1e3:.0f}K', va='center', fontsize=8)
+
+    # 数据流汇总
+    axes[2, 2].set_title('融合模式数据流')
+    flow = [
+        'img, bev, relation 输入',
+        '├─ ImgHead → ALNet',
+        '│   ├─ score_img',
+        '│   ├─ fea_img (密集描述子)',
+        '│   └─ fea_kpl (关键点)',
+        '├─ BEVHead → RICNN',
+        '│   ├─ score_bev',
+        '│   ├─ fea_bev (密集描述子)',
+        '│   ├─ fea_kpt_original',
+        '│   └─ vlad_bev',
+        '└─ FusionHead',
+        '    ├─ GridSample → fea_pl_dual, fea_pt_dual',
+        '    ├─ Converters → 跨模态转换',
+        '    ├─ Generator → 全景特征',
+        '    ├─ FusionHead → 融合特征',
+        '    └─ NetVLAD → vlad_fusion',
+        '最终: vlads = w·vlad_fusion + (1-w)·vlad_bev',
+        '     UOT: → transformation (位姿)',
+    ]
+    for i, f in enumerate(flow):
+        axes[2, 2].text(0.05, 0.98 - i * 0.06, f, transform=axes[2, 2].transAxes,
+                        fontsize=7.5, family='monospace')
+    axes[2, 2].axis('off')
+
+    axes[2, 3].axis('off')
+
+    plt.suptitle('Fusion 模式: 完整跨模态融合可视化', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'full_pipeline_fusion.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'[保存] {path}')
+
+
+def main():
+    parser = argparse.ArgumentParser(description='全流水线可视化')
+    parser.add_argument('--mode', type=str, default='all',
+                        choices=['all', 'bev', 'img', 'fusion'],
+                        help='运行模式')
+    args = parser.parse_args()
+
+    if args.mode in ('all', 'bev'):
+        run_bev_only()
+    if args.mode in ('all', 'img'):
+        run_img_only()
+    if args.mode in ('all', 'fusion'):
+        run_fusion()
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/network_learning/LEARNING_GUIDE.md b/network_learning/LEARNING_GUIDE.md
new file mode 100644
index 0000000..af68400
--- /dev/null
+++ b/network_learning/LEARNING_GUIDE.md
@@ -0,0 +1,419 @@
+# 网络结构学习指南
+
+> 论文：[Cross Fusion of Point Cloud and Learned Image for Loop Closure Detection](../Cross_Fusion_of_Point_Cloud_and_Learned_Image_for_Loop_Closure_Detection.pdf)
+
+---
+
+## 目录
+
+1. [项目总览](#1-项目总览)
+2. [网络结构全景图](#2-网络结构全景图)
+3. [ALNet — 图像特征提取器](#3-alnet--图像特征提取器)
+4. [RICNN — 旋转不变CNN](#4-ricnn--旋转不变cnn)
+5. [EncodePosition — 位置编码](#5-encodeposition--位置编码)
+6. [Converter — 跨模态特征转换器](#6-converter--跨模态特征转换器)
+7. [Generator & FusionHead — 特征生成与融合](#7-generator--fusionhead--特征生成与融合)
+8. [LocalPool — 局部特征聚合](#8-localpool--局部特征聚合)
+9. [NetVLAD — 全局描述子聚合](#9-netvlad--全局描述子聚合)
+10. [UOTHead — 最优传输位姿估计](#10-uothead--最优传输位姿估计)
+11. [完整数据流](#11-完整数据流)
+12. [学习路线建议](#12-学习路线建议)
+
+---
+
+## 1. 项目总览
+
+本项目实现**点云-图像跨模态融合的闭环检测**系统，共包含 **9 个网络结构**：
+
+| # | 网络 | 源文件 | 作用 |
+|---|------|------|------|
+| 1 | **ALNet** | `ALIKE/alnet.py` | 图像特征提取（关键点+描述子） |
+| 2 | **RICNN** | `BEVNet.py` | BEV点云特征提取（旋转不变） |
+| 3 | **EncodePosition** | `BEVNet.py` | 关键点空间位置编码 |
+| 4 | **Converter** | `net.py` | 跨模态特征空间转换 |
+| 5 | **Generator** | `net.py` | 变长特征→固定大小 |
+| 6 | **FusionHead** | `net.py` | 多来源特征Attention融合 |
+| 7 | **LocalPool** | `net.py` | 多像素特征→单体素聚合 |
+| 8 | **NetVLAD** | `netvlad.py` | 局部特征→全局描述子 |
+| 9 | **UOTHead** | `uot.py` | 最优传输→位姿估计 |
+
+### 运行模式
+
+| flag | 含义 | 包含模块 |
+|------|------|---------|
+| `bev` | 仅点云 | 2, 3, 8 |
+| `img` | 仅图像 | 1 |
+| `fusion` | 完整融合 | 全部 1-9 |
+
+### 关键维度
+
+| 参数 | 值 |
+|------|-----|
+| BEV图尺寸 (H×W) | 320×320 |
+| BEV输入通道 | 7 (max_z, intensity, density, cx, cy, cz, ci) |
+| 图像尺寸 (H×W) | 192×576 |
+| 关键点数量 (BEV/Img) | 150 |
+| 特征维度 | 128 |
+| VLAD聚类数 | 16 |
+| VLAD输出维度 | 2048 (=16×128) |
+
+---
+
+## 2. 网络结构全景图
+
+```
+                        ┌─────────────────────────┐
+                        │   输入 img + bev + relation│
+                        └──────┬──────────┬───────┘
+                               │          │
+                    ┌──────────┘          └──────────┐
+                    ▼                                ▼
+            ┌──────────────┐               ┌──────────────┐
+            │   ImgHead    │               │   BEVHead    │
+            │   (ALNet)    │               │   (RICNN)    │
+            │              │               │              │
+            │ score_img    │               │ score_bev    │
+            │ fea_img      │               │ fea_bev      │
+            │ fea_kpl      │               │ fea_kpt_orig │
+            │ key_pixels   │               │ key_points   │
+            └──────┬───────┘               │ vlad_bev     │
+                   │                       └──────┬───────┘
+                   │                              │
+                   │    ┌─────────────────────────┘
+                   │    │
+                   ▼    ▼
+            ┌─────────────────────────────────────┐
+            │           FusionHead                │
+            │                                     │
+            │  LocalPool → Converter(cvt_bev)     │
+            │  GridSample → Converter(cvt_img)    │
+            │  Generator → FusionHead(Attention)  │
+            │                                     │
+            │  fea_kpt_fusion  (B, 128, 150)      │
+            └─────────────┬───────────────────────┘
+                          │
+                  ┌───────▼────────┐
+                  │    NetVLAD     │
+                  │  vlad_fusion   │
+                  └───────┬────────┘
+                          │
+                  ┌───────▼────────┐
+                  │  VLAD 融合      │
+                  │  w*fusion +    │
+                  │  (1-w)*bev     │
+                  └───────┬────────┘
+                          │
+                  ┌───────▼────────┐
+                  │   UOTHead      │
+                  │ (仅训练时)      │
+                  │→ transformation│
+                  └────────────────┘
+```
+
+---
+
+## 3. ALNet — 图像特征提取器
+
+**源码**: `ALIKE/alnet.py` | **Demo**: `python 01_alnet_demo.py`
+
+### 结构
+
+```
+输入: (B, 3, 192, 576)
+  ↓
+block1: ConvBlock(3→16)           → (B, 16, 192, 576)
+  ↓ MaxPool2d(2)
+block2: ResBlock(16→32)           → (B, 32, 96, 288)
+  ↓ MaxPool2d(4)
+block3: ResBlock(32→64)           → (B, 64, 24, 72)
+  ↓ MaxPool2d(4)
+block4: ResBlock(64→128)          → (B, 128, 6, 18)
+  ↓
+特征聚合: 4尺度concat + 上采样    → (B, 128, 192, 576)
+  ↓ Conv1x1(128→129)
+输出: score(B,1,192,576) + desc(B,128,192,576)
+```
+
+### 设计要点
+
+- **多尺度特征聚合**: 4阶段特征通过1x1conv压缩后上采样拼接，兼顾浅层定位精度和深层语义
+- **共享检测+描述**: 单一骨干同时输出关键点得分和密集描述子
+- **配置(alike-n)**: c1=16, c2=32, c3=64, c4=128, dim=128
+- **关键点选择**: NMS (radius=2, 2轮) + Top-K=150
+
+### 各阶段含义
+
+| 阶段 | 分辨率 | 学习内容 |
+|------|-------|---------|
+| block1 | 原始 | 边缘、角点等低级特征 |
+| block2 | 1/2 | 纹理、局部形状 |
+| block3 | 1/8 | 物体部件、语义信息 |
+| block4 | 1/32 | 全局上下文、场景级信息 |
+
+---
+
+## 4. RICNN — 旋转不变CNN
+
+**源码**: `BEVNet.py` | **Demo**: `python 02_ricnn_demo.py`
+
+### 结构
+
+```
+输入: BEV图像 (B, 3, 320, 320)
+  ↓
+block1: RIConvBlock(3→16)         → (B, 16, 320, 320)
+  ↓ RIMaxpool2d(2)
+block2: RIResBlock(16→32)         → (B, 32, 160, 160)
+  ↓ RIMaxpool2d(5, stride=4)
+block3: RIResBlock(32→64)         → (B, 64, 40, 40)
+  ↓ RIMaxpool2d(5, stride=4)
+block4: RIResBlock(64→128)        → (B, 128, 10, 10)
+  ↓
+特征聚合 (同ALNet)                 → (B, 128, 320, 320)
+  ↓ Conv1x1(128→129)
+输出: score(B,1,320,320) + desc(B,128,320,320)
+```
+
+### 旋转不变性原理（核心创新）
+
+BEV图像中车辆旋转时点云投影会旋转。RICNN通过以下机制保持特征不变：
+
+**RIConv2d**: 根据kernel位置到中心的**欧氏距离**分组，同距离共享权重
+
+```
+标准 5×5 kernel (25个独立权重):       RI kernel (3组共享权重):
+[0 1 2 3 4]                           [0 1 1 1 0]
+[1 2 3 4 5]                           [1 2 2 2 1]
+[2 3 4 5 6]                           [1 2 3 2 1]  ← 3组: dis=0,1,2
+[1 2 3 4 5]                           [1 2 2 2 1]
+[0 1 2 3 4]                           [0 1 1 1 0]
+```
+
+**RIMaxpool2d / RIAvgpool2d**: 只取圆形区域内像素，排除对角线角点（旋转不一致）
+
+**推理优化**: `disable_ri()` 可将RI层转为标准CNN层
+
+---
+
+## 5. EncodePosition — 位置编码
+
+**源码**: `BEVNet.py` | **Demo**: 包含在 `02_ricnn_demo.py`
+
+```
+输入: kpts (B, 150, 4), fea (B, 128, 150)
+  ↓
+1. 计算150×150关键点欧氏距离矩阵
+2. 距离直方图 (16 bins, range=[1,80]m)
+3. 直方图归一化
+4. MLP: 16→64→64→128
+5. fea_out = fea + MLP(hist)  (残差连接)
+```
+
+将关键点间空间关系编码到特征中，帮助网络理解"哪些关键点在物理空间中相邻"。
+
+---
+
+## 6. Converter — 跨模态特征转换器
+
+**源码**: `net.py` | **Demo**: `python 03_converter_demo.py`
+
+```
+输入: x (B, 128, N)  N个特征点
+  ├─ 路径1: Self-Attention(MHA) → x2 (B, 128, N)
+  ├─ 路径2: Conv1d瓶颈(128→32→16→32→128) → x3 (B, 128, N)
+  └─ concat([x2,x3]) → Conv1d(256→128) → 输出 (B, 128, N)
+```
+
+### 两种使用
+
+| 转换器 | 输入 → 输出 | 含义 |
+|--------|------------|------|
+| `cvt_bev` | 图像特征 → BEV空间 | 让图像特征"理解"BEV几何 |
+| `cvt_img` | BEV特征 → 图像空间 | 让BEV特征"理解"图像语义 |
+
+双路径设计：MHA捕获全局关系，Conv1d做逐点变换，互补增强。
+
+---
+
+## 7. Generator & FusionHead — 特征生成与融合
+
+**源码**: `net.py` | **Demo**: `python 04_generator_fusion_demo.py`
+
+### Generator (全景特征生成器)
+
+```
+输入: (B, 128, N)  N可变
+  ↓ Self-Attention
+  ↓ ConvTranspose1d(k3, s3) → 上采样扩展
+  ↓ AdaptiveMaxPool1d(150)
+输出: (B, 128, 150)  固定K=150
+```
+
+将可变数量的匹配点特征压缩为固定150个，与BEV关键点对齐。
+
+### FusionHead (跨模态融合头)
+
+```
+输入: (B, 128, 150, 4)  ← [original, gen, gen_gen, kpl_gen]
+  ↓
+Step 1: 对前3对做 Self-Attn → max聚合
+Step 2: Cross-Attn with kpl_gen (图像空间特征)
+  ↓
+concat(original, cross_out) → Conv1d(256→128)
+输出: (B, 128, 150) 融合特征
+```
+
+4种特征来源：
+
+| 特征 | 来源 | 空间 |
+|------|------|------|
+| `original` | RICNN直接提取 | BEV |
+| `gen` | Generator从图像特征生成 | 图像→BEV |
+| `gen_gen` | cvt_bev(cvt_img(original)) | BEV→图像→BEV循环 |
+| `kpl_gen` | cvt_img(original) | BEV→图像残留 |
+
+---
+
+## 8. LocalPool — 局部特征聚合
+
+**源码**: `net.py` | 轻量级模块，无独立demo
+
+```
+输入: (B, 128, N, K)  N个体素，每体素K个像素(K≤100)
+  ↓ Conv2d(100→10, k=1) + MaxPool2d((1,10))
+输出: (B, 128, N, 1) → squeeze → (B, 128, N)
+```
+
+一个BEV体素对应图像上多个像素，需聚合为单个体素特征：1x1 Conv降维 + MaxPool取最显著响应。
+
+---
+
+## 9. NetVLAD — 全局描述子聚合
+
+**源码**: `netvlad.py` | **Demo**: `python 05_netvlad_demo.py`
+
+```
+输入: (B, 128, 150, 1)
+  ↓
+1. Soft Assignment: Softmax(Conv2d(128→16)(x)) → (B, 16, 150, 1)
+2. Residual: x - centroids[16,128] → (B, 16, 150, 128)
+3. VLAD Core: Σ(soft_assign × residual) → (B, 16, 128)
+4. 归一化: per-cluster L2 → flatten → global L2
+输出: (B, 2048)
+```
+
+### 为什么用VLAD
+
+| 方法 | 问题 |
+|------|------|
+| 平均池化 | 丢失空间分布信息 |
+| VLAD | 通过聚类保留"哪些类型特征在哪里"的结构信息 |
+
+### VLAD融合
+
+```python
+vlads = sigmoid(w) * vlad_fusion + (1 - sigmoid(w)) * vlad_bev
+```
+
+---
+
+## 10. UOTHead — 最优传输位姿估计
+
+**源码**: `uot.py` | **Demo**: `python 06_uot_demo.py`
+
+```
+输入: feat1,feat2 (B,150,128), kpts1,kpts2 (B,150,3)
+  ↓
+1. Cost Matrix: C = 1 - cosine_sim(feat1, feat2)    → (B, 150, 150)
+2. Sinkhorn Unbalanced OT (5 iterations):
+   K = exp(-C/ε)  where ε = exp(ε_raw)+0.03
+   a, b 交替更新，γ 控制质量正则
+   T = diag(a)·K·diag(b)                            → (B, 150, 150)
+3. 投影: project_kpts = T @ kpts2 / ΣT              → (B, 150, 3)
+4. Weighted SVD → R, t                              → transformation (B, 3, 4)
+```
+
+### 两个可学习参数
+
+| 参数 | 含义 | 效果 |
+|------|------|------|
+| ε | 熵正则化 | 大→平滑匹配, 小→稀疏匹配 |
+| γ | 质量正则化 | 大→质量守恒, 小→允许不匹配 |
+
+非平衡OT允许部分点不匹配，对有遮挡的真实场景更鲁棒。
+
+---
+
+## 11. 完整数据流
+
+### 训练时
+
+```
+img → ALNet → fea_img, fea_kpl
+bev → RICNN → fea_bev, fea_kpt_original, vlad_bev
+
+relation → grid_sample(fea_img) → fea_pl_dual
+         → LocalPool → cvt_bev → fea_pt_dual_gen
+         → Generator → fea_kpt_original_gen
+
+         grid_sample(fea_bev) → fea_pt_dual       (训练时)
+         → cvt_img → fea_pl_dual_gen               (训练时)
+
+fea_kpt_original → cvt_img → fea_kpl_gen
+                → cvt_bev → fea_kpt_gen_gen
+
+FusionHead([original, gen, gen_gen, kpl_gen]) → fea_kpt_fusion
+         → NetVLAD → vlad_fusion
+
+vlads = sigmoid(w)*vlad_fusion + (1-sigmoid(w))*vlad_bev
+
+UOTHead(fea_kpt_original) → transformation_original
+UOTHead(fea_kpt_fusion)   → transformation_fusion
+```
+
+### 推理时简化
+
+不执行 UOTHead 和 BEV→图像采样（无 `pose_to_frame`），只输出 `vlads` + 局部特征。
+
+---
+
+## 12. 学习路线建议
+
+### 入门（约2-3小时）
+
+```bash
+# 1. 全流程概览
+python 08_full_pipeline_demo.py --mode all
+
+# 2. 独立分支
+python 01_alnet_demo.py    # 图像分支
+python 02_ricnn_demo.py    # BEV分支（含旋转不变性测试 + 位置编码）
+
+# 3. 融合机制
+python 03_converter_demo.py           # 跨模态转换
+python 04_generator_fusion_demo.py    # 特征生成 + 融合
+
+# 4. 全局描述子与位姿
+python 05_netvlad_demo.py   # VLAD聚合
+python 06_uot_demo.py       # 最优传输位姿估计
+```
+
+### 深入
+
+1. 阅读论文 Section 3 (Methodology)
+2. 对照代码看每个模块的 `forward` 函数
+3. 修改demo中的参数（关键点数量、VLAD聚类数），观察变化
+4. 加载真实checkpoint运行推理
+
+### 运行环境
+
+```bash
+conda activate fusion_cyy
+cd network_learning
+```
+
+所有可视化图像输出在 `network_learning/output/` 目录。
+
+---
+
+*基于代码版本: commit c3d268f*
diff --git a/network_learning/README.md b/network_learning/README.md
new file mode 100644
index 0000000..b2fba14
--- /dev/null
+++ b/network_learning/README.md
@@ -0,0 +1,50 @@
+# Network Learning — 网络结构可视化学习
+
+论文《Cross Fusion of Point Cloud and Learned Image for Loop Closure Detection》中所有网络结构的可视化 Demo。
+
+## 快速开始
+
+```bash
+conda activate fusion_cyy
+cd network_learning
+
+# 依次运行各网络demo
+python 01_alnet_demo.py              # ALNet — 图像特征提取器
+python 02_ricnn_demo.py              # RICNN — 旋转不变CNN + 位置编码
+python 03_converter_demo.py          # Converter — 跨模态特征转换器
+python 04_generator_fusion_demo.py   # Generator + FusionHead
+python 05_netvlad_demo.py            # NetVLAD — 全局描述子聚合
+python 06_uot_demo.py                # UOT — 最优传输位姿估计
+
+# 或一次性看完整流水线
+python 08_full_pipeline_demo.py --mode all
+```
+
+所有图像输出到 `output/` 目录。
+
+## 文件说明
+
+| 文件 | 内容 |
+|------|------|
+| `01_alnet_demo.py` | ALNet中间特征、感受野、参数量分析 |
+| `02_ricnn_demo.py` | RICNN卷积核分组、池化区域、旋转不变性测试、位置编码 |
+| `03_converter_demo.py` | 跨模态转换前后特征相似度、Attention权重 |
+| `04_generator_fusion_demo.py` | Generator变长→定长、FusionHead多来源融合 |
+| `05_netvlad_demo.py` | 软分配过程、VLAD结构、NetVLAD变体对比 |
+| `06_uot_demo.py` | 代价矩阵、Sinkhorn迭代、刚体变换、参数影响 |
+| `08_full_pipeline_demo.py` | BEV/Img/Fusion三种模式端到端可视化 |
+| `LEARNING_GUIDE.md` | 完整学习文档（9个网络结构详解） |
+
+## 网络结构一览
+
+| # | 网络 | 文件 | Demo |
+|---|------|------|------|
+| 1 | ALNet | `ALIKE/alnet.py` | `01_alnet_demo.py` |
+| 2 | RICNN | `BEVNet.py` | `02_ricnn_demo.py` |
+| 3 | EncodePosition | `BEVNet.py` | `02_ricnn_demo.py` |
+| 4 | Converter | `net.py` | `03_converter_demo.py` |
+| 5 | Generator | `net.py` | `04_generator_fusion_demo.py` |
+| 6 | FusionHead | `net.py` | `04_generator_fusion_demo.py` |
+| 7 | LocalPool | `net.py` | （轻量级，见文档） |
+| 8 | NetVLAD | `netvlad.py` | `05_netvlad_demo.py` |
+| 9 | UOTHead | `uot.py` | `06_uot_demo.py` |