网络测试和学习demo

2026-05-09 17:03:40 +08:00
parent edbe8fdbf9
commit 78298e56f1
9 changed files with 2868 additions and 0 deletions
--- a/network_learning/04_generator_fusion_demo.py
+++ b/network_learning/04_generator_fusion_demo.py
@@ -0,0 +1,304 @@
+"""
+Generator & FusionHead 全景生成器与融合头 Demo
+==============================================
+Generator: 从变长图像特征生成固定数量的全景特征
+  Self-Attention → ConvTranspose1d(k3,s3) → AdaptiveMaxPool1d(150)
+  输入: (B, 128, N) N可变
+  输出: (B, 128, 150) 固定150个
+
+FusionHead: 融合多来源特征
+  对 [original, gen, gen_gen, kpl_gen] 四个特征
+  → pair-wise Self-Attention → max聚合 → Cross-Attention → 输出
+  输入: (B, 128, 150, 4)
+  输出: (B, 128, 150) 融合后特征
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from net import Generator, FusionHead, Attention
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def test_generator():
+    """测试Generator: 变长→定长特征转换"""
+    print('\n--- Generator 全景特征生成器 ---')
+
+    generator = Generator(in_c=128, num=150)
+    generator.eval()
+
+    # 模拟变长输入 (B=2, C=128, N=可变的200)
+    torch.manual_seed(42)
+    x = torch.randn(2, 128, 200)
+
+    with torch.no_grad():
+        output = generator(x)
+
+    print(f'输入: {x.shape} (变长，N=200)')
+    print(f'输出: {output.shape} (固定，K=150)')
+
+    # 可视化输入输出特征
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    # 输入特征相似度矩阵 (前50个点)
+    x_norm = x[0] / (x[0].norm(dim=0, keepdim=True) + 1e-8)
+    sim_in = (x_norm.T[:50] @ x_norm[:, :50]).detach().numpy()
+    im0 = axes[0, 0].imshow(sim_in, cmap='RdYlBu_r', vmin=-1, vmax=1)
+    axes[0, 0].set_title('输入特征相似度 (前50点)')
+    plt.colorbar(im0, ax=axes[0, 0])
+
+    # 输出特征相似度矩阵
+    out_norm = output[0] / (output[0].norm(dim=0, keepdim=True) + 1e-8)
+    sim_out = (out_norm.T @ out_norm).detach().numpy()
+    im1 = axes[0, 1].imshow(sim_out, cmap='RdYlBu_r', vmin=-1, vmax=1)
+    axes[0, 1].set_title('输出特征相似度 (150点)')
+    plt.colorbar(im1, ax=axes[0, 1])
+
+    # 输入特征热图
+    im2 = axes[0, 2].imshow(x[0, :, :30].detach().numpy(), cmap='viridis', aspect='auto')
+    axes[0, 2].set_title('输入特征 (30点)')
+    axes[0, 2].set_xlabel('Point Index'); axes[0, 2].set_ylabel('Channel')
+    plt.colorbar(im2, ax=axes[0, 2])
+
+    # 输出特征热图
+    im3 = axes[1, 0].imshow(output[0, :, :30].detach().numpy(), cmap='viridis', aspect='auto')
+    axes[1, 0].set_title('输出特征 (30点)')
+    axes[1, 0].set_xlabel('Point Index'); axes[1, 0].set_ylabel('Channel')
+    plt.colorbar(im3, ax=axes[1, 0])
+
+    # ConvTranspose + AdaptiveMaxPool 原理
+    axes[1, 1].set_title('Generator 内部变换', fontsize=12)
+    axes[1, 1].text(0.5, 0.8, 'ConvTranspose1d(k3,s3)', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=11, bbox=dict(boxstyle='round', facecolor='lightblue'))
+    axes[1, 1].text(0.5, 0.6, f'200 → 200*3 = 600', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=10)
+    axes[1, 1].text(0.5, 0.4, 'AdaptiveMaxPool1d(150)', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=11, bbox=dict(boxstyle='round', facecolor='lightgreen'))
+    axes[1, 1].text(0.5, 0.2, f'600 → 150', transform=axes[1, 1].transAxes,
+                    ha='center', fontsize=10)
+    axes[1, 1].axis('off')
+
+    # 特征值分布对比
+    axes[1, 2].hist(x[0].detach().numpy().flatten(), bins=50, alpha=0.5,
+                    label='Input', color='steelblue')
+    axes[1, 2].hist(output[0].detach().numpy().flatten(), bins=50, alpha=0.5,
+                    label='Output', color='coral')
+    axes[1, 2].set_title('特征值分布对比')
+    axes[1, 2].legend()
+
+    plt.suptitle('Generator: 变长特征→固定大小特征', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'generator_demo.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+    # 测试不同输入长度
+    print('\nGenerator 对不同输入长度的适应:')
+    for n in [50, 100, 200, 500]:
+        x_test = torch.randn(1, 128, n)
+        with torch.no_grad():
+            out = generator(x_test)
+        print(f'  N={n:4d} → 输出形状 {out.shape}')
+
+
+def test_fusion_head():
+    """测试FusionHead: 多来源特征融合"""
+    print('\n--- FusionHead 融合头 ---')
+
+    fusion_head = FusionHead(in_c=128)
+    fusion_head.eval()
+
+    # 模拟4种特征:
+    #   [0]: fea_kpt_original    - BEV原始关键点特征
+    #   [1]: fea_kpt_original_gen - Generator生成的BEV特征
+    #   [2]: fea_kpt_gen_gen      - 双路径转换器输出
+    #   [3]: fea_kpl_gen          - BEV→图像空间特征
+    B, C, K = 2, 128, 150
+    torch.manual_seed(42)
+
+    # 让不同来源的特征有相关性但不完全相同
+    base = torch.randn(B, C, K)
+    fea_original = base
+    fea_gen = base + 0.3 * torch.randn(B, C, K)
+    fea_gen_gen = fea_gen + 0.2 * torch.randn(B, C, K)
+    fea_kpl_gen = base + 0.5 * torch.randn(B, C, K)
+
+    fea_kpts = torch.stack([fea_original, fea_gen, fea_gen_gen, fea_kpl_gen], dim=2)
+    print(f'输入: {fea_kpts.shape} [B, C, K, 4来源]')
+
+    with torch.no_grad():
+        fea_fused = fusion_head(fea_kpts)
+
+    print(f'输出: {fea_fused.shape} [B, C, K] 融合特征')
+
+    # 可视化
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    names = ['Original (BEV原始)', 'Generated (全景生成)',
+             'Gen_Gen (双路径)', 'KPL_Gen (图像空间)']
+
+    for idx in range(4):
+        ax = axes[idx // 2, idx % 2]
+        sim = torch.nn.functional.cosine_similarity(
+            fea_kpts[0, :, :, 0].T.unsqueeze(-1),
+            fea_kpts[0, :, :, idx].T.unsqueeze(0),
+            dim=1
+        )
+        im = ax.imshow(sim.detach().numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
+        ax.set_title(f'{names[idx]}\nvs Original 相似度')
+        ax.set_xlabel('Point'); ax.set_ylabel('Point')
+        plt.colorbar(im, ax=ax)
+
+    # 融合特征 vs 原始特征
+    ax = axes[1, 2]
+    sim_fused = torch.nn.functional.cosine_similarity(
+        fea_original[0].T.unsqueeze(-1),
+        fea_fused[0].T.unsqueeze(0),
+        dim=1
+    )
+    im = ax.imshow(sim_fused.detach().numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
+    ax.set_title('Fused vs Original 相似度')
+    ax.set_xlabel('Point'); ax.set_ylabel('Point')
+    plt.colorbar(im, ax=ax)
+
+    plt.suptitle('FusionHead: 多来源特征融合分析', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'fusion_head_demo.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_attention_detail():
+    """详细可视化FusionHead中的Attention机制"""
+    print('\n--- FusionHead Attention 详细分析 ---')
+
+    att = Attention(d_model=128)
+    att.eval()
+
+    # 模拟3对特征的Self-Attention
+    B, N_pair, C = 2, 3, 128
+    torch.manual_seed(42)
+    x = torch.randn(B * 2, N_pair, C)  # 模拟batch*样本数的3对特征
+
+    with torch.no_grad():
+        output, weights = att(x, x, x)
+
+    print(f'Self-Attention 输入: {x.shape}')
+    print(f'输出: {output.shape}')
+    print(f'Attention权重: {weights.shape} (B, 3, 3)')
+
+    # 可视化attention权重
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+
+    weights_np = weights[0].detach().numpy()
+    im0 = axes[0].imshow(weights_np, cmap='YlOrRd', vmin=0, vmax=1)
+    axes[0].set_title('Self-Attention 权重 (3对特征)')
+    axes[0].set_xticks(range(3))
+    axes[0].set_xticklabels(['Original', 'Generated', 'Gen_Gen'])
+    axes[0].set_yticks(range(3))
+    axes[0].set_yticklabels(['Original', 'Generated', 'Gen_Gen'])
+
+    for i in range(3):
+        for j in range(3):
+            axes[0].text(j, i, f'{weights_np[i, j]:.3f}', ha='center', va='center',
+                         fontsize=12, color='white' if weights_np[i, j] > 0.5 else 'black')
+    plt.colorbar(im0, ax=axes[0])
+
+    # Cross-Attention 示意图
+    axes[1].set_title('FusionHead Attention 流程', fontsize=12)
+    steps = [
+        '1. 拼接4种特征 [original, gen, gen_gen, kpl_gen]',
+        '2. 取前3种 [original, gen, gen_gen]',
+        '3. 对每个样本的3对特征做Self-Attention',
+        '4. max聚合 → 每样本1个特征',
+        '5. Cross-Attention with kpl_gen (图像空间特征)',
+        '6. concat(original, cross_out) → Conv1d → 输出'
+    ]
+    for i, step in enumerate(steps):
+        axes[1].text(0.1, 0.9 - i * 0.15, step, transform=axes[1].transAxes,
+                     fontsize=10, family='monospace',
+                     bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.7))
+    axes[1].axis('off')
+
+    plt.suptitle('FusionHead Attention 机制详解', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'fusion_attention_detail.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def analyze_parameters():
+    """参数量分析"""
+    print('\n--- 参数量分析 ---')
+
+    gen = Generator(in_c=128, num=150)
+    fusion = FusionHead(in_c=128)
+
+    for name, model in [('Generator', gen), ('FusionHead', fusion)]:
+        total = sum(p.numel() for p in model.parameters())
+        print(f'\n{name}: {total:,} params ({total / 1e3:.1f}K)')
+        for n, m in model.named_children():
+            p = sum(pmt.numel() for pmt in m.parameters())
+            print(f'  {n:15s}: {p:>10,} params')
+
+
+def main():
+    print('=' * 60)
+    print('Generator & FusionHead 结构与功能可视化')
+    print('=' * 60)
+
+    analyze_parameters()
+    test_generator()
+    test_fusion_head()
+    visualize_attention_detail()
+
+    print('\n' + '=' * 60)
+    print('结构总结:')
+    print('=' * 60)
+    print("""
+    Generator (全景特征生成器):
+    ┌──────────────────────────────────────────────┐
+    │ 输入: (B, 128, N)  N可变                      │
+    │   ↓ Self-Attention (MHA)                     │
+    │ x2: (B, 128, N)  全局上下文特征                │
+    │   ↓ ConvTranspose1d(k3,s3)                    │
+    │ x3: (B, 128, N*3)  上采样扩展                  │
+    │   ↓ AdaptiveMaxPool1d(150)                    │
+    │ 输出: (B, 128, 150)  固定K个全景特征            │
+    └──────────────────────────────────────────────┘
+    作用: 将BEV中可变数量的匹配点特征压缩为固定150个，
+         与BEV关键点数量对齐
+
+    FusionHead (跨模态融合头):
+    ┌──────────────────────────────────────────────┐
+    │ 输入: (B, 128, 150, 4)                        │
+    │   [original, gen, gen_gen, kpl_gen]           │
+    │   ↓                                           │
+    │ 对前3对 (B*N, 3, C):                           │
+    │   Self-Attn → max(dim=1) → (B*N, C)           │
+    │   ↓ reshape → (B, N, C)                       │
+    │ Cross-Attention with kpl_gen                  │
+    │   ↓                                           │
+    │ concat(original, cross_out) → Conv1d(256→128) │
+    │ 输出: (B, 128, 150) 融合特征                   │
+    └──────────────────────────────────────────────┘
+    作用: 整合多来源特征，增强融合表示
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()