网络测试和学习demo

2026-05-09 17:03:40 +08:00
parent edbe8fdbf9
commit 78298e56f1
9 changed files with 2868 additions and 0 deletions
--- a/network_learning/05_netvlad_demo.py
+++ b/network_learning/05_netvlad_demo.py
@@ -0,0 +1,308 @@
+"""
+NetVLAD 全局描述子 Demo
+=======================
+NetVLAD (Vector of Locally Aggregated Descriptors) 将局部特征聚合为全局描述子。
+
+原理:
+1. Soft Assignment: 每个局部特征软分配到K个聚类中心
+2. Residual: 计算特征与聚类中心的残差
+3. Aggregation: 加权求和残差
+4. Normalization: 逐聚类L2归一化 + 全局L2归一化
+
+论文中使用 cluster_num=16, feature_size=128
+输出: 16 × 128 = 2048 维全局描述子
+"""
+
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from netvlad import NetVLAD, NetVLADLoupe
+
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+
+def test_netvlad_basic():
+    """测试NetVLAD基本功能"""
+    print('\n--- NetVLAD 基本功能测试 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad.eval()
+
+    # 输入: (B=2, C=128, K=150, W=1)
+    torch.manual_seed(42)
+    features = torch.randn(2, 128, 150, 1)
+
+    with torch.no_grad():
+        vlad = netvlad(features)
+
+    print(f'输入特征: {features.shape}  [B, C, K, W]')
+    print(f'VLAD输出: {vlad.shape}  [B, cluster_num × C = 2048]')
+    print(f'VLAD L2 norm: {vlad.norm(dim=1)}')  # 应该是全1（已归一化）
+
+
+def visualize_soft_assignment():
+    """可视化软分配过程"""
+    print('\n--- 软分配可视化 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad.eval()
+
+    torch.manual_seed(42)
+    features = torch.randn(1, 128, 150, 1)
+
+    # 手动提取中间结果
+    with torch.no_grad():
+        x = features
+        soft_assign = netvlad.conv(x)
+        soft_assign = netvlad.relu(soft_assign)
+        soft_assign = torch.nn.functional.softmax(soft_assign, dim=1)
+
+    # soft_assign: (B, 16, 150, 1)
+    assign_np = soft_assign[0, :, :, 0].numpy()  # (16, 150)
+
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+
+    # 软分配矩阵
+    im0 = axes[0, 0].imshow(assign_np, cmap='YlOrRd', aspect='auto')
+    axes[0, 0].set_title('软分配矩阵 (16 clusters × 150 points)')
+    axes[0, 0].set_xlabel('Point Index')
+    axes[0, 0].set_ylabel('Cluster')
+    plt.colorbar(im0, ax=axes[0, 0])
+
+    # 每个聚类中心的总权重
+    cluster_weight = assign_np.sum(axis=1)
+    axes[0, 1].bar(range(16), cluster_weight, color='steelblue')
+    axes[0, 1].axhline(y=150 / 16, color='red', linestyle='--',
+                       label=f'平均={150 / 16:.1f}')
+    axes[0, 1].set_title('每个聚类的总权重')
+    axes[0, 1].set_xlabel('Cluster')
+    axes[0, 1].legend()
+
+    # 每个点的最大分配
+    max_cluster = assign_np.argmax(axis=0)
+    axes[0, 2].hist(max_cluster, bins=16, color='coral', edgecolor='white')
+    axes[0, 2].set_title('每个点被分配到哪个聚类 (argmax)')
+    axes[0, 2].set_xlabel('Cluster')
+    axes[0, 2].set_ylabel('点数')
+
+    # 分配熵（混乱度）
+    entropy = -(assign_np * np.log(assign_np + 1e-8)).sum(axis=0)
+    axes[1, 0].bar(range(150), entropy, color='steelblue', width=1.0)
+    axes[1, 0].set_title('每个点的分配熵\n(高=模糊分配, 低=确定分配)')
+    axes[1, 0].set_xlabel('Point Index')
+    axes[1, 0].set_ylabel('Entropy')
+
+    # 前3个聚类的分配权重
+    for i in range(3):
+        axes[1, 1].plot(assign_np[i], alpha=0.7, label=f'Cluster {i}')
+    axes[1, 1].set_title('前3个聚类的分配权重')
+    axes[1, 1].set_xlabel('Point Index')
+    axes[1, 1].set_ylabel('Weight')
+    axes[1, 1].legend(fontsize=8)
+
+    # 聚类中心可视化 (前2维t-SNE类比)
+    centroids = netvlad.centroids.detach().numpy()  # (16, 128)
+    # PCA降维到2维
+    U, S, Vt = np.linalg.svd(centroids - centroids.mean(axis=0), full_matrices=False)
+    centroids_2d = (centroids @ Vt[:2].T)
+
+    axes[1, 2].scatter(centroids_2d[:, 0], centroids_2d[:, 1], c=range(16),
+                       cmap='tab20', s=200, edgecolors='black')
+    for i in range(16):
+        axes[1, 2].annotate(str(i), (centroids_2d[i, 0], centroids_2d[i, 1]),
+                            fontsize=10, ha='center', va='center')
+    axes[1, 2].set_title('聚类中心 PCA 2D 可视化')
+    axes[1, 2].set_xlabel('PC1'); axes[1, 2].set_ylabel('PC2')
+
+    plt.suptitle('NetVLAD 软分配机制', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'netvlad_soft_assignment.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def visualize_vlad_structure():
+    """可视化VLAD向量结构"""
+    print('\n--- VLAD向量结构可视化 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad.eval()
+
+    # 两组明显不同的特征 → 应该产生不同的VLAD
+    torch.manual_seed(42)
+    fea1 = torch.randn(1, 128, 150, 1)  # 场景A
+    fea2 = torch.randn(1, 128, 150, 1)  # 场景B（不同随机种子）
+
+    with torch.no_grad():
+        vlad1 = netvlad(fea1)[0]  # (2048,)
+        vlad2 = netvlad(fea2)[0]
+
+    # 每组同场景特征（加噪声）→ VLAD应相似
+    fea1_noisy = fea1 + 0.1 * torch.randn(1, 128, 150, 1)
+    with torch.no_grad():
+        vlad1_noisy = netvlad(fea1_noisy)[0]
+
+    sim_same = torch.nn.functional.cosine_similarity(vlad1, vlad1_noisy, dim=0)
+    sim_diff = torch.nn.functional.cosine_similarity(vlad1, vlad2, dim=0)
+
+    print(f'同场景(加噪声) VLAD相似度: {sim_same.item():.4f}')
+    print(f'不同场景 VLAD相似度: {sim_diff.item():.4f}')
+    print(f'区分度 (同-异): {sim_same.item() - sim_diff.item():.4f}')
+
+    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
+
+    # VLAD向量可视化 (reshape为16x128)
+    vlad1_2d = vlad1.view(16, 128).numpy()
+    vlad2_2d = vlad2.view(16, 128).numpy()
+
+    im0 = axes[0].imshow(vlad1_2d, cmap='RdBu_r', aspect='auto')
+    axes[0].set_title('VLAD场景A (16×128)')
+    axes[0].set_xlabel('Feature Dim'); axes[0].set_ylabel('Cluster')
+    plt.colorbar(im0, ax=axes[0])
+
+    im1 = axes[1].imshow(vlad2_2d, cmap='RdBu_r', aspect='auto')
+    axes[1].set_title('VLAD场景B (16×128)')
+    axes[1].set_xlabel('Feature Dim'); axes[1].set_ylabel('Cluster')
+    plt.colorbar(im1, ax=axes[1])
+
+    im2 = axes[2].imshow(np.abs(vlad1_2d - vlad2_2d), cmap='YlOrRd', aspect='auto')
+    axes[2].set_title(f'|差异|  (cos_sim={sim_same.item():.3f})')
+    axes[2].set_xlabel('Feature Dim'); axes[2].set_ylabel('Cluster')
+    plt.colorbar(im2, ax=axes[2])
+
+    plt.suptitle('NetVLAD 全局描述子结构', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'netvlad_vlad_structure.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def compare_netvlad_variants():
+    """对比NetVLAD和NetVLADLoupe"""
+    print('\n--- NetVLAD vs NetVLADLoupe 对比 ---')
+
+    netvlad = NetVLAD(fea_size=128, num_clusters=16)
+    netvlad_loupe = NetVLADLoupe(feature_size=128, cluster_size=16, output_dim=256)
+
+    torch.manual_seed(42)
+    x = torch.randn(2, 128, 150, 1)  # NetVLAD输入 (B,C,H,W)
+    x_loupe = torch.randn(2, 150, 128)  # NetVLADLoupe输入 (B,N,C)
+
+    with torch.no_grad():
+        v1 = netvlad(x)
+        v2 = netvlad_loupe(x_loupe)
+
+    print(f'NetVLAD:        {sum(p.numel() for p in netvlad.parameters()):,} params')
+    print(f'  输入: {list(x.shape)} → 输出: {list(v1.shape)}')
+    print(f'NetVLADLoupe:   {sum(p.numel() for p in netvlad_loupe.parameters()):,} params')
+    print(f'  输入: {list(x_loupe.shape)} → 输出: {list(v2.shape)}')
+
+    # 示意图
+    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
+
+    # NetVLAD 流程
+    axes[0].set_title('NetVLAD (论文使用)', fontsize=13, fontweight='bold')
+    steps_vlad = [
+        '输入: (B, 128, 150, 1)',
+        '↓ Conv2d(128→16) + Softmax',
+        '软分配: (B, 16, 150, 1)',
+        '↓ 残差 = x - centroids',
+        '残差: (B, 16, 150, 128)',
+        '↓ sum(软分配 × 残差)',
+        'VLAD: (B, 16, 128)',
+        '↓ L2归一化 (per cluster)',
+        '↓ flatten + L2归一化',
+        '输出: (B, 2048)'
+    ]
+    for i, s in enumerate(steps_vlad):
+        axes[0].text(0.1, 0.95 - i * 0.09, s, transform=axes[0].transAxes,
+                     fontsize=10, family='monospace')
+    axes[0].axis('off')
+
+    # NetVLADLoupe 流程
+    axes[1].set_title('NetVLADLoupe', fontsize=13, fontweight='bold')
+    steps_loupe = [
+        '输入: (B, N, 128)',
+        '↓ x @ cluster_weights',
+        '↓ Softmax + BatchNorm',
+        '软分配: (B, N, 16)',
+        '↓ activation @ x',
+        '↓ 减去中心校正项 a',
+        '↓ L2归一化',
+        '↓ MLP: 2048 → 256',
+        '↓ Context Gating',
+        '输出: (B, 256)'
+    ]
+    for i, s in enumerate(steps_loupe):
+        axes[1].text(0.1, 0.95 - i * 0.09, s, transform=axes[1].transAxes,
+                     fontsize=10, family='monospace')
+    axes[1].axis('off')
+
+    plt.suptitle('NetVLAD 两种变体对比', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    path = os.path.join(OUTPUT_DIR, 'netvlad_variants.png')
+    plt.savefig(path, dpi=150, bbox_inches='tight')
+    plt.close()
+    print(f'  [保存] {path}')
+
+
+def main():
+    print('=' * 60)
+    print('NetVLAD 全局描述子 结构与功能可视化')
+    print('=' * 60)
+
+    test_netvlad_basic()
+    visualize_soft_assignment()
+    visualize_vlad_structure()
+    compare_netvlad_variants()
+
+    print('\n' + '=' * 60)
+    print('结构总结:')
+    print('=' * 60)
+    print("""
+    NetVLAD (全局描述子聚合):
+
+    论文中使用:
+    - cluster_num: 16
+    - feature_size: 128
+    - 输出: 2048维全局描述子
+
+    VLAD计算步骤:
+    1. Soft Assignment: soft_assign = Softmax(Conv2d(128→16)(x))
+       每个局部特征被软分配到16个聚类中心
+
+    2. Residual: residual = x - centroids
+       计算特征与每个聚类中心的残差
+
+    3. VLAD Core: vlad = Σ(soft_assign × residual) / Σsoft_assign
+       按聚类聚合加权残差
+
+    4. Normalization:
+       - 逐聚类 L2 norm
+       - flatten
+       - 全局 L2 norm
+
+    最终VLAD融合:
+    vlads = sigmoid(w) × vlad_fusion + (1-sigmoid(w)) × vlad_bev
+    其中 w 是可学习参数
+
+    VLAD vs 平均池化:
+    - 平均池化: 丢失空间分布信息
+    - VLAD: 通过聚类保留了"哪些类型的特征在哪里出现"的信息
+    """)
+
+    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
+
+
+if __name__ == '__main__':
+    main()