Files
fusion_LCD/network_learning/05_netvlad_demo.py
2026-05-09 17:03:40 +08:00

309 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
NetVLAD 全局描述子 Demo
=======================
NetVLAD (Vector of Locally Aggregated Descriptors) 将局部特征聚合为全局描述子。
原理:
1. Soft Assignment: 每个局部特征软分配到K个聚类中心
2. Residual: 计算特征与聚类中心的残差
3. Aggregation: 加权求和残差
4. Normalization: 逐聚类L2归一化 + 全局L2归一化
论文中使用 cluster_num=16, feature_size=128
输出: 16 × 128 = 2048 维全局描述子
"""
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from netvlad import NetVLAD, NetVLADLoupe
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
os.makedirs(OUTPUT_DIR, exist_ok=True)
def test_netvlad_basic():
"""测试NetVLAD基本功能"""
print('\n--- NetVLAD 基本功能测试 ---')
netvlad = NetVLAD(fea_size=128, num_clusters=16)
netvlad.eval()
# 输入: (B=2, C=128, K=150, W=1)
torch.manual_seed(42)
features = torch.randn(2, 128, 150, 1)
with torch.no_grad():
vlad = netvlad(features)
print(f'输入特征: {features.shape} [B, C, K, W]')
print(f'VLAD输出: {vlad.shape} [B, cluster_num × C = 2048]')
print(f'VLAD L2 norm: {vlad.norm(dim=1)}') # 应该是全1已归一化
def visualize_soft_assignment():
"""可视化软分配过程"""
print('\n--- 软分配可视化 ---')
netvlad = NetVLAD(fea_size=128, num_clusters=16)
netvlad.eval()
torch.manual_seed(42)
features = torch.randn(1, 128, 150, 1)
# 手动提取中间结果
with torch.no_grad():
x = features
soft_assign = netvlad.conv(x)
soft_assign = netvlad.relu(soft_assign)
soft_assign = torch.nn.functional.softmax(soft_assign, dim=1)
# soft_assign: (B, 16, 150, 1)
assign_np = soft_assign[0, :, :, 0].numpy() # (16, 150)
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
# 软分配矩阵
im0 = axes[0, 0].imshow(assign_np, cmap='YlOrRd', aspect='auto')
axes[0, 0].set_title('软分配矩阵 (16 clusters × 150 points)')
axes[0, 0].set_xlabel('Point Index')
axes[0, 0].set_ylabel('Cluster')
plt.colorbar(im0, ax=axes[0, 0])
# 每个聚类中心的总权重
cluster_weight = assign_np.sum(axis=1)
axes[0, 1].bar(range(16), cluster_weight, color='steelblue')
axes[0, 1].axhline(y=150 / 16, color='red', linestyle='--',
label=f'平均={150 / 16:.1f}')
axes[0, 1].set_title('每个聚类的总权重')
axes[0, 1].set_xlabel('Cluster')
axes[0, 1].legend()
# 每个点的最大分配
max_cluster = assign_np.argmax(axis=0)
axes[0, 2].hist(max_cluster, bins=16, color='coral', edgecolor='white')
axes[0, 2].set_title('每个点被分配到哪个聚类 (argmax)')
axes[0, 2].set_xlabel('Cluster')
axes[0, 2].set_ylabel('点数')
# 分配熵(混乱度)
entropy = -(assign_np * np.log(assign_np + 1e-8)).sum(axis=0)
axes[1, 0].bar(range(150), entropy, color='steelblue', width=1.0)
axes[1, 0].set_title('每个点的分配熵\n(高=模糊分配, 低=确定分配)')
axes[1, 0].set_xlabel('Point Index')
axes[1, 0].set_ylabel('Entropy')
# 前3个聚类的分配权重
for i in range(3):
axes[1, 1].plot(assign_np[i], alpha=0.7, label=f'Cluster {i}')
axes[1, 1].set_title('前3个聚类的分配权重')
axes[1, 1].set_xlabel('Point Index')
axes[1, 1].set_ylabel('Weight')
axes[1, 1].legend(fontsize=8)
# 聚类中心可视化 (前2维t-SNE类比)
centroids = netvlad.centroids.detach().numpy() # (16, 128)
# PCA降维到2维
U, S, Vt = np.linalg.svd(centroids - centroids.mean(axis=0), full_matrices=False)
centroids_2d = (centroids @ Vt[:2].T)
axes[1, 2].scatter(centroids_2d[:, 0], centroids_2d[:, 1], c=range(16),
cmap='tab20', s=200, edgecolors='black')
for i in range(16):
axes[1, 2].annotate(str(i), (centroids_2d[i, 0], centroids_2d[i, 1]),
fontsize=10, ha='center', va='center')
axes[1, 2].set_title('聚类中心 PCA 2D 可视化')
axes[1, 2].set_xlabel('PC1'); axes[1, 2].set_ylabel('PC2')
plt.suptitle('NetVLAD 软分配机制', fontsize=14, fontweight='bold')
plt.tight_layout()
path = os.path.join(OUTPUT_DIR, 'netvlad_soft_assignment.png')
plt.savefig(path, dpi=150, bbox_inches='tight')
plt.close()
print(f' [保存] {path}')
def visualize_vlad_structure():
"""可视化VLAD向量结构"""
print('\n--- VLAD向量结构可视化 ---')
netvlad = NetVLAD(fea_size=128, num_clusters=16)
netvlad.eval()
# 两组明显不同的特征 → 应该产生不同的VLAD
torch.manual_seed(42)
fea1 = torch.randn(1, 128, 150, 1) # 场景A
fea2 = torch.randn(1, 128, 150, 1) # 场景B不同随机种子
with torch.no_grad():
vlad1 = netvlad(fea1)[0] # (2048,)
vlad2 = netvlad(fea2)[0]
# 每组同场景特征(加噪声)→ VLAD应相似
fea1_noisy = fea1 + 0.1 * torch.randn(1, 128, 150, 1)
with torch.no_grad():
vlad1_noisy = netvlad(fea1_noisy)[0]
sim_same = torch.nn.functional.cosine_similarity(vlad1, vlad1_noisy, dim=0)
sim_diff = torch.nn.functional.cosine_similarity(vlad1, vlad2, dim=0)
print(f'同场景(加噪声) VLAD相似度: {sim_same.item():.4f}')
print(f'不同场景 VLAD相似度: {sim_diff.item():.4f}')
print(f'区分度 (同-异): {sim_same.item() - sim_diff.item():.4f}')
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
# VLAD向量可视化 (reshape为16x128)
vlad1_2d = vlad1.view(16, 128).numpy()
vlad2_2d = vlad2.view(16, 128).numpy()
im0 = axes[0].imshow(vlad1_2d, cmap='RdBu_r', aspect='auto')
axes[0].set_title('VLAD场景A (16×128)')
axes[0].set_xlabel('Feature Dim'); axes[0].set_ylabel('Cluster')
plt.colorbar(im0, ax=axes[0])
im1 = axes[1].imshow(vlad2_2d, cmap='RdBu_r', aspect='auto')
axes[1].set_title('VLAD场景B (16×128)')
axes[1].set_xlabel('Feature Dim'); axes[1].set_ylabel('Cluster')
plt.colorbar(im1, ax=axes[1])
im2 = axes[2].imshow(np.abs(vlad1_2d - vlad2_2d), cmap='YlOrRd', aspect='auto')
axes[2].set_title(f'|差异| (cos_sim={sim_same.item():.3f})')
axes[2].set_xlabel('Feature Dim'); axes[2].set_ylabel('Cluster')
plt.colorbar(im2, ax=axes[2])
plt.suptitle('NetVLAD 全局描述子结构', fontsize=14, fontweight='bold')
plt.tight_layout()
path = os.path.join(OUTPUT_DIR, 'netvlad_vlad_structure.png')
plt.savefig(path, dpi=150, bbox_inches='tight')
plt.close()
print(f' [保存] {path}')
def compare_netvlad_variants():
"""对比NetVLAD和NetVLADLoupe"""
print('\n--- NetVLAD vs NetVLADLoupe 对比 ---')
netvlad = NetVLAD(fea_size=128, num_clusters=16)
netvlad_loupe = NetVLADLoupe(feature_size=128, cluster_size=16, output_dim=256)
torch.manual_seed(42)
x = torch.randn(2, 128, 150, 1) # NetVLAD输入 (B,C,H,W)
x_loupe = torch.randn(2, 150, 128) # NetVLADLoupe输入 (B,N,C)
with torch.no_grad():
v1 = netvlad(x)
v2 = netvlad_loupe(x_loupe)
print(f'NetVLAD: {sum(p.numel() for p in netvlad.parameters()):,} params')
print(f' 输入: {list(x.shape)} → 输出: {list(v1.shape)}')
print(f'NetVLADLoupe: {sum(p.numel() for p in netvlad_loupe.parameters()):,} params')
print(f' 输入: {list(x_loupe.shape)} → 输出: {list(v2.shape)}')
# 示意图
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# NetVLAD 流程
axes[0].set_title('NetVLAD (论文使用)', fontsize=13, fontweight='bold')
steps_vlad = [
'输入: (B, 128, 150, 1)',
'↓ Conv2d(128→16) + Softmax',
'软分配: (B, 16, 150, 1)',
'↓ 残差 = x - centroids',
'残差: (B, 16, 150, 128)',
'↓ sum(软分配 × 残差)',
'VLAD: (B, 16, 128)',
'↓ L2归一化 (per cluster)',
'↓ flatten + L2归一化',
'输出: (B, 2048)'
]
for i, s in enumerate(steps_vlad):
axes[0].text(0.1, 0.95 - i * 0.09, s, transform=axes[0].transAxes,
fontsize=10, family='monospace')
axes[0].axis('off')
# NetVLADLoupe 流程
axes[1].set_title('NetVLADLoupe', fontsize=13, fontweight='bold')
steps_loupe = [
'输入: (B, N, 128)',
'↓ x @ cluster_weights',
'↓ Softmax + BatchNorm',
'软分配: (B, N, 16)',
'↓ activation @ x',
'↓ 减去中心校正项 a',
'↓ L2归一化',
'↓ MLP: 2048 → 256',
'↓ Context Gating',
'输出: (B, 256)'
]
for i, s in enumerate(steps_loupe):
axes[1].text(0.1, 0.95 - i * 0.09, s, transform=axes[1].transAxes,
fontsize=10, family='monospace')
axes[1].axis('off')
plt.suptitle('NetVLAD 两种变体对比', fontsize=14, fontweight='bold')
plt.tight_layout()
path = os.path.join(OUTPUT_DIR, 'netvlad_variants.png')
plt.savefig(path, dpi=150, bbox_inches='tight')
plt.close()
print(f' [保存] {path}')
def main():
print('=' * 60)
print('NetVLAD 全局描述子 结构与功能可视化')
print('=' * 60)
test_netvlad_basic()
visualize_soft_assignment()
visualize_vlad_structure()
compare_netvlad_variants()
print('\n' + '=' * 60)
print('结构总结:')
print('=' * 60)
print("""
NetVLAD (全局描述子聚合):
论文中使用:
- cluster_num: 16
- feature_size: 128
- 输出: 2048维全局描述子
VLAD计算步骤:
1. Soft Assignment: soft_assign = Softmax(Conv2d(128→16)(x))
每个局部特征被软分配到16个聚类中心
2. Residual: residual = x - centroids
计算特征与每个聚类中心的残差
3. VLAD Core: vlad = Σ(soft_assign × residual) / Σsoft_assign
按聚类聚合加权残差
4. Normalization:
- 逐聚类 L2 norm
- flatten
- 全局 L2 norm
最终VLAD融合:
vlads = sigmoid(w) × vlad_fusion + (1-sigmoid(w)) × vlad_bev
其中 w 是可学习参数
VLAD vs 平均池化:
- 平均池化: 丢失空间分布信息
- VLAD: 通过聚类保留了"哪些类型的特征在哪里出现"的信息
""")
print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')
if __name__ == '__main__':
main()