fusion_LCD/network_learning/08_full_pipeline_demo.py

"""
完整流水线 Demo: 端到端网络结构可视化
=====================================
集成所有子网络，展示从输入到输出的完整数据流。

运行模式:
  python 08_full_pipeline_demo.py --mode bev    # 仅BEV分支
  python 08_full_pipeline_demo.py --mode img    # 仅图像分支
  python 08_full_pipeline_demo.py --mode fusion # 完整融合模式
"""

import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

import sys
import os
import argparse
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from net import Fusion, BEVHead, ImgHead, FusionHead
from BEVNet import RICNN
from ALIKE.alnet import ALNet
from netvlad import NetVLAD
from uot import UOTHead

OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'output')
os.makedirs(OUTPUT_DIR, exist_ok=True)


def create_dummy_batch_dict(mode='fusion'):
    """创建模拟的batch_dict"""
    B = 2  # batch中1对 (query + positive)
    batch_dict = {
        'batch_size': 2 * B,
    }

    if mode in ('fusion', 'bev'):
        batch_dict['bev'] = torch.randn(2 * B, 7, 320, 320)
        batch_dict['bev'][:, :3] = torch.sigmoid(batch_dict['bev'][:, :3])  # 可视通道
        batch_dict['bev'][:, 2:3] = (batch_dict['bev'][:, 2:3] > 0.3).float()  # guider mask

    if mode in ('fusion', 'img'):
        batch_dict['img'] = torch.randint(0, 256, (2 * B, 5, 192, 576)).float()

    if mode == 'fusion':
        # 模拟 relation: (B, max_len, K, 2)
        max_len, K = 200, 11  # K=1+10: last dim is bev coord
        batch_dict['relation'] = torch.zeros(2 * B, max_len, K, 2, dtype=torch.long)
        for i in range(2 * B):
            n_valid = 150
            batch_dict['relation'][i, :n_valid, :K - 1, 0] = torch.randint(0, 576, (n_valid, K - 1))
            batch_dict['relation'][i, :n_valid, :K - 1, 1] = torch.randint(0, 192, (n_valid, K - 1))
            batch_dict['relation'][i, :n_valid, K - 1, 0] = torch.randint(0, 320, (n_valid,))
            batch_dict['relation'][i, :n_valid, K - 1, 1] = torch.randint(0, 320, (n_valid,))

        # pose_to_frame (训练时需要)
        angle = 0.3
        pose = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
        pose[:, 0, 0] = torch.cos(torch.tensor(angle))
        pose[:, 0, 1] = -torch.sin(torch.tensor(angle))
        pose[:, 1, 0] = torch.sin(torch.tensor(angle))
        pose[:, 1, 1] = torch.cos(torch.tensor(angle))
        pose[:, 0, 3] = 2.0
        pose[:, 1, 3] = -1.0
        batch_dict['pose_to_frame'] = pose.clone()

        batch_dict['pose_query'] = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)
        batch_dict['pose_positive'] = torch.eye(4).unsqueeze(0).repeat(B, 1, 1)

        batch_dict['label_score'] = torch.zeros(B, 320, 320, 2)
        batch_dict['id_query'] = torch.arange(B)
        batch_dict['id_positive'] = torch.arange(B)
        batch_dict['sequence'] = torch.zeros(B, dtype=torch.long)

    return batch_dict


def run_bev_only():
    """仅BEV分支"""
    print('\n' + '=' * 60)
    print('模式: BEV Only (仅点云分支)')
    print('=' * 60)

    cfg = {
        'flag': 'bev',
        'kpts_number_bev': 150,
        'kpts_number_img': 150,
        'cluster_num_bev': 16,
        'cluster_num_img': 16,
        'cluster_num_fusion': 16,
        'sinkhorn_iter': 5,
        'vlad_size': 256,
    }

    model = Fusion(cfg)
    model.eval()
    total_params = sum(p.numel() for p in model.parameters())
    print(f'模型参数量: {total_params:,} ({total_params / 1e6:.2f}M)')

    batch_dict = create_dummy_batch_dict('bev')

    with torch.no_grad():
        output = model(batch_dict)

    print('\n输出:')
    for k, v in output.items():
        if isinstance(v, torch.Tensor):
            print(f'  {k:30s}: {list(v.shape)}')
        else:
            print(f'  {k:30s}: {v}')

    # 可视化BEV分支数据流
    fig, axes = plt.subplots(2, 4, figsize=(18, 9))

    # BEV输入 (3个可视通道)
    if 'bev' in output or 'bev' in batch_dict:
        bev_in = batch_dict['bev'][0, :3].permute(1, 2, 0).numpy()
        axes[0, 0].imshow(bev_in)
        axes[0, 0].set_title('BEV输入 (3通道)')
        axes[0, 0].axis('off')

    # Score Map
    if 'score_bev' in output:
        axes[0, 1].imshow(output['score_bev'][0].numpy(), cmap='hot')
        axes[0, 1].set_title('BEV Score Map')
        axes[0, 1].axis('off')

    # 关键点位置
    if 'key_points' in output and 'pixels_kpt' in output:
        bev_show = batch_dict['bev'][0, :3].permute(1, 2, 0).numpy()
        axes[0, 2].imshow(bev_show)
        kpt = output['pixels_kpt'][0].numpy()
        axes[0, 2].scatter(kpt[:, 1], kpt[:, 0], c='red', s=5, alpha=0.8)
        axes[0, 2].set_title(f'BEV Top-{len(kpt)} 关键点')
        axes[0, 2].axis('off')

    # Descriptor Map (第一通道)
    if 'fea_bev' in output:
        axes[0, 3].imshow(output['fea_bev'][0, 0].numpy(), cmap='viridis')
        axes[0, 3].set_title('BEV Descriptor ch0')
        axes[0, 3].axis('off')

    # 关键点特征相似度
    if 'fea_kpt_original' in output:
        fea = output['fea_kpt_original']
        # query vs positive 的相似度
        B = fea.shape[0] // 2
        sim = torch.nn.functional.cosine_similarity(
            fea[:B].permute(0, 2, 1).unsqueeze(-1),
            fea[B:].permute(0, 2, 1).unsqueeze(-2),
            dim=1
        )[0]
        im = axes[1, 0].imshow(sim.numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
        axes[1, 0].set_title('Query-Positive 特征相似度')
        axes[1, 0].set_xlabel('Positive'); axes[1, 0].set_ylabel('Query')
        plt.colorbar(im, ax=axes[1, 0])

    # VLAD
    if 'vlads' in output:
        vlad = output['vlads'][0].view(16, 128).numpy()
        im = axes[1, 1].imshow(vlad, cmap='RdBu_r', aspect='auto')
        axes[1, 1].set_title('VLAD描述子 (16×128)')
        axes[1, 1].set_xlabel('Feature Dim'); axes[1, 1].set_ylabel('Cluster')
        plt.colorbar(im, ax=axes[1, 1])

    # 数据流图
    axes[1, 2].set_title('BEV分支数据流')
    flow = [
        'bev (7,320,320)',
        '→ x = bev[:3] (可视BEV)',
        '→ points = bev[3:7] (坐标)',
        '→ RICNN前向',
        '→ score_bev (1,320,320)',
        '→ fea_bev (128,320,320)',
        '→ NMS + Top-K(150)',
        '→ key_points (150,4)',
        '→ fea_kpt (128,150)',
        '→ EncodePosition',
        '→ NetVLAD → vlad_bev (2048)',
    ]
    for i, f in enumerate(flow):
        axes[1, 2].text(0.1, 0.95 - i * 0.1, f, transform=axes[1, 2].transAxes,
                        fontsize=9, family='monospace')
    axes[1, 2].axis('off')

    # 参数量饼图
    axes[1, 3].set_title('BEV分支参数分布')
    modules = dict(model.bev.feature_extractor.named_children())
    sizes = []
    labels = []
    for name, mod in modules.items():
        p = sum(pm.numel() for pm in mod.parameters())
        if p > 0:
            sizes.append(p)
            labels.append(f'{name}\n({p/1e3:.0f}K)')
    axes[1, 3].pie(sizes, labels=labels, autopct='%1.1f%%', textprops={'fontsize': 8})

    plt.suptitle('BEV Only 模式: 点云分支可视化', fontsize=14, fontweight='bold')
    plt.tight_layout()
    path = os.path.join(OUTPUT_DIR, 'full_pipeline_bev.png')
    plt.savefig(path, dpi=150, bbox_inches='tight')
    plt.close()
    print(f'[保存] {path}')


def run_img_only():
    """仅图像分支"""
    print('\n' + '=' * 60)
    print('模式: Image Only (仅图像分支)')
    print('=' * 60)

    cfg = {
        'flag': 'img',
        'kpts_number_bev': 150,
        'kpts_number_img': 150,
        'cluster_num_bev': 16,
        'cluster_num_img': 16,
        'cluster_num_fusion': 16,
        'sinkhorn_iter': 5,
        'vlad_size': 256,
    }

    model = Fusion(cfg)
    model.eval()
    total_params = sum(p.numel() for p in model.parameters())
    print(f'模型参数量: {total_params:,} ({total_params / 1e6:.2f}M)')

    batch_dict = create_dummy_batch_dict('img')

    with torch.no_grad():
        output = model(batch_dict)

    print('\n输出:')
    for k, v in output.items():
        if isinstance(v, torch.Tensor):
            print(f'  {k:30s}: {list(v.shape)}')
        else:
            print(f'  {k:30s}: {v}')

    # 可视化
    fig, axes = plt.subplots(2, 4, figsize=(18, 9))

    # 输入图像
    img_in = batch_dict['img'][0, :3].permute(1, 2, 0).numpy().astype(np.uint8)
    axes[0, 0].imshow(img_in)
    axes[0, 0].set_title('图像输入 (192×576)')
    axes[0, 0].axis('off')

    # Score Map
    if 'score_img' in output:
        axes[0, 1].imshow(output['score_img'][0, 0].numpy(), cmap='hot')
        axes[0, 1].set_title('图像 Score Map')
        axes[0, 1].axis('off')

    # 关键点
    if 'key_pixels' in output:
        axes[0, 2].imshow(img_in)
        kpt = output['key_pixels'][0].numpy()
        axes[0, 2].scatter(kpt[:, 1], kpt[:, 0], c='red', s=5, alpha=0.8)
        axes[0, 2].set_title(f'Top-{len(kpt)} 关键点')
        axes[0, 2].axis('off')

    # Descriptor Map
    if 'fea_img' in output:
        axes[0, 3].imshow(output['fea_img'][0, 0].numpy(), cmap='viridis')
        axes[0, 3].set_title('图像 Descriptor ch0')
        axes[0, 3].axis('off')

    # 关键点特征相似度
    if 'fea_kpl' in output:
        fea = output['fea_kpl']
        B = fea.shape[0] // 2
        sim = torch.nn.functional.cosine_similarity(
            fea[:B].permute(0, 2, 1).unsqueeze(-1),
            fea[B:].permute(0, 2, 1).unsqueeze(-2),
            dim=1
        )[0]
        im = axes[1, 0].imshow(sim.numpy(), cmap='RdYlBu_r', vmin=-1, vmax=1)
        axes[1, 0].set_title('Query-Positive 特征相似度')
        plt.colorbar(im, ax=axes[1, 0])

    # 数据流图
    axes[1, 1].set_title('图像分支数据流')
    flow = [
        'img (5,192,576)',
        '→ x = img[:3]/255',
        '→ ALNet前向',
        '→ score_img (1,192,576)',
        '→ fea_img (128,192,576)',
        '→ NMS(2) + Top-K(150)',
        '→ key_pixels (150,2)',
        '→ fea_kpl (128,150)',
    ]
    for i, f in enumerate(flow):
        axes[1, 1].text(0.1, 0.95 - i * 0.11, f, transform=axes[1, 1].transAxes,
                        fontsize=9, family='monospace')
    axes[1, 1].axis('off')

    # 参数量饼图
    axes[1, 2].set_title('图像分支参数分布')
    modules = dict(model.img.feature_extractor.named_children())
    sizes = []
    labels = []
    for name, mod in modules.items():
        p = sum(pm.numel() for pm in mod.parameters())
        if p > 0:
            sizes.append(p)
            labels.append(f'{name}\n({p/1e3:.0f}K)')
    axes[1, 2].pie(sizes, labels=labels, autopct='%1.1f%%', textprops={'fontsize': 8})

    axes[1, 3].axis('off')

    plt.suptitle('Image Only 模式: 图像分支可视化', fontsize=14, fontweight='bold')
    plt.tight_layout()
    path = os.path.join(OUTPUT_DIR, 'full_pipeline_img.png')
    plt.savefig(path, dpi=150, bbox_inches='tight')
    plt.close()
    print(f'[保存] {path}')


def run_fusion():
    """完整融合模式"""
    print('\n' + '=' * 60)
    print('模式: Fusion (完整融合)')
    print('=' * 60)

    cfg = {
        'flag': 'fusion',
        'kpts_number_bev': 150,
        'kpts_number_img': 150,
        'cluster_num_bev': 16,
        'cluster_num_img': 16,
        'cluster_num_fusion': 16,
        'sinkhorn_iter': 5,
        'vlad_size': 256,
    }

    model = Fusion(cfg)
    model.eval()
    total_params = sum(p.numel() for p in model.parameters())
    print(f'模型参数量: {total_params:,} ({total_params / 1e6:.2f}M)')

    batch_dict = create_dummy_batch_dict('fusion')

    with torch.no_grad():
        output = model(batch_dict)

    print('\n输出:')
    for k, v in output.items():
        if isinstance(v, torch.Tensor):
            print(f'  {k:30s}: {list(v.shape)}')
        else:
            print(f'  {k:30s}: {v}')

    # 可视化融合数据流
    fig, axes = plt.subplots(3, 4, figsize=(22, 15))

    # BEV输入
    bev_in = batch_dict['bev'][0, :3].permute(1, 2, 0).numpy()
    axes[0, 0].imshow(bev_in)
    axes[0, 0].set_title('BEV 输入 (320×320)')
    axes[0, 0].axis('off')

    # 图像输入
    img_in = batch_dict['img'][0, :3].permute(1, 2, 0).numpy().astype(np.uint8)
    axes[0, 1].imshow(img_in)
    axes[0, 1].set_title('图像输入 (192×576)')
    axes[0, 1].axis('off')

    # Score maps
    if 'score_bev' in output:
        axes[0, 2].imshow(output['score_bev'][0].numpy(), cmap='hot')
        axes[0, 2].set_title('BEV Score')
        axes[0, 2].axis('off')
    if 'score_img' in output:
        axes[0, 3].imshow(output['score_img'][0, 0].numpy(), cmap='hot')
        axes[0, 3].set_title('Image Score')
        axes[0, 3].axis('off')

    # 融合特征空间中的相似度
    if 'fea_kpt_original' in output and 'fea_kpt_fusion' in output:
        fea_orig = output['fea_kpt_original']
        fea_fusion = output['fea_kpt_fusion']
        B = fea_orig.shape[0] // 2

        sim_orig = torch.nn.functional.cosine_similarity(
            fea_orig[:B].permute(0, 2, 1).unsqueeze(-1),
            fea_orig[B:].permute(0, 2, 1).unsqueeze(-2),
            dim=1
        )[0].numpy()

        sim_fusion = torch.nn.functional.cosine_similarity(
            fea_fusion[:B].permute(0, 2, 1).unsqueeze(-1),
            fea_fusion[B:].permute(0, 2, 1).unsqueeze(-2),
            dim=1
        )[0].numpy()

        im1 = axes[1, 0].imshow(sim_orig, cmap='RdYlBu_r', vmin=-1, vmax=1)
        axes[1, 0].set_title('原始特征 相似度 (150×150)')
        plt.colorbar(im1, ax=axes[1, 0])

        im2 = axes[1, 1].imshow(sim_fusion, cmap='RdYlBu_r', vmin=-1, vmax=1)
        axes[1, 1].set_title('融合特征 相似度 (150×150)')
        plt.colorbar(im2, ax=axes[1, 1])

        axes[1, 2].imshow(np.abs(sim_orig - sim_fusion), cmap='YlOrRd')
        axes[1, 2].set_title('相似度变化 |差异|')
        plt.colorbar(im2, ax=axes[1, 2])

    # VLAD
    if 'vlads' in output:
        vlad = output['vlads'][0].view(16, 128).numpy()
        im = axes[1, 3].imshow(vlad, cmap='RdBu_r', aspect='auto')
        axes[1, 3].set_title('VLAD 融合 (16×128)')
        plt.colorbar(im, ax=axes[1, 3])

    # 整体架构图
    axes[2, 0].set_title('完整架构')
    arch = [
        '┌─ BEVHead ─────────────┐',
        '│ RICNN + EncodePos     │',
        '│ → fea_kpt_original    │',
        '│ → vlad_bev            │',
        '└───────────────────────┘',
        '┌─ ImgHead ─────────────┐',
        '│ ALNet + NMS           │',
        '│ → fea_kpl             │',
        '│ → fea_img             │',
        '└───────────────────────┘',
        '┌─ FusionHead ──────────┐',
        '│ LocalPool + Converter │',
        '│ Generator + FusionHead│',
        '│ → fea_kpt_fusion      │',
        '└───────────────────────────────────────────────────────┘',
        '  VLAD = w·vlad_fusion + (1-w)·vlad_bev'
    ]
    for i, a in enumerate(arch):
        axes[2, 0].text(0.05, 0.98 - i * 0.075, a, transform=axes[2, 0].transAxes,
                        fontsize=7.5, family='monospace')
    axes[2, 0].axis('off')

    # 模块参数对比
    axes[2, 1].set_title('各模块参数量')
    module_names = []
    module_params = []
    for name, mod in model.named_children():
        p = sum(pm.numel() for pm in mod.parameters())
        if p > 0:
            module_names.append(name)
            module_params.append(p)
    colors = plt.cm.Set3(np.linspace(0, 1, len(module_names)))
    axes[2, 1].barh(range(len(module_names)), module_params, color=colors)
    axes[2, 1].set_yticks(range(len(module_names)))
    axes[2, 1].set_yticklabels(module_names, fontsize=8)
    for i, p in enumerate(module_params):
        axes[2, 1].text(p, i, f' {p/1e3:.0f}K', va='center', fontsize=8)

    # 数据流汇总
    axes[2, 2].set_title('融合模式数据流')
    flow = [
        'img, bev, relation 输入',
        '├─ ImgHead → ALNet',
        '│   ├─ score_img',
        '│   ├─ fea_img (密集描述子)',
        '│   └─ fea_kpl (关键点)',
        '├─ BEVHead → RICNN',
        '│   ├─ score_bev',
        '│   ├─ fea_bev (密集描述子)',
        '│   ├─ fea_kpt_original',
        '│   └─ vlad_bev',
        '└─ FusionHead',
        '    ├─ GridSample → fea_pl_dual, fea_pt_dual',
        '    ├─ Converters → 跨模态转换',
        '    ├─ Generator → 全景特征',
        '    ├─ FusionHead → 融合特征',
        '    └─ NetVLAD → vlad_fusion',
        '最终: vlads = w·vlad_fusion + (1-w)·vlad_bev',
        '     UOT: → transformation (位姿)',
    ]
    for i, f in enumerate(flow):
        axes[2, 2].text(0.05, 0.98 - i * 0.06, f, transform=axes[2, 2].transAxes,
                        fontsize=7.5, family='monospace')
    axes[2, 2].axis('off')

    axes[2, 3].axis('off')

    plt.suptitle('Fusion 模式: 完整跨模态融合可视化', fontsize=14, fontweight='bold')
    plt.tight_layout()
    path = os.path.join(OUTPUT_DIR, 'full_pipeline_fusion.png')
    plt.savefig(path, dpi=150, bbox_inches='tight')
    plt.close()
    print(f'[保存] {path}')


def main():
    parser = argparse.ArgumentParser(description='全流水线可视化')
    parser.add_argument('--mode', type=str, default='all',
                        choices=['all', 'bev', 'img', 'fusion'],
                        help='运行模式')
    args = parser.parse_args()

    if args.mode in ('all', 'bev'):
        run_bev_only()
    if args.mode in ('all', 'img'):
        run_img_only()
    if args.mode in ('all', 'fusion'):
        run_fusion()

    print(f'\n所有可视化结果保存在: {OUTPUT_DIR}')


if __name__ == '__main__':
    main()