From e66077997de856109dcdf4d1d23738a53006d55b Mon Sep 17 00:00:00 2001 From: MobKBK <@> Date: Sat, 11 Apr 2026 09:41:19 +0800 Subject: [PATCH] =?UTF-8?q?=E7=9B=B4=E6=8E=A5=E7=BB=88=E7=AB=AF=E8=BE=93?= =?UTF-8?q?=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- auto_eval_checkpoints.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/auto_eval_checkpoints.py b/auto_eval_checkpoints.py index 869bb83..11692c0 100644 --- a/auto_eval_checkpoints.py +++ b/auto_eval_checkpoints.py @@ -191,17 +191,36 @@ def run_one_eval(project_dir: Path, train_script: Path, result_name: str, gpu: s "auto_eval", ] - proc = subprocess.run( + print(f"[INFO] Running command: {' '.join(cmd)}") + print(f"[INFO] CUDA_VISIBLE_DEVICES={gpu}") + + proc = subprocess.Popen( cmd, cwd=str(project_dir), env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, + bufsize=1, + universal_newlines=True, ) - print(proc.stdout) - return proc.returncode + try: + assert proc.stdout is not None + for line in proc.stdout: + print(line, end="") + proc.wait() + return proc.returncode + except KeyboardInterrupt: + print("\n[WARN] 收到 Ctrl+C,正在终止当前测试子进程...") + proc.terminate() + try: + proc.wait(timeout=5) + except Exception: + print("[WARN] 子进程未及时退出,强制 kill") + proc.kill() + proc.wait() + raise def collect_epoch_rows(all_rows: List[Dict], epoch: int) -> List[Dict]: