feat(soak): per-room watchdog + heartbeat wiring + multi-game lobby fix

Watchdog class with 4 Vitest tests (27 total now), wired into
ctx.heartbeat in the runner. One watchdog per room with a 60s
timeout; firing logs an error, marks the room's dashboard tile
as errored, and triggers the abort signal so the scenario unwinds.
Watchdogs are explicitly stopped in the runner's finally block
so pending timers don't keep the node process alive on exit.

Also fixes a multi-game bug discovered during stress scenario
verification: after a game ends sessions stay parked on the
game_over screen, which hides the lobby and makes a subsequent
#create-room-btn click time out. runOneMultiplayerGame now
navigates every session to / before each game — localStorage
auth persists so nothing re-logs in.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
adlee-was-taken
2026-04-11 21:52:49 -04:00
parent 921a6ad984
commit d3b468575b
4 changed files with 120 additions and 2 deletions

View File

@@ -0,0 +1,44 @@
/**
* Watchdog — simple per-room timeout detector.
*
* `start()` begins a countdown. `heartbeat()` resets it. If the
* countdown elapses without a heartbeat, `onTimeout` fires once
* (subsequent heartbeats are no-ops after firing, unless `start()`
* is called again). `stop()` cancels any pending timer.
*
* Used by the runner to detect stuck rooms: one watchdog per room,
* scenarios call ctx.heartbeat(roomId) at each progress point, and
* a firing watchdog logs + aborts the run.
*/
export class Watchdog {
private timer: NodeJS.Timeout | null = null;
private fired = false;
constructor(
private timeoutMs: number,
private onTimeout: () => void,
) {}
start(): void {
this.stop();
this.fired = false;
this.timer = setTimeout(() => {
if (this.fired) return;
this.fired = true;
this.onTimeout();
}, this.timeoutMs);
}
heartbeat(): void {
if (this.fired) return;
this.start();
}
stop(): void {
if (this.timer) {
clearTimeout(this.timer);
this.timer = null;
}
}
}

View File

@@ -17,6 +17,7 @@ import { SessionPool } from './core/session-pool';
import { RoomCoordinator } from './core/room-coordinator';
import { DashboardServer } from './dashboard/server';
import { Screencaster } from './core/screencaster';
import { Watchdog } from './core/watchdog';
import { getScenario, listScenarios } from './scenarios';
import type { DashboardReporter, ScenarioContext, Session } from './core/types';
@@ -123,6 +124,7 @@ async function main(): Promise<void> {
let dashboardServer: DashboardServer | null = null;
let dashboard: DashboardReporter = noopDashboard();
const watchdogs = new Map<string, Watchdog>();
let exitCode = 0;
try {
const sessions = await pool.acquire(accounts);
@@ -179,6 +181,20 @@ async function main(): Promise<void> {
}
}
// Per-room watchdogs — fire if no heartbeat arrives within 60s.
// Declared at outer scope so the finally block can stop them and
// drain any pending timers before the process exits.
for (let i = 0; i < rooms; i++) {
const roomId = `room-${i}`;
const w = new Watchdog(60_000, () => {
logger.error('watchdog_fired', { room: roomId });
dashboard.update(roomId, { phase: 'error' });
abortController.abort();
});
w.start();
watchdogs.set(roomId, w);
}
const ctx: ScenarioContext = {
config,
sessions,
@@ -186,8 +202,9 @@ async function main(): Promise<void> {
dashboard,
logger,
signal: abortController.signal,
heartbeat: () => {
// Task 26 wires per-room watchdogs. No-op until then.
heartbeat: (roomId: string) => {
const w = watchdogs.get(roomId);
if (w) w.heartbeat();
},
};
@@ -214,6 +231,7 @@ async function main(): Promise<void> {
});
exitCode = 1;
} finally {
for (const w of watchdogs.values()) w.stop();
await screencaster.stopAll();
await pool.release();
if (dashboardServer) {

View File

@@ -49,6 +49,12 @@ export async function runOneMultiplayerGame(
const maxDuration = opts.maxDurationMs ?? 5 * 60_000;
try {
// Reset every session back to the lobby before starting.
// After the first game ends each session is parked on the
// game_over screen, which hides the lobby's Create Room button.
// goto('/') bounces them back; localStorage-cached auth persists.
await Promise.all(sessions.map((s) => s.bot.goto('/')));
// Host creates game and announces the code
const code = await host.bot.createGame(host.account.username);
ctx.coordinator.announce(opts.roomId, code);

View File

@@ -0,0 +1,50 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { Watchdog } from '../core/watchdog';
describe('Watchdog', () => {
beforeEach(() => {
vi.useFakeTimers();
});
afterEach(() => {
vi.useRealTimers();
});
it('fires after timeout if no heartbeat', () => {
const onTimeout = vi.fn();
const w = new Watchdog(1000, onTimeout);
w.start();
vi.advanceTimersByTime(1001);
expect(onTimeout).toHaveBeenCalledOnce();
});
it('heartbeat resets the timer', () => {
const onTimeout = vi.fn();
const w = new Watchdog(1000, onTimeout);
w.start();
vi.advanceTimersByTime(800);
w.heartbeat();
vi.advanceTimersByTime(800);
expect(onTimeout).not.toHaveBeenCalled();
vi.advanceTimersByTime(300);
expect(onTimeout).toHaveBeenCalledOnce();
});
it('stop cancels pending timeout', () => {
const onTimeout = vi.fn();
const w = new Watchdog(1000, onTimeout);
w.start();
w.stop();
vi.advanceTimersByTime(2000);
expect(onTimeout).not.toHaveBeenCalled();
});
it('does not fire twice after stop', () => {
const onTimeout = vi.fn();
const w = new Watchdog(1000, onTimeout);
w.start();
vi.advanceTimersByTime(1001);
w.heartbeat();
vi.advanceTimersByTime(1001);
expect(onTimeout).toHaveBeenCalledOnce();
});
});