feat(soak): per-room watchdog + heartbeat wiring + multi-game lobby fix
Watchdog class with 4 Vitest tests (27 total now), wired into ctx.heartbeat in the runner. One watchdog per room with a 60s timeout; firing logs an error, marks the room's dashboard tile as errored, and triggers the abort signal so the scenario unwinds. Watchdogs are explicitly stopped in the runner's finally block so pending timers don't keep the node process alive on exit. Also fixes a multi-game bug discovered during stress scenario verification: after a game ends sessions stay parked on the game_over screen, which hides the lobby and makes a subsequent #create-room-btn click time out. runOneMultiplayerGame now navigates every session to / before each game — localStorage auth persists so nothing re-logs in. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
44
tests/soak/core/watchdog.ts
Normal file
44
tests/soak/core/watchdog.ts
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
/**
|
||||||
|
* Watchdog — simple per-room timeout detector.
|
||||||
|
*
|
||||||
|
* `start()` begins a countdown. `heartbeat()` resets it. If the
|
||||||
|
* countdown elapses without a heartbeat, `onTimeout` fires once
|
||||||
|
* (subsequent heartbeats are no-ops after firing, unless `start()`
|
||||||
|
* is called again). `stop()` cancels any pending timer.
|
||||||
|
*
|
||||||
|
* Used by the runner to detect stuck rooms: one watchdog per room,
|
||||||
|
* scenarios call ctx.heartbeat(roomId) at each progress point, and
|
||||||
|
* a firing watchdog logs + aborts the run.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export class Watchdog {
|
||||||
|
private timer: NodeJS.Timeout | null = null;
|
||||||
|
private fired = false;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private timeoutMs: number,
|
||||||
|
private onTimeout: () => void,
|
||||||
|
) {}
|
||||||
|
|
||||||
|
start(): void {
|
||||||
|
this.stop();
|
||||||
|
this.fired = false;
|
||||||
|
this.timer = setTimeout(() => {
|
||||||
|
if (this.fired) return;
|
||||||
|
this.fired = true;
|
||||||
|
this.onTimeout();
|
||||||
|
}, this.timeoutMs);
|
||||||
|
}
|
||||||
|
|
||||||
|
heartbeat(): void {
|
||||||
|
if (this.fired) return;
|
||||||
|
this.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
stop(): void {
|
||||||
|
if (this.timer) {
|
||||||
|
clearTimeout(this.timer);
|
||||||
|
this.timer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -17,6 +17,7 @@ import { SessionPool } from './core/session-pool';
|
|||||||
import { RoomCoordinator } from './core/room-coordinator';
|
import { RoomCoordinator } from './core/room-coordinator';
|
||||||
import { DashboardServer } from './dashboard/server';
|
import { DashboardServer } from './dashboard/server';
|
||||||
import { Screencaster } from './core/screencaster';
|
import { Screencaster } from './core/screencaster';
|
||||||
|
import { Watchdog } from './core/watchdog';
|
||||||
import { getScenario, listScenarios } from './scenarios';
|
import { getScenario, listScenarios } from './scenarios';
|
||||||
import type { DashboardReporter, ScenarioContext, Session } from './core/types';
|
import type { DashboardReporter, ScenarioContext, Session } from './core/types';
|
||||||
|
|
||||||
@@ -123,6 +124,7 @@ async function main(): Promise<void> {
|
|||||||
|
|
||||||
let dashboardServer: DashboardServer | null = null;
|
let dashboardServer: DashboardServer | null = null;
|
||||||
let dashboard: DashboardReporter = noopDashboard();
|
let dashboard: DashboardReporter = noopDashboard();
|
||||||
|
const watchdogs = new Map<string, Watchdog>();
|
||||||
let exitCode = 0;
|
let exitCode = 0;
|
||||||
try {
|
try {
|
||||||
const sessions = await pool.acquire(accounts);
|
const sessions = await pool.acquire(accounts);
|
||||||
@@ -179,6 +181,20 @@ async function main(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Per-room watchdogs — fire if no heartbeat arrives within 60s.
|
||||||
|
// Declared at outer scope so the finally block can stop them and
|
||||||
|
// drain any pending timers before the process exits.
|
||||||
|
for (let i = 0; i < rooms; i++) {
|
||||||
|
const roomId = `room-${i}`;
|
||||||
|
const w = new Watchdog(60_000, () => {
|
||||||
|
logger.error('watchdog_fired', { room: roomId });
|
||||||
|
dashboard.update(roomId, { phase: 'error' });
|
||||||
|
abortController.abort();
|
||||||
|
});
|
||||||
|
w.start();
|
||||||
|
watchdogs.set(roomId, w);
|
||||||
|
}
|
||||||
|
|
||||||
const ctx: ScenarioContext = {
|
const ctx: ScenarioContext = {
|
||||||
config,
|
config,
|
||||||
sessions,
|
sessions,
|
||||||
@@ -186,8 +202,9 @@ async function main(): Promise<void> {
|
|||||||
dashboard,
|
dashboard,
|
||||||
logger,
|
logger,
|
||||||
signal: abortController.signal,
|
signal: abortController.signal,
|
||||||
heartbeat: () => {
|
heartbeat: (roomId: string) => {
|
||||||
// Task 26 wires per-room watchdogs. No-op until then.
|
const w = watchdogs.get(roomId);
|
||||||
|
if (w) w.heartbeat();
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -214,6 +231,7 @@ async function main(): Promise<void> {
|
|||||||
});
|
});
|
||||||
exitCode = 1;
|
exitCode = 1;
|
||||||
} finally {
|
} finally {
|
||||||
|
for (const w of watchdogs.values()) w.stop();
|
||||||
await screencaster.stopAll();
|
await screencaster.stopAll();
|
||||||
await pool.release();
|
await pool.release();
|
||||||
if (dashboardServer) {
|
if (dashboardServer) {
|
||||||
|
|||||||
@@ -49,6 +49,12 @@ export async function runOneMultiplayerGame(
|
|||||||
const maxDuration = opts.maxDurationMs ?? 5 * 60_000;
|
const maxDuration = opts.maxDurationMs ?? 5 * 60_000;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Reset every session back to the lobby before starting.
|
||||||
|
// After the first game ends each session is parked on the
|
||||||
|
// game_over screen, which hides the lobby's Create Room button.
|
||||||
|
// goto('/') bounces them back; localStorage-cached auth persists.
|
||||||
|
await Promise.all(sessions.map((s) => s.bot.goto('/')));
|
||||||
|
|
||||||
// Host creates game and announces the code
|
// Host creates game and announces the code
|
||||||
const code = await host.bot.createGame(host.account.username);
|
const code = await host.bot.createGame(host.account.username);
|
||||||
ctx.coordinator.announce(opts.roomId, code);
|
ctx.coordinator.announce(opts.roomId, code);
|
||||||
|
|||||||
50
tests/soak/tests/watchdog.test.ts
Normal file
50
tests/soak/tests/watchdog.test.ts
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||||
|
import { Watchdog } from '../core/watchdog';
|
||||||
|
|
||||||
|
describe('Watchdog', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
});
|
||||||
|
afterEach(() => {
|
||||||
|
vi.useRealTimers();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fires after timeout if no heartbeat', () => {
|
||||||
|
const onTimeout = vi.fn();
|
||||||
|
const w = new Watchdog(1000, onTimeout);
|
||||||
|
w.start();
|
||||||
|
vi.advanceTimersByTime(1001);
|
||||||
|
expect(onTimeout).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('heartbeat resets the timer', () => {
|
||||||
|
const onTimeout = vi.fn();
|
||||||
|
const w = new Watchdog(1000, onTimeout);
|
||||||
|
w.start();
|
||||||
|
vi.advanceTimersByTime(800);
|
||||||
|
w.heartbeat();
|
||||||
|
vi.advanceTimersByTime(800);
|
||||||
|
expect(onTimeout).not.toHaveBeenCalled();
|
||||||
|
vi.advanceTimersByTime(300);
|
||||||
|
expect(onTimeout).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('stop cancels pending timeout', () => {
|
||||||
|
const onTimeout = vi.fn();
|
||||||
|
const w = new Watchdog(1000, onTimeout);
|
||||||
|
w.start();
|
||||||
|
w.stop();
|
||||||
|
vi.advanceTimersByTime(2000);
|
||||||
|
expect(onTimeout).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not fire twice after stop', () => {
|
||||||
|
const onTimeout = vi.fn();
|
||||||
|
const w = new Watchdog(1000, onTimeout);
|
||||||
|
w.start();
|
||||||
|
vi.advanceTimersByTime(1001);
|
||||||
|
w.heartbeat();
|
||||||
|
vi.advanceTimersByTime(1001);
|
||||||
|
expect(onTimeout).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user