diff --git a/client/admin.html b/client/admin.html index 32fc9d9..71aeebf 100644 --- a/client/admin.html +++ b/client/admin.html @@ -400,6 +400,8 @@
+ + diff --git a/client/index.html b/client/index.html index 8af551f..9add284 100644 --- a/client/index.html +++ b/client/index.html @@ -55,7 +55,7 @@

- + @@ -288,7 +288,7 @@

Waiting for host to start the game...

- + diff --git a/pyproject.toml b/pyproject.toml index 90b8d53..0a1727c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "golfgame" -version = "3.1.6" +version = "3.3.4" description = "6-Card Golf card game with AI opponents" readme = "README.md" requires-python = ">=3.11" diff --git a/tests/soak/core/artifacts.ts b/tests/soak/core/artifacts.ts new file mode 100644 index 0000000..eeeb947 --- /dev/null +++ b/tests/soak/core/artifacts.ts @@ -0,0 +1,121 @@ +/** + * Artifacts — capture session debugging info on scenario failure. + * + * When runner.ts hits an unrecoverable error during a scenario, it + * calls `artifacts.captureAll(liveSessions)` which dumps one + * screenshot + HTML snapshot + game state JSON + console tail per + * session into `tests/soak/artifacts//`. + * + * Successful runs get a lightweight `summary.json` written at the + * same path so post-run inspection has something to grep. + * + * `pruneOldRuns` sweeps run dirs older than maxAgeMs on startup so + * the artifacts directory doesn't grow unbounded. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import type { Session, Logger } from './types'; + +export interface ArtifactsOptions { + runId: string; + /** Absolute path to the artifacts root, e.g. /path/to/tests/soak/artifacts */ + rootDir: string; + logger: Logger; +} + +export class Artifacts { + readonly runDir: string; + + constructor(private opts: ArtifactsOptions) { + this.runDir = path.join(opts.rootDir, opts.runId); + fs.mkdirSync(this.runDir, { recursive: true }); + } + + /** Capture screenshot + HTML + state + console tail for one session. */ + async captureSession(session: Session, roomId: string): Promise { + const dir = path.join(this.runDir, roomId); + fs.mkdirSync(dir, { recursive: true }); + const prefix = session.key; + + try { + const png = await session.page.screenshot({ fullPage: true }); + fs.writeFileSync(path.join(dir, `${prefix}.png`), png); + } catch (err) { + this.opts.logger.warn('artifact_screenshot_failed', { + session: session.key, + error: err instanceof Error ? err.message : String(err), + }); + } + + try { + const html = await session.page.content(); + fs.writeFileSync(path.join(dir, `${prefix}.html`), html); + } catch (err) { + this.opts.logger.warn('artifact_html_failed', { + session: session.key, + error: err instanceof Error ? err.message : String(err), + }); + } + + try { + const state = await session.bot.getGameState(); + fs.writeFileSync( + path.join(dir, `${prefix}.state.json`), + JSON.stringify(state, null, 2), + ); + } catch (err) { + this.opts.logger.warn('artifact_state_failed', { + session: session.key, + error: err instanceof Error ? err.message : String(err), + }); + } + + try { + const errors = session.bot.getConsoleErrors?.() ?? []; + fs.writeFileSync(path.join(dir, `${prefix}.console.txt`), errors.join('\n')); + } catch { + // ignore — not all bot flavors expose console errors + } + } + + /** + * Best-effort capture for every live session. We don't know which + * room each session belongs to at this level, so everything lands + * under `room-unknown/` unless callers partition sessions first. + */ + async captureAll(sessions: Session[]): Promise { + await Promise.all( + sessions.map((s) => this.captureSession(s, 'room-unknown')), + ); + } + + writeSummary(summary: object): void { + fs.writeFileSync( + path.join(this.runDir, 'summary.json'), + JSON.stringify(summary, null, 2), + ); + } +} + +/** Prune run directories older than `maxAgeMs`. Called on runner startup. */ +export function pruneOldRuns( + rootDir: string, + maxAgeMs: number, + logger: Logger, +): void { + if (!fs.existsSync(rootDir)) return; + const now = Date.now(); + for (const entry of fs.readdirSync(rootDir)) { + const full = path.join(rootDir, entry); + try { + const stat = fs.statSync(full); + if (stat.isDirectory() && now - stat.mtimeMs > maxAgeMs) { + fs.rmSync(full, { recursive: true, force: true }); + logger.info('artifact_pruned', { runId: entry }); + } + } catch { + // ignore — best effort + } + } +} diff --git a/tests/soak/core/session-pool.ts b/tests/soak/core/session-pool.ts index 7b3318d..b4e63c1 100644 --- a/tests/soak/core/session-pool.ts +++ b/tests/soak/core/session-pool.ts @@ -259,8 +259,13 @@ export class SessionPool { // a typical 1920×1080 display. Two windows side-by-side still fit // horizontally; if the user runs more than 2 rooms in tiled mode // the extra windows will overlap and need to be arranged manually. + // + // baseURL is set on every context so relative goto('/') calls + // (used between games to bounce back to the lobby) resolve to + // the target server instead of failing with "invalid URL". const context = await targetBrowser.newContext({ ...this.opts.contextOptions, + baseURL: this.opts.targetUrl, ...(useHeaded ? { viewport: { width: 960, height: 900 } } : {}), }); await this.injectAuth(context, account); diff --git a/tests/soak/runner.ts b/tests/soak/runner.ts index 2c19238..a0ff64d 100644 --- a/tests/soak/runner.ts +++ b/tests/soak/runner.ts @@ -18,6 +18,7 @@ import { RoomCoordinator } from './core/room-coordinator'; import { DashboardServer } from './dashboard/server'; import { Screencaster } from './core/screencaster'; import { Watchdog } from './core/watchdog'; +import { Artifacts, pruneOldRuns } from './core/artifacts'; import { getScenario, listScenarios } from './scenarios'; import type { DashboardReporter, ScenarioContext, Session } from './core/types'; @@ -72,6 +73,13 @@ async function main(): Promise { cli, }); + // Artifacts: instantiate now so both failure path + success summary + // can reach it. Prune old runs (>7d) on startup so the directory + // doesn't grow unbounded. + const artifactsRoot = path.resolve(__dirname, 'artifacts'); + const artifacts = new Artifacts({ runId, rootDir: artifactsRoot, logger }); + pruneOldRuns(artifactsRoot, 7 * 24 * 3600 * 1000, logger); + // Resolve final config: scenarioDefaults → env → CLI (later wins) const config = mergeConfig( cli as Record, @@ -115,13 +123,47 @@ async function main(): Promise { const abortController = new AbortController(); + // Graceful shutdown: first signal flips abort, scenarios finish the + // current turn then unwind. 10 seconds later, if cleanup is still + // hanging, the runner force-exits. A second Ctrl-C skips the wait. + let forceExitTimer: NodeJS.Timeout | null = null; const onSignal = (sig: string) => { + if (abortController.signal.aborted) { + logger.warn('force_exit', { signal: sig }); + process.exit(130); + } logger.warn('signal_received', { signal: sig }); abortController.abort(); + forceExitTimer = setTimeout(() => { + logger.error('graceful_shutdown_timeout'); + process.exit(130); + }, 10_000); }; process.on('SIGINT', () => onSignal('SIGINT')); process.on('SIGTERM', () => onSignal('SIGTERM')); + // Health probes: every 30s GET /api/health. Three consecutive failures + // abort the run with a fatal error so staging outages don't get + // misattributed to harness bugs. + let healthFailures = 0; + const healthTimer = setInterval(async () => { + try { + const res = await fetch(`${targetUrl}/health`); + if (!res.ok) throw new Error(`status ${res.status}`); + healthFailures = 0; + } catch (err) { + healthFailures++; + logger.warn('health_probe_failed', { + consecutive: healthFailures, + error: err instanceof Error ? err.message : String(err), + }); + if (healthFailures >= 3) { + logger.error('health_fatal', { consecutive: healthFailures }); + abortController.abort(); + } + } + }, 30_000); + let dashboardServer: DashboardServer | null = null; let dashboard: DashboardReporter = noopDashboard(); const watchdogs = new Map(); @@ -217,6 +259,15 @@ async function main(): Promise { console.log(`Games completed: ${result.gamesCompleted}`); console.log(`Errors: ${result.errors.length}`); console.log(`Duration: ${(result.durationMs / 1000).toFixed(1)}s`); + artifacts.writeSummary({ + runId, + scenario: scenario.name, + targetUrl, + gamesCompleted: result.gamesCompleted, + errors: result.errors, + durationMs: result.durationMs, + customMetrics: result.customMetrics, + }); if (result.errors.length > 0) { console.log('Errors:'); for (const e of result.errors) { @@ -229,8 +280,23 @@ async function main(): Promise { error: err instanceof Error ? err.message : String(err), stack: err instanceof Error ? err.stack : undefined, }); + // Best-effort artifact capture from still-live sessions. The pool's + // activeSessions field is private but accessible for this error path — + // we want every frame we can grab before release() tears them down. + try { + const liveSessions = (pool as unknown as { activeSessions: Session[] }).activeSessions; + if (liveSessions && liveSessions.length > 0) { + await artifacts.captureAll(liveSessions); + } + } catch (captureErr) { + logger.warn('artifact_capture_failed', { + error: captureErr instanceof Error ? captureErr.message : String(captureErr), + }); + } exitCode = 1; } finally { + clearInterval(healthTimer); + if (forceExitTimer) clearTimeout(forceExitTimer); for (const w of watchdogs.values()) w.stop(); await screencaster.stopAll(); await pool.release(); diff --git a/tests/soak/scenarios/shared/multiplayer-game.ts b/tests/soak/scenarios/shared/multiplayer-game.ts index 09b7162..187a567 100644 --- a/tests/soak/scenarios/shared/multiplayer-game.ts +++ b/tests/soak/scenarios/shared/multiplayer-game.ts @@ -55,9 +55,15 @@ export async function runOneMultiplayerGame( // goto('/') bounces them back; localStorage-cached auth persists. await Promise.all(sessions.map((s) => s.bot.goto('/'))); + // Use a unique coordinator key per game-start so Deferreds don't + // carry stale room codes from previous games. The coordinator's + // Promises only resolve once — reusing `opts.roomId` across games + // would make joiners receive the first game's code on every game. + const coordKey = `${opts.roomId}-${Date.now()}`; + // Host creates game and announces the code const code = await host.bot.createGame(host.account.username); - ctx.coordinator.announce(opts.roomId, code); + ctx.coordinator.announce(coordKey, code); ctx.heartbeat(opts.roomId); ctx.dashboard.update(opts.roomId, { phase: 'lobby' }); ctx.logger.info('room_created', { room: opts.roomId, code }); @@ -65,7 +71,7 @@ export async function runOneMultiplayerGame( // Joiners join concurrently await Promise.all( joiners.map(async (joiner) => { - const awaited = await ctx.coordinator.await(opts.roomId); + const awaited = await ctx.coordinator.await(coordKey); await joiner.bot.joinGame(awaited, joiner.account.username); }), ); diff --git a/tests/soak/scripts/smoke.sh b/tests/soak/scripts/smoke.sh new file mode 100755 index 0000000..6b2125f --- /dev/null +++ b/tests/soak/scripts/smoke.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Soak harness smoke test — end-to-end canary against local dev. +# Expected runtime: ~60 seconds. +set -euo pipefail + +cd "$(dirname "$0")/.." + +: "${TEST_URL:=http://localhost:8000}" +: "${SOAK_INVITE_CODE:=SOAKTEST}" + +echo "Smoke target: $TEST_URL" +echo "Invite code: $SOAK_INVITE_CODE" + +# 1. Health probe +curl -fsS "$TEST_URL/api/health" > /dev/null || { + echo "FAIL: target server unreachable at $TEST_URL" + exit 1 +} + +# 2. Ensure minimum accounts +if [ ! -f .env.stresstest ]; then + echo "Seeding accounts..." + bun run seed -- --count=4 +fi + +# 3. Run minimum viable scenario +TEST_URL="$TEST_URL" SOAK_INVITE_CODE="$SOAK_INVITE_CODE" \ + bun run soak -- \ + --scenario=populate \ + --accounts=2 \ + --rooms=1 \ + --cpus-per-room=0 \ + --games-per-room=1 \ + --holes=1 \ + --watch=none + +echo "Smoke PASSED"