Batched remaining harness tasks (27-30, 33):
Task 27 — Artifact capture on failure: screenshots, HTML snapshots,
game state JSON, and console error tails are captured into
tests/soak/artifacts/<run-id>/ when a scenario throws. Successful
runs get a summary.json. Old runs (>7d) are pruned on startup.
Task 28 — Graceful shutdown: first SIGINT/SIGTERM flips the abort
signal (scenarios finish current turn then unwind). 10s after, a
hard-kill fires if cleanup hangs. Double Ctrl-C = immediate exit.
Exit codes: 0 success, 1 errors, 2 interrupted.
Task 29 — Periodic health probes: every 30s GET /health against the
target server. Three consecutive failures abort the run with
health_fatal, preventing staging outages from being misattributed
to harness bugs. Corrected endpoint from /api/health to /health
per server/routers/health.py.
Task 30 — Smoke test script: tests/soak/scripts/smoke.sh, a 60s
end-to-end canary that health-probes the target, seeds if needed,
and runs one minimal populate game.
Task 33 — Version bump to v3.3.4: both index.html footers (was
v3.1.6), new footer added to admin.html (had none), pyproject.toml.
Also fixes discovered during stress testing:
- SessionPool sets baseURL on all contexts so relative goto('/')
resolves correctly between games (was "invalid URL" error)
- RoomCoordinator key is now unique per game-start (Date.now
suffix) so Deferred promises don't carry stale room codes from
previous games
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
122 lines
3.7 KiB
TypeScript
122 lines
3.7 KiB
TypeScript
/**
|
|
* Artifacts — capture session debugging info on scenario failure.
|
|
*
|
|
* When runner.ts hits an unrecoverable error during a scenario, it
|
|
* calls `artifacts.captureAll(liveSessions)` which dumps one
|
|
* screenshot + HTML snapshot + game state JSON + console tail per
|
|
* session into `tests/soak/artifacts/<run-id>/`.
|
|
*
|
|
* Successful runs get a lightweight `summary.json` written at the
|
|
* same path so post-run inspection has something to grep.
|
|
*
|
|
* `pruneOldRuns` sweeps run dirs older than maxAgeMs on startup so
|
|
* the artifacts directory doesn't grow unbounded.
|
|
*/
|
|
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import type { Session, Logger } from './types';
|
|
|
|
export interface ArtifactsOptions {
|
|
runId: string;
|
|
/** Absolute path to the artifacts root, e.g. /path/to/tests/soak/artifacts */
|
|
rootDir: string;
|
|
logger: Logger;
|
|
}
|
|
|
|
export class Artifacts {
|
|
readonly runDir: string;
|
|
|
|
constructor(private opts: ArtifactsOptions) {
|
|
this.runDir = path.join(opts.rootDir, opts.runId);
|
|
fs.mkdirSync(this.runDir, { recursive: true });
|
|
}
|
|
|
|
/** Capture screenshot + HTML + state + console tail for one session. */
|
|
async captureSession(session: Session, roomId: string): Promise<void> {
|
|
const dir = path.join(this.runDir, roomId);
|
|
fs.mkdirSync(dir, { recursive: true });
|
|
const prefix = session.key;
|
|
|
|
try {
|
|
const png = await session.page.screenshot({ fullPage: true });
|
|
fs.writeFileSync(path.join(dir, `${prefix}.png`), png);
|
|
} catch (err) {
|
|
this.opts.logger.warn('artifact_screenshot_failed', {
|
|
session: session.key,
|
|
error: err instanceof Error ? err.message : String(err),
|
|
});
|
|
}
|
|
|
|
try {
|
|
const html = await session.page.content();
|
|
fs.writeFileSync(path.join(dir, `${prefix}.html`), html);
|
|
} catch (err) {
|
|
this.opts.logger.warn('artifact_html_failed', {
|
|
session: session.key,
|
|
error: err instanceof Error ? err.message : String(err),
|
|
});
|
|
}
|
|
|
|
try {
|
|
const state = await session.bot.getGameState();
|
|
fs.writeFileSync(
|
|
path.join(dir, `${prefix}.state.json`),
|
|
JSON.stringify(state, null, 2),
|
|
);
|
|
} catch (err) {
|
|
this.opts.logger.warn('artifact_state_failed', {
|
|
session: session.key,
|
|
error: err instanceof Error ? err.message : String(err),
|
|
});
|
|
}
|
|
|
|
try {
|
|
const errors = session.bot.getConsoleErrors?.() ?? [];
|
|
fs.writeFileSync(path.join(dir, `${prefix}.console.txt`), errors.join('\n'));
|
|
} catch {
|
|
// ignore — not all bot flavors expose console errors
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Best-effort capture for every live session. We don't know which
|
|
* room each session belongs to at this level, so everything lands
|
|
* under `room-unknown/` unless callers partition sessions first.
|
|
*/
|
|
async captureAll(sessions: Session[]): Promise<void> {
|
|
await Promise.all(
|
|
sessions.map((s) => this.captureSession(s, 'room-unknown')),
|
|
);
|
|
}
|
|
|
|
writeSummary(summary: object): void {
|
|
fs.writeFileSync(
|
|
path.join(this.runDir, 'summary.json'),
|
|
JSON.stringify(summary, null, 2),
|
|
);
|
|
}
|
|
}
|
|
|
|
/** Prune run directories older than `maxAgeMs`. Called on runner startup. */
|
|
export function pruneOldRuns(
|
|
rootDir: string,
|
|
maxAgeMs: number,
|
|
logger: Logger,
|
|
): void {
|
|
if (!fs.existsSync(rootDir)) return;
|
|
const now = Date.now();
|
|
for (const entry of fs.readdirSync(rootDir)) {
|
|
const full = path.join(rootDir, entry);
|
|
try {
|
|
const stat = fs.statSync(full);
|
|
if (stat.isDirectory() && now - stat.mtimeMs > maxAgeMs) {
|
|
fs.rmSync(full, { recursive: true, force: true });
|
|
logger.info('artifact_pruned', { runId: entry });
|
|
}
|
|
} catch {
|
|
// ignore — best effort
|
|
}
|
|
}
|
|
}
|