fix(server): mark games abandoned on room teardown + staging leaderboard

When handle_player_leave emptied a room or handle_end_game was invoked,
the room was removed from memory without touching games_v2. Periodic
cleanup only scans in-memory rooms, so those rows were stranded as
status='active' forever — staging had 42 orphans accumulated over 5h.

- event_store.update_game_abandoned: guarded UPDATE (status='active' only)
- GameLogger.log_game_abandoned{,_async}: fire-and-forget wrapper
- handle_end_game + handle_player_leave: flip status before remove_room
- LEADERBOARD_INCLUDE_TEST_DEFAULT: env override so staging can show
  soak-harness accounts by default; prod keeps them hidden

Verified on staging: 42 orphans swept on restart, soak accounts now
visible on /api/stats/leaderboard (rank 1-4).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
adlee-was-taken
2026-04-18 00:37:49 -04:00
parent 70498b1c33
commit d5f8eef6b3
7 changed files with 230 additions and 3 deletions

View File

@@ -171,6 +171,12 @@ class ServerConfig:
# Rate limiting # Rate limiting
RATE_LIMIT_ENABLED: bool = True RATE_LIMIT_ENABLED: bool = True
# Leaderboard: include soak-harness test accounts by default when the
# client doesn't pass ?include_test. Production keeps this False so real
# users never see synthetic traffic; staging can flip it True so bring-up
# traffic actually shows on the board.
LEADERBOARD_INCLUDE_TEST_DEFAULT: bool = False
# Error tracking (Sentry) # Error tracking (Sentry)
SENTRY_DSN: str = "" SENTRY_DSN: str = ""
@@ -216,6 +222,7 @@ class ServerConfig:
MATCHMAKING_MAX_PLAYERS=get_env_int("MATCHMAKING_MAX_PLAYERS", 4), MATCHMAKING_MAX_PLAYERS=get_env_int("MATCHMAKING_MAX_PLAYERS", 4),
ADMIN_EMAILS=admin_emails, ADMIN_EMAILS=admin_emails,
RATE_LIMIT_ENABLED=get_env_bool("RATE_LIMIT_ENABLED", True), RATE_LIMIT_ENABLED=get_env_bool("RATE_LIMIT_ENABLED", True),
LEADERBOARD_INCLUDE_TEST_DEFAULT=get_env_bool("LEADERBOARD_INCLUDE_TEST_DEFAULT", False),
SENTRY_DSN=get_env("SENTRY_DSN", ""), SENTRY_DSN=get_env("SENTRY_DSN", ""),
card_values=CardValues( card_values=CardValues(
ACE=get_env_int("CARD_ACE", 1), ACE=get_env_int("CARD_ACE", 1),

View File

@@ -508,6 +508,14 @@ async def handle_end_game(data: dict, ctx: ConnectionContext, *, room_manager, c
pass pass
ctx.current_room.cpu_turn_task = None ctx.current_room.cpu_turn_task = None
# Mark the DB row abandoned before we lose the room (and its game_log_id)
# from memory — otherwise games_v2 would be stranded as 'active' forever.
if ctx.current_room.game_log_id:
game_logger = get_logger()
if game_logger:
game_logger.log_game_abandoned(ctx.current_room.game_log_id)
ctx.current_room.game_log_id = None
await ctx.current_room.broadcast({ await ctx.current_room.broadcast({
"type": "game_ended", "type": "game_ended",
"reason": "Host ended the game", "reason": "Host ended the game",

View File

@@ -916,6 +916,14 @@ async def handle_player_leave(room: Room, player_id: str):
# Check both is_empty() AND human_player_count() — CPU players keep rooms # Check both is_empty() AND human_player_count() — CPU players keep rooms
# technically non-empty, but a room with only CPUs is an abandoned room. # technically non-empty, but a room with only CPUs is an abandoned room.
if room.is_empty() or room.human_player_count() == 0: if room.is_empty() or room.human_player_count() == 0:
# Mark games_v2 abandoned while we still hold the game_log_id. After
# remove_room() the row would be stranded as 'active' — periodic
# cleanup only scans in-memory rooms.
if room.game_log_id:
game_logger = get_logger()
if game_logger:
game_logger.log_game_abandoned(room.game_log_id)
room.game_log_id = None
# Remove all remaining CPU players to release their profiles # Remove all remaining CPU players to release their profiles
for cpu in list(room.get_cpu_players()): for cpu in list(room.get_cpu_players()):
room.remove_player(cpu.id) room.remove_player(cpu.id)

View File

@@ -12,6 +12,7 @@ from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Header, Query from fastapi import APIRouter, Depends, HTTPException, Header, Query
from pydantic import BaseModel from pydantic import BaseModel
from config import config
from models.user import User from models.user import User
from services.stats_service import StatsService from services.stats_service import StatsService
@@ -159,7 +160,7 @@ async def get_leaderboard(
metric: str = Query("wins", pattern="^(wins|win_rate|avg_score|knockouts|streak|rating)$"), metric: str = Query("wins", pattern="^(wins|win_rate|avg_score|knockouts|streak|rating)$"),
limit: int = Query(50, ge=1, le=100), limit: int = Query(50, ge=1, le=100),
offset: int = Query(0, ge=0), offset: int = Query(0, ge=0),
include_test: bool = Query(False, description="Include soak-harness test accounts"), include_test: Optional[bool] = Query(None, description="Include soak-harness test accounts. Defaults to LEADERBOARD_INCLUDE_TEST_DEFAULT env (False in prod)."),
service: StatsService = Depends(get_stats_service_dep), service: StatsService = Depends(get_stats_service_dep),
): ):
""" """
@@ -173,9 +174,14 @@ async def get_leaderboard(
- streak: Best win streak - streak: Best win streak
Players must have 5+ games to appear on leaderboards. Players must have 5+ games to appear on leaderboards.
By default, soak-harness test accounts are hidden. Soak-harness test accounts are hidden unless include_test is passed,
or LEADERBOARD_INCLUDE_TEST_DEFAULT is set True on the server (staging).
""" """
entries = await service.get_leaderboard(metric, limit, offset, include_test) effective_include_test = (
include_test if include_test is not None
else config.LEADERBOARD_INCLUDE_TEST_DEFAULT
)
entries = await service.get_leaderboard(metric, limit, offset, effective_include_test)
return { return {
"metric": metric, "metric": metric,

View File

@@ -164,6 +164,24 @@ class GameLogger:
# Not in async context - skip (simulations don't need this) # Not in async context - skip (simulations don't need this)
pass pass
async def log_game_abandoned_async(self, game_id: str) -> None:
"""Mark game as abandoned (room emptied before GAME_OVER)."""
try:
await self.event_store.update_game_abandoned(game_id)
log.debug(f"Logged game abandoned: {game_id}")
except Exception as e:
log.error(f"Failed to log game abandoned: {e}")
def log_game_abandoned(self, game_id: str) -> None:
"""Sync wrapper: fires async task in async context, no-op otherwise."""
if not game_id:
return
try:
asyncio.get_running_loop()
asyncio.create_task(self.log_game_abandoned_async(game_id))
except RuntimeError:
pass
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# Move Logging # Move Logging
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------

View File

@@ -432,6 +432,23 @@ class EventStore:
winner_id, winner_id,
) )
async def update_game_abandoned(self, game_id: str) -> None:
"""
Mark a game as abandoned — used when a room empties before the game
reaches GAME_OVER, so games_v2 never leaks as stranded 'active'.
Only flips rows that are still active so a legitimately completed game
never gets reverted.
"""
async with self.pool.acquire() as conn:
await conn.execute(
"""
UPDATE games_v2
SET status = 'abandoned', completed_at = NOW()
WHERE id = $1 AND status = 'active'
""",
game_id,
)
async def get_active_games(self) -> list[dict]: async def get_active_games(self) -> list[dict]:
""" """
Get all active games for recovery on server restart. Get all active games for recovery on server restart.

View File

@@ -0,0 +1,163 @@
# SPDX-License-Identifier: GPL-3.0-or-later
"""
Tests for game lifecycle logging — ensuring games_v2 rows never leak as
stranded 'active' when a room is removed from memory without the game
transitioning to GAME_OVER.
Context: on staging we observed 42 games stuck in status='active' because
handle_player_leave and handle_end_game removed the room from memory
without updating games_v2. The periodic cleanup only scans in-memory rooms,
so those rows were orphaned forever.
These tests pin down the fix:
1. GameLogger.log_game_abandoned_async calls event_store.update_game_abandoned
2. handle_end_game marks the game abandoned when the host ends the game
"""
import asyncio
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from game import GameOptions
from room import Room, RoomManager
from services.game_logger import GameLogger
from handlers import ConnectionContext, handle_end_game
from test_handlers import MockWebSocket, make_ctx
# ---------------------------------------------------------------------------
# GameLogger.log_game_abandoned — unit
# ---------------------------------------------------------------------------
class TestLogGameAbandoned:
@pytest.mark.asyncio
async def test_calls_update_game_abandoned(self):
"""log_game_abandoned_async delegates to event_store.update_game_abandoned."""
event_store = MagicMock()
event_store.update_game_abandoned = AsyncMock()
logger = GameLogger(event_store)
await logger.log_game_abandoned_async("game-uuid-123")
event_store.update_game_abandoned.assert_awaited_once_with("game-uuid-123")
@pytest.mark.asyncio
async def test_sync_wrapper_fires_task(self):
"""Sync log_game_abandoned fires an async task in async context."""
event_store = MagicMock()
event_store.update_game_abandoned = AsyncMock()
logger = GameLogger(event_store)
logger.log_game_abandoned("game-uuid-456")
# Let the fire-and-forget task run
await asyncio.sleep(0)
await asyncio.sleep(0)
event_store.update_game_abandoned.assert_awaited_once_with("game-uuid-456")
def test_sync_wrapper_noop_on_empty_id(self):
"""Empty game_id is a no-op (nothing to abandon)."""
event_store = MagicMock()
event_store.update_game_abandoned = AsyncMock()
logger = GameLogger(event_store)
logger.log_game_abandoned("")
logger.log_game_abandoned(None)
event_store.update_game_abandoned.assert_not_called()
@pytest.mark.asyncio
async def test_swallows_db_exceptions(self):
"""DB errors are logged, not re-raised (fire-and-forget guarantee)."""
event_store = MagicMock()
event_store.update_game_abandoned = AsyncMock(side_effect=Exception("db down"))
logger = GameLogger(event_store)
# Must not raise
await logger.log_game_abandoned_async("game-uuid-789")
# ---------------------------------------------------------------------------
# handle_end_game integration
# ---------------------------------------------------------------------------
class TestHandleEndGameMarksAbandoned:
@pytest.mark.asyncio
async def test_marks_game_abandoned_before_room_removal(self):
"""When host ends the game, games_v2 must be marked abandoned."""
rm = RoomManager()
room = rm.create_room()
host_ws = MockWebSocket()
room.add_player("host", "Host", host_ws)
room.get_player("host").is_host = True
room.game_log_id = "game-uuid-end"
mock_logger = MagicMock()
mock_logger.log_game_abandoned = MagicMock()
ctx = make_ctx(websocket=host_ws, player_id="host", room=room)
with patch("handlers.get_logger", return_value=mock_logger):
await handle_end_game(
{},
ctx,
room_manager=rm,
cleanup_room_profiles=lambda _code: None,
)
mock_logger.log_game_abandoned.assert_called_once_with("game-uuid-end")
assert room.code not in rm.rooms # room still removed
@pytest.mark.asyncio
async def test_no_log_when_game_log_id_missing(self):
"""If the game never logged a start, there's nothing to mark abandoned."""
rm = RoomManager()
room = rm.create_room()
host_ws = MockWebSocket()
room.add_player("host", "Host", host_ws)
room.get_player("host").is_host = True
# room.game_log_id stays None
mock_logger = MagicMock()
mock_logger.log_game_abandoned = MagicMock()
ctx = make_ctx(websocket=host_ws, player_id="host", room=room)
with patch("handlers.get_logger", return_value=mock_logger):
await handle_end_game(
{},
ctx,
room_manager=rm,
cleanup_room_profiles=lambda _code: None,
)
mock_logger.log_game_abandoned.assert_not_called()
@pytest.mark.asyncio
async def test_non_host_cannot_trigger_abandonment(self):
"""Only the host ends games — non-host requests are rejected unchanged."""
rm = RoomManager()
room = rm.create_room()
room.add_player("host", "Host", MockWebSocket())
room.get_player("host").is_host = True
joiner_ws = MockWebSocket()
room.add_player("joiner", "Joiner", joiner_ws)
room.game_log_id = "game-uuid-untouchable"
mock_logger = MagicMock()
mock_logger.log_game_abandoned = MagicMock()
ctx = make_ctx(websocket=joiner_ws, player_id="joiner", room=room)
with patch("handlers.get_logger", return_value=mock_logger):
await handle_end_game(
{},
ctx,
room_manager=rm,
cleanup_room_profiles=lambda _code: None,
)
mock_logger.log_game_abandoned.assert_not_called()
assert room.code in rm.rooms # room still exists