fix(server): mark games abandoned on room teardown + staging leaderboard
When handle_player_leave emptied a room or handle_end_game was invoked,
the room was removed from memory without touching games_v2. Periodic
cleanup only scans in-memory rooms, so those rows were stranded as
status='active' forever — staging had 42 orphans accumulated over 5h.
- event_store.update_game_abandoned: guarded UPDATE (status='active' only)
- GameLogger.log_game_abandoned{,_async}: fire-and-forget wrapper
- handle_end_game + handle_player_leave: flip status before remove_room
- LEADERBOARD_INCLUDE_TEST_DEFAULT: env override so staging can show
soak-harness accounts by default; prod keeps them hidden
Verified on staging: 42 orphans swept on restart, soak accounts now
visible on /api/stats/leaderboard (rank 1-4).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -171,6 +171,12 @@ class ServerConfig:
|
||||
# Rate limiting
|
||||
RATE_LIMIT_ENABLED: bool = True
|
||||
|
||||
# Leaderboard: include soak-harness test accounts by default when the
|
||||
# client doesn't pass ?include_test. Production keeps this False so real
|
||||
# users never see synthetic traffic; staging can flip it True so bring-up
|
||||
# traffic actually shows on the board.
|
||||
LEADERBOARD_INCLUDE_TEST_DEFAULT: bool = False
|
||||
|
||||
# Error tracking (Sentry)
|
||||
SENTRY_DSN: str = ""
|
||||
|
||||
@@ -216,6 +222,7 @@ class ServerConfig:
|
||||
MATCHMAKING_MAX_PLAYERS=get_env_int("MATCHMAKING_MAX_PLAYERS", 4),
|
||||
ADMIN_EMAILS=admin_emails,
|
||||
RATE_LIMIT_ENABLED=get_env_bool("RATE_LIMIT_ENABLED", True),
|
||||
LEADERBOARD_INCLUDE_TEST_DEFAULT=get_env_bool("LEADERBOARD_INCLUDE_TEST_DEFAULT", False),
|
||||
SENTRY_DSN=get_env("SENTRY_DSN", ""),
|
||||
card_values=CardValues(
|
||||
ACE=get_env_int("CARD_ACE", 1),
|
||||
|
||||
@@ -508,6 +508,14 @@ async def handle_end_game(data: dict, ctx: ConnectionContext, *, room_manager, c
|
||||
pass
|
||||
ctx.current_room.cpu_turn_task = None
|
||||
|
||||
# Mark the DB row abandoned before we lose the room (and its game_log_id)
|
||||
# from memory — otherwise games_v2 would be stranded as 'active' forever.
|
||||
if ctx.current_room.game_log_id:
|
||||
game_logger = get_logger()
|
||||
if game_logger:
|
||||
game_logger.log_game_abandoned(ctx.current_room.game_log_id)
|
||||
ctx.current_room.game_log_id = None
|
||||
|
||||
await ctx.current_room.broadcast({
|
||||
"type": "game_ended",
|
||||
"reason": "Host ended the game",
|
||||
|
||||
@@ -916,6 +916,14 @@ async def handle_player_leave(room: Room, player_id: str):
|
||||
# Check both is_empty() AND human_player_count() — CPU players keep rooms
|
||||
# technically non-empty, but a room with only CPUs is an abandoned room.
|
||||
if room.is_empty() or room.human_player_count() == 0:
|
||||
# Mark games_v2 abandoned while we still hold the game_log_id. After
|
||||
# remove_room() the row would be stranded as 'active' — periodic
|
||||
# cleanup only scans in-memory rooms.
|
||||
if room.game_log_id:
|
||||
game_logger = get_logger()
|
||||
if game_logger:
|
||||
game_logger.log_game_abandoned(room.game_log_id)
|
||||
room.game_log_id = None
|
||||
# Remove all remaining CPU players to release their profiles
|
||||
for cpu in list(room.get_cpu_players()):
|
||||
room.remove_player(cpu.id)
|
||||
|
||||
@@ -12,6 +12,7 @@ from typing import Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header, Query
|
||||
from pydantic import BaseModel
|
||||
|
||||
from config import config
|
||||
from models.user import User
|
||||
from services.stats_service import StatsService
|
||||
|
||||
@@ -159,7 +160,7 @@ async def get_leaderboard(
|
||||
metric: str = Query("wins", pattern="^(wins|win_rate|avg_score|knockouts|streak|rating)$"),
|
||||
limit: int = Query(50, ge=1, le=100),
|
||||
offset: int = Query(0, ge=0),
|
||||
include_test: bool = Query(False, description="Include soak-harness test accounts"),
|
||||
include_test: Optional[bool] = Query(None, description="Include soak-harness test accounts. Defaults to LEADERBOARD_INCLUDE_TEST_DEFAULT env (False in prod)."),
|
||||
service: StatsService = Depends(get_stats_service_dep),
|
||||
):
|
||||
"""
|
||||
@@ -173,9 +174,14 @@ async def get_leaderboard(
|
||||
- streak: Best win streak
|
||||
|
||||
Players must have 5+ games to appear on leaderboards.
|
||||
By default, soak-harness test accounts are hidden.
|
||||
Soak-harness test accounts are hidden unless include_test is passed,
|
||||
or LEADERBOARD_INCLUDE_TEST_DEFAULT is set True on the server (staging).
|
||||
"""
|
||||
entries = await service.get_leaderboard(metric, limit, offset, include_test)
|
||||
effective_include_test = (
|
||||
include_test if include_test is not None
|
||||
else config.LEADERBOARD_INCLUDE_TEST_DEFAULT
|
||||
)
|
||||
entries = await service.get_leaderboard(metric, limit, offset, effective_include_test)
|
||||
|
||||
return {
|
||||
"metric": metric,
|
||||
|
||||
@@ -164,6 +164,24 @@ class GameLogger:
|
||||
# Not in async context - skip (simulations don't need this)
|
||||
pass
|
||||
|
||||
async def log_game_abandoned_async(self, game_id: str) -> None:
|
||||
"""Mark game as abandoned (room emptied before GAME_OVER)."""
|
||||
try:
|
||||
await self.event_store.update_game_abandoned(game_id)
|
||||
log.debug(f"Logged game abandoned: {game_id}")
|
||||
except Exception as e:
|
||||
log.error(f"Failed to log game abandoned: {e}")
|
||||
|
||||
def log_game_abandoned(self, game_id: str) -> None:
|
||||
"""Sync wrapper: fires async task in async context, no-op otherwise."""
|
||||
if not game_id:
|
||||
return
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
asyncio.create_task(self.log_game_abandoned_async(game_id))
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Move Logging
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@@ -432,6 +432,23 @@ class EventStore:
|
||||
winner_id,
|
||||
)
|
||||
|
||||
async def update_game_abandoned(self, game_id: str) -> None:
|
||||
"""
|
||||
Mark a game as abandoned — used when a room empties before the game
|
||||
reaches GAME_OVER, so games_v2 never leaks as stranded 'active'.
|
||||
Only flips rows that are still active so a legitimately completed game
|
||||
never gets reverted.
|
||||
"""
|
||||
async with self.pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"""
|
||||
UPDATE games_v2
|
||||
SET status = 'abandoned', completed_at = NOW()
|
||||
WHERE id = $1 AND status = 'active'
|
||||
""",
|
||||
game_id,
|
||||
)
|
||||
|
||||
async def get_active_games(self) -> list[dict]:
|
||||
"""
|
||||
Get all active games for recovery on server restart.
|
||||
|
||||
163
server/test_game_lifecycle_logging.py
Normal file
163
server/test_game_lifecycle_logging.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
"""
|
||||
Tests for game lifecycle logging — ensuring games_v2 rows never leak as
|
||||
stranded 'active' when a room is removed from memory without the game
|
||||
transitioning to GAME_OVER.
|
||||
|
||||
Context: on staging we observed 42 games stuck in status='active' because
|
||||
handle_player_leave and handle_end_game removed the room from memory
|
||||
without updating games_v2. The periodic cleanup only scans in-memory rooms,
|
||||
so those rows were orphaned forever.
|
||||
|
||||
These tests pin down the fix:
|
||||
1. GameLogger.log_game_abandoned_async calls event_store.update_game_abandoned
|
||||
2. handle_end_game marks the game abandoned when the host ends the game
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from game import GameOptions
|
||||
from room import Room, RoomManager
|
||||
from services.game_logger import GameLogger
|
||||
from handlers import ConnectionContext, handle_end_game
|
||||
from test_handlers import MockWebSocket, make_ctx
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GameLogger.log_game_abandoned — unit
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestLogGameAbandoned:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calls_update_game_abandoned(self):
|
||||
"""log_game_abandoned_async delegates to event_store.update_game_abandoned."""
|
||||
event_store = MagicMock()
|
||||
event_store.update_game_abandoned = AsyncMock()
|
||||
logger = GameLogger(event_store)
|
||||
|
||||
await logger.log_game_abandoned_async("game-uuid-123")
|
||||
|
||||
event_store.update_game_abandoned.assert_awaited_once_with("game-uuid-123")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sync_wrapper_fires_task(self):
|
||||
"""Sync log_game_abandoned fires an async task in async context."""
|
||||
event_store = MagicMock()
|
||||
event_store.update_game_abandoned = AsyncMock()
|
||||
logger = GameLogger(event_store)
|
||||
|
||||
logger.log_game_abandoned("game-uuid-456")
|
||||
# Let the fire-and-forget task run
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
event_store.update_game_abandoned.assert_awaited_once_with("game-uuid-456")
|
||||
|
||||
def test_sync_wrapper_noop_on_empty_id(self):
|
||||
"""Empty game_id is a no-op (nothing to abandon)."""
|
||||
event_store = MagicMock()
|
||||
event_store.update_game_abandoned = AsyncMock()
|
||||
logger = GameLogger(event_store)
|
||||
|
||||
logger.log_game_abandoned("")
|
||||
logger.log_game_abandoned(None)
|
||||
|
||||
event_store.update_game_abandoned.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_swallows_db_exceptions(self):
|
||||
"""DB errors are logged, not re-raised (fire-and-forget guarantee)."""
|
||||
event_store = MagicMock()
|
||||
event_store.update_game_abandoned = AsyncMock(side_effect=Exception("db down"))
|
||||
logger = GameLogger(event_store)
|
||||
|
||||
# Must not raise
|
||||
await logger.log_game_abandoned_async("game-uuid-789")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# handle_end_game integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHandleEndGameMarksAbandoned:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_marks_game_abandoned_before_room_removal(self):
|
||||
"""When host ends the game, games_v2 must be marked abandoned."""
|
||||
rm = RoomManager()
|
||||
room = rm.create_room()
|
||||
host_ws = MockWebSocket()
|
||||
room.add_player("host", "Host", host_ws)
|
||||
room.get_player("host").is_host = True
|
||||
room.game_log_id = "game-uuid-end"
|
||||
|
||||
mock_logger = MagicMock()
|
||||
mock_logger.log_game_abandoned = MagicMock()
|
||||
|
||||
ctx = make_ctx(websocket=host_ws, player_id="host", room=room)
|
||||
|
||||
with patch("handlers.get_logger", return_value=mock_logger):
|
||||
await handle_end_game(
|
||||
{},
|
||||
ctx,
|
||||
room_manager=rm,
|
||||
cleanup_room_profiles=lambda _code: None,
|
||||
)
|
||||
|
||||
mock_logger.log_game_abandoned.assert_called_once_with("game-uuid-end")
|
||||
assert room.code not in rm.rooms # room still removed
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_log_when_game_log_id_missing(self):
|
||||
"""If the game never logged a start, there's nothing to mark abandoned."""
|
||||
rm = RoomManager()
|
||||
room = rm.create_room()
|
||||
host_ws = MockWebSocket()
|
||||
room.add_player("host", "Host", host_ws)
|
||||
room.get_player("host").is_host = True
|
||||
# room.game_log_id stays None
|
||||
|
||||
mock_logger = MagicMock()
|
||||
mock_logger.log_game_abandoned = MagicMock()
|
||||
|
||||
ctx = make_ctx(websocket=host_ws, player_id="host", room=room)
|
||||
|
||||
with patch("handlers.get_logger", return_value=mock_logger):
|
||||
await handle_end_game(
|
||||
{},
|
||||
ctx,
|
||||
room_manager=rm,
|
||||
cleanup_room_profiles=lambda _code: None,
|
||||
)
|
||||
|
||||
mock_logger.log_game_abandoned.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_host_cannot_trigger_abandonment(self):
|
||||
"""Only the host ends games — non-host requests are rejected unchanged."""
|
||||
rm = RoomManager()
|
||||
room = rm.create_room()
|
||||
room.add_player("host", "Host", MockWebSocket())
|
||||
room.get_player("host").is_host = True
|
||||
joiner_ws = MockWebSocket()
|
||||
room.add_player("joiner", "Joiner", joiner_ws)
|
||||
room.game_log_id = "game-uuid-untouchable"
|
||||
|
||||
mock_logger = MagicMock()
|
||||
mock_logger.log_game_abandoned = MagicMock()
|
||||
|
||||
ctx = make_ctx(websocket=joiner_ws, player_id="joiner", room=room)
|
||||
|
||||
with patch("handlers.get_logger", return_value=mock_logger):
|
||||
await handle_end_game(
|
||||
{},
|
||||
ctx,
|
||||
room_manager=rm,
|
||||
cleanup_room_profiles=lambda _code: None,
|
||||
)
|
||||
|
||||
mock_logger.log_game_abandoned.assert_not_called()
|
||||
assert room.code in rm.rooms # room still exists
|
||||
Reference in New Issue
Block a user