fix(server): mark games abandoned on room teardown + staging leaderboard
When handle_player_leave emptied a room or handle_end_game was invoked,
the room was removed from memory without touching games_v2. Periodic
cleanup only scans in-memory rooms, so those rows were stranded as
status='active' forever — staging had 42 orphans accumulated over 5h.
- event_store.update_game_abandoned: guarded UPDATE (status='active' only)
- GameLogger.log_game_abandoned{,_async}: fire-and-forget wrapper
- handle_end_game + handle_player_leave: flip status before remove_room
- LEADERBOARD_INCLUDE_TEST_DEFAULT: env override so staging can show
soak-harness accounts by default; prod keeps them hidden
Verified on staging: 42 orphans swept on restart, soak accounts now
visible on /api/stats/leaderboard (rank 1-4).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -171,6 +171,12 @@ class ServerConfig:
|
|||||||
# Rate limiting
|
# Rate limiting
|
||||||
RATE_LIMIT_ENABLED: bool = True
|
RATE_LIMIT_ENABLED: bool = True
|
||||||
|
|
||||||
|
# Leaderboard: include soak-harness test accounts by default when the
|
||||||
|
# client doesn't pass ?include_test. Production keeps this False so real
|
||||||
|
# users never see synthetic traffic; staging can flip it True so bring-up
|
||||||
|
# traffic actually shows on the board.
|
||||||
|
LEADERBOARD_INCLUDE_TEST_DEFAULT: bool = False
|
||||||
|
|
||||||
# Error tracking (Sentry)
|
# Error tracking (Sentry)
|
||||||
SENTRY_DSN: str = ""
|
SENTRY_DSN: str = ""
|
||||||
|
|
||||||
@@ -216,6 +222,7 @@ class ServerConfig:
|
|||||||
MATCHMAKING_MAX_PLAYERS=get_env_int("MATCHMAKING_MAX_PLAYERS", 4),
|
MATCHMAKING_MAX_PLAYERS=get_env_int("MATCHMAKING_MAX_PLAYERS", 4),
|
||||||
ADMIN_EMAILS=admin_emails,
|
ADMIN_EMAILS=admin_emails,
|
||||||
RATE_LIMIT_ENABLED=get_env_bool("RATE_LIMIT_ENABLED", True),
|
RATE_LIMIT_ENABLED=get_env_bool("RATE_LIMIT_ENABLED", True),
|
||||||
|
LEADERBOARD_INCLUDE_TEST_DEFAULT=get_env_bool("LEADERBOARD_INCLUDE_TEST_DEFAULT", False),
|
||||||
SENTRY_DSN=get_env("SENTRY_DSN", ""),
|
SENTRY_DSN=get_env("SENTRY_DSN", ""),
|
||||||
card_values=CardValues(
|
card_values=CardValues(
|
||||||
ACE=get_env_int("CARD_ACE", 1),
|
ACE=get_env_int("CARD_ACE", 1),
|
||||||
|
|||||||
@@ -508,6 +508,14 @@ async def handle_end_game(data: dict, ctx: ConnectionContext, *, room_manager, c
|
|||||||
pass
|
pass
|
||||||
ctx.current_room.cpu_turn_task = None
|
ctx.current_room.cpu_turn_task = None
|
||||||
|
|
||||||
|
# Mark the DB row abandoned before we lose the room (and its game_log_id)
|
||||||
|
# from memory — otherwise games_v2 would be stranded as 'active' forever.
|
||||||
|
if ctx.current_room.game_log_id:
|
||||||
|
game_logger = get_logger()
|
||||||
|
if game_logger:
|
||||||
|
game_logger.log_game_abandoned(ctx.current_room.game_log_id)
|
||||||
|
ctx.current_room.game_log_id = None
|
||||||
|
|
||||||
await ctx.current_room.broadcast({
|
await ctx.current_room.broadcast({
|
||||||
"type": "game_ended",
|
"type": "game_ended",
|
||||||
"reason": "Host ended the game",
|
"reason": "Host ended the game",
|
||||||
|
|||||||
@@ -916,6 +916,14 @@ async def handle_player_leave(room: Room, player_id: str):
|
|||||||
# Check both is_empty() AND human_player_count() — CPU players keep rooms
|
# Check both is_empty() AND human_player_count() — CPU players keep rooms
|
||||||
# technically non-empty, but a room with only CPUs is an abandoned room.
|
# technically non-empty, but a room with only CPUs is an abandoned room.
|
||||||
if room.is_empty() or room.human_player_count() == 0:
|
if room.is_empty() or room.human_player_count() == 0:
|
||||||
|
# Mark games_v2 abandoned while we still hold the game_log_id. After
|
||||||
|
# remove_room() the row would be stranded as 'active' — periodic
|
||||||
|
# cleanup only scans in-memory rooms.
|
||||||
|
if room.game_log_id:
|
||||||
|
game_logger = get_logger()
|
||||||
|
if game_logger:
|
||||||
|
game_logger.log_game_abandoned(room.game_log_id)
|
||||||
|
room.game_log_id = None
|
||||||
# Remove all remaining CPU players to release their profiles
|
# Remove all remaining CPU players to release their profiles
|
||||||
for cpu in list(room.get_cpu_players()):
|
for cpu in list(room.get_cpu_players()):
|
||||||
room.remove_player(cpu.id)
|
room.remove_player(cpu.id)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from typing import Optional
|
|||||||
from fastapi import APIRouter, Depends, HTTPException, Header, Query
|
from fastapi import APIRouter, Depends, HTTPException, Header, Query
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from config import config
|
||||||
from models.user import User
|
from models.user import User
|
||||||
from services.stats_service import StatsService
|
from services.stats_service import StatsService
|
||||||
|
|
||||||
@@ -159,7 +160,7 @@ async def get_leaderboard(
|
|||||||
metric: str = Query("wins", pattern="^(wins|win_rate|avg_score|knockouts|streak|rating)$"),
|
metric: str = Query("wins", pattern="^(wins|win_rate|avg_score|knockouts|streak|rating)$"),
|
||||||
limit: int = Query(50, ge=1, le=100),
|
limit: int = Query(50, ge=1, le=100),
|
||||||
offset: int = Query(0, ge=0),
|
offset: int = Query(0, ge=0),
|
||||||
include_test: bool = Query(False, description="Include soak-harness test accounts"),
|
include_test: Optional[bool] = Query(None, description="Include soak-harness test accounts. Defaults to LEADERBOARD_INCLUDE_TEST_DEFAULT env (False in prod)."),
|
||||||
service: StatsService = Depends(get_stats_service_dep),
|
service: StatsService = Depends(get_stats_service_dep),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -173,9 +174,14 @@ async def get_leaderboard(
|
|||||||
- streak: Best win streak
|
- streak: Best win streak
|
||||||
|
|
||||||
Players must have 5+ games to appear on leaderboards.
|
Players must have 5+ games to appear on leaderboards.
|
||||||
By default, soak-harness test accounts are hidden.
|
Soak-harness test accounts are hidden unless include_test is passed,
|
||||||
|
or LEADERBOARD_INCLUDE_TEST_DEFAULT is set True on the server (staging).
|
||||||
"""
|
"""
|
||||||
entries = await service.get_leaderboard(metric, limit, offset, include_test)
|
effective_include_test = (
|
||||||
|
include_test if include_test is not None
|
||||||
|
else config.LEADERBOARD_INCLUDE_TEST_DEFAULT
|
||||||
|
)
|
||||||
|
entries = await service.get_leaderboard(metric, limit, offset, effective_include_test)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"metric": metric,
|
"metric": metric,
|
||||||
|
|||||||
@@ -164,6 +164,24 @@ class GameLogger:
|
|||||||
# Not in async context - skip (simulations don't need this)
|
# Not in async context - skip (simulations don't need this)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def log_game_abandoned_async(self, game_id: str) -> None:
|
||||||
|
"""Mark game as abandoned (room emptied before GAME_OVER)."""
|
||||||
|
try:
|
||||||
|
await self.event_store.update_game_abandoned(game_id)
|
||||||
|
log.debug(f"Logged game abandoned: {game_id}")
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Failed to log game abandoned: {e}")
|
||||||
|
|
||||||
|
def log_game_abandoned(self, game_id: str) -> None:
|
||||||
|
"""Sync wrapper: fires async task in async context, no-op otherwise."""
|
||||||
|
if not game_id:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
asyncio.get_running_loop()
|
||||||
|
asyncio.create_task(self.log_game_abandoned_async(game_id))
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
|
||||||
# -------------------------------------------------------------------------
|
# -------------------------------------------------------------------------
|
||||||
# Move Logging
|
# Move Logging
|
||||||
# -------------------------------------------------------------------------
|
# -------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -432,6 +432,23 @@ class EventStore:
|
|||||||
winner_id,
|
winner_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def update_game_abandoned(self, game_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Mark a game as abandoned — used when a room empties before the game
|
||||||
|
reaches GAME_OVER, so games_v2 never leaks as stranded 'active'.
|
||||||
|
Only flips rows that are still active so a legitimately completed game
|
||||||
|
never gets reverted.
|
||||||
|
"""
|
||||||
|
async with self.pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"""
|
||||||
|
UPDATE games_v2
|
||||||
|
SET status = 'abandoned', completed_at = NOW()
|
||||||
|
WHERE id = $1 AND status = 'active'
|
||||||
|
""",
|
||||||
|
game_id,
|
||||||
|
)
|
||||||
|
|
||||||
async def get_active_games(self) -> list[dict]:
|
async def get_active_games(self) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Get all active games for recovery on server restart.
|
Get all active games for recovery on server restart.
|
||||||
|
|||||||
163
server/test_game_lifecycle_logging.py
Normal file
163
server/test_game_lifecycle_logging.py
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
"""
|
||||||
|
Tests for game lifecycle logging — ensuring games_v2 rows never leak as
|
||||||
|
stranded 'active' when a room is removed from memory without the game
|
||||||
|
transitioning to GAME_OVER.
|
||||||
|
|
||||||
|
Context: on staging we observed 42 games stuck in status='active' because
|
||||||
|
handle_player_leave and handle_end_game removed the room from memory
|
||||||
|
without updating games_v2. The periodic cleanup only scans in-memory rooms,
|
||||||
|
so those rows were orphaned forever.
|
||||||
|
|
||||||
|
These tests pin down the fix:
|
||||||
|
1. GameLogger.log_game_abandoned_async calls event_store.update_game_abandoned
|
||||||
|
2. handle_end_game marks the game abandoned when the host ends the game
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
from game import GameOptions
|
||||||
|
from room import Room, RoomManager
|
||||||
|
from services.game_logger import GameLogger
|
||||||
|
from handlers import ConnectionContext, handle_end_game
|
||||||
|
from test_handlers import MockWebSocket, make_ctx
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# GameLogger.log_game_abandoned — unit
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestLogGameAbandoned:
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_calls_update_game_abandoned(self):
|
||||||
|
"""log_game_abandoned_async delegates to event_store.update_game_abandoned."""
|
||||||
|
event_store = MagicMock()
|
||||||
|
event_store.update_game_abandoned = AsyncMock()
|
||||||
|
logger = GameLogger(event_store)
|
||||||
|
|
||||||
|
await logger.log_game_abandoned_async("game-uuid-123")
|
||||||
|
|
||||||
|
event_store.update_game_abandoned.assert_awaited_once_with("game-uuid-123")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_sync_wrapper_fires_task(self):
|
||||||
|
"""Sync log_game_abandoned fires an async task in async context."""
|
||||||
|
event_store = MagicMock()
|
||||||
|
event_store.update_game_abandoned = AsyncMock()
|
||||||
|
logger = GameLogger(event_store)
|
||||||
|
|
||||||
|
logger.log_game_abandoned("game-uuid-456")
|
||||||
|
# Let the fire-and-forget task run
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
event_store.update_game_abandoned.assert_awaited_once_with("game-uuid-456")
|
||||||
|
|
||||||
|
def test_sync_wrapper_noop_on_empty_id(self):
|
||||||
|
"""Empty game_id is a no-op (nothing to abandon)."""
|
||||||
|
event_store = MagicMock()
|
||||||
|
event_store.update_game_abandoned = AsyncMock()
|
||||||
|
logger = GameLogger(event_store)
|
||||||
|
|
||||||
|
logger.log_game_abandoned("")
|
||||||
|
logger.log_game_abandoned(None)
|
||||||
|
|
||||||
|
event_store.update_game_abandoned.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_swallows_db_exceptions(self):
|
||||||
|
"""DB errors are logged, not re-raised (fire-and-forget guarantee)."""
|
||||||
|
event_store = MagicMock()
|
||||||
|
event_store.update_game_abandoned = AsyncMock(side_effect=Exception("db down"))
|
||||||
|
logger = GameLogger(event_store)
|
||||||
|
|
||||||
|
# Must not raise
|
||||||
|
await logger.log_game_abandoned_async("game-uuid-789")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# handle_end_game integration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestHandleEndGameMarksAbandoned:
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_marks_game_abandoned_before_room_removal(self):
|
||||||
|
"""When host ends the game, games_v2 must be marked abandoned."""
|
||||||
|
rm = RoomManager()
|
||||||
|
room = rm.create_room()
|
||||||
|
host_ws = MockWebSocket()
|
||||||
|
room.add_player("host", "Host", host_ws)
|
||||||
|
room.get_player("host").is_host = True
|
||||||
|
room.game_log_id = "game-uuid-end"
|
||||||
|
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_logger.log_game_abandoned = MagicMock()
|
||||||
|
|
||||||
|
ctx = make_ctx(websocket=host_ws, player_id="host", room=room)
|
||||||
|
|
||||||
|
with patch("handlers.get_logger", return_value=mock_logger):
|
||||||
|
await handle_end_game(
|
||||||
|
{},
|
||||||
|
ctx,
|
||||||
|
room_manager=rm,
|
||||||
|
cleanup_room_profiles=lambda _code: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_logger.log_game_abandoned.assert_called_once_with("game-uuid-end")
|
||||||
|
assert room.code not in rm.rooms # room still removed
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_no_log_when_game_log_id_missing(self):
|
||||||
|
"""If the game never logged a start, there's nothing to mark abandoned."""
|
||||||
|
rm = RoomManager()
|
||||||
|
room = rm.create_room()
|
||||||
|
host_ws = MockWebSocket()
|
||||||
|
room.add_player("host", "Host", host_ws)
|
||||||
|
room.get_player("host").is_host = True
|
||||||
|
# room.game_log_id stays None
|
||||||
|
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_logger.log_game_abandoned = MagicMock()
|
||||||
|
|
||||||
|
ctx = make_ctx(websocket=host_ws, player_id="host", room=room)
|
||||||
|
|
||||||
|
with patch("handlers.get_logger", return_value=mock_logger):
|
||||||
|
await handle_end_game(
|
||||||
|
{},
|
||||||
|
ctx,
|
||||||
|
room_manager=rm,
|
||||||
|
cleanup_room_profiles=lambda _code: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_logger.log_game_abandoned.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_non_host_cannot_trigger_abandonment(self):
|
||||||
|
"""Only the host ends games — non-host requests are rejected unchanged."""
|
||||||
|
rm = RoomManager()
|
||||||
|
room = rm.create_room()
|
||||||
|
room.add_player("host", "Host", MockWebSocket())
|
||||||
|
room.get_player("host").is_host = True
|
||||||
|
joiner_ws = MockWebSocket()
|
||||||
|
room.add_player("joiner", "Joiner", joiner_ws)
|
||||||
|
room.game_log_id = "game-uuid-untouchable"
|
||||||
|
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_logger.log_game_abandoned = MagicMock()
|
||||||
|
|
||||||
|
ctx = make_ctx(websocket=joiner_ws, player_id="joiner", room=room)
|
||||||
|
|
||||||
|
with patch("handlers.get_logger", return_value=mock_logger):
|
||||||
|
await handle_end_game(
|
||||||
|
{},
|
||||||
|
ctx,
|
||||||
|
room_manager=rm,
|
||||||
|
cleanup_room_profiles=lambda _code: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_logger.log_game_abandoned.assert_not_called()
|
||||||
|
assert room.code in rm.rooms # room still exists
|
||||||
Reference in New Issue
Block a user