Fix V2 race conditions, auth gaps, serialization bugs, and async stats

Phase 1 - Critical Fixes: - Add game_lock (asyncio.Lock) to Room class for serializing mutations - Wrap all game action handlers in lock to prevent race conditions - Split Card.to_dict into to_dict (full data) and to_client_dict (hidden) - Fix CardState.from_dict to handle missing rank/suit gracefully - Fix GameOptions reconstruction in recovery_service (dict -> object) - Extend state cache TTL from 4h to 24h, add touch_game method Phase 2 - Security: - Add optional WebSocket authentication via token query param - Use authenticated user ID/name when available - Add auth support to spectator WebSocket endpoint Phase 3 - Performance: - Make stats processing async (fire-and-forget) to avoid blocking game completion notifications Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 16:27:30 -05:00
parent 1dbfb3f14b
commit f27020f21b
8 changed files with 446 additions and 231 deletions
--- a/server/game.py
+++ b/server/game.py
@@ -140,18 +140,37 @@ class Card:
        """
        Convert card to dictionary for JSON serialization.
        Always includes full card data (rank/suit/face_up) for server-side
        caching and event sourcing. Use to_client_dict() for client views
        that should hide face-down cards.
        Args:
-            reveal: If True, show card details even if face-down.
+            reveal: Ignored for backwards compatibility. Use to_client_dict() instead.
        Returns:
-            Dict with card info, or just {face_up: False} if hidden.
+            Dict with full card info (suit, rank, face_up).
        """
        if self.face_up or reveal:
        return {
            "suit": self.suit.value,
            "rank": self.rank.value,
            "face_up": self.face_up,
        }
    def to_client_dict(self) -> dict:
        """
        Convert card to dictionary for client display.
        Hides card details if face-down to prevent cheating.
        Returns:
            Dict with card info, or just {face_up: False} if hidden.
        """
        if self.face_up:
            return {
                "suit": self.suit.value,
                "rank": self.rank.value,
                "face_up": True,
            }
        return {"face_up": False}
    def value(self) -> int:
@@ -390,7 +409,9 @@ class Player:
        Returns:
            List of card dictionaries.
        """
-        return [card.to_dict(reveal) for card in self.cards]
+        if reveal:
            return [card.to_dict() for card in self.cards]
        return [card.to_client_dict() for card in self.cards]
 class GamePhase(Enum):
--- a/server/main.py
+++ b/server/main.py
@@ -21,6 +21,26 @@ from game_log import get_logger
 # Import production components
 from logging_config import setup_logging
 # Initialize Sentry if configured
 if config.SENTRY_DSN:
    try:
        import sentry_sdk
        from sentry_sdk.integrations.fastapi import FastApiIntegration
        from sentry_sdk.integrations.starlette import StarletteIntegration
        sentry_sdk.init(
            dsn=config.SENTRY_DSN,
            environment=config.ENVIRONMENT,
            traces_sample_rate=0.1 if config.ENVIRONMENT == "production" else 1.0,
            integrations=[
                StarletteIntegration(transaction_style="endpoint"),
                FastApiIntegration(transaction_style="endpoint"),
            ],
        )
        logging.getLogger(__name__).info("Sentry error tracking initialized")
    except ImportError:
        logging.getLogger(__name__).warning("sentry-sdk not installed, error tracking disabled")
 # Configure logging based on environment
 setup_logging(
    level=config.LOG_LEVEL,
@@ -243,8 +263,75 @@ app.add_middleware(
    environment=config.ENVIRONMENT,
 )
-# Note: Rate limiting middleware is added after app startup when Redis is available
+# Rate limiting middleware (uses global _rate_limiter set in lifespan)
-# See _add_rate_limit_middleware() called from a startup event if needed
+# We create a wrapper that safely handles the case when rate limiter isn't ready
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.responses import JSONResponse
 class LazyRateLimitMiddleware(BaseHTTPMiddleware):
    """Rate limiting middleware that uses the global _rate_limiter when available."""
    async def dispatch(self, request, call_next):
        global _rate_limiter
        # Skip if rate limiter not initialized or disabled
        if not _rate_limiter or not config.RATE_LIMIT_ENABLED:
            return await call_next(request)
        # Import here to avoid circular imports
        from services.ratelimit import RATE_LIMITS
        path = request.url.path
        # Skip health checks and static files
        if path in ("/health", "/ready", "/metrics"):
            return await call_next(request)
        if path.endswith((".js", ".css", ".html", ".ico", ".png", ".jpg", ".svg")):
            return await call_next(request)
        # Determine rate limit tier
        if path.startswith("/api/auth"):
            limit, window = RATE_LIMITS["api_auth"]
        elif path == "/api/rooms" and request.method == "POST":
            limit, window = RATE_LIMITS["api_create_room"]
        elif "email" in path or "verify" in path:
            limit, window = RATE_LIMITS["email_send"]
        elif path.startswith("/api"):
            limit, window = RATE_LIMITS["api_general"]
        else:
            return await call_next(request)
        # Get client key and check rate limit
        client_key = _rate_limiter.get_client_key(request)
        full_key = f"{path}:{client_key}"
        allowed, info = await _rate_limiter.is_allowed(full_key, limit, window)
        if allowed:
            response = await call_next(request)
        else:
            response = JSONResponse(
                status_code=429,
                content={
                    "error": "Rate limit exceeded",
                    "message": f"Too many requests. Please wait {info['reset']} seconds.",
                    "retry_after": info["reset"],
                },
            )
        # Add rate limit headers
        response.headers["X-RateLimit-Limit"] = str(info["limit"])
        response.headers["X-RateLimit-Remaining"] = str(info["remaining"])
        response.headers["X-RateLimit-Reset"] = str(info["reset"])
        if not allowed:
            response.headers["Retry-After"] = str(info["reset"])
        return response
 app.add_middleware(LazyRateLimitMiddleware)
 room_manager = RoomManager()
@@ -309,7 +396,23 @@ async def require_admin(user: User = Depends(require_user)) -> User:
 async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    # Extract token from query param for optional authentication
    token = websocket.query_params.get("token")
    authenticated_user = None
    if token and _auth_service:
        try:
            authenticated_user = await _auth_service.get_user_from_token(token)
        except Exception as e:
            logger.debug(f"WebSocket auth failed: {e}")
    # Use authenticated user ID if available, otherwise generate random UUID
    if authenticated_user:
        player_id = str(authenticated_user.id)
        logger.debug(f"WebSocket authenticated as user {player_id}")
    else:
        player_id = str(uuid.uuid4())
        logger.debug(f"WebSocket connected anonymously as {player_id}")
    current_room: Room | None = None
    try:
@@ -319,6 +422,9 @@ async def websocket_endpoint(websocket: WebSocket):
            if msg_type == "create_room":
                player_name = data.get("player_name", "Player")
                # Use authenticated user's name if available
                if authenticated_user and authenticated_user.display_name:
                    player_name = authenticated_user.display_name
                room = room_manager.create_room()
                room.add_player(player_id, player_name, websocket)
                current_room = room
@@ -327,6 +433,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    "type": "room_created",
                    "room_code": room.code,
                    "player_id": player_id,
                    "authenticated": authenticated_user is not None,
                })
                await room.broadcast({
@@ -360,6 +467,9 @@ async def websocket_endpoint(websocket: WebSocket):
                    })
                    continue
                # Use authenticated user's name if available
                if authenticated_user and authenticated_user.display_name:
                    player_name = authenticated_user.display_name
                room.add_player(player_id, player_name, websocket)
                current_room = room
@@ -367,6 +477,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    "type": "room_joined",
                    "room_code": room.code,
                    "player_id": player_id,
                    "authenticated": authenticated_user is not None,
                })
                await room.broadcast({
@@ -487,11 +598,12 @@ async def websocket_endpoint(websocket: WebSocket):
                num_decks = max(1, min(3, num_decks))
                num_rounds = max(1, min(18, num_rounds))
                async with current_room.game_lock:
                    current_room.game.start_game(num_decks, num_rounds, options)
                    # Log game start for AI analysis
-                logger = get_logger()
+                    game_logger = get_logger()
-                current_room.game_log_id = logger.log_game_start(
+                    current_room.game_log_id = game_logger.log_game_start(
                        room_code=current_room.code,
                        num_players=len(current_room.players),
                        options=options,
@@ -520,6 +632,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    continue
                positions = data.get("positions", [])
                async with current_room.game_lock:
                    if current_room.game.flip_initial_cards(player_id, positions):
                        await broadcast_game_state(current_room)
@@ -531,6 +644,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    continue
                source = data.get("source", "deck")
                async with current_room.game_lock:
                    # Capture discard top before draw (for logging decision context)
                    discard_before_draw = current_room.game.discard_top()
                    card = current_room.game.draw_card(player_id, source)
@@ -555,7 +669,7 @@ async def websocket_endpoint(websocket: WebSocket):
                        # Send drawn card only to the player who drew
                        await websocket.send_json({
                            "type": "card_drawn",
-                        "card": card.to_dict(reveal=True),
+                            "card": card.to_dict(),
                            "source": source,
                        })
@@ -566,6 +680,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    continue
                position = data.get("position", 0)
                async with current_room.game_lock:
                    # Capture drawn card before swap for logging
                    drawn_card = current_room.game.drawn_card
                    player = current_room.game.get_player(player_id)
@@ -596,6 +711,7 @@ async def websocket_endpoint(websocket: WebSocket):
                if not current_room:
                    continue
                async with current_room.game_lock:
                    # Capture drawn card before discard for logging
                    drawn_card = current_room.game.drawn_card
                    player = current_room.game.get_player(player_id)
@@ -637,6 +753,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    continue
                position = data.get("position", 0)
                async with current_room.game_lock:
                    player = current_room.game.get_player(player_id)
                    current_room.game.flip_and_end_turn(player_id, position)
@@ -662,6 +779,7 @@ async def websocket_endpoint(websocket: WebSocket):
                if not current_room:
                    continue
                async with current_room.game_lock:
                    player = current_room.game.get_player(player_id)
                    if current_room.game.skip_flip_and_end_turn(player_id):
                        # Log skip flip decision for human player
@@ -685,6 +803,7 @@ async def websocket_endpoint(websocket: WebSocket):
                    continue
                position = data.get("position", 0)
                async with current_room.game_lock:
                    player = current_room.game.get_player(player_id)
                    if current_room.game.flip_card_as_action(player_id, position):
                        # Log flip-as-action for human player
@@ -709,6 +828,7 @@ async def websocket_endpoint(websocket: WebSocket):
                if not current_room:
                    continue
                async with current_room.game_lock:
                    player = current_room.game.get_player(player_id)
                    if current_room.game.knock_early(player_id):
                        # Log knock early for human player
@@ -736,6 +856,7 @@ async def websocket_endpoint(websocket: WebSocket):
                if not room_player or not room_player.is_host:
                    continue
                async with current_room.game_lock:
                    if current_room.game.start_next_round():
                        # CPU players do their initial flips
                        for cpu in current_room.get_cpu_players():
@@ -796,6 +917,38 @@ async def websocket_endpoint(websocket: WebSocket):
            await handle_player_leave(current_room, player_id)
 async def _process_stats_safe(room: Room):
    """
    Process game stats in a fire-and-forget manner.
    This is called via asyncio.create_task to avoid blocking game completion
    notifications while stats are being processed.
    """
    try:
        # Build mapping - for non-CPU players, the player_id is their user_id
        # (assigned during authentication or as a session UUID)
        player_user_ids = {}
        for player_id, room_player in room.players.items():
            if not room_player.is_cpu:
                player_user_ids[player_id] = player_id
        # Find winner
        winner_id = None
        if room.game.players:
            winner = min(room.game.players, key=lambda p: p.total_score)
            winner_id = winner.id
        await _stats_service.process_game_from_state(
            players=room.game.players,
            winner_id=winner_id,
            num_rounds=room.game.num_rounds,
            player_user_ids=player_user_ids,
        )
        logger.debug(f"Stats processed for room {room.code}")
    except Exception as e:
        logger.error(f"Failed to process game stats: {e}")
 async def broadcast_game_state(room: Room):
    """Broadcast game state to all human players in a room."""
    # Notify spectators if spectator manager is available
@@ -842,30 +995,9 @@ async def broadcast_game_state(room: Room):
                game_logger.log_game_end(room.game_log_id)
                room.game_log_id = None  # Clear to avoid duplicate logging
-            # Process stats for authenticated players
+            # Process stats asynchronously (fire-and-forget) to avoid delaying game over notifications
            if _stats_service and room.game.players:
-                try:
+                asyncio.create_task(_process_stats_safe(room))
                    # Build mapping - for non-CPU players, the player_id is their user_id
                    # (assigned during authentication or as a session UUID)
                    player_user_ids = {}
                    for player_id, room_player in room.players.items():
                        if not room_player.is_cpu:
                            player_user_ids[player_id] = player_id
                    # Find winner
                    winner_id = None
                    if room.game.players:
                        winner = min(room.game.players, key=lambda p: p.total_score)
                        winner_id = winner.id
                    await _stats_service.process_game_from_state(
                        players=room.game.players,
                        winner_id=winner_id,
                        num_rounds=room.game.num_rounds,
                        player_user_ids=player_user_ids,
                    )
                except Exception as e:
                    logger.error(f"Failed to process game stats: {e}")
            scores = [
                {"name": p.name, "total": p.total_score, "rounds_won": p.rounds_won}
--- a/server/models/game_state.py
+++ b/server/models/game_state.py
@@ -52,12 +52,38 @@ class CardState:
    @classmethod
    def from_dict(cls, d: dict) -> "CardState":
-        """Create from dictionary."""
+        """
-        return cls(
+        Create from dictionary.
-            rank=d["rank"],
+
-            suit=d["suit"],
+        Handles both full card data and minimal face-down data gracefully.
-            face_up=d.get("face_up", False),
+
-        )
+        Args:
            d: Dictionary with card data. May contain:
               - Full data: {rank, suit, face_up}
               - Minimal face-down: {face_up: False}
        Returns:
            CardState instance.
        Raises:
            ValueError: If face_up is True but rank/suit are missing.
        """
        face_up = d.get("face_up", False)
        rank = d.get("rank")
        suit = d.get("suit")
        # If card is face-up, we must have rank and suit
        if face_up and (rank is None or suit is None):
            raise ValueError("Face-up card must have rank and suit")
        # For face-down cards with missing data, use placeholder values
        # This handles improperly serialized data from older versions
        if rank is None:
            rank = "?"  # Placeholder for unknown
        if suit is None:
            suit = "?"  # Placeholder for unknown
        return cls(rank=rank, suit=suit, face_up=face_up)
@dataclass
--- a/server/room.py
+++ b/server/room.py
@@ -11,6 +11,7 @@ A Room contains:
    - Settings for number of decks, rounds, etc.
 """
 import asyncio
 import random
 import string
 from dataclasses import dataclass, field
@@ -57,6 +58,7 @@ class Room:
        game: The Game instance containing actual game state.
        settings: Room settings (decks, rounds, etc.).
        game_log_id: SQLite log ID for analytics (if logging enabled).
        game_lock: asyncio.Lock for serializing game mutations to prevent race conditions.
    """
    code: str
@@ -64,6 +66,7 @@ class Room:
    game: Game = field(default_factory=Game)
    settings: dict = field(default_factory=lambda: {"decks": 1, "rounds": 1})
    game_log_id: Optional[str] = None
    game_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
    def add_player(
        self,
--- a/server/routers/replay.py
+++ b/server/routers/replay.py
@@ -420,9 +420,19 @@ async def spectate_game(websocket: WebSocket, room_code: str):
    WebSocket endpoint for spectating live games.
    Spectators receive real-time game state updates but cannot interact.
    Supports optional authentication via token query parameter.
    """
    await websocket.accept()
    # Optional authentication for spectators
    token = websocket.query_params.get("token")
    spectator_user = None
    if token and _auth_service:
        try:
            spectator_user = await _auth_service.get_user_from_token(token)
        except Exception:
            pass  # Anonymous spectator
    if not _spectator_manager or not _room_manager:
        await websocket.close(code=4003, reason="Spectator service unavailable")
        return
@@ -449,6 +459,7 @@ async def spectate_game(websocket: WebSocket, room_code: str):
            "game_state": game_state,
            "spectator_count": _spectator_manager.get_spectator_count(game_id),
            "players": room.player_list(),
            "authenticated": spectator_user is not None,
        })
        # Keep connection alive
--- a/server/services/recovery_service.py
+++ b/server/services/recovery_service.py
@@ -309,6 +309,7 @@ class RecoveryService:
            Reconstructed game state.
        """
        from models.game_state import GamePhase, PlayerState
        from game import GameOptions
        state = RebuiltGameState(game_id=d["game_id"])
        state.room_code = d.get("room_code", "")
@@ -318,7 +319,12 @@ class RecoveryService:
        state.current_player_idx = d.get("current_player_idx", 0)
        state.player_order = d.get("player_order", [])
        state.deck_remaining = d.get("deck_remaining", 0)
-        state.options = d.get("options", {})
+        # Reconstruct GameOptions as proper object for attribute access
        options_dict = d.get("options", {})
        if isinstance(options_dict, dict):
            state.options = GameOptions(**options_dict)
        else:
            state.options = options_dict
        state.sequence_num = d.get("sequence_num", 0)
        state.finisher_id = d.get("finisher_id")
        state.host_id = d.get("host_id")
--- a/server/stores/state_cache.py
+++ b/server/stores/state_cache.py
@@ -39,9 +39,9 @@ class StateCache:
    ACTIVE_ROOMS_KEY = "golf:rooms:active"
    PLAYER_ROOM_KEY = "golf:player:{player_id}:room"
-    # TTLs
+    # TTLs - extended to 24 hours to prevent active games from expiring
-    ROOM_TTL = timedelta(hours=4)  # Inactive rooms expire
+    ROOM_TTL = timedelta(hours=24)  # Inactive rooms expire
-    GAME_TTL = timedelta(hours=4)
+    GAME_TTL = timedelta(hours=24)
    def __init__(self, redis_client: redis.Redis):
        """
@@ -360,6 +360,18 @@ class StateCache:
        await pipe.execute()
    async def touch_game(self, game_id: str) -> None:
        """
        Refresh game TTL on any activity.
        Call this on game actions to prevent active games from expiring.
        Args:
            game_id: Game UUID to refresh.
        """
        key = self.GAME_KEY.format(game_id=game_id)
        await self.redis.expire(key, int(self.GAME_TTL.total_seconds()))
 # Global state cache instance (initialized on first use)
 _state_cache: Optional[StateCache] = None
--- a/server/test_game.py
+++ b/server/test_game.py
@@ -251,9 +251,13 @@ class TestDrawDiscardMechanics:
        player = self.game.get_player("p1")
        # Card is face down
        assert player.cards[0].face_up is False
-        # to_dict doesn't reveal it
+        # to_client_dict hides face-down card details from clients
-        card_dict = player.cards[0].to_dict(reveal=False)
+        card_dict = player.cards[0].to_client_dict()
        assert "rank" not in card_dict
        # But to_dict always includes full data for server-side caching
        full_dict = player.cards[0].to_dict()
        assert "rank" in full_dict
        assert "suit" in full_dict
 # =============================================================================