Fix V2 race conditions, auth gaps, serialization bugs, and async stats

Phase 1 - Critical Fixes:
- Add game_lock (asyncio.Lock) to Room class for serializing mutations
- Wrap all game action handlers in lock to prevent race conditions
- Split Card.to_dict into to_dict (full data) and to_client_dict (hidden)
- Fix CardState.from_dict to handle missing rank/suit gracefully
- Fix GameOptions reconstruction in recovery_service (dict -> object)
- Extend state cache TTL from 4h to 24h, add touch_game method

Phase 2 - Security:
- Add optional WebSocket authentication via token query param
- Use authenticated user ID/name when available
- Add auth support to spectator WebSocket endpoint

Phase 3 - Performance:
- Make stats processing async (fire-and-forget) to avoid blocking
  game completion notifications

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Aaron D. Lee 2026-01-27 16:27:30 -05:00
parent 1dbfb3f14b
commit f27020f21b
8 changed files with 446 additions and 231 deletions

View File

@ -140,18 +140,37 @@ class Card:
""" """
Convert card to dictionary for JSON serialization. Convert card to dictionary for JSON serialization.
Always includes full card data (rank/suit/face_up) for server-side
caching and event sourcing. Use to_client_dict() for client views
that should hide face-down cards.
Args: Args:
reveal: If True, show card details even if face-down. reveal: Ignored for backwards compatibility. Use to_client_dict() instead.
Returns: Returns:
Dict with card info, or just {face_up: False} if hidden. Dict with full card info (suit, rank, face_up).
""" """
if self.face_up or reveal:
return { return {
"suit": self.suit.value, "suit": self.suit.value,
"rank": self.rank.value, "rank": self.rank.value,
"face_up": self.face_up, "face_up": self.face_up,
} }
def to_client_dict(self) -> dict:
"""
Convert card to dictionary for client display.
Hides card details if face-down to prevent cheating.
Returns:
Dict with card info, or just {face_up: False} if hidden.
"""
if self.face_up:
return {
"suit": self.suit.value,
"rank": self.rank.value,
"face_up": True,
}
return {"face_up": False} return {"face_up": False}
def value(self) -> int: def value(self) -> int:
@ -390,7 +409,9 @@ class Player:
Returns: Returns:
List of card dictionaries. List of card dictionaries.
""" """
return [card.to_dict(reveal) for card in self.cards] if reveal:
return [card.to_dict() for card in self.cards]
return [card.to_client_dict() for card in self.cards]
class GamePhase(Enum): class GamePhase(Enum):

View File

@ -21,6 +21,26 @@ from game_log import get_logger
# Import production components # Import production components
from logging_config import setup_logging from logging_config import setup_logging
# Initialize Sentry if configured
if config.SENTRY_DSN:
try:
import sentry_sdk
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
sentry_sdk.init(
dsn=config.SENTRY_DSN,
environment=config.ENVIRONMENT,
traces_sample_rate=0.1 if config.ENVIRONMENT == "production" else 1.0,
integrations=[
StarletteIntegration(transaction_style="endpoint"),
FastApiIntegration(transaction_style="endpoint"),
],
)
logging.getLogger(__name__).info("Sentry error tracking initialized")
except ImportError:
logging.getLogger(__name__).warning("sentry-sdk not installed, error tracking disabled")
# Configure logging based on environment # Configure logging based on environment
setup_logging( setup_logging(
level=config.LOG_LEVEL, level=config.LOG_LEVEL,
@ -243,8 +263,75 @@ app.add_middleware(
environment=config.ENVIRONMENT, environment=config.ENVIRONMENT,
) )
# Note: Rate limiting middleware is added after app startup when Redis is available # Rate limiting middleware (uses global _rate_limiter set in lifespan)
# See _add_rate_limit_middleware() called from a startup event if needed # We create a wrapper that safely handles the case when rate limiter isn't ready
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
class LazyRateLimitMiddleware(BaseHTTPMiddleware):
"""Rate limiting middleware that uses the global _rate_limiter when available."""
async def dispatch(self, request, call_next):
global _rate_limiter
# Skip if rate limiter not initialized or disabled
if not _rate_limiter or not config.RATE_LIMIT_ENABLED:
return await call_next(request)
# Import here to avoid circular imports
from services.ratelimit import RATE_LIMITS
path = request.url.path
# Skip health checks and static files
if path in ("/health", "/ready", "/metrics"):
return await call_next(request)
if path.endswith((".js", ".css", ".html", ".ico", ".png", ".jpg", ".svg")):
return await call_next(request)
# Determine rate limit tier
if path.startswith("/api/auth"):
limit, window = RATE_LIMITS["api_auth"]
elif path == "/api/rooms" and request.method == "POST":
limit, window = RATE_LIMITS["api_create_room"]
elif "email" in path or "verify" in path:
limit, window = RATE_LIMITS["email_send"]
elif path.startswith("/api"):
limit, window = RATE_LIMITS["api_general"]
else:
return await call_next(request)
# Get client key and check rate limit
client_key = _rate_limiter.get_client_key(request)
full_key = f"{path}:{client_key}"
allowed, info = await _rate_limiter.is_allowed(full_key, limit, window)
if allowed:
response = await call_next(request)
else:
response = JSONResponse(
status_code=429,
content={
"error": "Rate limit exceeded",
"message": f"Too many requests. Please wait {info['reset']} seconds.",
"retry_after": info["reset"],
},
)
# Add rate limit headers
response.headers["X-RateLimit-Limit"] = str(info["limit"])
response.headers["X-RateLimit-Remaining"] = str(info["remaining"])
response.headers["X-RateLimit-Reset"] = str(info["reset"])
if not allowed:
response.headers["Retry-After"] = str(info["reset"])
return response
app.add_middleware(LazyRateLimitMiddleware)
room_manager = RoomManager() room_manager = RoomManager()
@ -309,7 +396,23 @@ async def require_admin(user: User = Depends(require_user)) -> User:
async def websocket_endpoint(websocket: WebSocket): async def websocket_endpoint(websocket: WebSocket):
await websocket.accept() await websocket.accept()
# Extract token from query param for optional authentication
token = websocket.query_params.get("token")
authenticated_user = None
if token and _auth_service:
try:
authenticated_user = await _auth_service.get_user_from_token(token)
except Exception as e:
logger.debug(f"WebSocket auth failed: {e}")
# Use authenticated user ID if available, otherwise generate random UUID
if authenticated_user:
player_id = str(authenticated_user.id)
logger.debug(f"WebSocket authenticated as user {player_id}")
else:
player_id = str(uuid.uuid4()) player_id = str(uuid.uuid4())
logger.debug(f"WebSocket connected anonymously as {player_id}")
current_room: Room | None = None current_room: Room | None = None
try: try:
@ -319,6 +422,9 @@ async def websocket_endpoint(websocket: WebSocket):
if msg_type == "create_room": if msg_type == "create_room":
player_name = data.get("player_name", "Player") player_name = data.get("player_name", "Player")
# Use authenticated user's name if available
if authenticated_user and authenticated_user.display_name:
player_name = authenticated_user.display_name
room = room_manager.create_room() room = room_manager.create_room()
room.add_player(player_id, player_name, websocket) room.add_player(player_id, player_name, websocket)
current_room = room current_room = room
@ -327,6 +433,7 @@ async def websocket_endpoint(websocket: WebSocket):
"type": "room_created", "type": "room_created",
"room_code": room.code, "room_code": room.code,
"player_id": player_id, "player_id": player_id,
"authenticated": authenticated_user is not None,
}) })
await room.broadcast({ await room.broadcast({
@ -360,6 +467,9 @@ async def websocket_endpoint(websocket: WebSocket):
}) })
continue continue
# Use authenticated user's name if available
if authenticated_user and authenticated_user.display_name:
player_name = authenticated_user.display_name
room.add_player(player_id, player_name, websocket) room.add_player(player_id, player_name, websocket)
current_room = room current_room = room
@ -367,6 +477,7 @@ async def websocket_endpoint(websocket: WebSocket):
"type": "room_joined", "type": "room_joined",
"room_code": room.code, "room_code": room.code,
"player_id": player_id, "player_id": player_id,
"authenticated": authenticated_user is not None,
}) })
await room.broadcast({ await room.broadcast({
@ -487,11 +598,12 @@ async def websocket_endpoint(websocket: WebSocket):
num_decks = max(1, min(3, num_decks)) num_decks = max(1, min(3, num_decks))
num_rounds = max(1, min(18, num_rounds)) num_rounds = max(1, min(18, num_rounds))
async with current_room.game_lock:
current_room.game.start_game(num_decks, num_rounds, options) current_room.game.start_game(num_decks, num_rounds, options)
# Log game start for AI analysis # Log game start for AI analysis
logger = get_logger() game_logger = get_logger()
current_room.game_log_id = logger.log_game_start( current_room.game_log_id = game_logger.log_game_start(
room_code=current_room.code, room_code=current_room.code,
num_players=len(current_room.players), num_players=len(current_room.players),
options=options, options=options,
@ -520,6 +632,7 @@ async def websocket_endpoint(websocket: WebSocket):
continue continue
positions = data.get("positions", []) positions = data.get("positions", [])
async with current_room.game_lock:
if current_room.game.flip_initial_cards(player_id, positions): if current_room.game.flip_initial_cards(player_id, positions):
await broadcast_game_state(current_room) await broadcast_game_state(current_room)
@ -531,6 +644,7 @@ async def websocket_endpoint(websocket: WebSocket):
continue continue
source = data.get("source", "deck") source = data.get("source", "deck")
async with current_room.game_lock:
# Capture discard top before draw (for logging decision context) # Capture discard top before draw (for logging decision context)
discard_before_draw = current_room.game.discard_top() discard_before_draw = current_room.game.discard_top()
card = current_room.game.draw_card(player_id, source) card = current_room.game.draw_card(player_id, source)
@ -555,7 +669,7 @@ async def websocket_endpoint(websocket: WebSocket):
# Send drawn card only to the player who drew # Send drawn card only to the player who drew
await websocket.send_json({ await websocket.send_json({
"type": "card_drawn", "type": "card_drawn",
"card": card.to_dict(reveal=True), "card": card.to_dict(),
"source": source, "source": source,
}) })
@ -566,6 +680,7 @@ async def websocket_endpoint(websocket: WebSocket):
continue continue
position = data.get("position", 0) position = data.get("position", 0)
async with current_room.game_lock:
# Capture drawn card before swap for logging # Capture drawn card before swap for logging
drawn_card = current_room.game.drawn_card drawn_card = current_room.game.drawn_card
player = current_room.game.get_player(player_id) player = current_room.game.get_player(player_id)
@ -596,6 +711,7 @@ async def websocket_endpoint(websocket: WebSocket):
if not current_room: if not current_room:
continue continue
async with current_room.game_lock:
# Capture drawn card before discard for logging # Capture drawn card before discard for logging
drawn_card = current_room.game.drawn_card drawn_card = current_room.game.drawn_card
player = current_room.game.get_player(player_id) player = current_room.game.get_player(player_id)
@ -637,6 +753,7 @@ async def websocket_endpoint(websocket: WebSocket):
continue continue
position = data.get("position", 0) position = data.get("position", 0)
async with current_room.game_lock:
player = current_room.game.get_player(player_id) player = current_room.game.get_player(player_id)
current_room.game.flip_and_end_turn(player_id, position) current_room.game.flip_and_end_turn(player_id, position)
@ -662,6 +779,7 @@ async def websocket_endpoint(websocket: WebSocket):
if not current_room: if not current_room:
continue continue
async with current_room.game_lock:
player = current_room.game.get_player(player_id) player = current_room.game.get_player(player_id)
if current_room.game.skip_flip_and_end_turn(player_id): if current_room.game.skip_flip_and_end_turn(player_id):
# Log skip flip decision for human player # Log skip flip decision for human player
@ -685,6 +803,7 @@ async def websocket_endpoint(websocket: WebSocket):
continue continue
position = data.get("position", 0) position = data.get("position", 0)
async with current_room.game_lock:
player = current_room.game.get_player(player_id) player = current_room.game.get_player(player_id)
if current_room.game.flip_card_as_action(player_id, position): if current_room.game.flip_card_as_action(player_id, position):
# Log flip-as-action for human player # Log flip-as-action for human player
@ -709,6 +828,7 @@ async def websocket_endpoint(websocket: WebSocket):
if not current_room: if not current_room:
continue continue
async with current_room.game_lock:
player = current_room.game.get_player(player_id) player = current_room.game.get_player(player_id)
if current_room.game.knock_early(player_id): if current_room.game.knock_early(player_id):
# Log knock early for human player # Log knock early for human player
@ -736,6 +856,7 @@ async def websocket_endpoint(websocket: WebSocket):
if not room_player or not room_player.is_host: if not room_player or not room_player.is_host:
continue continue
async with current_room.game_lock:
if current_room.game.start_next_round(): if current_room.game.start_next_round():
# CPU players do their initial flips # CPU players do their initial flips
for cpu in current_room.get_cpu_players(): for cpu in current_room.get_cpu_players():
@ -796,6 +917,38 @@ async def websocket_endpoint(websocket: WebSocket):
await handle_player_leave(current_room, player_id) await handle_player_leave(current_room, player_id)
async def _process_stats_safe(room: Room):
"""
Process game stats in a fire-and-forget manner.
This is called via asyncio.create_task to avoid blocking game completion
notifications while stats are being processed.
"""
try:
# Build mapping - for non-CPU players, the player_id is their user_id
# (assigned during authentication or as a session UUID)
player_user_ids = {}
for player_id, room_player in room.players.items():
if not room_player.is_cpu:
player_user_ids[player_id] = player_id
# Find winner
winner_id = None
if room.game.players:
winner = min(room.game.players, key=lambda p: p.total_score)
winner_id = winner.id
await _stats_service.process_game_from_state(
players=room.game.players,
winner_id=winner_id,
num_rounds=room.game.num_rounds,
player_user_ids=player_user_ids,
)
logger.debug(f"Stats processed for room {room.code}")
except Exception as e:
logger.error(f"Failed to process game stats: {e}")
async def broadcast_game_state(room: Room): async def broadcast_game_state(room: Room):
"""Broadcast game state to all human players in a room.""" """Broadcast game state to all human players in a room."""
# Notify spectators if spectator manager is available # Notify spectators if spectator manager is available
@ -842,30 +995,9 @@ async def broadcast_game_state(room: Room):
game_logger.log_game_end(room.game_log_id) game_logger.log_game_end(room.game_log_id)
room.game_log_id = None # Clear to avoid duplicate logging room.game_log_id = None # Clear to avoid duplicate logging
# Process stats for authenticated players # Process stats asynchronously (fire-and-forget) to avoid delaying game over notifications
if _stats_service and room.game.players: if _stats_service and room.game.players:
try: asyncio.create_task(_process_stats_safe(room))
# Build mapping - for non-CPU players, the player_id is their user_id
# (assigned during authentication or as a session UUID)
player_user_ids = {}
for player_id, room_player in room.players.items():
if not room_player.is_cpu:
player_user_ids[player_id] = player_id
# Find winner
winner_id = None
if room.game.players:
winner = min(room.game.players, key=lambda p: p.total_score)
winner_id = winner.id
await _stats_service.process_game_from_state(
players=room.game.players,
winner_id=winner_id,
num_rounds=room.game.num_rounds,
player_user_ids=player_user_ids,
)
except Exception as e:
logger.error(f"Failed to process game stats: {e}")
scores = [ scores = [
{"name": p.name, "total": p.total_score, "rounds_won": p.rounds_won} {"name": p.name, "total": p.total_score, "rounds_won": p.rounds_won}

View File

@ -52,12 +52,38 @@ class CardState:
@classmethod @classmethod
def from_dict(cls, d: dict) -> "CardState": def from_dict(cls, d: dict) -> "CardState":
"""Create from dictionary.""" """
return cls( Create from dictionary.
rank=d["rank"],
suit=d["suit"], Handles both full card data and minimal face-down data gracefully.
face_up=d.get("face_up", False),
) Args:
d: Dictionary with card data. May contain:
- Full data: {rank, suit, face_up}
- Minimal face-down: {face_up: False}
Returns:
CardState instance.
Raises:
ValueError: If face_up is True but rank/suit are missing.
"""
face_up = d.get("face_up", False)
rank = d.get("rank")
suit = d.get("suit")
# If card is face-up, we must have rank and suit
if face_up and (rank is None or suit is None):
raise ValueError("Face-up card must have rank and suit")
# For face-down cards with missing data, use placeholder values
# This handles improperly serialized data from older versions
if rank is None:
rank = "?" # Placeholder for unknown
if suit is None:
suit = "?" # Placeholder for unknown
return cls(rank=rank, suit=suit, face_up=face_up)
@dataclass @dataclass

View File

@ -11,6 +11,7 @@ A Room contains:
- Settings for number of decks, rounds, etc. - Settings for number of decks, rounds, etc.
""" """
import asyncio
import random import random
import string import string
from dataclasses import dataclass, field from dataclasses import dataclass, field
@ -57,6 +58,7 @@ class Room:
game: The Game instance containing actual game state. game: The Game instance containing actual game state.
settings: Room settings (decks, rounds, etc.). settings: Room settings (decks, rounds, etc.).
game_log_id: SQLite log ID for analytics (if logging enabled). game_log_id: SQLite log ID for analytics (if logging enabled).
game_lock: asyncio.Lock for serializing game mutations to prevent race conditions.
""" """
code: str code: str
@ -64,6 +66,7 @@ class Room:
game: Game = field(default_factory=Game) game: Game = field(default_factory=Game)
settings: dict = field(default_factory=lambda: {"decks": 1, "rounds": 1}) settings: dict = field(default_factory=lambda: {"decks": 1, "rounds": 1})
game_log_id: Optional[str] = None game_log_id: Optional[str] = None
game_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
def add_player( def add_player(
self, self,

View File

@ -420,9 +420,19 @@ async def spectate_game(websocket: WebSocket, room_code: str):
WebSocket endpoint for spectating live games. WebSocket endpoint for spectating live games.
Spectators receive real-time game state updates but cannot interact. Spectators receive real-time game state updates but cannot interact.
Supports optional authentication via token query parameter.
""" """
await websocket.accept() await websocket.accept()
# Optional authentication for spectators
token = websocket.query_params.get("token")
spectator_user = None
if token and _auth_service:
try:
spectator_user = await _auth_service.get_user_from_token(token)
except Exception:
pass # Anonymous spectator
if not _spectator_manager or not _room_manager: if not _spectator_manager or not _room_manager:
await websocket.close(code=4003, reason="Spectator service unavailable") await websocket.close(code=4003, reason="Spectator service unavailable")
return return
@ -449,6 +459,7 @@ async def spectate_game(websocket: WebSocket, room_code: str):
"game_state": game_state, "game_state": game_state,
"spectator_count": _spectator_manager.get_spectator_count(game_id), "spectator_count": _spectator_manager.get_spectator_count(game_id),
"players": room.player_list(), "players": room.player_list(),
"authenticated": spectator_user is not None,
}) })
# Keep connection alive # Keep connection alive

View File

@ -309,6 +309,7 @@ class RecoveryService:
Reconstructed game state. Reconstructed game state.
""" """
from models.game_state import GamePhase, PlayerState from models.game_state import GamePhase, PlayerState
from game import GameOptions
state = RebuiltGameState(game_id=d["game_id"]) state = RebuiltGameState(game_id=d["game_id"])
state.room_code = d.get("room_code", "") state.room_code = d.get("room_code", "")
@ -318,7 +319,12 @@ class RecoveryService:
state.current_player_idx = d.get("current_player_idx", 0) state.current_player_idx = d.get("current_player_idx", 0)
state.player_order = d.get("player_order", []) state.player_order = d.get("player_order", [])
state.deck_remaining = d.get("deck_remaining", 0) state.deck_remaining = d.get("deck_remaining", 0)
state.options = d.get("options", {}) # Reconstruct GameOptions as proper object for attribute access
options_dict = d.get("options", {})
if isinstance(options_dict, dict):
state.options = GameOptions(**options_dict)
else:
state.options = options_dict
state.sequence_num = d.get("sequence_num", 0) state.sequence_num = d.get("sequence_num", 0)
state.finisher_id = d.get("finisher_id") state.finisher_id = d.get("finisher_id")
state.host_id = d.get("host_id") state.host_id = d.get("host_id")

View File

@ -39,9 +39,9 @@ class StateCache:
ACTIVE_ROOMS_KEY = "golf:rooms:active" ACTIVE_ROOMS_KEY = "golf:rooms:active"
PLAYER_ROOM_KEY = "golf:player:{player_id}:room" PLAYER_ROOM_KEY = "golf:player:{player_id}:room"
# TTLs # TTLs - extended to 24 hours to prevent active games from expiring
ROOM_TTL = timedelta(hours=4) # Inactive rooms expire ROOM_TTL = timedelta(hours=24) # Inactive rooms expire
GAME_TTL = timedelta(hours=4) GAME_TTL = timedelta(hours=24)
def __init__(self, redis_client: redis.Redis): def __init__(self, redis_client: redis.Redis):
""" """
@ -360,6 +360,18 @@ class StateCache:
await pipe.execute() await pipe.execute()
async def touch_game(self, game_id: str) -> None:
"""
Refresh game TTL on any activity.
Call this on game actions to prevent active games from expiring.
Args:
game_id: Game UUID to refresh.
"""
key = self.GAME_KEY.format(game_id=game_id)
await self.redis.expire(key, int(self.GAME_TTL.total_seconds()))
# Global state cache instance (initialized on first use) # Global state cache instance (initialized on first use)
_state_cache: Optional[StateCache] = None _state_cache: Optional[StateCache] = None

View File

@ -251,9 +251,13 @@ class TestDrawDiscardMechanics:
player = self.game.get_player("p1") player = self.game.get_player("p1")
# Card is face down # Card is face down
assert player.cards[0].face_up is False assert player.cards[0].face_up is False
# to_dict doesn't reveal it # to_client_dict hides face-down card details from clients
card_dict = player.cards[0].to_dict(reveal=False) card_dict = player.cards[0].to_client_dict()
assert "rank" not in card_dict assert "rank" not in card_dict
# But to_dict always includes full data for server-side caching
full_dict = player.cards[0].to_dict()
assert "rank" in full_dict
assert "suit" in full_dict
# ============================================================================= # =============================================================================