Add auto-cleanup of stale game rooms after 5 minutes of inactivity

Rooms that sit idle (no player actions or CPU turns) for longer than
ROOM_IDLE_TIMEOUT_SECONDS (default 300s) are now automatically cleaned
up: CPU tasks cancelled, players notified with room_expired, WebSockets
closed, and room removed from memory.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
adlee-was-taken 2026-02-25 12:17:57 -05:00
parent 7001232658
commit 82aa3dfb3e
4 changed files with 91 additions and 0 deletions

View File

@ -142,6 +142,7 @@ class ServerConfig:
MAX_PLAYERS_PER_ROOM: int = 6 MAX_PLAYERS_PER_ROOM: int = 6
ROOM_TIMEOUT_MINUTES: int = 60 ROOM_TIMEOUT_MINUTES: int = 60
ROOM_CODE_LENGTH: int = 4 ROOM_CODE_LENGTH: int = 4
ROOM_IDLE_TIMEOUT_SECONDS: int = 300 # 5 minutes of inactivity
# Security (for future auth system) # Security (for future auth system)
SECRET_KEY: str = "" SECRET_KEY: str = ""
@ -198,6 +199,7 @@ class ServerConfig:
MAX_PLAYERS_PER_ROOM=get_env_int("MAX_PLAYERS_PER_ROOM", 6), MAX_PLAYERS_PER_ROOM=get_env_int("MAX_PLAYERS_PER_ROOM", 6),
ROOM_TIMEOUT_MINUTES=get_env_int("ROOM_TIMEOUT_MINUTES", 60), ROOM_TIMEOUT_MINUTES=get_env_int("ROOM_TIMEOUT_MINUTES", 60),
ROOM_CODE_LENGTH=get_env_int("ROOM_CODE_LENGTH", 4), ROOM_CODE_LENGTH=get_env_int("ROOM_CODE_LENGTH", 4),
ROOM_IDLE_TIMEOUT_SECONDS=get_env_int("ROOM_IDLE_TIMEOUT_SECONDS", 300),
SECRET_KEY=get_env("SECRET_KEY", ""), SECRET_KEY=get_env("SECRET_KEY", ""),
INVITE_ONLY=get_env_bool("INVITE_ONLY", True), INVITE_ONLY=get_env_bool("INVITE_ONLY", True),
DAILY_OPEN_SIGNUPS=get_env_int("DAILY_OPEN_SIGNUPS", 0), DAILY_OPEN_SIGNUPS=get_env_int("DAILY_OPEN_SIGNUPS", 0),

View File

@ -69,6 +69,7 @@ async def handle_create_room(data: dict, ctx: ConnectionContext, *, room_manager
player_name = ctx.authenticated_user.username if ctx.authenticated_user else data.get("player_name", "Player") player_name = ctx.authenticated_user.username if ctx.authenticated_user else data.get("player_name", "Player")
room = room_manager.create_room() room = room_manager.create_room()
room.add_player(ctx.player_id, player_name, ctx.websocket, ctx.auth_user_id) room.add_player(ctx.player_id, player_name, ctx.websocket, ctx.auth_user_id)
room.touch()
ctx.current_room = room ctx.current_room = room
await ctx.websocket.send_json({ await ctx.websocket.send_json({
@ -114,6 +115,7 @@ async def handle_join_room(data: dict, ctx: ConnectionContext, *, room_manager,
return return
room.add_player(ctx.player_id, player_name, ctx.websocket, ctx.auth_user_id) room.add_player(ctx.player_id, player_name, ctx.websocket, ctx.auth_user_id)
room.touch()
ctx.current_room = room ctx.current_room = room
await ctx.websocket.send_json({ await ctx.websocket.send_json({
@ -189,6 +191,7 @@ async def handle_remove_cpu(data: dict, ctx: ConnectionContext, **kw) -> None:
async def handle_start_game(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_start_game(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
room_player = ctx.current_room.get_player(ctx.player_id) room_player = ctx.current_room.get_player(ctx.player_id)
if not room_player or not room_player.is_host: if not room_player or not room_player.is_host:
@ -235,6 +238,7 @@ async def handle_start_game(data: dict, ctx: ConnectionContext, *, broadcast_gam
async def handle_flip_initial(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_flip_initial(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
positions = data.get("positions", []) positions = data.get("positions", [])
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
@ -250,6 +254,7 @@ async def handle_flip_initial(data: dict, ctx: ConnectionContext, *, broadcast_g
async def handle_draw(data: dict, ctx: ConnectionContext, *, broadcast_game_state, **kw) -> None: async def handle_draw(data: dict, ctx: ConnectionContext, *, broadcast_game_state, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
source = data.get("source", "deck") source = data.get("source", "deck")
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
@ -277,6 +282,7 @@ async def handle_draw(data: dict, ctx: ConnectionContext, *, broadcast_game_stat
async def handle_swap(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_swap(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
position = data.get("position", 0) position = data.get("position", 0)
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
@ -303,6 +309,7 @@ async def handle_swap(data: dict, ctx: ConnectionContext, *, broadcast_game_stat
async def handle_discard(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_discard(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
drawn_card = ctx.current_room.game.drawn_card drawn_card = ctx.current_room.game.drawn_card
@ -349,6 +356,7 @@ async def handle_cancel_draw(data: dict, ctx: ConnectionContext, *, broadcast_ga
async def handle_flip_card(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_flip_card(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
position = data.get("position", 0) position = data.get("position", 0)
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
@ -370,6 +378,7 @@ async def handle_flip_card(data: dict, ctx: ConnectionContext, *, broadcast_game
async def handle_skip_flip(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_skip_flip(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
player = ctx.current_room.game.get_player(ctx.player_id) player = ctx.current_room.game.get_player(ctx.player_id)
@ -386,6 +395,7 @@ async def handle_skip_flip(data: dict, ctx: ConnectionContext, *, broadcast_game
async def handle_flip_as_action(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_flip_as_action(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
position = data.get("position", 0) position = data.get("position", 0)
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
@ -406,6 +416,7 @@ async def handle_flip_as_action(data: dict, ctx: ConnectionContext, *, broadcast
async def handle_knock_early(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_knock_early(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
async with ctx.current_room.game_lock: async with ctx.current_room.game_lock:
player = ctx.current_room.game.get_player(ctx.player_id) player = ctx.current_room.game.get_player(ctx.player_id)
@ -424,6 +435,7 @@ async def handle_knock_early(data: dict, ctx: ConnectionContext, *, broadcast_ga
async def handle_next_round(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None: async def handle_next_round(data: dict, ctx: ConnectionContext, *, broadcast_game_state, check_and_run_cpu_turn, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
room_player = ctx.current_room.get_player(ctx.player_id) room_player = ctx.current_room.get_player(ctx.player_id)
if not room_player or not room_player.is_host: if not room_player or not room_player.is_host:
@ -467,6 +479,7 @@ async def handle_leave_game(data: dict, ctx: ConnectionContext, *, handle_player
async def handle_end_game(data: dict, ctx: ConnectionContext, *, room_manager, cleanup_room_profiles, **kw) -> None: async def handle_end_game(data: dict, ctx: ConnectionContext, *, room_manager, cleanup_room_profiles, **kw) -> None:
if not ctx.current_room: if not ctx.current_room:
return return
ctx.current_room.touch()
room_player = ctx.current_room.get_player(ctx.player_id) room_player = ctx.current_room.get_player(ctx.player_id)
if not room_player or not room_player.is_host: if not room_player or not room_player.is_host:

View File

@ -64,6 +64,7 @@ _matchmaking_service = None
_replay_service = None _replay_service = None
_spectator_manager = None _spectator_manager = None
_leaderboard_refresh_task = None _leaderboard_refresh_task = None
_room_cleanup_task = None
_redis_client = None _redis_client = None
_rate_limiter = None _rate_limiter = None
_shutdown_event = asyncio.Event() _shutdown_event = asyncio.Event()
@ -83,6 +84,60 @@ async def _periodic_leaderboard_refresh():
logger.error(f"Leaderboard refresh failed: {e}") logger.error(f"Leaderboard refresh failed: {e}")
async def _periodic_room_cleanup():
"""Periodic task to clean up rooms idle for longer than ROOM_IDLE_TIMEOUT_SECONDS."""
import time
while True:
try:
await asyncio.sleep(60)
now = time.time()
timeout = config.ROOM_IDLE_TIMEOUT_SECONDS
stale_rooms = [
room for room in room_manager.rooms.values()
if now - room.last_activity > timeout
]
for room in stale_rooms:
logger.info(
f"Cleaning up stale room {room.code} "
f"(idle {int(now - room.last_activity)}s, "
f"{len(room.players)} players)"
)
# Cancel CPU turn task
if room.cpu_turn_task:
room.cpu_turn_task.cancel()
try:
await room.cpu_turn_task
except (asyncio.CancelledError, Exception):
pass
room.cpu_turn_task = None
# Notify and close human WebSocket connections
for player in list(room.players.values()):
if player.websocket and not player.is_cpu:
try:
await player.websocket.send_json({
"type": "room_expired",
"message": "Room closed due to inactivity",
})
await player.websocket.close(code=4002, reason="Room expired")
except Exception:
pass
# Clean up players and profiles
room_code = room.code
for cpu in list(room.get_cpu_players()):
room.remove_player(cpu.id)
cleanup_room_profiles(room_code)
room_manager.remove_room(room_code)
if stale_rooms:
logger.info(f"Cleaned up {len(stale_rooms)} stale room(s)")
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"Room cleanup failed: {e}")
async def _init_redis(): async def _init_redis():
"""Initialize Redis client, rate limiter, and signup limiter.""" """Initialize Redis client, rate limiter, and signup limiter."""
global _redis_client, _rate_limiter global _redis_client, _rate_limiter
@ -254,6 +309,14 @@ async def _shutdown_services():
reset_all_profiles() reset_all_profiles()
logger.info("All rooms and CPU profiles cleaned up") logger.info("All rooms and CPU profiles cleaned up")
if _room_cleanup_task:
_room_cleanup_task.cancel()
try:
await _room_cleanup_task
except asyncio.CancelledError:
pass
logger.info("Room cleanup task stopped")
if _leaderboard_refresh_task: if _leaderboard_refresh_task:
_leaderboard_refresh_task.cancel() _leaderboard_refresh_task.cancel()
try: try:
@ -312,6 +375,11 @@ async def lifespan(app: FastAPI):
room_manager=room_manager, room_manager=room_manager,
) )
# Start periodic room cleanup
global _room_cleanup_task
_room_cleanup_task = asyncio.create_task(_periodic_room_cleanup())
logger.info(f"Room cleanup task started (timeout={config.ROOM_IDLE_TIMEOUT_SECONDS}s)")
logger.info(f"Golf server started (environment={config.ENVIRONMENT})") logger.info(f"Golf server started (environment={config.ENVIRONMENT})")
yield yield
@ -761,6 +829,8 @@ async def _run_cpu_chain(room: Room):
if not room_player or not room_player.is_cpu: if not room_player or not room_player.is_cpu:
return return
room.touch()
# Brief pause before CPU starts - animations are faster now # Brief pause before CPU starts - animations are faster now
await asyncio.sleep(0.25) await asyncio.sleep(0.25)

View File

@ -14,6 +14,7 @@ A Room contains:
import asyncio import asyncio
import random import random
import string import string
import time
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -70,6 +71,11 @@ class Room:
game_log_id: Optional[str] = None game_log_id: Optional[str] = None
game_lock: asyncio.Lock = field(default_factory=asyncio.Lock) game_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
cpu_turn_task: Optional[asyncio.Task] = None cpu_turn_task: Optional[asyncio.Task] = None
last_activity: float = field(default_factory=time.time)
def touch(self) -> None:
"""Update last_activity timestamp to mark room as active."""
self.last_activity = time.time()
def add_player( def add_player(
self, self,