Add house rule presets and comparison mode to simulation runner

Enable testing AI behavior under different rule sets via CLI:
- --preset flag for named configurations (baseline, eagle_eye, etc.)
- --rules flag for custom comma-separated rules
- --compare flag for side-by-side preset comparison with metrics
- Improved dumb move detection for negative_pairs_keep_value rule

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
adlee-was-taken 2026-02-05 22:51:20 -05:00
parent c615c8b433
commit cd05930b69

View File

@ -6,15 +6,19 @@ No server/websocket needed - runs games directly.
Usage: Usage:
python simulate.py [num_games] [num_players] python simulate.py [num_games] [num_players]
python simulate.py 100 --rules use_jokers,eagle_eye
python simulate.py 100 --preset competitive
python simulate.py 100 --compare baseline eagle_eye negative_pairs
Examples: Examples:
python simulate.py 10 # Run 10 games with 4 players each python simulate.py 10 # Run 10 games with 4 players each
python simulate.py 50 2 # Run 50 games with 2 players each python simulate.py 50 2 # Run 50 games with 2 players each
python simulate.py 100 --preset eagle_eye
python simulate.py detail --preset competitive
""" """
import asyncio import argparse
import random import random
import sys
from typing import Optional from typing import Optional
from game import Game, Player, GamePhase, GameOptions from game import Game, Player, GamePhase, GameOptions
@ -27,6 +31,78 @@ from game import Rank
from game_log import GameLogger from game_log import GameLogger
# Named rule presets for quick configuration
RULE_PRESETS: dict[str, dict] = {
"baseline": {
# Default classic rules, no special options
},
"jokers": {
"use_jokers": True,
},
"eagle_eye": {
"use_jokers": True,
"eagle_eye": True,
},
"negative_pairs": {
"use_jokers": True,
"negative_pairs_keep_value": True,
},
"four_kind": {
"four_of_a_kind": True,
},
"wolfpack": {
"wolfpack": True,
},
"competitive": {
"knock_penalty": True,
"knock_bonus": True,
},
"wild": {
"use_jokers": True,
"lucky_swing": True,
"eagle_eye": True,
"negative_pairs_keep_value": True,
},
"all_bonuses": {
"knock_bonus": True,
"underdog_bonus": True,
"four_of_a_kind": True,
"wolfpack": True,
},
}
def get_preset_options(preset_name: str) -> GameOptions:
"""Get GameOptions for a named preset."""
if preset_name not in RULE_PRESETS:
available = ", ".join(sorted(RULE_PRESETS.keys()))
raise ValueError(f"Unknown preset '{preset_name}'. Available: {available}")
rules = RULE_PRESETS[preset_name]
return GameOptions(
initial_flips=2,
flip_mode="never",
**rules
)
def parse_rules_string(rules_str: str) -> GameOptions:
"""Parse comma-separated rule names into GameOptions."""
if not rules_str:
return GameOptions(initial_flips=2, flip_mode="never")
rules = {}
for rule in rules_str.split(","):
rule = rule.strip()
if rule:
# Validate that it's a valid GameOptions field
if not hasattr(GameOptions, rule):
raise ValueError(f"Unknown rule '{rule}'. Check GameOptions for valid fields.")
rules[rule] = True
return GameOptions(initial_flips=2, flip_mode="never", **rules)
class SimulationStats: class SimulationStats:
"""Track simulation statistics.""" """Track simulation statistics."""
@ -254,21 +330,32 @@ def run_cpu_turn(
if swap_pos is not None: if swap_pos is not None:
old_card = player.cards[swap_pos] old_card = player.cards[swap_pos]
partner_pos = get_column_partner_position(swap_pos)
partner = player.cards[partner_pos]
# Check for dumb moves: swapping good card for bad # Check for dumb moves: swapping good card for bad
drawn_val = get_ai_card_value(drawn, game.options) drawn_val = get_ai_card_value(drawn, game.options)
old_val = get_ai_card_value(old_card, game.options) old_val = get_ai_card_value(old_card, game.options)
# Only flag as dumb if:
# 1. Old card was face-up and good (value <= 1)
# 2. We're putting a worse card in
# 3. We're NOT creating a pair (pairing is a valid reason to replace a good card)
# 4. We're NOT in a forced-swap-from-discard situation
creates_pair = partner.face_up and partner.rank == drawn.rank
if old_card.face_up and old_val < drawn_val and old_val <= 1: if old_card.face_up and old_val < drawn_val and old_val <= 1:
if not creates_pair:
stats.record_dumb_move("swapped_good_for_bad") stats.record_dumb_move("swapped_good_for_bad")
# Check for dumb move: creating bad pair with negative card # Check for dumb move: creating bad pair with negative card
partner_pos = get_column_partner_position(swap_pos)
partner = player.cards[partner_pos]
if (partner.face_up and if (partner.face_up and
partner.rank == drawn.rank and partner.rank == drawn.rank and
drawn_val < 0 and drawn_val < 0 and
not (game.options.eagle_eye and drawn.rank == Rank.JOKER)): not (game.options.eagle_eye and drawn.rank == Rank.JOKER) and
not game.options.negative_pairs_keep_value):
stats.record_dumb_move("paired_negative") stats.record_dumb_move("paired_negative")
print(f" !!! PAIRED NEGATIVE: {player.name} paired {drawn.rank.value} "
f"at pos {swap_pos} (partner at {partner_pos})")
game.swap_card(player.id, swap_pos) game.swap_card(player.id, swap_pos)
action = "swap" action = "swap"
@ -399,23 +486,31 @@ def run_game(
def run_simulation( def run_simulation(
num_games: int = 10, num_games: int = 10,
num_players: int = 4, num_players: int = 4,
options: Optional[GameOptions] = None,
verbose: bool = True verbose: bool = True
): ) -> SimulationStats:
"""Run multiple games and report statistics.""" """Run multiple games and report statistics."""
if options is None:
options = GameOptions(initial_flips=2, flip_mode="never")
# Build description of active rules
active_rules = []
for field_name in ["use_jokers", "eagle_eye", "negative_pairs_keep_value",
"knock_penalty", "knock_bonus", "four_of_a_kind",
"wolfpack", "lucky_swing", "underdog_bonus"]:
if getattr(options, field_name, False):
active_rules.append(field_name)
rules_desc = ", ".join(active_rules) if active_rules else "baseline (no special rules)"
print(f"\nRunning {num_games} games with {num_players} players each...") print(f"\nRunning {num_games} games with {num_players} players each...")
print(f"Rules: {rules_desc}")
print("=" * 50) print("=" * 50)
logger = GameLogger() logger = GameLogger()
stats = SimulationStats() stats = SimulationStats()
# Default options
options = GameOptions(
initial_flips=2,
flip_mode="never",
use_jokers=False,
)
for i in range(num_games): for i in range(num_games):
players = create_cpu_players(num_players) players = create_cpu_players(num_players)
@ -438,22 +533,32 @@ def run_simulation(
print(" python game_analyzer.py blunders") print(" python game_analyzer.py blunders")
print(" python game_analyzer.py summary") print(" python game_analyzer.py summary")
return stats
def run_detailed_game(num_players: int = 4):
def run_detailed_game(num_players: int = 4, options: Optional[GameOptions] = None):
"""Run a single game with detailed output.""" """Run a single game with detailed output."""
if options is None:
options = GameOptions(initial_flips=2, flip_mode="never")
# Build description of active rules
active_rules = []
for field_name in ["use_jokers", "eagle_eye", "negative_pairs_keep_value",
"knock_penalty", "knock_bonus", "four_of_a_kind",
"wolfpack", "lucky_swing", "underdog_bonus"]:
if getattr(options, field_name, False):
active_rules.append(field_name)
rules_desc = ", ".join(active_rules) if active_rules else "baseline (no special rules)"
print(f"\nRunning detailed game with {num_players} players...") print(f"\nRunning detailed game with {num_players} players...")
print(f"Rules: {rules_desc}")
print("=" * 50) print("=" * 50)
logger = GameLogger() logger = GameLogger()
stats = SimulationStats() stats = SimulationStats()
options = GameOptions(
initial_flips=2,
flip_mode="never",
use_jokers=False,
)
players_with_profiles = create_cpu_players(num_players) players_with_profiles = create_cpu_players(num_players)
game = Game() game = Game()
@ -533,13 +638,155 @@ def run_detailed_game(num_players: int = 4):
print("Run: python game_analyzer.py game", game_id, winner.name) print("Run: python game_analyzer.py game", game_id, winner.name)
def compare_rule_sets(presets: list[str], num_games: int = 100, num_players: int = 4):
"""Run simulations with different rule sets and compare results."""
print(f"\nComparing {len(presets)} rule sets with {num_games} games each...")
print("=" * 60)
results: dict[str, SimulationStats] = {}
for preset in presets:
print(f"\n{'='*60}")
print(f"RUNNING PRESET: {preset}")
print(f"{'='*60}")
options = get_preset_options(preset)
stats = run_simulation(num_games, num_players, options, verbose=False)
results[preset] = stats
# Print comparison summary
print("\n")
print("=" * 70)
print("COMPARISON SUMMARY")
print("=" * 70)
# Header
print(f"\n{'Preset':<20} {'Avg Score':<12} {'Dumb %':<10} {'Swap %':<10} {'Discard %':<10}")
print("-" * 70)
for preset in presets:
stats = results[preset]
# Calculate average score across all players
all_scores = []
for scores in stats.player_scores.values():
all_scores.extend(scores)
avg_score = sum(all_scores) / len(all_scores) if all_scores else 0
# Calculate swap vs discard ratio
total_swaps = 0
total_discards = 0
for actions in stats.decisions.values():
total_swaps += actions.get("swap", 0)
total_discards += actions.get("discard", 0)
total_actions = total_swaps + total_discards
swap_pct = (total_swaps / total_actions * 100) if total_actions > 0 else 0
discard_pct = (total_discards / total_actions * 100) if total_actions > 0 else 0
print(f"{preset:<20} {avg_score:<12.1f} {stats.dumb_move_rate:<10.3f} {swap_pct:<10.1f} {discard_pct:<10.1f}")
# Detailed dumb move breakdown
print("\n\nDUMB MOVE BREAKDOWN BY PRESET:")
print("-" * 70)
print(f"{'Preset':<20} {'Jokers':<8} {'2s':<8} {'Kings':<8} {'BadTake':<8} {'NegPair':<8} {'BadSwap':<8}")
print("-" * 70)
for preset in presets:
stats = results[preset]
print(f"{preset:<20} {stats.discarded_jokers:<8} {stats.discarded_twos:<8} "
f"{stats.discarded_kings:<8} {stats.took_bad_card_without_pair:<8} "
f"{stats.paired_negative_cards:<8} {stats.swapped_good_for_bad:<8}")
def main():
"""Main entry point with argparse CLI."""
parser = argparse.ArgumentParser(
description="Golf AI Simulation Runner - test AI behavior under different rule sets",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python simulate.py 100 # 100 games, baseline rules
python simulate.py 100 4 # 100 games, 4 players
python simulate.py 100 --preset eagle_eye # Use eagle_eye preset
python simulate.py 100 --rules use_jokers,knock_penalty
python simulate.py 100 --compare baseline eagle_eye negative_pairs
python simulate.py detail --preset competitive # Single detailed game
Available presets:
baseline - Classic rules (no special options)
jokers - Jokers enabled
eagle_eye - Jokers + eagle_eye rule
negative_pairs - Jokers + negative pairs keep value
four_kind - Four of a kind bonus
wolfpack - Wolfpack bonus
competitive - Knock penalty + knock bonus
wild - Jokers + lucky_swing + eagle_eye + negative_pairs
all_bonuses - All bonus rules enabled
"""
)
parser.add_argument(
"num_games",
nargs="?",
default="10",
help="Number of games to run, or 'detail' for a single detailed game"
)
parser.add_argument(
"num_players",
nargs="?",
type=int,
default=4,
help="Number of players (default: 4)"
)
parser.add_argument(
"--preset",
type=str,
help="Use a named rule preset (e.g., eagle_eye, competitive)"
)
parser.add_argument(
"--rules",
type=str,
help="Comma-separated list of rules to enable (e.g., use_jokers,knock_penalty)"
)
parser.add_argument(
"--compare",
nargs="+",
metavar="PRESET",
help="Compare multiple presets side-by-side"
)
parser.add_argument(
"-q", "--quiet",
action="store_true",
help="Reduce output verbosity"
)
args = parser.parse_args()
# Determine options
options = None
if args.preset and args.rules:
parser.error("Cannot use both --preset and --rules")
if args.preset:
options = get_preset_options(args.preset)
elif args.rules:
options = parse_rules_string(args.rules)
# Handle compare mode
if args.compare:
num_games = int(args.num_games) if args.num_games != "detail" else 100
compare_rule_sets(args.compare, num_games, args.num_players)
return
# Handle detail mode
if args.num_games == "detail":
run_detailed_game(args.num_players, options)
return
# Standard batch simulation
num_games = int(args.num_games)
run_simulation(num_games, args.num_players, options, verbose=not args.quiet)
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "detail": main()
# Detailed single game
num_players = int(sys.argv[2]) if len(sys.argv) > 2 else 4
run_detailed_game(num_players)
else:
# Batch simulation
num_games = int(sys.argv[1]) if len(sys.argv) > 1 else 10
num_players = int(sys.argv[2]) if len(sys.argv) > 2 else 4
run_simulation(num_games, num_players)