"""Process supervisor — starts and monitors all Vigilar subsystems.""" import logging import signal import sys import time from multiprocessing import Process from pathlib import Path from typing import Any from vigilar.config import VigilarConfig from vigilar.storage.db import get_db_path, init_db log = logging.getLogger(__name__) class SubsystemProcess: """Wrapper for a subsystem running in a child process.""" def __init__(self, name: str, target: Any, args: tuple = ()): self.name = name self.target = target self.args = args self.process: Process | None = None self.restart_count = 0 self.max_restarts = 10 self.last_restart: float = 0 def start(self) -> None: self.process = Process(target=self.target, args=self.args, name=self.name, daemon=True) self.process.start() log.info("Started %s (pid=%s)", self.name, self.process.pid) def is_alive(self) -> bool: return self.process is not None and self.process.is_alive() def stop(self, timeout: float = 5.0) -> None: if self.process and self.process.is_alive(): log.info("Stopping %s (pid=%s)", self.name, self.process.pid) self.process.terminate() self.process.join(timeout=timeout) if self.process.is_alive(): log.warning("Force-killing %s", self.name) self.process.kill() self.process.join(timeout=2) def maybe_restart(self) -> bool: """Restart if crashed, with backoff. Returns True if restarted.""" if self.is_alive(): return False if self.restart_count >= self.max_restarts: log.error("%s exceeded max restarts (%d), giving up", self.name, self.max_restarts) return False backoff = min(2 ** self.restart_count, 60) elapsed = time.monotonic() - self.last_restart if elapsed < backoff: return False self.restart_count += 1 self.last_restart = time.monotonic() log.warning("Restarting %s (attempt %d)", self.name, self.restart_count) self.start() return True def _run_web(cfg: VigilarConfig) -> None: """Run the Flask web server in a subprocess.""" from vigilar.web.app import create_app app = create_app(cfg) app.run(host=cfg.web.host, port=cfg.web.port, debug=False, use_reloader=False) def _run_event_processor(cfg: VigilarConfig) -> None: """Run the event processor in a subprocess.""" from vigilar.events.processor import EventProcessor processor = EventProcessor(cfg) processor.run() def _run_ups_monitor(cfg: VigilarConfig) -> None: """Run the UPS monitor in a subprocess.""" from vigilar.ups.monitor import UPSMonitor monitor = UPSMonitor(cfg) monitor.run() def run_supervisor(cfg: VigilarConfig) -> None: """Main supervisor loop — starts all subsystems, monitors, restarts on crash.""" # Initialize database db_path = get_db_path(cfg.system.data_dir) init_db(db_path) # Ensure directories exist Path(cfg.system.data_dir).mkdir(parents=True, exist_ok=True) Path(cfg.system.recordings_dir).mkdir(parents=True, exist_ok=True) Path(cfg.system.hls_dir).mkdir(parents=True, exist_ok=True) # Build subsystem list subsystems: list[SubsystemProcess] = [] # Web server subsystems.append(SubsystemProcess("web", _run_web, (cfg,))) # Camera manager (manages its own child processes internally) from vigilar.camera.manager import CameraManager camera_mgr = CameraManager(cfg) if cfg.cameras else None # Event processor subsystems.append(SubsystemProcess("event-processor", _run_event_processor, (cfg,))) # Phase 7 — Sensor bridge if cfg.sensors: from vigilar.sensors.bridge import run_sensor_bridge subsystems.append(SubsystemProcess("sensor-bridge", run_sensor_bridge, (cfg,))) # Phase 8 — UPS monitor if cfg.ups.enabled: subsystems.append(SubsystemProcess("ups-monitor", _run_ups_monitor, (cfg,))) # Presence monitor if cfg.presence.enabled and cfg.presence.members: from vigilar.presence.monitor import run_presence_monitor subsystems.append(SubsystemProcess("presence-monitor", run_presence_monitor, (cfg,))) # Health monitor if cfg.health.enabled: from vigilar.health.monitor import run_health_monitor subsystems.append(SubsystemProcess("health-monitor", run_health_monitor, (cfg,))) # Handle signals for clean shutdown shutdown_requested = False def handle_signal(signum: int, frame: Any) -> None: nonlocal shutdown_requested if shutdown_requested: log.warning("Force shutdown") sys.exit(1) shutdown_requested = True log.info("Shutdown requested (signal %d)", signum) signal.signal(signal.SIGTERM, handle_signal) signal.signal(signal.SIGINT, handle_signal) # Start all subsystems log.info("Starting %d subsystems", len(subsystems)) for sub in subsystems: sub.start() if camera_mgr: camera_mgr.start() log.info("Vigilar is running") # Monitor loop try: while not shutdown_requested: for sub in subsystems: if not sub.is_alive(): sub.maybe_restart() if camera_mgr: camera_mgr.check_and_restart() time.sleep(2) except KeyboardInterrupt: pass # Shutdown log.info("Shutting down subsystems...") if camera_mgr: camera_mgr.stop() for sub in reversed(subsystems): sub.stop() log.info("Vigilar stopped")