Task 1 — Presence: ping family phones, derive household state (EMPTY/KIDS_HOME/ADULTS_HOME/ALL_HOME), configurable departure delay, per-member roles, auto-arm actions via MQTT. Task 2 — Detection: MobileNet-SSD v2 via OpenCV DNN for person/vehicle classification. Vehicle color/size fingerprinting for known car matching. Zone-based filtering per camera. Model download script. Task 3 — Health: periodic disk/MQTT/subsystem checks, auto-prune oldest non-starred recordings on disk pressure, daily digest builder. 126 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
178 lines
5.6 KiB
Python
178 lines
5.6 KiB
Python
"""Process supervisor — starts and monitors all Vigilar subsystems."""
|
|
|
|
import logging
|
|
import signal
|
|
import sys
|
|
import time
|
|
from multiprocessing import Process
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from vigilar.config import VigilarConfig
|
|
from vigilar.storage.db import get_db_path, init_db
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class SubsystemProcess:
|
|
"""Wrapper for a subsystem running in a child process."""
|
|
|
|
def __init__(self, name: str, target: Any, args: tuple = ()):
|
|
self.name = name
|
|
self.target = target
|
|
self.args = args
|
|
self.process: Process | None = None
|
|
self.restart_count = 0
|
|
self.max_restarts = 10
|
|
self.last_restart: float = 0
|
|
|
|
def start(self) -> None:
|
|
self.process = Process(target=self.target, args=self.args, name=self.name, daemon=True)
|
|
self.process.start()
|
|
log.info("Started %s (pid=%s)", self.name, self.process.pid)
|
|
|
|
def is_alive(self) -> bool:
|
|
return self.process is not None and self.process.is_alive()
|
|
|
|
def stop(self, timeout: float = 5.0) -> None:
|
|
if self.process and self.process.is_alive():
|
|
log.info("Stopping %s (pid=%s)", self.name, self.process.pid)
|
|
self.process.terminate()
|
|
self.process.join(timeout=timeout)
|
|
if self.process.is_alive():
|
|
log.warning("Force-killing %s", self.name)
|
|
self.process.kill()
|
|
self.process.join(timeout=2)
|
|
|
|
def maybe_restart(self) -> bool:
|
|
"""Restart if crashed, with backoff. Returns True if restarted."""
|
|
if self.is_alive():
|
|
return False
|
|
if self.restart_count >= self.max_restarts:
|
|
log.error("%s exceeded max restarts (%d), giving up", self.name, self.max_restarts)
|
|
return False
|
|
|
|
backoff = min(2 ** self.restart_count, 60)
|
|
elapsed = time.monotonic() - self.last_restart
|
|
if elapsed < backoff:
|
|
return False
|
|
|
|
self.restart_count += 1
|
|
self.last_restart = time.monotonic()
|
|
log.warning("Restarting %s (attempt %d)", self.name, self.restart_count)
|
|
self.start()
|
|
return True
|
|
|
|
|
|
def _run_web(cfg: VigilarConfig) -> None:
|
|
"""Run the Flask web server in a subprocess."""
|
|
from vigilar.web.app import create_app
|
|
|
|
app = create_app(cfg)
|
|
app.run(host=cfg.web.host, port=cfg.web.port, debug=False, use_reloader=False)
|
|
|
|
|
|
def _run_event_processor(cfg: VigilarConfig) -> None:
|
|
"""Run the event processor in a subprocess."""
|
|
from vigilar.events.processor import EventProcessor
|
|
|
|
processor = EventProcessor(cfg)
|
|
processor.run()
|
|
|
|
|
|
def _run_ups_monitor(cfg: VigilarConfig) -> None:
|
|
"""Run the UPS monitor in a subprocess."""
|
|
from vigilar.ups.monitor import UPSMonitor
|
|
|
|
monitor = UPSMonitor(cfg)
|
|
monitor.run()
|
|
|
|
|
|
def run_supervisor(cfg: VigilarConfig) -> None:
|
|
"""Main supervisor loop — starts all subsystems, monitors, restarts on crash."""
|
|
# Initialize database
|
|
db_path = get_db_path(cfg.system.data_dir)
|
|
init_db(db_path)
|
|
|
|
# Ensure directories exist
|
|
Path(cfg.system.data_dir).mkdir(parents=True, exist_ok=True)
|
|
Path(cfg.system.recordings_dir).mkdir(parents=True, exist_ok=True)
|
|
Path(cfg.system.hls_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
# Build subsystem list
|
|
subsystems: list[SubsystemProcess] = []
|
|
|
|
# Web server
|
|
subsystems.append(SubsystemProcess("web", _run_web, (cfg,)))
|
|
|
|
# Camera manager (manages its own child processes internally)
|
|
from vigilar.camera.manager import CameraManager
|
|
camera_mgr = CameraManager(cfg) if cfg.cameras else None
|
|
|
|
# Event processor
|
|
subsystems.append(SubsystemProcess("event-processor", _run_event_processor, (cfg,)))
|
|
|
|
# Phase 7 — Sensor bridge
|
|
if cfg.sensors:
|
|
from vigilar.sensors.bridge import run_sensor_bridge
|
|
subsystems.append(SubsystemProcess("sensor-bridge", run_sensor_bridge, (cfg,)))
|
|
|
|
# Phase 8 — UPS monitor
|
|
if cfg.ups.enabled:
|
|
subsystems.append(SubsystemProcess("ups-monitor", _run_ups_monitor, (cfg,)))
|
|
|
|
# Presence monitor
|
|
if cfg.presence.enabled and cfg.presence.members:
|
|
from vigilar.presence.monitor import run_presence_monitor
|
|
subsystems.append(SubsystemProcess("presence-monitor", run_presence_monitor, (cfg,)))
|
|
|
|
# Health monitor
|
|
if cfg.health.enabled:
|
|
from vigilar.health.monitor import run_health_monitor
|
|
subsystems.append(SubsystemProcess("health-monitor", run_health_monitor, (cfg,)))
|
|
|
|
# Handle signals for clean shutdown
|
|
shutdown_requested = False
|
|
|
|
def handle_signal(signum: int, frame: Any) -> None:
|
|
nonlocal shutdown_requested
|
|
if shutdown_requested:
|
|
log.warning("Force shutdown")
|
|
sys.exit(1)
|
|
shutdown_requested = True
|
|
log.info("Shutdown requested (signal %d)", signum)
|
|
|
|
signal.signal(signal.SIGTERM, handle_signal)
|
|
signal.signal(signal.SIGINT, handle_signal)
|
|
|
|
# Start all subsystems
|
|
log.info("Starting %d subsystems", len(subsystems))
|
|
for sub in subsystems:
|
|
sub.start()
|
|
|
|
if camera_mgr:
|
|
camera_mgr.start()
|
|
|
|
log.info("Vigilar is running")
|
|
|
|
# Monitor loop
|
|
try:
|
|
while not shutdown_requested:
|
|
for sub in subsystems:
|
|
if not sub.is_alive():
|
|
sub.maybe_restart()
|
|
if camera_mgr:
|
|
camera_mgr.check_and_restart()
|
|
time.sleep(2)
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
# Shutdown
|
|
log.info("Shutting down subsystems...")
|
|
if camera_mgr:
|
|
camera_mgr.stop()
|
|
for sub in reversed(subsystems):
|
|
sub.stop()
|
|
|
|
log.info("Vigilar stopped")
|