vigilar/vigilar/main.py
Aaron D. Lee 8314a61815 Add presence detection, person/vehicle AI detection, health monitoring
Task 1 — Presence: ping family phones, derive household state
(EMPTY/KIDS_HOME/ADULTS_HOME/ALL_HOME), configurable departure delay,
per-member roles, auto-arm actions via MQTT.

Task 2 — Detection: MobileNet-SSD v2 via OpenCV DNN for person/vehicle
classification. Vehicle color/size fingerprinting for known car matching.
Zone-based filtering per camera. Model download script.

Task 3 — Health: periodic disk/MQTT/subsystem checks, auto-prune oldest
non-starred recordings on disk pressure, daily digest builder.

126 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 00:06:45 -04:00

178 lines
5.6 KiB
Python

"""Process supervisor — starts and monitors all Vigilar subsystems."""
import logging
import signal
import sys
import time
from multiprocessing import Process
from pathlib import Path
from typing import Any
from vigilar.config import VigilarConfig
from vigilar.storage.db import get_db_path, init_db
log = logging.getLogger(__name__)
class SubsystemProcess:
"""Wrapper for a subsystem running in a child process."""
def __init__(self, name: str, target: Any, args: tuple = ()):
self.name = name
self.target = target
self.args = args
self.process: Process | None = None
self.restart_count = 0
self.max_restarts = 10
self.last_restart: float = 0
def start(self) -> None:
self.process = Process(target=self.target, args=self.args, name=self.name, daemon=True)
self.process.start()
log.info("Started %s (pid=%s)", self.name, self.process.pid)
def is_alive(self) -> bool:
return self.process is not None and self.process.is_alive()
def stop(self, timeout: float = 5.0) -> None:
if self.process and self.process.is_alive():
log.info("Stopping %s (pid=%s)", self.name, self.process.pid)
self.process.terminate()
self.process.join(timeout=timeout)
if self.process.is_alive():
log.warning("Force-killing %s", self.name)
self.process.kill()
self.process.join(timeout=2)
def maybe_restart(self) -> bool:
"""Restart if crashed, with backoff. Returns True if restarted."""
if self.is_alive():
return False
if self.restart_count >= self.max_restarts:
log.error("%s exceeded max restarts (%d), giving up", self.name, self.max_restarts)
return False
backoff = min(2 ** self.restart_count, 60)
elapsed = time.monotonic() - self.last_restart
if elapsed < backoff:
return False
self.restart_count += 1
self.last_restart = time.monotonic()
log.warning("Restarting %s (attempt %d)", self.name, self.restart_count)
self.start()
return True
def _run_web(cfg: VigilarConfig) -> None:
"""Run the Flask web server in a subprocess."""
from vigilar.web.app import create_app
app = create_app(cfg)
app.run(host=cfg.web.host, port=cfg.web.port, debug=False, use_reloader=False)
def _run_event_processor(cfg: VigilarConfig) -> None:
"""Run the event processor in a subprocess."""
from vigilar.events.processor import EventProcessor
processor = EventProcessor(cfg)
processor.run()
def _run_ups_monitor(cfg: VigilarConfig) -> None:
"""Run the UPS monitor in a subprocess."""
from vigilar.ups.monitor import UPSMonitor
monitor = UPSMonitor(cfg)
monitor.run()
def run_supervisor(cfg: VigilarConfig) -> None:
"""Main supervisor loop — starts all subsystems, monitors, restarts on crash."""
# Initialize database
db_path = get_db_path(cfg.system.data_dir)
init_db(db_path)
# Ensure directories exist
Path(cfg.system.data_dir).mkdir(parents=True, exist_ok=True)
Path(cfg.system.recordings_dir).mkdir(parents=True, exist_ok=True)
Path(cfg.system.hls_dir).mkdir(parents=True, exist_ok=True)
# Build subsystem list
subsystems: list[SubsystemProcess] = []
# Web server
subsystems.append(SubsystemProcess("web", _run_web, (cfg,)))
# Camera manager (manages its own child processes internally)
from vigilar.camera.manager import CameraManager
camera_mgr = CameraManager(cfg) if cfg.cameras else None
# Event processor
subsystems.append(SubsystemProcess("event-processor", _run_event_processor, (cfg,)))
# Phase 7 — Sensor bridge
if cfg.sensors:
from vigilar.sensors.bridge import run_sensor_bridge
subsystems.append(SubsystemProcess("sensor-bridge", run_sensor_bridge, (cfg,)))
# Phase 8 — UPS monitor
if cfg.ups.enabled:
subsystems.append(SubsystemProcess("ups-monitor", _run_ups_monitor, (cfg,)))
# Presence monitor
if cfg.presence.enabled and cfg.presence.members:
from vigilar.presence.monitor import run_presence_monitor
subsystems.append(SubsystemProcess("presence-monitor", run_presence_monitor, (cfg,)))
# Health monitor
if cfg.health.enabled:
from vigilar.health.monitor import run_health_monitor
subsystems.append(SubsystemProcess("health-monitor", run_health_monitor, (cfg,)))
# Handle signals for clean shutdown
shutdown_requested = False
def handle_signal(signum: int, frame: Any) -> None:
nonlocal shutdown_requested
if shutdown_requested:
log.warning("Force shutdown")
sys.exit(1)
shutdown_requested = True
log.info("Shutdown requested (signal %d)", signum)
signal.signal(signal.SIGTERM, handle_signal)
signal.signal(signal.SIGINT, handle_signal)
# Start all subsystems
log.info("Starting %d subsystems", len(subsystems))
for sub in subsystems:
sub.start()
if camera_mgr:
camera_mgr.start()
log.info("Vigilar is running")
# Monitor loop
try:
while not shutdown_requested:
for sub in subsystems:
if not sub.is_alive():
sub.maybe_restart()
if camera_mgr:
camera_mgr.check_and_restart()
time.sleep(2)
except KeyboardInterrupt:
pass
# Shutdown
log.info("Shutting down subsystems...")
if camera_mgr:
camera_mgr.stop()
for sub in reversed(subsystems):
sub.stop()
log.info("Vigilar stopped")