Complete project rebrand for better positioning in the press freedom and digital security space. FieldWitness communicates both field deployment and evidence testimony — appropriate for the target audience of journalists, NGOs, and human rights organizations. Rename mapping: - soosef → fieldwitness (package, CLI, all imports) - soosef.stegasoo → fieldwitness.stego - soosef.verisoo → fieldwitness.attest - ~/.soosef/ → ~/.fwmetadata/ (innocuous data dir name) - SOOSEF_DATA_DIR → FIELDWITNESS_DATA_DIR - SoosefConfig → FieldWitnessConfig - SoosefError → FieldWitnessError Also includes: - License switch from MIT to GPL-3.0 - C2PA bridge module (Phase 0-2 MVP): cert.py, export.py, vendor_assertions.py - README repositioned to lead with provenance/federation, stego backgrounded - Threat model skeleton at docs/security/threat-model.md - Planning docs: docs/planning/c2pa-integration.md, docs/planning/gtm-feasibility.md Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
251 lines
6.3 KiB
Python
251 lines
6.3 KiB
Python
"""
|
|
File-based Temporary Storage
|
|
|
|
Stores temp files on disk instead of in-memory dict.
|
|
This allows multiple Gunicorn workers to share temp files
|
|
and survives service restarts within the expiry window.
|
|
|
|
Files are stored in a temp directory with:
|
|
- {file_id}.data - The actual file data
|
|
- {file_id}.json - Metadata (filename, timestamp, mime_type, etc.)
|
|
|
|
IMPORTANT: This module ONLY manages files in the temp directory.
|
|
It does NOT touch instance/ (auth database) or any other directories.
|
|
|
|
All temp files are written to ~/.fieldwitness/temp/ (fieldwitness.paths.TEMP_DIR) so
|
|
that the killswitch's destroy_temp_files step covers them.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import platform
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
from threading import Lock
|
|
|
|
import fieldwitness.paths as paths
|
|
|
|
# Default temp directory — always under ~/.fieldwitness/temp/ so the killswitch
|
|
# (which purges paths.TEMP_DIR) can reach every file written here.
|
|
DEFAULT_TEMP_DIR: Path = paths.TEMP_DIR
|
|
|
|
# Lock for thread-safe operations
|
|
_lock = Lock()
|
|
|
|
# Module-level temp directory (set on init)
|
|
_temp_dir: Path = DEFAULT_TEMP_DIR
|
|
|
|
|
|
def init(temp_dir: Path | str | None = None) -> None:
|
|
"""Initialize temp storage with optional custom directory."""
|
|
global _temp_dir
|
|
_temp_dir = Path(temp_dir) if temp_dir else DEFAULT_TEMP_DIR
|
|
_temp_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def _data_path(file_id: str) -> Path:
|
|
"""Get path for file data."""
|
|
return _temp_dir / f"{file_id}.data"
|
|
|
|
|
|
def _meta_path(file_id: str) -> Path:
|
|
"""Get path for file metadata."""
|
|
return _temp_dir / f"{file_id}.json"
|
|
|
|
|
|
def _thumb_path(thumb_id: str) -> Path:
|
|
"""Get path for thumbnail data."""
|
|
return _temp_dir / f"{thumb_id}.thumb"
|
|
|
|
|
|
def _secure_delete(path: Path) -> None:
|
|
"""Overwrite and delete a file. Best-effort on flash storage."""
|
|
if not path.exists():
|
|
return
|
|
|
|
if platform.system() == "Linux":
|
|
try:
|
|
subprocess.run(
|
|
["shred", "-u", "-z", "-n", "3", str(path)],
|
|
timeout=30,
|
|
capture_output=True,
|
|
)
|
|
return
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
pass
|
|
|
|
# Fallback: overwrite with zeros then delete
|
|
try:
|
|
size = path.stat().st_size
|
|
with open(path, "r+b") as f:
|
|
f.write(b"\x00" * size)
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
path.unlink()
|
|
except OSError:
|
|
# Last resort: plain unlink so we don't leave data stranded
|
|
path.unlink(missing_ok=True)
|
|
|
|
|
|
def save_temp_file(file_id: str, data: bytes, metadata: dict) -> None:
|
|
"""
|
|
Save a temp file with its metadata.
|
|
|
|
Args:
|
|
file_id: Unique identifier for the file
|
|
data: File contents as bytes
|
|
metadata: Dict with filename, mime_type, timestamp, etc.
|
|
"""
|
|
init() # Ensure directory exists
|
|
|
|
with _lock:
|
|
# Add timestamp if not present
|
|
if "timestamp" not in metadata:
|
|
metadata["timestamp"] = time.time()
|
|
|
|
# Write data file
|
|
_data_path(file_id).write_bytes(data)
|
|
|
|
# Write metadata
|
|
_meta_path(file_id).write_text(json.dumps(metadata))
|
|
|
|
|
|
def get_temp_file(file_id: str) -> dict | None:
|
|
"""
|
|
Get a temp file and its metadata.
|
|
|
|
Returns:
|
|
Dict with 'data' (bytes) and all metadata fields, or None if not found.
|
|
"""
|
|
init()
|
|
|
|
data_file = _data_path(file_id)
|
|
meta_file = _meta_path(file_id)
|
|
|
|
if not data_file.exists() or not meta_file.exists():
|
|
return None
|
|
|
|
try:
|
|
data = data_file.read_bytes()
|
|
metadata = json.loads(meta_file.read_text())
|
|
return {"data": data, **metadata}
|
|
except (OSError, json.JSONDecodeError):
|
|
return None
|
|
|
|
|
|
def has_temp_file(file_id: str) -> bool:
|
|
"""Check if a temp file exists."""
|
|
init()
|
|
return _data_path(file_id).exists() and _meta_path(file_id).exists()
|
|
|
|
|
|
def delete_temp_file(file_id: str) -> None:
|
|
"""Securely delete a temp file and its metadata."""
|
|
init()
|
|
|
|
with _lock:
|
|
_secure_delete(_data_path(file_id))
|
|
_secure_delete(_meta_path(file_id))
|
|
|
|
|
|
def save_thumbnail(thumb_id: str, data: bytes) -> None:
|
|
"""Save a thumbnail."""
|
|
init()
|
|
|
|
with _lock:
|
|
_thumb_path(thumb_id).write_bytes(data)
|
|
|
|
|
|
def get_thumbnail(thumb_id: str) -> bytes | None:
|
|
"""Get thumbnail data."""
|
|
init()
|
|
|
|
thumb_file = _thumb_path(thumb_id)
|
|
if not thumb_file.exists():
|
|
return None
|
|
|
|
try:
|
|
return thumb_file.read_bytes()
|
|
except OSError:
|
|
return None
|
|
|
|
|
|
def delete_thumbnail(thumb_id: str) -> None:
|
|
"""Securely delete a thumbnail."""
|
|
init()
|
|
|
|
with _lock:
|
|
_secure_delete(_thumb_path(thumb_id))
|
|
|
|
|
|
def cleanup_expired(max_age_seconds: float) -> int:
|
|
"""
|
|
Securely delete expired temp files.
|
|
|
|
Args:
|
|
max_age_seconds: Maximum age in seconds before expiry
|
|
|
|
Returns:
|
|
Number of files deleted
|
|
"""
|
|
init()
|
|
|
|
now = time.time()
|
|
deleted = 0
|
|
|
|
with _lock:
|
|
# Find all metadata files
|
|
for meta_file in _temp_dir.glob("*.json"):
|
|
try:
|
|
metadata = json.loads(meta_file.read_text())
|
|
timestamp = metadata.get("timestamp", 0)
|
|
|
|
if now - timestamp > max_age_seconds:
|
|
file_id = meta_file.stem
|
|
_secure_delete(_data_path(file_id))
|
|
_secure_delete(meta_file)
|
|
# Also delete thumbnail if exists
|
|
_secure_delete(_thumb_path(f"{file_id}_thumb"))
|
|
deleted += 1
|
|
except (OSError, json.JSONDecodeError):
|
|
# Remove corrupted files
|
|
_secure_delete(meta_file)
|
|
deleted += 1
|
|
|
|
return deleted
|
|
|
|
|
|
def cleanup_all() -> int:
|
|
"""
|
|
Securely delete all temp files. Call on service start/stop.
|
|
|
|
Returns:
|
|
Number of files deleted
|
|
"""
|
|
init()
|
|
|
|
deleted = 0
|
|
|
|
with _lock:
|
|
for f in _temp_dir.iterdir():
|
|
if f.is_file():
|
|
_secure_delete(f)
|
|
deleted += 1
|
|
|
|
return deleted
|
|
|
|
|
|
def get_stats() -> dict:
|
|
"""Get temp storage statistics."""
|
|
init()
|
|
|
|
files = list(_temp_dir.glob("*.data"))
|
|
total_size = sum(f.stat().st_size for f in files if f.exists())
|
|
|
|
return {
|
|
"file_count": len(files),
|
|
"total_size_bytes": total_size,
|
|
"temp_dir": str(_temp_dir),
|
|
}
|