fieldwitness/frontends/web/temp_storage.py
Aaron D. Lee 490f9d4a1d Rebrand SooSeF to FieldWitness
Complete project rebrand for better positioning in the press freedom
and digital security space. FieldWitness communicates both field
deployment and evidence testimony — appropriate for the target audience
of journalists, NGOs, and human rights organizations.

Rename mapping:
- soosef → fieldwitness (package, CLI, all imports)
- soosef.stegasoo → fieldwitness.stego
- soosef.verisoo → fieldwitness.attest
- ~/.soosef/ → ~/.fwmetadata/ (innocuous data dir name)
- SOOSEF_DATA_DIR → FIELDWITNESS_DATA_DIR
- SoosefConfig → FieldWitnessConfig
- SoosefError → FieldWitnessError

Also includes:
- License switch from MIT to GPL-3.0
- C2PA bridge module (Phase 0-2 MVP): cert.py, export.py, vendor_assertions.py
- README repositioned to lead with provenance/federation, stego backgrounded
- Threat model skeleton at docs/security/threat-model.md
- Planning docs: docs/planning/c2pa-integration.md, docs/planning/gtm-feasibility.md

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 15:05:13 -04:00

251 lines
6.3 KiB
Python

"""
File-based Temporary Storage
Stores temp files on disk instead of in-memory dict.
This allows multiple Gunicorn workers to share temp files
and survives service restarts within the expiry window.
Files are stored in a temp directory with:
- {file_id}.data - The actual file data
- {file_id}.json - Metadata (filename, timestamp, mime_type, etc.)
IMPORTANT: This module ONLY manages files in the temp directory.
It does NOT touch instance/ (auth database) or any other directories.
All temp files are written to ~/.fieldwitness/temp/ (fieldwitness.paths.TEMP_DIR) so
that the killswitch's destroy_temp_files step covers them.
"""
import json
import os
import platform
import subprocess
import time
from pathlib import Path
from threading import Lock
import fieldwitness.paths as paths
# Default temp directory — always under ~/.fieldwitness/temp/ so the killswitch
# (which purges paths.TEMP_DIR) can reach every file written here.
DEFAULT_TEMP_DIR: Path = paths.TEMP_DIR
# Lock for thread-safe operations
_lock = Lock()
# Module-level temp directory (set on init)
_temp_dir: Path = DEFAULT_TEMP_DIR
def init(temp_dir: Path | str | None = None) -> None:
"""Initialize temp storage with optional custom directory."""
global _temp_dir
_temp_dir = Path(temp_dir) if temp_dir else DEFAULT_TEMP_DIR
_temp_dir.mkdir(parents=True, exist_ok=True)
def _data_path(file_id: str) -> Path:
"""Get path for file data."""
return _temp_dir / f"{file_id}.data"
def _meta_path(file_id: str) -> Path:
"""Get path for file metadata."""
return _temp_dir / f"{file_id}.json"
def _thumb_path(thumb_id: str) -> Path:
"""Get path for thumbnail data."""
return _temp_dir / f"{thumb_id}.thumb"
def _secure_delete(path: Path) -> None:
"""Overwrite and delete a file. Best-effort on flash storage."""
if not path.exists():
return
if platform.system() == "Linux":
try:
subprocess.run(
["shred", "-u", "-z", "-n", "3", str(path)],
timeout=30,
capture_output=True,
)
return
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
# Fallback: overwrite with zeros then delete
try:
size = path.stat().st_size
with open(path, "r+b") as f:
f.write(b"\x00" * size)
f.flush()
os.fsync(f.fileno())
path.unlink()
except OSError:
# Last resort: plain unlink so we don't leave data stranded
path.unlink(missing_ok=True)
def save_temp_file(file_id: str, data: bytes, metadata: dict) -> None:
"""
Save a temp file with its metadata.
Args:
file_id: Unique identifier for the file
data: File contents as bytes
metadata: Dict with filename, mime_type, timestamp, etc.
"""
init() # Ensure directory exists
with _lock:
# Add timestamp if not present
if "timestamp" not in metadata:
metadata["timestamp"] = time.time()
# Write data file
_data_path(file_id).write_bytes(data)
# Write metadata
_meta_path(file_id).write_text(json.dumps(metadata))
def get_temp_file(file_id: str) -> dict | None:
"""
Get a temp file and its metadata.
Returns:
Dict with 'data' (bytes) and all metadata fields, or None if not found.
"""
init()
data_file = _data_path(file_id)
meta_file = _meta_path(file_id)
if not data_file.exists() or not meta_file.exists():
return None
try:
data = data_file.read_bytes()
metadata = json.loads(meta_file.read_text())
return {"data": data, **metadata}
except (OSError, json.JSONDecodeError):
return None
def has_temp_file(file_id: str) -> bool:
"""Check if a temp file exists."""
init()
return _data_path(file_id).exists() and _meta_path(file_id).exists()
def delete_temp_file(file_id: str) -> None:
"""Securely delete a temp file and its metadata."""
init()
with _lock:
_secure_delete(_data_path(file_id))
_secure_delete(_meta_path(file_id))
def save_thumbnail(thumb_id: str, data: bytes) -> None:
"""Save a thumbnail."""
init()
with _lock:
_thumb_path(thumb_id).write_bytes(data)
def get_thumbnail(thumb_id: str) -> bytes | None:
"""Get thumbnail data."""
init()
thumb_file = _thumb_path(thumb_id)
if not thumb_file.exists():
return None
try:
return thumb_file.read_bytes()
except OSError:
return None
def delete_thumbnail(thumb_id: str) -> None:
"""Securely delete a thumbnail."""
init()
with _lock:
_secure_delete(_thumb_path(thumb_id))
def cleanup_expired(max_age_seconds: float) -> int:
"""
Securely delete expired temp files.
Args:
max_age_seconds: Maximum age in seconds before expiry
Returns:
Number of files deleted
"""
init()
now = time.time()
deleted = 0
with _lock:
# Find all metadata files
for meta_file in _temp_dir.glob("*.json"):
try:
metadata = json.loads(meta_file.read_text())
timestamp = metadata.get("timestamp", 0)
if now - timestamp > max_age_seconds:
file_id = meta_file.stem
_secure_delete(_data_path(file_id))
_secure_delete(meta_file)
# Also delete thumbnail if exists
_secure_delete(_thumb_path(f"{file_id}_thumb"))
deleted += 1
except (OSError, json.JSONDecodeError):
# Remove corrupted files
_secure_delete(meta_file)
deleted += 1
return deleted
def cleanup_all() -> int:
"""
Securely delete all temp files. Call on service start/stop.
Returns:
Number of files deleted
"""
init()
deleted = 0
with _lock:
for f in _temp_dir.iterdir():
if f.is_file():
_secure_delete(f)
deleted += 1
return deleted
def get_stats() -> dict:
"""Get temp storage statistics."""
init()
files = list(_temp_dir.glob("*.data"))
total_size = sum(f.stat().st_size for f in files if f.exists())
return {
"file_count": len(files),
"total_size_bytes": total_size,
"temp_dir": str(_temp_dir),
}