fieldwitness/frontends/web/blueprints/dropbox.py
Aaron D. Lee 171e51643c
Some checks failed
CI / lint (push) Failing after 53s
CI / typecheck (push) Failing after 30s
Add extract-then-strip EXIF pipeline for attestation intake
Resolves the tension between steganography (strip everything to
protect sources) and attestation (preserve evidence of provenance):

- New soosef.metadata module with extract_and_classify() and
  extract_strip_pipeline() — classifies EXIF fields as evidentiary
  (GPS, timestamp — valuable for proving provenance) vs dangerous
  (device serial, firmware — could identify the source)
- Drop box now uses extract-then-strip: attests ORIGINAL bytes (hash
  matches what source submitted), extracts evidentiary EXIF into
  attestation metadata, strips dangerous fields, stores clean copy
- Attest route gains strip_device option: when enabled, includes
  GPS/timestamp in attestation but excludes device serial/firmware
- Stego encode unchanged: still strips all metadata from carriers
  (correct for steganography threat model)

The key insight: for stego, the carrier is a vessel (strip everything).
For attestation, EXIF is the evidence (extract, classify, preserve
selectively). Both hashes (original + stripped) are recorded so the
relationship between raw submission and stored copy is provable.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 20:57:36 -04:00

240 lines
8.9 KiB
Python

"""
Source drop box blueprint — anonymous, token-gated file submission.
Provides a SecureDrop-like intake that lives inside SooSeF:
- Admin creates a time-limited upload token
- Source opens the token URL in a browser (no account needed)
- Files are uploaded, EXIF-stripped, and auto-attested on receipt
- Source receives a one-time receipt code to confirm delivery
- Token self-destructs after use or timeout
"""
from __future__ import annotations
import hashlib
import json
import os
import secrets
from datetime import UTC, datetime, timedelta
from pathlib import Path
from auth import admin_required, login_required
from flask import Blueprint, Response, flash, redirect, render_template, request, url_for
from soosef.audit import log_action
from soosef.paths import TEMP_DIR
bp = Blueprint("dropbox", __name__, url_prefix="/dropbox")
# In-memory token store. In production, this should be persisted to SQLite.
# Token format: {token: {created_at, expires_at, max_files, label, used, receipts[]}}
_tokens: dict[str, dict] = {}
_TOKEN_DIR = TEMP_DIR / "dropbox"
def _ensure_token_dir():
_TOKEN_DIR.mkdir(parents=True, exist_ok=True)
_TOKEN_DIR.chmod(0o700)
@bp.route("/admin", methods=["GET", "POST"])
@admin_required
def admin():
"""Admin panel for creating and managing drop box tokens."""
if request.method == "POST":
action = request.form.get("action")
if action == "create":
label = request.form.get("label", "").strip() or "Unnamed source"
hours = int(request.form.get("hours", 24))
max_files = int(request.form.get("max_files", 10))
token = secrets.token_urlsafe(32)
_tokens[token] = {
"created_at": datetime.now(UTC).isoformat(),
"expires_at": (datetime.now(UTC) + timedelta(hours=hours)).isoformat(),
"max_files": max_files,
"label": label,
"used": 0,
"receipts": [],
}
log_action(
actor=request.environ.get("REMOTE_USER", "admin"),
action="dropbox.token_created",
target=token[:8],
outcome="success",
source="web",
)
upload_url = url_for("dropbox.upload", token=token, _external=True)
flash(f"Drop box created. Share this URL with your source: {upload_url}", "success")
elif action == "revoke":
token = request.form.get("token", "")
if token in _tokens:
del _tokens[token]
flash("Token revoked.", "success")
# Clean expired tokens
now = datetime.now(UTC)
expired = [t for t, d in _tokens.items() if datetime.fromisoformat(d["expires_at"]) < now]
for t in expired:
del _tokens[t]
return render_template("dropbox/admin.html", tokens=_tokens)
def _validate_token(token: str) -> dict | None:
"""Check if a token is valid. Returns token data or None."""
if token not in _tokens:
return None
data = _tokens[token]
if datetime.fromisoformat(data["expires_at"]) < datetime.now(UTC):
del _tokens[token]
return None
if data["used"] >= data["max_files"]:
return None
return data
@bp.route("/upload/<token>", methods=["GET", "POST"])
def upload(token):
"""Source-facing upload page. No authentication required."""
token_data = _validate_token(token)
if token_data is None:
return Response(
"This upload link has expired or is invalid.",
status=404,
content_type="text/plain",
)
if request.method == "POST":
files = request.files.getlist("files")
if not files:
return Response("No files provided.", status=400, content_type="text/plain")
_ensure_token_dir()
receipts = []
for f in files:
if token_data["used"] >= token_data["max_files"]:
break
raw_data = f.read()
if not raw_data:
continue
# Extract-then-strip pipeline:
# 1. Extract EXIF into attestation metadata (evidentiary fields)
# 2. Attest the ORIGINAL bytes (hash matches what source submitted)
# 3. Strip metadata from the stored copy (protect source device info)
from soosef.metadata import extract_strip_pipeline
extraction, stripped_data = extract_strip_pipeline(raw_data)
# SHA-256 of what the source actually submitted
sha256 = extraction.original_sha256
# Save the stripped copy for display/storage (no device fingerprint on disk)
dest = _TOKEN_DIR / f"{sha256[:16]}_{f.filename}"
dest.write_bytes(stripped_data)
# Auto-attest the ORIGINAL bytes so the attestation hash matches
# what the source submitted. Evidentiary EXIF (GPS, timestamp)
# is preserved in the attestation metadata; dangerous fields
# (device serial) are excluded.
try:
from soosef.verisoo.attestation import create_attestation
from blueprints.attest import _get_private_key, _get_storage
attest_metadata = {
"source": "dropbox",
"label": token_data["label"],
"stripped_sha256": extraction.stripped_sha256,
}
# Include evidentiary EXIF in attestation (GPS, timestamp)
for key, value in extraction.evidentiary.items():
if hasattr(value, "isoformat"):
attest_metadata[key] = value.isoformat()
elif hasattr(value, "__dataclass_fields__"):
from dataclasses import asdict
attest_metadata[key] = asdict(value)
elif isinstance(value, dict):
attest_metadata[key] = value
else:
attest_metadata[key] = str(value)
private_key = _get_private_key()
if private_key:
attestation = create_attestation(
raw_data, private_key, metadata=attest_metadata
)
storage = _get_storage()
storage.append_record(attestation.record)
except Exception:
pass # Attestation is best-effort; don't fail the upload
# Generate receipt code
receipt_code = secrets.token_hex(8)
receipts.append({
"filename": f.filename,
"sha256": sha256,
"receipt_code": receipt_code,
"received_at": datetime.now(UTC).isoformat(),
})
token_data["used"] += 1
token_data["receipts"].append(receipt_code)
remaining = token_data["max_files"] - token_data["used"]
# Return receipt codes as plain text (minimal fingerprint)
receipt_text = "FILES RECEIVED\n" + "=" * 40 + "\n\n"
for r in receipts:
receipt_text += f"File: {r['filename']}\n"
receipt_text += f"Receipt: {r['receipt_code']}\n"
receipt_text += f"SHA-256: {r['sha256']}\n\n"
receipt_text += f"Remaining uploads on this link: {remaining}\n"
receipt_text += "\nSave your receipt codes. They confirm your submission was received.\n"
return Response(receipt_text, content_type="text/plain")
# GET — show upload form (minimal, no SooSeF branding for source safety)
remaining = token_data["max_files"] - token_data["used"]
return f"""<!DOCTYPE html>
<html><head><title>Secure Upload</title>
<style>body{{font-family:sans-serif;max-width:600px;margin:40px auto;padding:20px}}
input[type=file]{{margin:10px 0}}button{{padding:10px 20px}}</style></head>
<body>
<h2>Secure File Upload</h2>
<p>Select files to upload. You may upload up to {remaining} file(s).</p>
<p>Your files will be timestamped on receipt. No account or personal information is required.</p>
<form method="POST" enctype="multipart/form-data">
<input type="file" name="files" multiple accept="image/*,.pdf,.doc,.docx,.txt"><br>
<button type="submit">Upload</button>
</form>
<p style="color:#666;font-size:12px">This link will expire automatically. Do not bookmark it.</p>
</body></html>"""
@bp.route("/verify-receipt", methods=["POST"])
def verify_receipt():
"""Let a source verify their submission was received by receipt code."""
code = request.form.get("code", "").strip()
if not code:
return Response("No receipt code provided.", status=400, content_type="text/plain")
for token_data in _tokens.values():
if code in token_data["receipts"]:
return Response(
f"Receipt {code} is VALID. Your submission was received.",
content_type="text/plain",
)
return Response(
f"Receipt {code} was not found. It may have expired.",
status=404,
content_type="text/plain",
)