fieldwitness/frontends/web/blueprints/dropbox.py
Aaron D. Lee f557cac45a
Some checks failed
CI / lint (push) Failing after 56s
CI / typecheck (push) Failing after 29s
Implement 6 evidence lifecycle features
1. Client-side SHA-256 in drop box: browser computes and displays
   file fingerprints via SubtleCrypto before upload. Receipt codes
   are HMAC-derived from file hash so source can verify
   correspondence. Source sees hash before submitting.

2. Drop box token persistence: replaced in-memory dict with SQLite
   (dropbox.db). Tokens and receipts survive server restarts.
   Receipt verification now returns filename, SHA-256, and timestamp.

3. RFC 3161 trusted timestamps + manual anchors: new
   federation/anchors.py with get_chain_head_anchor(),
   submit_rfc3161(), save_anchor(), and manual export format.
   CLI: `soosef chain anchor [--tsa URL]`. A single anchor
   implicitly timestamps every preceding chain record.

4. Derived work lineage: attestation metadata supports
   derived_from (parent record ID) and derivation_type
   (crop, redact, brightness, etc.) for tracking edits
   through the chain of custody.

5. Self-contained evidence package: new soosef.evidence module
   with export_evidence_package() producing a ZIP with images,
   attestation records, chain data, public key, standalone
   verify.py script, and README.

6. Cold archive export: new soosef.archive module with
   export_cold_archive() bundling chain.bin, verisoo log,
   LMDB index, keys, anchors, trusted keys, ALGORITHMS.txt
   documenting all crypto, and verification instructions.
   Designed for OAIS (ISO 14721) alignment.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 21:04:20 -04:00

357 lines
13 KiB
Python

"""
Source drop box blueprint — anonymous, token-gated file submission.
Provides a SecureDrop-like intake that lives inside SooSeF:
- Admin creates a time-limited upload token
- Source opens the token URL in a browser (no account needed)
- Files are uploaded, EXIF-stripped, and auto-attested on receipt
- Source receives a one-time receipt code to confirm delivery
- Token self-destructs after use or timeout
"""
from __future__ import annotations
import hashlib
import json
import os
import secrets
from datetime import UTC, datetime, timedelta
from pathlib import Path
from auth import admin_required, login_required
from flask import Blueprint, Response, flash, redirect, render_template, request, url_for
from soosef.audit import log_action
from soosef.paths import AUTH_DIR, TEMP_DIR
bp = Blueprint("dropbox", __name__, url_prefix="/dropbox")
_TOKEN_DIR = TEMP_DIR / "dropbox"
_DB_PATH = AUTH_DIR / "dropbox.db"
def _ensure_token_dir():
_TOKEN_DIR.mkdir(parents=True, exist_ok=True)
_TOKEN_DIR.chmod(0o700)
def _get_db():
"""Get SQLite connection for drop box tokens."""
import sqlite3
_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(_DB_PATH))
conn.row_factory = sqlite3.Row
conn.execute("""CREATE TABLE IF NOT EXISTS tokens (
token TEXT PRIMARY KEY,
label TEXT NOT NULL,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
max_files INTEGER NOT NULL,
used INTEGER NOT NULL DEFAULT 0
)""")
conn.execute("""CREATE TABLE IF NOT EXISTS receipts (
receipt_code TEXT PRIMARY KEY,
token TEXT NOT NULL,
filename TEXT,
sha256 TEXT,
received_at TEXT,
FOREIGN KEY (token) REFERENCES tokens(token)
)""")
conn.commit()
return conn
def _get_token(token: str) -> dict | None:
"""Load a token from SQLite. Returns dict or None if expired/missing."""
conn = _get_db()
row = conn.execute("SELECT * FROM tokens WHERE token = ?", (token,)).fetchone()
if not row:
conn.close()
return None
if datetime.fromisoformat(row["expires_at"]) < datetime.now(UTC):
conn.execute("DELETE FROM tokens WHERE token = ?", (token,))
conn.commit()
conn.close()
return None
data = dict(row)
# Load receipts
receipts = conn.execute(
"SELECT receipt_code FROM receipts WHERE token = ?", (token,)
).fetchall()
data["receipts"] = [r["receipt_code"] for r in receipts]
conn.close()
return data
def _get_all_tokens() -> dict[str, dict]:
"""Load all non-expired tokens."""
conn = _get_db()
now = datetime.now(UTC).isoformat()
# Clean expired
conn.execute("DELETE FROM tokens WHERE expires_at < ?", (now,))
conn.commit()
rows = conn.execute("SELECT * FROM tokens").fetchall()
result = {}
for row in rows:
data = dict(row)
receipts = conn.execute(
"SELECT receipt_code FROM receipts WHERE token = ?", (row["token"],)
).fetchall()
data["receipts"] = [r["receipt_code"] for r in receipts]
result[row["token"]] = data
conn.close()
return result
@bp.route("/admin", methods=["GET", "POST"])
@admin_required
def admin():
"""Admin panel for creating and managing drop box tokens."""
if request.method == "POST":
action = request.form.get("action")
if action == "create":
label = request.form.get("label", "").strip() or "Unnamed source"
hours = int(request.form.get("hours", 24))
max_files = int(request.form.get("max_files", 10))
token = secrets.token_urlsafe(32)
conn = _get_db()
conn.execute(
"INSERT INTO tokens (token, label, created_at, expires_at, max_files, used) VALUES (?, ?, ?, ?, ?, 0)",
(token, label, datetime.now(UTC).isoformat(), (datetime.now(UTC) + timedelta(hours=hours)).isoformat(), max_files),
)
conn.commit()
conn.close()
log_action(
actor=request.environ.get("REMOTE_USER", "admin"),
action="dropbox.token_created",
target=token[:8],
outcome="success",
source="web",
)
upload_url = url_for("dropbox.upload", token=token, _external=True)
flash(f"Drop box created. Share this URL with your source: {upload_url}", "success")
elif action == "revoke":
tok = request.form.get("token", "")
conn = _get_db()
conn.execute("DELETE FROM receipts WHERE token = ?", (tok,))
conn.execute("DELETE FROM tokens WHERE token = ?", (tok,))
conn.commit()
conn.close()
flash("Token revoked.", "success")
return render_template("dropbox/admin.html", tokens=_get_all_tokens())
def _validate_token(token: str) -> dict | None:
"""Check if a token is valid. Returns token data or None."""
data = _get_token(token)
if data is None:
return None
if data["used"] >= data["max_files"]:
return None
return data
@bp.route("/upload/<token>", methods=["GET", "POST"])
def upload(token):
"""Source-facing upload page. No authentication required."""
token_data = _validate_token(token)
if token_data is None:
return Response(
"This upload link has expired or is invalid.",
status=404,
content_type="text/plain",
)
if request.method == "POST":
files = request.files.getlist("files")
if not files:
return Response("No files provided.", status=400, content_type="text/plain")
_ensure_token_dir()
receipts = []
for f in files:
if token_data["used"] >= token_data["max_files"]:
break
raw_data = f.read()
if not raw_data:
continue
# Extract-then-strip pipeline:
# 1. Extract EXIF into attestation metadata (evidentiary fields)
# 2. Attest the ORIGINAL bytes (hash matches what source submitted)
# 3. Strip metadata from the stored copy (protect source device info)
from soosef.metadata import extract_strip_pipeline
extraction, stripped_data = extract_strip_pipeline(raw_data)
# SHA-256 of what the source actually submitted
sha256 = extraction.original_sha256
# Save the stripped copy for display/storage (no device fingerprint on disk)
dest = _TOKEN_DIR / f"{sha256[:16]}_{f.filename}"
dest.write_bytes(stripped_data)
# Auto-attest the ORIGINAL bytes so the attestation hash matches
# what the source submitted. Evidentiary EXIF (GPS, timestamp)
# is preserved in the attestation metadata; dangerous fields
# (device serial) are excluded.
try:
from soosef.verisoo.attestation import create_attestation
from blueprints.attest import _get_private_key, _get_storage
attest_metadata = {
"source": "dropbox",
"label": token_data["label"],
"stripped_sha256": extraction.stripped_sha256,
}
# Include evidentiary EXIF in attestation (GPS, timestamp)
for key, value in extraction.evidentiary.items():
if hasattr(value, "isoformat"):
attest_metadata[key] = value.isoformat()
elif hasattr(value, "__dataclass_fields__"):
from dataclasses import asdict
attest_metadata[key] = asdict(value)
elif isinstance(value, dict):
attest_metadata[key] = value
else:
attest_metadata[key] = str(value)
private_key = _get_private_key()
if private_key:
attestation = create_attestation(
raw_data, private_key, metadata=attest_metadata
)
storage = _get_storage()
storage.append_record(attestation.record)
except Exception:
pass # Attestation is best-effort; don't fail the upload
# Receipt code derived from file hash via HMAC — the source can
# independently verify their receipt corresponds to specific content
import hmac
receipt_code = hmac.new(
token.encode(), sha256.encode(), hashlib.sha256
).hexdigest()[:16]
receipts.append({
"filename": f.filename,
"sha256": sha256,
"receipt_code": receipt_code,
"received_at": datetime.now(UTC).isoformat(),
})
# Persist receipt and increment used count in SQLite
conn = _get_db()
conn.execute(
"INSERT OR IGNORE INTO receipts (receipt_code, token, filename, sha256, received_at) VALUES (?, ?, ?, ?, ?)",
(receipt_code, token, f.filename, sha256, datetime.now(UTC).isoformat()),
)
conn.execute("UPDATE tokens SET used = used + 1 WHERE token = ?", (token,))
conn.commit()
conn.close()
token_data["used"] += 1
remaining = token_data["max_files"] - token_data["used"]
# Return receipt codes as plain text (minimal fingerprint)
receipt_text = "FILES RECEIVED\n" + "=" * 40 + "\n\n"
for r in receipts:
receipt_text += f"File: {r['filename']}\n"
receipt_text += f"Receipt: {r['receipt_code']}\n"
receipt_text += f"SHA-256: {r['sha256']}\n\n"
receipt_text += f"Remaining uploads on this link: {remaining}\n"
receipt_text += "\nSave your receipt codes. They confirm your submission was received.\n"
return Response(receipt_text, content_type="text/plain")
# GET — show upload form with client-side SHA-256 hashing
# Minimal page, no SooSeF branding (source safety)
remaining = token_data["max_files"] - token_data["used"]
return f"""<!DOCTYPE html>
<html><head><title>Secure Upload</title>
<style>
body{{font-family:sans-serif;max-width:600px;margin:40px auto;padding:20px;color:#333}}
input[type=file]{{margin:10px 0}}
button{{padding:10px 20px;font-size:16px}}
#hashes{{background:#f5f5f5;padding:10px;border-radius:4px;font-family:monospace;
font-size:12px;margin:10px 0;display:none;white-space:pre-wrap}}
.hash-label{{color:#666;font-size:11px}}
</style></head>
<body>
<h2>Secure File Upload</h2>
<p>Select files to upload. You may upload up to {remaining} file(s).</p>
<p>Your files will be fingerprinted in your browser before upload. Save the
fingerprints — they prove exactly what you submitted.</p>
<form method="POST" enctype="multipart/form-data" id="uploadForm">
<input type="file" name="files" id="fileInput" multiple
accept="image/*,.pdf,.doc,.docx,.txt"><br>
<div id="hashes"></div>
<button type="submit" id="submitBtn" disabled>Computing fingerprints...</button>
</form>
<p style="color:#666;font-size:12px">This link will expire automatically. Do not bookmark it.</p>
<script>
// Client-side SHA-256 via SubtleCrypto — runs in browser, no server round-trip
async function hashFile(file) {{
const buffer = await file.arrayBuffer();
const hash = await crypto.subtle.digest('SHA-256', buffer);
return Array.from(new Uint8Array(hash)).map(b => b.toString(16).padStart(2,'0')).join('');
}}
document.getElementById('fileInput').addEventListener('change', async function() {{
const files = this.files;
const hashDiv = document.getElementById('hashes');
const btn = document.getElementById('submitBtn');
if (!files.length) {{ hashDiv.style.display='none'; btn.disabled=true; return; }}
btn.disabled = true;
btn.textContent = 'Computing fingerprints...';
hashDiv.style.display = 'block';
hashDiv.innerHTML = '';
for (const file of files) {{
const hash = await hashFile(file);
hashDiv.innerHTML += '<span class="hash-label">' + file.name + ':</span>\\n' + hash + '\\n\\n';
}}
hashDiv.innerHTML += '<span class="hash-label">Save these fingerprints before uploading.</span>';
btn.disabled = false;
btn.textContent = 'Upload';
}});
</script>
</body></html>"""
@bp.route("/verify-receipt", methods=["POST"])
def verify_receipt():
"""Let a source verify their submission was received by receipt code."""
code = request.form.get("code", "").strip()
if not code:
return Response("No receipt code provided.", status=400, content_type="text/plain")
conn = _get_db()
row = conn.execute(
"SELECT filename, sha256, received_at FROM receipts WHERE receipt_code = ?", (code,)
).fetchone()
conn.close()
if row:
return Response(
f"Receipt {code} is VALID.\n"
f"File: {row['filename']}\n"
f"SHA-256: {row['sha256']}\n"
f"Received: {row['received_at']}\n",
content_type="text/plain",
)
return Response(
f"Receipt {code} was not found. It may have expired.",
status=404,
content_type="text/plain",
)