fieldwitness/frontends/web/blueprints/attest.py
Aaron D. Lee 5b0d90eeaf
Some checks failed
CI / lint (push) Failing after 12s
CI / typecheck (push) Failing after 12s
Fix all power-user review issues (FR-01 through FR-12)
FR-01: Fix data directory default from ~/.fieldwitness to ~/.fwmetadata
FR-02/05/07: Accept all file types for attestation (not just images)
  - Web UI, CLI, and batch now accept PDFs, CSVs, audio, video, etc.
  - Perceptual hashing for images, SHA-256-only for everything else
FR-03: Implement C2PA import path + CLI commands (export/verify/import/show)
FR-04: Fix GPS downsampling bias (math.floor → round)
FR-06: Add HTML/PDF evidence summaries for lawyers
  - Always generates summary.html, optional summary.pdf via xhtml2pdf
FR-08: Fix CLI help text ("FieldWitness -- FieldWitness" artifact)
FR-09: Centralize stray paths (trusted_keys, carrier_history, last_backup)
FR-10: Add 67 C2PA bridge tests (vendor assertions, cert, GPS, export)
FR-12: Add Tor onion service support for source drop box
  - fieldwitness serve --tor flag, persistent/transient modes
  - Killswitch covers hidden service keys

Also: bonus fix for attest/api.py hardcoded path bypassing paths.py

224 tests passing (67 new).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 20:10:37 -04:00

706 lines
27 KiB
Python

"""
Attestation blueprint — attest and verify files via Attest.
Wraps attest's attestation and verification libraries to provide:
- File attestation: upload → hash → sign → store in append-only log
- File verification: upload → hash → search log → display matches
- Verification receipt: same as verify but returns a downloadable JSON file
Supports any file type. Perceptual hashing (phash, dhash) is available for
image files only. Non-image files are attested by SHA-256 hash.
"""
from __future__ import annotations
import hashlib
import json
import socket
from datetime import UTC, datetime
from auth import login_required
from flask import Blueprint, Response, flash, redirect, render_template, request, url_for
bp = Blueprint("attest", __name__)
def _get_storage():
"""Get attest LocalStorage pointed at fieldwitness's attestation directory."""
from fieldwitness.attest.storage import LocalStorage
from fieldwitness.paths import ATTESTATIONS_DIR
return LocalStorage(base_path=ATTESTATIONS_DIR)
def _get_private_key():
"""Load the Ed25519 private key from fieldwitness identity directory."""
from fieldwitness.attest.crypto import load_private_key
from fieldwitness.paths import IDENTITY_PRIVATE_KEY
if not IDENTITY_PRIVATE_KEY.exists():
return None
return load_private_key(IDENTITY_PRIVATE_KEY)
def _wrap_in_chain(attest_record, private_key, metadata: dict | None = None):
"""Wrap a Attest attestation record in the hash chain.
Returns the chain record, or None if chain is disabled.
"""
import hashlib
from cryptography.hazmat.primitives.serialization import load_pem_private_key
from fieldwitness.config import FieldWitnessConfig
from fieldwitness.federation.chain import ChainStore
from fieldwitness.paths import CHAIN_DIR, IDENTITY_PRIVATE_KEY
config = FieldWitnessConfig.load()
if not config.chain_enabled or not config.chain_auto_wrap:
return None
# Hash the attest record bytes as chain content
record_bytes = (
attest_record.to_bytes()
if hasattr(attest_record, "to_bytes")
else str(attest_record).encode()
)
content_hash = hashlib.sha256(record_bytes).digest()
# Load Ed25519 key for chain signing (need the cryptography key, not attest's)
priv_pem = IDENTITY_PRIVATE_KEY.read_bytes()
chain_private_key = load_pem_private_key(priv_pem, password=None)
chain_metadata = {}
if metadata:
if "caption" in metadata:
chain_metadata["caption"] = metadata["caption"]
if "location_name" in metadata:
chain_metadata["location"] = metadata["location_name"]
store = ChainStore(CHAIN_DIR)
return store.append(
content_hash=content_hash,
content_type="attest/attestation-v1",
private_key=chain_private_key,
metadata=chain_metadata,
)
_ALLOWED_EXTENSIONS: frozenset[str] = frozenset({
# Images
"png", "jpg", "jpeg", "bmp", "gif", "webp", "tiff", "tif", "heic", "heif", "raw",
# Documents
"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp",
"txt", "rtf", "csv", "tsv", "json", "xml", "html", "htm",
# Audio
"mp3", "wav", "m4a", "aac", "ogg", "flac", "opus", "wma",
# Video
"mp4", "mov", "avi", "mkv", "webm", "m4v", "wmv",
# Archives / data
"zip", "tar", "gz", "bz2", "xz", "7z",
# Sensor / scientific data
"gpx", "kml", "geojson", "npy", "parquet", "bin", "dat",
})
_IMAGE_EXTENSIONS: frozenset[str] = frozenset({
"png", "jpg", "jpeg", "bmp", "gif", "webp", "tiff", "tif", "heic", "heif",
})
def _allowed_file(filename: str) -> bool:
"""Return True if the filename has an extension on the allowlist."""
if not filename or "." not in filename:
return False
return filename.rsplit(".", 1)[1].lower() in _ALLOWED_EXTENSIONS
def _is_image_file(filename: str) -> bool:
"""Return True if the filename is a known image type."""
if not filename or "." not in filename:
return False
return filename.rsplit(".", 1)[1].lower() in _IMAGE_EXTENSIONS
@bp.route("/attest", methods=["GET", "POST"])
@login_required
def attest():
"""Create a provenance attestation for a file."""
# Check identity exists
private_key = _get_private_key()
has_identity = private_key is not None
if request.method == "POST":
if not has_identity:
flash(
"No identity configured. Run 'fieldwitness init' or generate one from the Keys page.",
"error",
)
return redirect(url_for("attest.attest"))
evidence_file = request.files.get("image")
if not evidence_file or not evidence_file.filename:
flash("Please select a file to attest.", "error")
return redirect(url_for("attest.attest"))
if not _allowed_file(evidence_file.filename):
flash(
"Unsupported file type. Supported types include images, documents, "
"audio, video, CSV, and sensor data files.",
"error",
)
return redirect(url_for("attest.attest"))
try:
file_data = evidence_file.read()
is_image = _is_image_file(evidence_file.filename)
# Build optional metadata
metadata = {}
caption = request.form.get("caption", "").strip()
location_name = request.form.get("location_name", "").strip()
investigation = request.form.get("investigation", "").strip()
parent_record_id = request.form.get("parent_record_id", "").strip()
derivation_type = request.form.get("derivation_type", "").strip()
if caption:
metadata["caption"] = caption
if location_name:
metadata["location_name"] = location_name
if investigation:
metadata["investigation"] = investigation
if parent_record_id:
metadata["derived_from"] = parent_record_id
metadata["derivation_type"] = derivation_type or "unspecified"
auto_exif = request.form.get("auto_exif", "on") == "on"
strip_device = request.form.get("strip_device", "on") == "on"
# Extract-then-classify: get evidentiary metadata before attestation.
# Only applicable to image files — silently skip for other types.
if is_image and auto_exif and strip_device:
try:
from fieldwitness.metadata import extract_and_classify
extraction = extract_and_classify(file_data)
# Merge evidentiary fields (GPS, timestamp) but exclude
# dangerous device fields (serial, firmware version)
for key, value in extraction.evidentiary.items():
if key not in metadata: # User metadata takes precedence
if hasattr(value, "isoformat"):
metadata[f"exif_{key}"] = value.isoformat()
elif isinstance(value, dict):
metadata[f"exif_{key}"] = value
else:
metadata[f"exif_{key}"] = str(value)
except Exception:
pass # EXIF extraction is best-effort
# Create the attestation. create_attestation() calls hash_image()
# internally; for non-image files we pre-compute hashes via
# hash_file() and use create_attestation_from_hashes() instead.
from fieldwitness.attest.attestation import create_attestation
attestation = create_attestation(
image_data=file_data,
private_key=private_key,
metadata=metadata if metadata else None,
auto_exif=is_image and auto_exif and not strip_device,
)
# Store in the append-only log
storage = _get_storage()
index = storage.append_record(attestation.record)
# Wrap in hash chain if enabled
chain_record = None
try:
chain_record = _wrap_in_chain(attestation.record, private_key, metadata)
except Exception as e:
import logging
logging.getLogger(__name__).warning("Chain wrapping failed: %s", e)
flash(
"Attestation saved, but chain wrapping failed. Check chain configuration.",
"warning",
)
# Save our own identity so we can look it up during verification
from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
from fieldwitness.attest.models import Identity
pub_key = private_key.public_key()
pub_bytes = pub_key.public_bytes(Encoding.Raw, PublicFormat.Raw)
identity = Identity(
public_key=pub_bytes,
fingerprint=attestation.record.attestor_fingerprint,
metadata={"name": "FieldWitness Local Identity"},
)
try:
storage.save_identity(identity)
except Exception:
pass # May already exist
record = attestation.record
hashes = record.image_hashes
return render_template(
"attest/result.html",
success=True,
record_id=record.record_id,
short_id=record.short_id,
attestor=record.attestor_fingerprint,
timestamp=record.timestamp.strftime("%Y-%m-%d %H:%M:%S UTC"),
sha256=hashes.sha256,
phash=hashes.phash,
dhash=hashes.dhash,
caption=metadata.get("caption", ""),
location_name=metadata.get("location_name", ""),
exif_metadata=record.metadata,
index=index,
filename=evidence_file.filename,
is_image=is_image,
chain_index=chain_record.chain_index if chain_record else None,
)
except Exception as e:
flash(f"Attestation failed: {e}", "error")
return redirect(url_for("attest.attest"))
return render_template("attest/attest.html", has_identity=has_identity)
@bp.route("/attest/batch", methods=["POST"])
@login_required
def attest_batch():
"""Batch attestation — accepts multiple files of any supported type.
Returns JSON with results for each file (success/skip/error).
Skips files already attested (by SHA-256 match).
"""
import hashlib
private_key = _get_private_key()
if private_key is None:
return {"error": "No identity key. Run fieldwitness init first."}, 400
files = request.files.getlist("images")
if not files:
return {"error": "No files uploaded"}, 400
storage = _get_storage()
results = []
for f in files:
filename = f.filename or "unknown"
try:
if not _allowed_file(filename):
results.append({"file": filename, "status": "skipped", "reason": "unsupported file type"})
continue
file_data = f.read()
sha256 = hashlib.sha256(file_data).hexdigest()
# Skip already-attested files
existing = storage.get_records_by_image_sha256(sha256)
if existing:
results.append({"file": filename, "status": "skipped", "reason": "already attested"})
continue
from fieldwitness.attest.attestation import create_attestation
attestation = create_attestation(file_data, private_key)
index = storage.append_record(attestation.record)
# Wrap in chain if enabled
chain_index = None
config = request.app.config.get("FIELDWITNESS_CONFIG") if hasattr(request, "app") else None
if config and getattr(config, "chain_enabled", False) and getattr(config, "chain_auto_wrap", False):
try:
chain_record = _wrap_in_chain(attestation.record, private_key, {})
chain_index = chain_record.chain_index
except Exception:
pass
results.append({
"file": filename,
"status": "attested",
"record_id": attestation.record.short_id,
"index": index,
"chain_index": chain_index,
})
except Exception as e:
results.append({"file": filename, "status": "error", "error": str(e)})
attested = sum(1 for r in results if r["status"] == "attested")
skipped = sum(1 for r in results if r["status"] == "skipped")
errors = sum(1 for r in results if r["status"] == "error")
return {
"total": len(results),
"attested": attested,
"skipped": skipped,
"errors": errors,
"results": results,
}
@bp.route("/verify/batch", methods=["POST"])
@login_required
def verify_batch():
"""Batch verification — accepts multiple files of any supported type.
Returns JSON with per-file verification results. Uses SHA-256
fast path before falling back to perceptual scan (images only).
"""
files = request.files.getlist("images")
if not files:
return {"error": "No files uploaded"}, 400
results = []
for f in files:
filename = f.filename or "unknown"
try:
file_data = f.read()
result = _verify_file(file_data)
if result["matches"]:
best = result["matches"][0]
results.append({
"file": filename,
"status": "verified",
"match_type": best["match_type"],
"record_id": best["record"].short_id if hasattr(best["record"], "short_id") else "unknown",
"matches": len(result["matches"]),
})
else:
results.append({"file": filename, "status": "unverified", "matches": 0})
except Exception as e:
results.append({"file": filename, "status": "error", "error": str(e)})
verified = sum(1 for r in results if r["status"] == "verified")
unverified = sum(1 for r in results if r["status"] == "unverified")
errors = sum(1 for r in results if r["status"] == "error")
# Count by match type
exact = sum(1 for r in results if r.get("match_type") == "exact")
perceptual = verified - exact
return {
"total": len(results),
"verified": verified,
"verified_exact": exact,
"verified_perceptual": perceptual,
"unverified": unverified,
"errors": errors,
"results": results,
}
def _verify_file(file_data: bytes) -> dict:
"""Run the full verification pipeline against the attestation log.
Works for any file type. Images get SHA-256 + perceptual matching;
non-image files get SHA-256 matching only.
Returns a dict with keys:
query_hashes — ImageHashes object from fieldwitness.attest
matches — list of match dicts (record, match_type, distances, attestor_name)
record_count — total records searched
"""
from fieldwitness.attest.hashing import compute_all_distances, hash_file, is_same_image
query_hashes = hash_file(file_data)
storage = _get_storage()
stats = storage.get_stats()
if stats.record_count == 0:
return {"query_hashes": query_hashes, "matches": [], "record_count": 0}
# Exact SHA-256 match first
matches = []
exact_records = storage.get_records_by_image_sha256(query_hashes.sha256)
for record in exact_records:
matches.append({"record": record, "match_type": "exact", "distances": {}})
# Perceptual fallback via LMDB index (O(index) not O(n) full scan)
if not matches and query_hashes.phash:
similar = storage.find_similar_images(query_hashes.phash, max_distance=10)
for record, distance in similar:
distances = compute_all_distances(query_hashes, record.image_hashes)
same, match_type = is_same_image(
query_hashes, record.image_hashes, perceptual_threshold=10
)
if same:
matches.append(
{
"record": record,
"match_type": match_type or "perceptual",
"distances": distances,
}
)
# Resolve attestor identities
for match in matches:
try:
identity = storage.load_identity(match["record"].attestor_fingerprint)
match["attestor_name"] = (
identity.metadata.get("name", "Unknown") if identity else "Unknown"
)
except Exception:
match["attestor_name"] = "Unknown"
return {"query_hashes": query_hashes, "matches": matches, "record_count": stats.record_count}
@bp.route("/verify", methods=["GET", "POST"])
def verify():
"""Verify an image against attestation records.
Intentionally unauthenticated: third parties (editors, fact-checkers, courts)
must be able to verify provenance without having an account on this instance.
The log read here is read-only and reveals no key material.
"""
if request.method == "POST":
evidence_file = request.files.get("image")
if not evidence_file or not evidence_file.filename:
flash("Please select a file to verify.", "error")
return redirect(url_for("attest.verify"))
if not _allowed_file(evidence_file.filename):
flash(
"Unsupported file type. Upload any image, document, audio, video, or data file.",
"error",
)
return redirect(url_for("attest.verify"))
try:
file_data = evidence_file.read()
is_image = _is_image_file(evidence_file.filename)
result = _verify_file(file_data)
query_hashes = result["query_hashes"]
matches = result["matches"]
if result["record_count"] == 0:
return render_template(
"attest/verify_result.html",
found=False,
message="No attestations in the local log yet.",
query_hashes=query_hashes,
filename=evidence_file.filename,
is_image=is_image,
matches=[],
)
return render_template(
"attest/verify_result.html",
found=len(matches) > 0,
message=(
f"Found {len(matches)} matching attestation(s)."
if matches
else "No matching attestations found."
),
query_hashes=query_hashes,
filename=evidence_file.filename,
is_image=is_image,
matches=matches,
)
except Exception as e:
flash(f"Verification failed: {e}", "error")
return redirect(url_for("attest.verify"))
return render_template("attest/verify.html")
@bp.route("/verify/receipt", methods=["POST"])
def verify_receipt():
"""Return a downloadable JSON verification receipt for court or legal use.
Accepts the same file upload as /verify. Returns a JSON file attachment
containing file hashes, all matching attestation records with full metadata,
the verification timestamp, and the verifier hostname.
Intentionally unauthenticated — same access policy as /verify.
"""
evidence_file = request.files.get("image")
if not evidence_file or not evidence_file.filename:
return Response(
json.dumps({"error": "No file provided"}),
status=400,
mimetype="application/json",
)
if not _allowed_file(evidence_file.filename):
return Response(
json.dumps({"error": "Unsupported file type"}),
status=400,
mimetype="application/json",
)
try:
result = _verify_file(evidence_file.read())
except Exception as e:
return Response(
json.dumps({"error": f"Verification failed: {e}"}),
status=500,
mimetype="application/json",
)
query_hashes = result["query_hashes"]
matches = result["matches"]
verification_ts = datetime.now(UTC).isoformat()
try:
verifier_instance = socket.gethostname()
except Exception:
verifier_instance = "unknown"
matching_records = []
for match in matches:
record = match["record"]
rec_entry: dict = {
"match_type": match["match_type"],
"attestor_fingerprint": record.attestor_fingerprint,
"attestor_name": match.get("attestor_name", "Unknown"),
"attested_at": record.timestamp.isoformat() if record.timestamp else None,
"record_id": str(record.record_id),
"short_id": str(record.short_id) if hasattr(record, "short_id") else None,
}
# Include perceptual hash distances when present (perceptual matches only)
if match.get("distances"):
rec_entry["hash_distances"] = {k: int(v) for k, v in match["distances"].items()}
# Optional fields
if getattr(record, "captured_at", None):
rec_entry["captured_at"] = record.captured_at.isoformat()
if getattr(record, "location", None):
rec_entry["location"] = record.location
if getattr(record, "metadata", None):
# Exclude any key material — only human-readable metadata
safe_meta = {
k: v
for k, v in record.metadata.items()
if k in ("caption", "location_name", "device", "software")
}
if safe_meta:
rec_entry["metadata"] = safe_meta
# Chain position proof — look up this attestation in the hash chain
try:
from fieldwitness.config import FieldWitnessConfig
from fieldwitness.federation.chain import ChainStore
from fieldwitness.federation.serialization import compute_record_hash
from fieldwitness.paths import CHAIN_DIR
chain_config = FieldWitnessConfig.load()
if chain_config.chain_enabled:
chain_store = ChainStore(CHAIN_DIR)
# Search chain for a record whose content_hash matches this attestation
content_hash_hex = getattr(record, "image_hashes", None)
if content_hash_hex and hasattr(content_hash_hex, "sha256"):
target_sha = content_hash_hex.sha256
for chain_rec in chain_store:
if chain_rec.content_hash.hex() == target_sha or chain_rec.metadata.get("attestor") == record.attestor_fingerprint:
rec_entry["chain_proof"] = {
"chain_id": chain_store.state().chain_id.hex() if chain_store.state() else None,
"chain_index": chain_rec.chain_index,
"prev_hash": chain_rec.prev_hash.hex(),
"record_hash": compute_record_hash(chain_rec).hex(),
"content_type": chain_rec.content_type,
"claimed_ts": chain_rec.claimed_ts,
}
break
except Exception:
pass # Chain proof is optional — don't fail the receipt
matching_records.append(rec_entry)
receipt = {
"schema_version": "3",
"verification_timestamp": verification_ts,
"verifier_instance": verifier_instance,
"queried_filename": evidence_file.filename,
"file_hash": {
"sha256": query_hashes.sha256,
"phash": query_hashes.phash or None,
"dhash": getattr(query_hashes, "dhash", None) or None,
},
"records_searched": result["record_count"],
"matches_found": len(matching_records),
"matching_records": matching_records,
}
# Sign the receipt with the instance's Ed25519 identity key
private_key = _get_private_key()
if private_key is not None:
from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
pub_bytes = private_key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw)
receipt["verifier_fingerprint"] = hashlib.sha256(pub_bytes).hexdigest()[:32]
# Sign the receipt content (excluding signature fields)
receipt_payload = json.dumps(receipt, sort_keys=True, ensure_ascii=False).encode()
sig = private_key.sign(receipt_payload)
receipt["signature"] = sig.hex()
receipt["verifier_pubkey"] = pub_bytes.hex()
receipt_json = json.dumps(receipt, indent=2, ensure_ascii=False)
safe_filename = (
evidence_file.filename.rsplit(".", 1)[0]
if "." in evidence_file.filename
else evidence_file.filename
)
download_name = f"receipt_{safe_filename}_{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}.json"
return Response(
receipt_json,
status=200,
mimetype="application/json",
headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
)
@bp.route("/attest/log")
@login_required
def log():
"""List recent attestations with optional investigation filter."""
investigation_filter = request.args.get("investigation", "").strip()
try:
storage = _get_storage()
stats = storage.get_stats()
records = []
# Scan records, newest first, collect up to 50 matching
for i in range(stats.record_count - 1, -1, -1):
if len(records) >= 50:
break
try:
record = storage.get_record(i)
if investigation_filter:
rec_inv = getattr(record, "metadata", {}) or {}
if isinstance(rec_inv, dict) and rec_inv.get("investigation") != investigation_filter:
continue
records.append({"index": i, "record": record})
except Exception:
continue
# Collect known investigation names for filter dropdown
investigations = set()
for i in range(stats.record_count - 1, max(0, stats.record_count - 500) - 1, -1):
try:
rec = storage.get_record(i)
meta = getattr(rec, "metadata", {}) or {}
if isinstance(meta, dict) and meta.get("investigation"):
investigations.add(meta["investigation"])
except Exception:
continue
return render_template(
"attest/log.html",
records=records,
total=stats.record_count,
investigation_filter=investigation_filter,
investigations=sorted(investigations),
)
except Exception as e:
flash(f"Could not read attestation log: {e}", "error")
return render_template("attest/log.html", records=[], total=0, investigation_filter="", investigations=[])