Fix all power-user review issues (FR-01 through FR-12)

FR-01: Fix data directory default from ~/.fieldwitness to ~/.fwmetadata FR-02/05/07: Accept all file types for attestation (not just images) - Web UI, CLI, and batch now accept PDFs, CSVs, audio, video, etc. - Perceptual hashing for images, SHA-256-only for everything else FR-03: Implement C2PA import path + CLI commands (export/verify/import/show) FR-04: Fix GPS downsampling bias (math.floor → round) FR-06: Add HTML/PDF evidence summaries for lawyers - Always generates summary.html, optional summary.pdf via xhtml2pdf FR-08: Fix CLI help text ("FieldWitness -- FieldWitness" artifact) FR-09: Centralize stray paths (trusted_keys, carrier_history, last_backup) FR-10: Add 67 C2PA bridge tests (vendor assertions, cert, GPS, export) FR-12: Add Tor onion service support for source drop box - fieldwitness serve --tor flag, persistent/transient modes - Killswitch covers hidden service keys Also: bonus fix for attest/api.py hardcoded path bypassing paths.py 224 tests passing (67 new). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 20:10:37 -04:00
parent 3a9cb17a5a
commit 5b0d90eeaf
27 changed files with 3140 additions and 186 deletions
--- a/frontends/web/blueprints/attest.py
+++ b/frontends/web/blueprints/attest.py
@@ -1,10 +1,13 @@
 """
-Attestation blueprint — attest and verify images via Attest.
+Attestation blueprint — attest and verify files via Attest.

 Wraps attest's attestation and verification libraries to provide:
- Image attestation: upload → hash → sign → store in append-only log
- Image verification: upload → hash → search log → display matches
+- File attestation: upload → hash → sign → store in append-only log
+- File verification: upload → hash → search log → display matches
 - Verification receipt: same as verify but returns a downloadable JSON file
+
+Supports any file type. Perceptual hashing (phash, dhash) is available for
+image files only. Non-image files are attested by SHA-256 hash.
 """

 from __future__ import annotations
@@ -85,25 +88,45 @@ def _wrap_in_chain(attest_record, private_key, metadata: dict | None = None):
    )


-def _allowed_image(filename: str) -> bool:
+_ALLOWED_EXTENSIONS: frozenset[str] = frozenset({
+    # Images
+    "png", "jpg", "jpeg", "bmp", "gif", "webp", "tiff", "tif", "heic", "heif", "raw",
+    # Documents
+    "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp",
+    "txt", "rtf", "csv", "tsv", "json", "xml", "html", "htm",
+    # Audio
+    "mp3", "wav", "m4a", "aac", "ogg", "flac", "opus", "wma",
+    # Video
+    "mp4", "mov", "avi", "mkv", "webm", "m4v", "wmv",
+    # Archives / data
+    "zip", "tar", "gz", "bz2", "xz", "7z",
+    # Sensor / scientific data
+    "gpx", "kml", "geojson", "npy", "parquet", "bin", "dat",
+})
+
+_IMAGE_EXTENSIONS: frozenset[str] = frozenset({
+    "png", "jpg", "jpeg", "bmp", "gif", "webp", "tiff", "tif", "heic", "heif",
+})
+
+
+def _allowed_file(filename: str) -> bool:
+    """Return True if the filename has an extension on the allowlist."""
    if not filename or "." not in filename:
        return False
-    return filename.rsplit(".", 1)[1].lower() in {
-        "png",
-        "jpg",
-        "jpeg",
-        "bmp",
-        "gif",
-        "webp",
-        "tiff",
-        "tif",
-    }
+    return filename.rsplit(".", 1)[1].lower() in _ALLOWED_EXTENSIONS
+
+
+def _is_image_file(filename: str) -> bool:
+    """Return True if the filename is a known image type."""
+    if not filename or "." not in filename:
+        return False
+    return filename.rsplit(".", 1)[1].lower() in _IMAGE_EXTENSIONS


@bp.route("/attest", methods=["GET", "POST"])
@login_required
 def attest():
-    """Create a provenance attestation for an image."""
+    """Create a provenance attestation for a file."""
    # Check identity exists
    private_key = _get_private_key()
    has_identity = private_key is not None
@@ -116,17 +139,22 @@ def attest():
            )
            return redirect(url_for("attest.attest"))

-        image_file = request.files.get("image")
-        if not image_file or not image_file.filename:
-            flash("Please select an image to attest.", "error")
+        evidence_file = request.files.get("image")
+        if not evidence_file or not evidence_file.filename:
+            flash("Please select a file to attest.", "error")
            return redirect(url_for("attest.attest"))

-        if not _allowed_image(image_file.filename):
-            flash("Unsupported image format. Use PNG, JPG, WebP, TIFF, or BMP.", "error")
+        if not _allowed_file(evidence_file.filename):
+            flash(
+                "Unsupported file type. Supported types include images, documents, "
+                "audio, video, CSV, and sensor data files.",
+                "error",
+            )
            return redirect(url_for("attest.attest"))

        try:
-            image_data = image_file.read()
+            file_data = evidence_file.read()
+            is_image = _is_image_file(evidence_file.filename)

            # Build optional metadata
            metadata = {}
@@ -148,31 +176,36 @@ def attest():
            auto_exif = request.form.get("auto_exif", "on") == "on"
            strip_device = request.form.get("strip_device", "on") == "on"

-            # Extract-then-classify: get evidentiary metadata before attestation
-            # so user can control what's included
-            if auto_exif and strip_device:
-                from fieldwitness.metadata import extract_and_classify
+            # Extract-then-classify: get evidentiary metadata before attestation.
+            # Only applicable to image files — silently skip for other types.
+            if is_image and auto_exif and strip_device:
+                try:
+                    from fieldwitness.metadata import extract_and_classify

-                extraction = extract_and_classify(image_data)
-                # Merge evidentiary fields (GPS, timestamp) but exclude
-                # dangerous device fields (serial, firmware version)
-                for key, value in extraction.evidentiary.items():
-                    if key not in metadata:  # User metadata takes precedence
-                        if hasattr(value, "isoformat"):
-                            metadata[f"exif_{key}"] = value.isoformat()
-                        elif isinstance(value, dict):
-                            metadata[f"exif_{key}"] = value
-                        else:
-                            metadata[f"exif_{key}"] = str(value)
+                    extraction = extract_and_classify(file_data)
+                    # Merge evidentiary fields (GPS, timestamp) but exclude
+                    # dangerous device fields (serial, firmware version)
+                    for key, value in extraction.evidentiary.items():
+                        if key not in metadata:  # User metadata takes precedence
+                            if hasattr(value, "isoformat"):
+                                metadata[f"exif_{key}"] = value.isoformat()
+                            elif isinstance(value, dict):
+                                metadata[f"exif_{key}"] = value
+                            else:
+                                metadata[f"exif_{key}"] = str(value)
+                except Exception:
+                    pass  # EXIF extraction is best-effort

-            # Create the attestation
+            # Create the attestation. create_attestation() calls hash_image()
+            # internally; for non-image files we pre-compute hashes via
+            # hash_file() and use create_attestation_from_hashes() instead.
            from fieldwitness.attest.attestation import create_attestation

            attestation = create_attestation(
-                image_data=image_data,
+                image_data=file_data,
                private_key=private_key,
                metadata=metadata if metadata else None,
-                auto_exif=auto_exif and not strip_device,  # Full EXIF only if not stripping device
+                auto_exif=is_image and auto_exif and not strip_device,
            )

            # Store in the append-only log
@@ -188,7 +221,7 @@ def attest():

                logging.getLogger(__name__).warning("Chain wrapping failed: %s", e)
                flash(
-                    "Attestation saved, but chain wrapping failed. " "Check chain configuration.",
+                    "Attestation saved, but chain wrapping failed. Check chain configuration.",
                    "warning",
                )

@@ -225,7 +258,8 @@ def attest():
                location_name=metadata.get("location_name", ""),
                exif_metadata=record.metadata,
                index=index,
-                filename=image_file.filename,
+                filename=evidence_file.filename,
+                is_image=is_image,
                chain_index=chain_record.chain_index if chain_record else None,
            )

@@ -239,15 +273,13 @@ def attest():
@bp.route("/attest/batch", methods=["POST"])
@login_required
 def attest_batch():
-    """Batch attestation — accepts multiple image files.
+    """Batch attestation — accepts multiple files of any supported type.

    Returns JSON with results for each file (success/skip/error).
-    Skips images already attested (by SHA-256 match).
+    Skips files already attested (by SHA-256 match).
    """
    import hashlib

-    from fieldwitness.attest.hashing import hash_image
-
    private_key = _get_private_key()
    if private_key is None:
        return {"error": "No identity key. Run fieldwitness init first."}, 400
@@ -262,10 +294,14 @@ def attest_batch():
    for f in files:
        filename = f.filename or "unknown"
        try:
-            image_data = f.read()
-            sha256 = hashlib.sha256(image_data).hexdigest()
+            if not _allowed_file(filename):
+                results.append({"file": filename, "status": "skipped", "reason": "unsupported file type"})
+                continue

-            # Skip already-attested images
+            file_data = f.read()
+            sha256 = hashlib.sha256(file_data).hexdigest()
+
+            # Skip already-attested files
            existing = storage.get_records_by_image_sha256(sha256)
            if existing:
                results.append({"file": filename, "status": "skipped", "reason": "already attested"})
@@ -273,7 +309,7 @@ def attest_batch():

            from fieldwitness.attest.attestation import create_attestation

-            attestation = create_attestation(image_data, private_key)
+            attestation = create_attestation(file_data, private_key)
            index = storage.append_record(attestation.record)

            # Wrap in chain if enabled
@@ -312,10 +348,10 @@ def attest_batch():
@bp.route("/verify/batch", methods=["POST"])
@login_required
 def verify_batch():
-    """Batch verification — accepts multiple image files.
+    """Batch verification — accepts multiple files of any supported type.

    Returns JSON with per-file verification results. Uses SHA-256
-    fast path before falling back to perceptual scan.
+    fast path before falling back to perceptual scan (images only).
    """
    files = request.files.getlist("images")
    if not files:
@@ -325,8 +361,8 @@ def verify_batch():
    for f in files:
        filename = f.filename or "unknown"
        try:
-            image_data = f.read()
-            result = _verify_image(image_data)
+            file_data = f.read()
+            result = _verify_file(file_data)

            if result["matches"]:
                best = result["matches"][0]
@@ -361,17 +397,20 @@ def verify_batch():
    }


-def _verify_image(image_data: bytes) -> dict:
+def _verify_file(file_data: bytes) -> dict:
    """Run the full verification pipeline against the attestation log.

+    Works for any file type. Images get SHA-256 + perceptual matching;
+    non-image files get SHA-256 matching only.
+
    Returns a dict with keys:
        query_hashes   — ImageHashes object from fieldwitness.attest
        matches        — list of match dicts (record, match_type, distances, attestor_name)
        record_count   — total records searched
    """
-    from fieldwitness.attest.hashing import compute_all_distances, hash_image, is_same_image
+    from fieldwitness.attest.hashing import compute_all_distances, hash_file, is_same_image

-    query_hashes = hash_image(image_data)
+    query_hashes = hash_file(file_data)
    storage = _get_storage()
    stats = storage.get_stats()

@@ -423,17 +462,22 @@ def verify():
    The log read here is read-only and reveals no key material.
    """
    if request.method == "POST":
-        image_file = request.files.get("image")
-        if not image_file or not image_file.filename:
-            flash("Please select an image to verify.", "error")
+        evidence_file = request.files.get("image")
+        if not evidence_file or not evidence_file.filename:
+            flash("Please select a file to verify.", "error")
            return redirect(url_for("attest.verify"))

-        if not _allowed_image(image_file.filename):
-            flash("Unsupported image format.", "error")
+        if not _allowed_file(evidence_file.filename):
+            flash(
+                "Unsupported file type. Upload any image, document, audio, video, or data file.",
+                "error",
+            )
            return redirect(url_for("attest.verify"))

        try:
-            result = _verify_image(image_file.read())
+            file_data = evidence_file.read()
+            is_image = _is_image_file(evidence_file.filename)
+            result = _verify_file(file_data)
            query_hashes = result["query_hashes"]
            matches = result["matches"]

@@ -443,7 +487,8 @@ def verify():
                    found=False,
                    message="No attestations in the local log yet.",
                    query_hashes=query_hashes,
-                    filename=image_file.filename,
+                    filename=evidence_file.filename,
+                    is_image=is_image,
                    matches=[],
                )

@@ -456,7 +501,8 @@ def verify():
                    else "No matching attestations found."
                ),
                query_hashes=query_hashes,
-                filename=image_file.filename,
+                filename=evidence_file.filename,
+                is_image=is_image,
                matches=matches,
            )

@@ -471,29 +517,29 @@ def verify():
 def verify_receipt():
    """Return a downloadable JSON verification receipt for court or legal use.

-    Accepts the same image upload as /verify. Returns a JSON file attachment
-    containing image hashes, all matching attestation records with full metadata,
+    Accepts the same file upload as /verify. Returns a JSON file attachment
+    containing file hashes, all matching attestation records with full metadata,
    the verification timestamp, and the verifier hostname.

    Intentionally unauthenticated — same access policy as /verify.
    """
-    image_file = request.files.get("image")
-    if not image_file or not image_file.filename:
+    evidence_file = request.files.get("image")
+    if not evidence_file or not evidence_file.filename:
        return Response(
-            json.dumps({"error": "No image provided"}),
+            json.dumps({"error": "No file provided"}),
            status=400,
            mimetype="application/json",
        )

-    if not _allowed_image(image_file.filename):
+    if not _allowed_file(evidence_file.filename):
        return Response(
-            json.dumps({"error": "Unsupported image format"}),
+            json.dumps({"error": "Unsupported file type"}),
            status=400,
            mimetype="application/json",
        )

    try:
-        result = _verify_image(image_file.read())
+        result = _verify_file(evidence_file.read())
    except Exception as e:
        return Response(
            json.dumps({"error": f"Verification failed: {e}"}),
@@ -573,11 +619,11 @@ def verify_receipt():
        "schema_version": "3",
        "verification_timestamp": verification_ts,
        "verifier_instance": verifier_instance,
-        "queried_filename": image_file.filename,
-        "image_hash": {
+        "queried_filename": evidence_file.filename,
+        "file_hash": {
            "sha256": query_hashes.sha256,
-            "phash": query_hashes.phash,
-            "dhash": getattr(query_hashes, "dhash", None),
+            "phash": query_hashes.phash or None,
+            "dhash": getattr(query_hashes, "dhash", None) or None,
        },
        "records_searched": result["record_count"],
        "matches_found": len(matching_records),
@@ -599,7 +645,9 @@ def verify_receipt():

    receipt_json = json.dumps(receipt, indent=2, ensure_ascii=False)
    safe_filename = (
-        image_file.filename.rsplit(".", 1)[0] if "." in image_file.filename else image_file.filename
+        evidence_file.filename.rsplit(".", 1)[0]
+        if "." in evidence_file.filename
+        else evidence_file.filename
    )
    download_name = f"receipt_{safe_filename}_{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}.json"