"""Integration tests for hash_file() — all-file-type attestation hashing.""" from __future__ import annotations import hashlib import os from io import BytesIO import pytest from PIL import Image from fieldwitness.attest.hashing import hash_file from fieldwitness.attest.models import ImageHashes # --------------------------------------------------------------------------- # File creation helpers # --------------------------------------------------------------------------- def _make_png(width: int = 50, height: int = 50, color: tuple = (128, 64, 32)) -> bytes: """Create a minimal valid PNG in memory.""" img = Image.new("RGB", (width, height), color) buf = BytesIO() img.save(buf, format="PNG") return buf.getvalue() def _make_pdf() -> bytes: """Return a valid minimal PDF as raw bytes.""" return ( b"%PDF-1.4\n" b"1 0 obj<>endobj\n" b"2 0 obj<>endobj\n" b"3 0 obj<>endobj\n" b"xref\n0 4\n" b"0000000000 65535 f\r\n" b"0000000009 00000 n\r\n" b"0000000058 00000 n\r\n" b"0000000115 00000 n\r\n" b"trailer<>\n" b"startxref\n196\n%%EOF" ) def _make_csv() -> bytes: """Return a simple CSV file as bytes.""" return b"id,name,value\n1,alpha,100\n2,beta,200\n3,gamma,300\n" # --------------------------------------------------------------------------- # test_hash_image_file # --------------------------------------------------------------------------- class TestHashImageFile: def test_sha256_populated(self): hashes = hash_file(_make_png()) assert hashes.sha256 assert len(hashes.sha256) == 64 def test_phash_populated(self): hashes = hash_file(_make_png()) # phash must be a non-empty string for a valid image assert isinstance(hashes.phash, str) assert len(hashes.phash) > 0 def test_dhash_populated(self): hashes = hash_file(_make_png()) assert isinstance(hashes.dhash, str) assert len(hashes.dhash) > 0 def test_returns_image_hashes_instance(self): result = hash_file(_make_png()) assert isinstance(result, ImageHashes) def test_sha256_matches_direct_computation(self): png_data = _make_png() hashes = hash_file(png_data) expected = hashlib.sha256(png_data).hexdigest() assert hashes.sha256 == expected # --------------------------------------------------------------------------- # test_hash_pdf_file # --------------------------------------------------------------------------- class TestHashPdfFile: def test_sha256_populated(self): hashes = hash_file(_make_pdf()) assert hashes.sha256 assert len(hashes.sha256) == 64 def test_phash_empty_for_non_image(self): """PDF files must have phash == '' (PIL cannot decode them).""" hashes = hash_file(_make_pdf()) assert hashes.phash == "" def test_dhash_empty_for_non_image(self): hashes = hash_file(_make_pdf()) assert hashes.dhash == "" def test_sha256_correct(self): pdf_data = _make_pdf() expected = hashlib.sha256(pdf_data).hexdigest() assert hash_file(pdf_data).sha256 == expected # --------------------------------------------------------------------------- # test_hash_csv_file # --------------------------------------------------------------------------- class TestHashCsvFile: def test_sha256_populated(self): hashes = hash_file(_make_csv()) assert hashes.sha256 assert len(hashes.sha256) == 64 def test_phash_empty(self): assert hash_file(_make_csv()).phash == "" def test_dhash_empty(self): assert hash_file(_make_csv()).dhash == "" def test_sha256_correct(self): csv_data = _make_csv() assert hash_file(csv_data).sha256 == hashlib.sha256(csv_data).hexdigest() # --------------------------------------------------------------------------- # test_hash_empty_file # --------------------------------------------------------------------------- class TestHashEmptyFile: def test_does_not_crash(self): """Hashing empty bytes must not raise any exception.""" result = hash_file(b"") assert isinstance(result, ImageHashes) def test_sha256_of_empty_bytes(self): """SHA-256 of empty bytes is the well-known constant.""" empty_sha256 = hashlib.sha256(b"").hexdigest() assert hash_file(b"").sha256 == empty_sha256 def test_phash_and_dhash_empty_or_str(self): result = hash_file(b"") # Must be strings (possibly empty), never None assert isinstance(result.phash, str) assert isinstance(result.dhash, str) # --------------------------------------------------------------------------- # test_hash_large_file # --------------------------------------------------------------------------- class TestHashLargeFile: def test_sha256_correct_for_10mb(self): """SHA-256 must be correct for a 10 MB random payload.""" data = os.urandom(10 * 1024 * 1024) expected = hashlib.sha256(data).hexdigest() result = hash_file(data) assert result.sha256 == expected def test_large_file_does_not_raise(self): data = os.urandom(10 * 1024 * 1024) result = hash_file(data) assert isinstance(result, ImageHashes) def test_large_non_image_has_empty_perceptual_hashes(self): data = os.urandom(10 * 1024 * 1024) result = hash_file(data) assert result.phash == "" assert result.dhash == "" # --------------------------------------------------------------------------- # test_hash_file_deterministic # --------------------------------------------------------------------------- class TestHashFileDeterministic: def test_same_image_twice_identical_sha256(self): data = _make_png() h1 = hash_file(data) h2 = hash_file(data) assert h1.sha256 == h2.sha256 def test_same_image_twice_identical_phash(self): data = _make_png() h1 = hash_file(data) h2 = hash_file(data) assert h1.phash == h2.phash def test_same_image_twice_identical_dhash(self): data = _make_png() h1 = hash_file(data) h2 = hash_file(data) assert h1.dhash == h2.dhash def test_same_binary_blob_twice_identical(self): data = os.urandom(4096) h1 = hash_file(data) h2 = hash_file(data) assert h1.sha256 == h2.sha256 def test_same_csv_twice_identical(self): data = _make_csv() assert hash_file(data).sha256 == hash_file(data).sha256 # --------------------------------------------------------------------------- # test_hash_file_different_content # --------------------------------------------------------------------------- class TestHashFileDifferentContent: def test_different_images_different_sha256(self): red = _make_png(color=(255, 0, 0)) blue = _make_png(color=(0, 0, 255)) assert hash_file(red).sha256 != hash_file(blue).sha256 def test_different_binary_blobs_different_sha256(self): a = os.urandom(1024) b = os.urandom(1024) # Astronomically unlikely to collide, but guard anyway assert a != b assert hash_file(a).sha256 != hash_file(b).sha256 def test_different_csvs_different_sha256(self): csv1 = b"a,b\n1,2\n" csv2 = b"a,b\n3,4\n" assert hash_file(csv1).sha256 != hash_file(csv2).sha256 def test_one_bit_flip_changes_sha256(self): """Changing a single byte must produce a completely different SHA-256.""" pdf = bytearray(_make_pdf()) pdf[-1] ^= 0xFF original_hash = hash_file(_make_pdf()).sha256 mutated_hash = hash_file(bytes(pdf)).sha256 assert original_hash != mutated_hash