diff --git a/frontends/web/app.py b/frontends/web/app.py index 4d97090..358c647 100644 --- a/frontends/web/app.py +++ b/frontends/web/app.py @@ -123,8 +123,8 @@ def create_app(config: SoosefConfig | None = None) -> Flask: # Stegasoo capabilities try: - from stegasoo import HAS_AUDIO_SUPPORT, get_channel_status, has_dct_support - from stegasoo.constants import ( + from soosef.stegasoo import HAS_AUDIO_SUPPORT, get_channel_status, has_dct_support + from soosef.stegasoo.constants import ( DEFAULT_PASSPHRASE_WORDS, MAX_FILE_PAYLOAD_SIZE, MAX_MESSAGE_CHARS, @@ -164,7 +164,7 @@ def create_app(config: SoosefConfig | None = None) -> Flask: # Verisoo availability try: - import verisoo # noqa: F401 + import soosef.verisoo # noqa: F401 has_verisoo = True except ImportError: @@ -241,13 +241,13 @@ def _register_stegasoo_routes(app: Flask) -> None: """ import temp_storage from auth import admin_required, login_required - from stegasoo import ( + from soosef.stegasoo import ( export_rsa_key_pem, generate_credentials, get_channel_status, load_rsa_key, ) - from stegasoo.constants import ( + from soosef.stegasoo.constants import ( DEFAULT_PASSPHRASE_WORDS, MAX_PIN_LENGTH, MIN_PASSPHRASE_WORDS, @@ -255,7 +255,7 @@ def _register_stegasoo_routes(app: Flask) -> None: TEMP_FILE_EXPIRY, VALID_RSA_SIZES, ) - from stegasoo.qr_utils import ( + from soosef.stegasoo.qr_utils import ( can_fit_in_qr, generate_qr_code, ) diff --git a/frontends/web/auth.py b/frontends/web/auth.py index 27e0a68..e8997b3 100644 --- a/frontends/web/auth.py +++ b/frontends/web/auth.py @@ -273,7 +273,7 @@ def verify_and_reset_admin_password(recovery_key: str, new_password: str) -> tup Returns: (success, message) tuple """ - from stegasoo.recovery import verify_recovery_key + from soosef.stegasoo.recovery import verify_recovery_key stored_hash = get_recovery_key_hash() if not stored_hash: diff --git a/frontends/web/blueprints/attest.py b/frontends/web/blueprints/attest.py index c3d35f0..682425b 100644 --- a/frontends/web/blueprints/attest.py +++ b/frontends/web/blueprints/attest.py @@ -21,7 +21,7 @@ bp = Blueprint("attest", __name__) def _get_storage(): """Get verisoo LocalStorage pointed at soosef's attestation directory.""" - from verisoo.storage import LocalStorage + from soosef.verisoo.storage import LocalStorage from soosef.paths import ATTESTATIONS_DIR @@ -30,7 +30,7 @@ def _get_storage(): def _get_private_key(): """Load the Ed25519 private key from soosef identity directory.""" - from verisoo.crypto import load_private_key + from soosef.verisoo.crypto import load_private_key from soosef.paths import IDENTITY_PRIVATE_KEY @@ -139,7 +139,7 @@ def attest(): auto_exif = request.form.get("auto_exif", "on") == "on" # Create the attestation - from verisoo.attestation import create_attestation + from soosef.verisoo.attestation import create_attestation attestation = create_attestation( image_data=image_data, @@ -167,7 +167,7 @@ def attest(): # Save our own identity so we can look it up during verification from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat - from verisoo.models import Identity + from soosef.verisoo.models import Identity pub_key = private_key.public_key() pub_bytes = pub_key.public_bytes(Encoding.Raw, PublicFormat.Raw) @@ -217,7 +217,7 @@ def _verify_image(image_data: bytes) -> dict: matches — list of match dicts (record, match_type, distances, attestor_name) record_count — total records searched """ - from verisoo.hashing import compute_all_distances, hash_image, is_same_image + from soosef.verisoo.hashing import compute_all_distances, hash_image, is_same_image query_hashes = hash_image(image_data) storage = _get_storage() diff --git a/frontends/web/stego_routes.py b/frontends/web/stego_routes.py index d3d49cb..4a67dd5 100644 --- a/frontends/web/stego_routes.py +++ b/frontends/web/stego_routes.py @@ -41,7 +41,7 @@ def register_stego_routes(app, **deps): temp_storage = deps["temp_storage"] _has_qrcode_read = deps.get("has_qrcode_read", False) - from stegasoo import ( + from soosef.stegasoo import ( HAS_AUDIO_SUPPORT, CapacityError, DecryptionError, @@ -60,13 +60,13 @@ def register_stego_routes(app, **deps): validate_rsa_key, validate_security_factors, ) - from stegasoo.channel import resolve_channel_key - from stegasoo.constants import ( + from soosef.stegasoo.channel import resolve_channel_key + from soosef.stegasoo.constants import ( TEMP_FILE_EXPIRY, THUMBNAIL_QUALITY, THUMBNAIL_SIZE, ) - from stegasoo.qr_utils import ( + from soosef.stegasoo.qr_utils import ( decompress_data, extract_key_from_qr, is_compressed, @@ -686,7 +686,7 @@ def register_stego_routes(app, **deps): return _error_response(result.error_message) # Pre-check payload capacity BEFORE encode (fail fast) - from stegasoo.steganography import will_fit_by_mode + from soosef.stegasoo.steganography import will_fit_by_mode payload_size = ( len(payload.data) if hasattr(payload, "data") else len(payload.encode("utf-8")) @@ -1613,8 +1613,8 @@ def register_stego_routes(app, **deps): @app.route("/about") def about(): from auth import get_current_user - from stegasoo import has_argon2 - from stegasoo.channel import get_channel_status + from soosef.stegasoo import has_argon2 + from soosef.stegasoo.channel import get_channel_status channel_status = get_channel_status() current_user = get_current_user() @@ -1644,7 +1644,7 @@ def register_stego_routes(app, **deps): @login_required def api_tools_capacity(): """Calculate image capacity for steganography.""" - from stegasoo.dct_steganography import estimate_capacity_comparison + from soosef.stegasoo.dct_steganography import estimate_capacity_comparison carrier = request.files.get("image") if not carrier: @@ -1666,7 +1666,7 @@ def register_stego_routes(app, **deps): """Strip EXIF/metadata from image.""" import io - from stegasoo.utils import strip_image_metadata + from soosef.stegasoo.utils import strip_image_metadata image_file = request.files.get("image") if not image_file: @@ -1689,7 +1689,7 @@ def register_stego_routes(app, **deps): @login_required def api_tools_exif(): """Read EXIF metadata from image.""" - from stegasoo.utils import read_image_exif + from soosef.stegasoo.utils import read_image_exif image_file = request.files.get("image") if not image_file: @@ -1718,7 +1718,7 @@ def register_stego_routes(app, **deps): @login_required def api_tools_exif_update(): """Update EXIF fields in image.""" - from stegasoo.utils import write_image_exif + from soosef.stegasoo.utils import write_image_exif image_file = request.files.get("image") if not image_file: @@ -1757,7 +1757,7 @@ def register_stego_routes(app, **deps): @login_required def api_tools_exif_clear(): """Remove all EXIF metadata from image.""" - from stegasoo.utils import strip_image_metadata + from soosef.stegasoo.utils import strip_image_metadata image_file = request.files.get("image") if not image_file: diff --git a/frontends/web/stego_worker.py b/frontends/web/stego_worker.py index 60743db..e988ebc 100644 --- a/frontends/web/stego_worker.py +++ b/frontends/web/stego_worker.py @@ -73,7 +73,7 @@ def _get_channel_info(resolved_key): Returns: (mode, fingerprint) tuple """ - from stegasoo import get_channel_status, has_channel_key + from soosef.stegasoo import get_channel_status, has_channel_key if resolved_key == "": return "public", None @@ -94,7 +94,7 @@ def _get_channel_info(resolved_key): def encode_operation(params: dict) -> dict: """Handle encode operation.""" logger.debug("encode_operation: mode=%s", params.get("embed_mode", "lsb")) - from stegasoo import FilePayload, encode + from soosef.stegasoo import FilePayload, encode # Decode base64 inputs carrier_data = base64.b64decode(params["carrier_b64"]) @@ -173,7 +173,7 @@ def _write_decode_progress(progress_file: str | None, percent: int, phase: str) def decode_operation(params: dict) -> dict: """Handle decode operation.""" logger.debug("decode_operation: mode=%s", params.get("embed_mode", "auto")) - from stegasoo import decode + from soosef.stegasoo import decode progress_file = params.get("progress_file") @@ -227,7 +227,7 @@ def decode_operation(params: dict) -> dict: def compare_operation(params: dict) -> dict: """Handle compare_modes operation.""" - from stegasoo import compare_modes + from soosef.stegasoo import compare_modes carrier_data = base64.b64decode(params["carrier_b64"]) result = compare_modes(carrier_data) @@ -240,7 +240,7 @@ def compare_operation(params: dict) -> dict: def capacity_check_operation(params: dict) -> dict: """Handle will_fit_by_mode operation.""" - from stegasoo import will_fit_by_mode + from soosef.stegasoo import will_fit_by_mode carrier_data = base64.b64decode(params["carrier_b64"]) @@ -259,7 +259,7 @@ def capacity_check_operation(params: dict) -> dict: def encode_audio_operation(params: dict) -> dict: """Handle audio encode operation (v4.3.0).""" logger.debug("encode_audio_operation: mode=%s", params.get("embed_mode", "audio_lsb")) - from stegasoo import FilePayload, encode_audio + from soosef.stegasoo import FilePayload, encode_audio carrier_data = base64.b64decode(params["carrier_b64"]) reference_data = base64.b64decode(params["reference_b64"]) @@ -324,7 +324,7 @@ def encode_audio_operation(params: dict) -> dict: def decode_audio_operation(params: dict) -> dict: """Handle audio decode operation (v4.3.0).""" logger.debug("decode_audio_operation: mode=%s", params.get("embed_mode", "audio_auto")) - from stegasoo import decode_audio + from soosef.stegasoo import decode_audio progress_file = params.get("progress_file") _write_decode_progress(progress_file, 5, "reading") @@ -370,9 +370,9 @@ def decode_audio_operation(params: dict) -> dict: def audio_info_operation(params: dict) -> dict: """Handle audio info operation (v4.3.0).""" - from stegasoo import get_audio_info - from stegasoo.audio_steganography import calculate_audio_lsb_capacity - from stegasoo.spread_steganography import calculate_audio_spread_capacity + from soosef.stegasoo import get_audio_info + from soosef.stegasoo.audio_steganography import calculate_audio_lsb_capacity + from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity audio_data = base64.b64decode(params["audio_b64"]) @@ -397,7 +397,7 @@ def audio_info_operation(params: dict) -> dict: def channel_status_operation(params: dict) -> dict: """Handle channel status check (v4.0.0).""" - from stegasoo import get_channel_status + from soosef.stegasoo import get_channel_status status = get_channel_status() reveal = params.get("reveal", False) diff --git a/pyproject.toml b/pyproject.toml index 1e46a22..e17f57a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,44 +40,69 @@ classifiers = [ ] dependencies = [ - "stegasoo>=4.3.0", - "verisoo>=0.1.0", "pillow>=10.0.0", "cryptography>=41.0.0", "argon2-cffi>=23.0.0", + "zstandard>=0.22.0", "cbor2>=5.6.0", "uuid-utils>=0.9.0", ] [project.optional-dependencies] -web = [ - "flask>=3.0.0", - "gunicorn>=21.0.0", - # Stegasoo web extras - "stegasoo[web]", - # Verisoo storage - "lmdb>=1.4.0", +stego-dct = [ + "numpy>=2.0.0", + "scipy>=1.10.0", + "jpeglib>=1.0.0", + "reedsolo>=1.7.0", +] +stego-audio = [ + "pydub>=0.25.0", + "numpy>=2.0.0", + "scipy>=1.10.0", + "soundfile>=0.12.0", + "reedsolo>=1.7.0", +] +stego-compression = [ + "lz4>=4.0.0", +] +attest = [ "imagehash>=4.3.0", + "lmdb>=1.4.0", "exifread>=3.0.0", ] cli = [ "click>=8.0.0", "rich>=13.0.0", - "stegasoo[cli]", - "verisoo[cli]", + "qrcode>=7.30", + "piexif>=1.1.0", +] +web = [ + "flask>=3.0.0", + "gunicorn>=21.0.0", + "qrcode>=7.3.0", + "pyzbar>=0.1.9", + "piexif>=1.1.0", + "soosef[attest,stego-dct]", +] +api = [ + "fastapi>=0.109.0", + "uvicorn[standard]>=0.27.0", + "python-multipart>=0.0.6", + "soosef[stego-dct]", ] fieldkit = [ "watchdog>=4.0.0", "pyudev>=0.24.0", ] +federation = [ + "aiohttp>=3.9.0", +] rpi = [ "soosef[web,cli,fieldkit]", "gpiozero>=2.0", ] all = [ - "soosef[web,cli,fieldkit]", - "stegasoo[all]", - "verisoo[all]", + "soosef[stego-dct,stego-audio,stego-compression,attest,cli,web,api,fieldkit,federation]", ] dev = [ "soosef[all]", @@ -107,6 +132,9 @@ packages = ["src/soosef", "frontends"] [tool.hatch.build.targets.wheel.sources] "src" = "" +[tool.hatch.build.targets.wheel.force-include] +"src/soosef/stegasoo/data/bip39-words.txt" = "soosef/stegasoo/data/bip39-words.txt" + [tool.pytest.ini_options] testpaths = ["tests"] python_files = ["test_*.py"] @@ -123,6 +151,14 @@ line-length = 100 select = ["E", "F", "I", "N", "W", "UP"] ignore = ["E501"] +[tool.ruff.lint.per-file-ignores] +# YCbCr colorspace variables (R, G, B, Y, Cb, Cr) are standard names +"src/soosef/stegasoo/dct_steganography.py" = ["N803", "N806"] +# MDCT transform variables (N, X) are standard mathematical names +"src/soosef/stegasoo/spread_steganography.py" = ["N803", "N806"] +# Package __init__.py has imports after try/except and aliases - intentional structure +"src/soosef/stegasoo/__init__.py" = ["E402"] + [tool.mypy] python_version = "3.11" warn_return_any = true diff --git a/src/soosef/_availability.py b/src/soosef/_availability.py new file mode 100644 index 0000000..e2cad91 --- /dev/null +++ b/src/soosef/_availability.py @@ -0,0 +1,21 @@ +"""Runtime availability checks for optional soosef subpackages.""" + + +def has_stegasoo() -> bool: + """Check if soosef.stegasoo is importable (core deps are always present).""" + try: + import soosef.stegasoo # noqa: F401 + + return True + except ImportError: + return False + + +def has_verisoo() -> bool: + """Check if soosef.verisoo is importable (requires [attest] extra).""" + try: + import soosef.verisoo # noqa: F401 + + return True + except ImportError: + return False diff --git a/src/soosef/api.py b/src/soosef/api.py new file mode 100644 index 0000000..840e5b8 --- /dev/null +++ b/src/soosef/api.py @@ -0,0 +1,36 @@ +"""Optional unified FastAPI app combining stegasoo and verisoo APIs. + +Usage:: + + uvicorn soosef.api:app --host 0.0.0.0 --port 8000 + +Requires the [api] extra: pip install soosef[api] +""" + +from fastapi import FastAPI + +app = FastAPI( + title="SooSeF API", + version="0.1.0", + description="Unified steganography and attestation API", +) + +try: + from soosef.stegasoo.api import app as stego_api + + app.mount("/stego", stego_api) +except ImportError: + pass + +try: + from soosef.verisoo.api import app as verisoo_api + + app.mount("/attest", verisoo_api) +except ImportError: + pass + + +@app.get("/health") +async def health(): + """Health check endpoint.""" + return {"status": "ok"} diff --git a/src/soosef/cli.py b/src/soosef/cli.py index 6c371a9..e4ebecd 100644 --- a/src/soosef/cli.py +++ b/src/soosef/cli.py @@ -226,7 +226,7 @@ def stego(): try: - from stegasoo.cli import cli as stegasoo_cli + from soosef.stegasoo.cli import cli as stegasoo_cli # Re-register stegasoo commands under the 'stego' group for name, cmd in stegasoo_cli.commands.items(): @@ -249,7 +249,7 @@ def attest(): try: - from verisoo.cli import main as verisoo_cli + from soosef.verisoo.cli import main as verisoo_cli for name, cmd in verisoo_cli.commands.items(): attest.add_command(cmd, name) @@ -288,8 +288,8 @@ def _attest_file( PublicFormat, load_pem_private_key, ) - from verisoo.attestation import create_attestation - from verisoo.models import Identity + from soosef.verisoo.attestation import create_attestation + from soosef.verisoo.models import Identity from soosef.config import SoosefConfig from soosef.federation.chain import ChainStore @@ -380,8 +380,8 @@ def batch(directory: Path, caption: str | None, extensions: str, no_exif: bool) soosef attest batch ./docs --extensions pdf,png --no-exif """ - from verisoo.crypto import load_private_key - from verisoo.storage import LocalStorage + from soosef.verisoo.crypto import load_private_key + from soosef.verisoo.storage import LocalStorage from soosef.paths import ATTESTATIONS_DIR, IDENTITY_PRIVATE_KEY @@ -1013,7 +1013,7 @@ def backfill(): private_key = load_pem_private_key(priv_pem, password=None) try: - from verisoo.storage import LocalStorage + from soosef.verisoo.storage import LocalStorage storage = LocalStorage(base_path=ATTESTATIONS_DIR) stats = storage.get_stats() diff --git a/src/soosef/keystore/manager.py b/src/soosef/keystore/manager.py index cb15cf9..6309ed4 100644 --- a/src/soosef/keystore/manager.py +++ b/src/soosef/keystore/manager.py @@ -206,7 +206,7 @@ class KeystoreManager: def set_channel_key(self, key: str) -> None: """Store a channel key.""" - from stegasoo import validate_channel_key + from soosef.stegasoo import validate_channel_key validate_channel_key(key) self._channel_key_file.parent.mkdir(parents=True, exist_ok=True) @@ -215,7 +215,7 @@ class KeystoreManager: def generate_channel_key(self) -> str: """Generate and store a new channel key.""" - from stegasoo import generate_channel_key + from soosef.stegasoo import generate_channel_key key: str = generate_channel_key() self.set_channel_key(key) @@ -245,7 +245,7 @@ class KeystoreManager: "the key in the keystore first." ) - from stegasoo.crypto import get_channel_fingerprint + from soosef.stegasoo.crypto import get_channel_fingerprint old_key = self._channel_key_file.read_text().strip() old_fp = get_channel_fingerprint(old_key) @@ -281,7 +281,7 @@ class KeystoreManager: if self.has_channel_key(): key = self.get_channel_key() if key: - from stegasoo.crypto import get_channel_fingerprint + from soosef.stegasoo.crypto import get_channel_fingerprint channel_fp = get_channel_fingerprint(key) diff --git a/src/soosef/stegasoo/__init__.py b/src/soosef/stegasoo/__init__.py new file mode 100644 index 0000000..aaa13b7 --- /dev/null +++ b/src/soosef/stegasoo/__init__.py @@ -0,0 +1,401 @@ +""" +Stegasoo - Secure Steganography with Multi-Factor Authentication (v4.0.1) + +Changes in v4.0.0: +- Added channel key support for deployment/group isolation +- New functions: get_channel_key, get_channel_fingerprint, generate_channel_key, etc. +- encode() and decode() now accept channel_key parameter +""" + +__version__ = "4.3.0" + +# Core functionality +# Channel key management (v4.0.0) +from .channel import ( + clear_channel_key, + format_channel_key, + generate_channel_key, + get_channel_key, + get_channel_status, + has_channel_key, + set_channel_key, + validate_channel_key, +) + +# Audio support — gated by STEGASOO_AUDIO env var and dependency availability +from .constants import AUDIO_ENABLED, VIDEO_ENABLED + +# Crypto functions +from .crypto import get_active_channel_key, get_channel_fingerprint, has_argon2 +from .decode import decode, decode_file, decode_text +from .encode import encode + +# Credential generation +from .generate import ( + export_rsa_key_pem, + generate_credentials, + generate_passphrase, + generate_pin, + generate_rsa_key, + load_rsa_key, +) + +# Image utilities +from .image_utils import ( + compare_capacity, + get_image_info, +) + +# Backend registry +from .backends import EmbeddingBackend, registry as backend_registry + +# Platform presets +from .platform_presets import PLATFORMS, get_preset + +# Steganalysis +from .steganalysis import check_image +from .backends.registry import BackendNotFoundError + +# Steganography functions +from .steganography import ( + calculate_capacity_by_mode, + compare_modes, + has_dct_support, + will_fit_by_mode, +) + +# Utilities +from .utils import generate_filename + +HAS_AUDIO_SUPPORT = AUDIO_ENABLED +HAS_VIDEO_SUPPORT = VIDEO_ENABLED + +if AUDIO_ENABLED: + from .audio_utils import ( + detect_audio_format, + get_audio_info, + has_ffmpeg_support, + validate_audio, + ) + from .decode import decode_audio + from .encode import encode_audio +else: + detect_audio_format = None + get_audio_info = None + has_ffmpeg_support = None + validate_audio = None + encode_audio = None + decode_audio = None + +# Video support — gated by STEGASOO_VIDEO env var and ffmpeg + audio deps +if VIDEO_ENABLED: + from .decode import decode_video + from .encode import encode_video + from .video_utils import ( + calculate_video_capacity, + detect_video_format, + get_video_info, + validate_video, + ) +else: + detect_video_format = None + get_video_info = None + validate_video = None + calculate_video_capacity = None + encode_video = None + decode_video = None + +# QR Code utilities - optional, may not be available +try: + from .qr_utils import ( + detect_and_crop_qr, + extract_key_from_qr, + generate_qr_code, + ) + + HAS_QR_UTILS = True +except ImportError: + HAS_QR_UTILS = False + generate_qr_code = None + extract_key_from_qr = None + detect_and_crop_qr = None + +# Validation +from .validation import ( + validate_file_payload, + validate_image, + validate_message, + validate_passphrase, + validate_pin, + validate_rsa_key, + validate_security_factors, +) + +# Validation aliases for public API +validate_reference_photo = validate_image +validate_carrier = validate_image + +# Additional validators +# Constants +from .constants import ( + DEFAULT_PASSPHRASE_WORDS, + EMBED_MODE_AUDIO_AUTO, + EMBED_MODE_AUDIO_LSB, + EMBED_MODE_AUDIO_SPREAD, + EMBED_MODE_AUTO, + EMBED_MODE_DCT, + EMBED_MODE_LSB, + EMBED_MODE_VIDEO_AUTO, + EMBED_MODE_VIDEO_LSB, + FORMAT_VERSION, + LOSSLESS_FORMATS, + MAX_FILE_PAYLOAD_SIZE, + MAX_IMAGE_PIXELS, + MAX_MESSAGE_SIZE, + MAX_PASSPHRASE_WORDS, + MAX_PIN_LENGTH, + MIN_IMAGE_PIXELS, + MIN_PASSPHRASE_WORDS, + MIN_PIN_LENGTH, + RECOMMENDED_PASSPHRASE_WORDS, +) + +# Exceptions +from .exceptions import ( + AudioCapacityError, + AudioError, + AudioExtractionError, + AudioTranscodeError, + AudioValidationError, + CapacityError, + CryptoError, + DecryptionError, + EmbeddingError, + EncryptionError, + ExtractionError, + ImageValidationError, + InvalidHeaderError, + InvalidMagicBytesError, + KeyDerivationError, + KeyGenerationError, + KeyPasswordError, + KeyValidationError, + MessageValidationError, + ModeMismatchError, + NoDataFoundError, + PinValidationError, + ReedSolomonError, + SecurityFactorError, + SteganographyError, + StegasooError, + UnsupportedAudioFormatError, + UnsupportedVideoFormatError, + ValidationError, + VideoCapacityError, + VideoError, + VideoExtractionError, + VideoTranscodeError, + VideoValidationError, +) + +# Models +from .models import ( + AudioCapacityInfo, + AudioEmbedStats, + AudioInfo, + CapacityComparison, + Credentials, + DecodeResult, + EncodeResult, + FilePayload, + GenerateResult, + ImageInfo, + ValidationResult, + VideoCapacityInfo, + VideoEmbedStats, + VideoInfo, +) +from .validation import ( + validate_audio_embed_mode, + validate_audio_file, + validate_dct_color_mode, + validate_dct_output_format, + validate_embed_mode, +) + +# Aliases for backward compatibility +MIN_MESSAGE_LENGTH = 1 +MAX_MESSAGE_LENGTH = MAX_MESSAGE_SIZE +MAX_PAYLOAD_SIZE = MAX_MESSAGE_SIZE +# MAX_FILE_PAYLOAD_SIZE imported from constants above +SUPPORTED_IMAGE_FORMATS = LOSSLESS_FORMATS +LSB_BYTES_PER_PIXEL = 3 / 8 +DCT_BYTES_PER_PIXEL = 0.125 + +__all__ = [ + # Version + "__version__", + # Core + "encode", + "decode", + "decode_file", + "decode_text", + # Audio (v4.3.0) + "encode_audio", + "decode_audio", + "detect_audio_format", + "get_audio_info", + "has_ffmpeg_support", + "validate_audio", + "HAS_AUDIO_SUPPORT", + "HAS_VIDEO_SUPPORT", + "validate_audio_embed_mode", + "validate_audio_file", + # Video (v4.4.0) + "encode_video", + "decode_video", + "detect_video_format", + "get_video_info", + "validate_video", + "calculate_video_capacity", + # Generation + "generate_pin", + "generate_passphrase", + "generate_rsa_key", + "generate_credentials", + "export_rsa_key_pem", + "load_rsa_key", + # Channel key management (v4.0.0) + "generate_channel_key", + "get_channel_key", + "set_channel_key", + "clear_channel_key", + "has_channel_key", + "get_channel_status", + "validate_channel_key", + "format_channel_key", + "get_active_channel_key", + "get_channel_fingerprint", + # Image utilities + "get_image_info", + "compare_capacity", + # Utilities + "generate_filename", + # Crypto + "has_argon2", + # Backends + "EmbeddingBackend", + "backend_registry", + "BackendNotFoundError", + # Platform presets + "get_preset", + "PLATFORMS", + # Steganalysis + "check_image", + # Steganography + "has_dct_support", + "calculate_capacity_by_mode", + "compare_modes", + "will_fit_by_mode", + # QR utilities + "generate_qr_code", + "extract_key_from_qr", + "detect_and_crop_qr", + "HAS_QR_UTILS", + # Validation + "validate_reference_photo", + "validate_carrier", + "validate_message", + "validate_file_payload", + "validate_passphrase", + "validate_pin", + "validate_rsa_key", + "validate_security_factors", + "validate_embed_mode", + "validate_dct_output_format", + "validate_dct_color_mode", + "validate_channel_key", + # Models + "ImageInfo", + "CapacityComparison", + "GenerateResult", + "EncodeResult", + "DecodeResult", + "FilePayload", + "Credentials", + "ValidationResult", + # Audio models + "AudioEmbedStats", + "AudioInfo", + "AudioCapacityInfo", + # Video models + "VideoEmbedStats", + "VideoInfo", + "VideoCapacityInfo", + # Exceptions + "StegasooError", + "ValidationError", + "PinValidationError", + "MessageValidationError", + "ImageValidationError", + "KeyValidationError", + "SecurityFactorError", + "CryptoError", + "EncryptionError", + "DecryptionError", + "KeyDerivationError", + "KeyGenerationError", + "KeyPasswordError", + "SteganographyError", + "CapacityError", + "ExtractionError", + "EmbeddingError", + "InvalidHeaderError", + "InvalidMagicBytesError", + "ReedSolomonError", + "NoDataFoundError", + "ModeMismatchError", + # Audio exceptions + "AudioError", + "AudioValidationError", + "AudioCapacityError", + "AudioExtractionError", + "AudioTranscodeError", + "UnsupportedAudioFormatError", + # Video exceptions + "VideoError", + "VideoValidationError", + "VideoCapacityError", + "VideoExtractionError", + "VideoTranscodeError", + "UnsupportedVideoFormatError", + # Constants + "FORMAT_VERSION", + "MIN_PASSPHRASE_WORDS", + "RECOMMENDED_PASSPHRASE_WORDS", + "DEFAULT_PASSPHRASE_WORDS", + "MAX_PASSPHRASE_WORDS", + "MIN_PIN_LENGTH", + "MAX_PIN_LENGTH", + "MIN_MESSAGE_LENGTH", + "MAX_MESSAGE_LENGTH", + "MAX_MESSAGE_SIZE", + "MAX_PAYLOAD_SIZE", + "MAX_FILE_PAYLOAD_SIZE", + "MIN_IMAGE_PIXELS", + "MAX_IMAGE_PIXELS", + "SUPPORTED_IMAGE_FORMATS", + "LOSSLESS_FORMATS", + "LSB_BYTES_PER_PIXEL", + "DCT_BYTES_PER_PIXEL", + "EMBED_MODE_LSB", + "EMBED_MODE_DCT", + "EMBED_MODE_AUTO", + # Audio constants + "EMBED_MODE_AUDIO_LSB", + "EMBED_MODE_AUDIO_SPREAD", + "EMBED_MODE_AUDIO_AUTO", + # Video constants + "EMBED_MODE_VIDEO_LSB", + "EMBED_MODE_VIDEO_AUTO", +] diff --git a/src/soosef/stegasoo/api.py b/src/soosef/stegasoo/api.py new file mode 100644 index 0000000..3e1f0a5 --- /dev/null +++ b/src/soosef/stegasoo/api.py @@ -0,0 +1,2256 @@ +#!/usr/bin/env python3 +""" +Stegasoo REST API (v4.3.0) + +FastAPI-based REST API for steganography operations. +Supports both text messages and file embedding. + +CHANGES in v4.3.0: +- Audio steganography endpoints (/audio/*) +- LSB and spread spectrum (DSSS) audio embedding modes +- Audio info and capacity checking + +CHANGES in v4.2.1: +- API key authentication (X-API-Key header) +- TLS support with self-signed certificates +- /auth/* endpoints for key management + +CHANGES in v4.2.0: +- Async encode/decode operations (run in thread pool) +- Server can handle concurrent requests without blocking + +CHANGES in v4.0.0: +- Added channel key support for deployment/group isolation +- New /channel endpoints for key management +- channel_key parameter on encode/decode endpoints +- Messages encoded with channel key require same key to decode + +CHANGES in v3.2.0: +- Removed date dependency from all operations +- Renamed day_phrase → passphrase +- No date_str parameters needed +- Simplified API for asynchronous communications + +NEW in v3.0: LSB and DCT embedding modes. +NEW in v3.0.1: DCT color mode and JPEG output format. +""" + +import asyncio +import base64 +import logging +import os +import sys +from functools import partial +from pathlib import Path +from typing import Literal + +# Configure logging for API frontend +_log_level = os.environ.get("STEGASOO_LOG_LEVEL", "").strip().upper() +if _log_level and hasattr(logging, _log_level): + logging.basicConfig( + level=getattr(logging, _log_level), + format="[%(asctime)s.%(msecs)03d] [%(levelname)s] [%(name)s] %(message)s", + datefmt="%H:%M:%S", + stream=sys.stderr, + ) +elif os.environ.get("STEGASOO_DEBUG", "").strip() in ("1", "true", "yes"): + logging.basicConfig( + level=logging.DEBUG, + format="[%(asctime)s.%(msecs)03d] [%(levelname)s] [%(name)s] %(message)s", + datefmt="%H:%M:%S", + stream=sys.stderr, + ) +api_logger = logging.getLogger("stegasoo.api") + +from fastapi import Depends, FastAPI, File, Form, HTTPException, Query, UploadFile +from fastapi.responses import JSONResponse, Response +from pydantic import BaseModel, Field + +# API Key Authentication +try: + from .api_auth import ( + add_api_key, + get_api_key_status, + is_auth_enabled, + list_api_keys, + remove_api_key, + require_api_key, + ) +except ImportError: + from .api_auth import ( + add_api_key, + get_api_key_status, + list_api_keys, + remove_api_key, + require_api_key, + ) + +# Add parent to path for development +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) + +from soosef.stegasoo import ( + HAS_AUDIO_SUPPORT, + MAX_FILE_PAYLOAD_SIZE, + CapacityError, + DecryptionError, + FilePayload, + StegasooError, + __version__, + calculate_capacity_by_mode, + clear_channel_key, + compare_modes, + decode, + encode, + generate_channel_key, + generate_credentials, + get_channel_status, + has_argon2, + has_dct_support, + set_channel_key, + validate_channel_key, + validate_image, + will_fit_by_mode, +) + +# Audio steganography (v4.3.0) - conditionally imported +if HAS_AUDIO_SUPPORT: + from soosef.stegasoo import decode_audio, encode_audio, get_audio_info + from soosef.stegasoo.audio_steganography import calculate_audio_lsb_capacity + from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity +from soosef.stegasoo.constants import ( + DEFAULT_PASSPHRASE_WORDS, + MAX_PASSPHRASE_WORDS, + MAX_PIN_LENGTH, + MIN_PASSPHRASE_WORDS, + MIN_PIN_LENGTH, + VALID_RSA_SIZES, +) + +# QR Code utilities +try: + from soosef.stegasoo.qr_utils import ( + extract_key_from_qr, + generate_qr_ascii, + generate_qr_code, + has_qr_read, + has_qr_write, + ) + + HAS_QR_READ = has_qr_read() + HAS_QR_WRITE = has_qr_write() +except ImportError: + HAS_QR_READ = False + HAS_QR_WRITE = False + extract_key_from_qr = None + generate_qr_code = None + generate_qr_ascii = None + + +# ============================================================================ +# FASTAPI APP +# ============================================================================ + +app = FastAPI( + title="Stegasoo API", + description=""" +Secure steganography with hybrid authentication. Supports text messages and file embedding. + +## Version 4.0.0 Changes + +- **Channel key support** - Deployment/group isolation for messages +- **New /channel endpoints** - Generate, view, and manage channel keys +- **channel_key parameter** - Added to encode/decode endpoints + +## Version 3.2.0 Changes + +- **No date parameters needed** - Encode and decode anytime without tracking dates +- **Single passphrase** - No daily rotation, just use your passphrase +- **True asynchronous communications** - Perfect for dead drops and delayed delivery + +## Embedding Modes (v3.0) + +- **LSB mode** (default): Spatial LSB embedding, full color output, higher capacity +- **DCT mode**: Frequency domain embedding, ~20% capacity, better stealth + +## DCT Options (v3.0.1) + +- **dct_color_mode**: 'grayscale' (default) or 'color' (preserves original colors) +- **dct_output_format**: 'png' (lossless) or 'jpeg' (smaller, more natural) + +Use the `/modes` endpoint to check availability and `/compare` to compare capacities. +""", + version=__version__, + docs_url="/docs", + redoc_url="/redoc", +) + + +# ============================================================================ +# TYPE ALIASES +# ============================================================================ + +EmbedModeType = Literal["lsb", "dct"] +ExtractModeType = Literal["auto", "lsb", "dct"] +DctColorModeType = Literal["grayscale", "color"] +DctOutputFormatType = Literal["png", "jpeg"] +AudioEmbedModeType = Literal["audio_lsb", "audio_spread"] +AudioExtractModeType = Literal["audio_auto", "audio_lsb", "audio_spread"] + + +# ============================================================================ +# MODELS +# ============================================================================ + + +class GenerateRequest(BaseModel): + use_pin: bool = True + use_rsa: bool = False + pin_length: int = Field(default=6, ge=MIN_PIN_LENGTH, le=MAX_PIN_LENGTH) + rsa_bits: int = Field(default=2048) + words_per_passphrase: int = Field( + default=DEFAULT_PASSPHRASE_WORDS, + ge=MIN_PASSPHRASE_WORDS, + le=MAX_PASSPHRASE_WORDS, + description="Words per passphrase (v3.2.0: default increased to 4)", + ) + + +class GenerateResponse(BaseModel): + passphrase: str = Field(description="Single passphrase (v3.2.0: no daily rotation)") + pin: str | None = None + rsa_key_pem: str | None = None + entropy: dict[str, int] + # Legacy field for compatibility + phrases: dict[str, str] | None = Field( + default=None, description="Deprecated: Use 'passphrase' instead" + ) + + +class EncodeRequest(BaseModel): + message: str + reference_photo_base64: str + carrier_image_base64: str + passphrase: str = Field(description="Passphrase (v3.2.0: renamed from day_phrase)") + pin: str = "" + rsa_key_base64: str | None = None + rsa_password: str | None = None + # Channel key (v4.0.0) + channel_key: str | None = Field( + default=None, + description="Channel key for deployment isolation. null=auto (use server config), ''=public mode, 'XXXX-...'=explicit key", + ) + embed_mode: EmbedModeType = Field( + default="lsb", + description="Embedding mode: 'lsb' (default, color) or 'dct' (requires scipy)", + ) + dct_output_format: DctOutputFormatType = Field( + default="png", + description="DCT output format: 'png' (lossless) or 'jpeg' (smaller). Only applies to DCT mode.", + ) + dct_color_mode: DctColorModeType = Field( + default="grayscale", + description="DCT color mode: 'grayscale' (default) or 'color' (preserves colors). Only applies to DCT mode.", + ) + + +class EncodeFileRequest(BaseModel): + """Request for embedding a file (base64-encoded).""" + + file_data_base64: str + filename: str + mime_type: str | None = None + reference_photo_base64: str + carrier_image_base64: str + passphrase: str = Field(description="Passphrase (v3.2.0: renamed from day_phrase)") + pin: str = "" + rsa_key_base64: str | None = None + rsa_password: str | None = None + # Channel key (v4.0.0) + channel_key: str | None = Field( + default=None, + description="Channel key for deployment isolation. null=auto (use server config), ''=public mode, 'XXXX-...'=explicit key", + ) + embed_mode: EmbedModeType = Field( + default="lsb", + description="Embedding mode: 'lsb' (default, color) or 'dct' (requires scipy)", + ) + dct_output_format: DctOutputFormatType = Field( + default="png", + description="DCT output format: 'png' (lossless) or 'jpeg' (smaller). Only applies to DCT mode.", + ) + dct_color_mode: DctColorModeType = Field( + default="grayscale", + description="DCT color mode: 'grayscale' (default) or 'color' (preserves colors). Only applies to DCT mode.", + ) + + +class EncodeResponse(BaseModel): + stego_image_base64: str + filename: str + capacity_used_percent: float + embed_mode: str = Field(description="Embedding mode used: 'lsb' or 'dct'") + output_format: str = Field( + default="png", description="Output format: 'png' or 'jpeg' (for DCT mode)" + ) + color_mode: str = Field( + default="color", + description="Color mode: 'color' (LSB/DCT color) or 'grayscale' (DCT grayscale)", + ) + # Channel key info (v4.0.0) + channel_mode: str = Field(default="public", description="Channel mode: 'public' or 'private'") + channel_fingerprint: str | None = Field( + default=None, description="Channel key fingerprint (if private mode)" + ) + # Legacy fields (v3.2.0: no longer used in crypto) + date_used: str | None = Field( + default=None, description="Deprecated: Date no longer used in v3.2.0" + ) + day_of_week: str | None = Field( + default=None, description="Deprecated: Date no longer used in v3.2.0" + ) + + +class DecodeRequest(BaseModel): + stego_image_base64: str + reference_photo_base64: str + passphrase: str = Field(description="Passphrase (v3.2.0: renamed from day_phrase)") + pin: str = "" + rsa_key_base64: str | None = None + rsa_password: str | None = None + # Channel key (v4.0.0) + channel_key: str | None = Field( + default=None, + description="Channel key for decryption. null=auto (use server config), ''=public mode, 'XXXX-...'=explicit key", + ) + embed_mode: ExtractModeType = Field( + default="auto", description="Extraction mode: 'auto' (default), 'lsb', or 'dct'" + ) + + +class DecodeResponse(BaseModel): + """Response for decode - can be text or file.""" + + payload_type: str # 'text' or 'file' + message: str | None = None # For text + file_data_base64: str | None = None # For file (base64-encoded) + filename: str | None = None # For file + mime_type: str | None = None # For file + + +class ModeCapacity(BaseModel): + """Capacity info for a single mode.""" + + capacity_bytes: int + capacity_kb: float + available: bool + output_format: str + + +class ImageInfoResponse(BaseModel): + width: int + height: int + pixels: int + capacity_bytes: int = Field(description="LSB mode capacity (for backwards compatibility)") + capacity_kb: int = Field(description="LSB mode capacity in KB") + modes: dict[str, ModeCapacity] | None = Field( + default=None, description="Capacity by embedding mode (v3.0+)" + ) + + +class CompareModesRequest(BaseModel): + """Request for comparing embedding modes.""" + + carrier_image_base64: str + payload_size: int | None = Field( + default=None, description="Optional payload size to check if it fits" + ) + + +class CompareModesResponse(BaseModel): + """Response comparing LSB and DCT modes.""" + + width: int + height: int + lsb: dict + dct: dict + payload_check: dict | None = None + recommendation: str + + +class DctModeInfo(BaseModel): + """Detailed DCT mode information.""" + + available: bool + name: str + description: str + output_formats: list[str] + color_modes: list[str] + capacity_ratio: str + requires: str + + +class ChannelStatusResponse(BaseModel): + """Response for channel key status (v4.0.0).""" + + mode: str = Field(description="'public' or 'private'") + configured: bool = Field(description="Whether a channel key is configured") + fingerprint: str | None = Field(default=None, description="Key fingerprint (partial)") + source: str | None = Field(default=None, description="Where the key comes from") + key: str | None = Field(default=None, description="Full key (only if reveal=true)") + + +class ChannelGenerateResponse(BaseModel): + """Response for channel key generation (v4.0.0).""" + + key: str = Field(description="Generated channel key") + fingerprint: str = Field(description="Key fingerprint") + saved: bool = Field(default=False, description="Whether key was saved to config") + save_location: str | None = Field(default=None, description="Where key was saved") + + +class ChannelSetRequest(BaseModel): + """Request to set channel key (v4.0.0).""" + + key: str = Field(description="Channel key to set") + location: str = Field(default="user", description="'user' or 'project'") + + +class AuthStatusResponse(BaseModel): + """Response for API key authentication status.""" + + enabled: bool = Field(description="Whether API key auth is enabled") + total_keys: int = Field(description="Total number of configured API keys") + user_keys: int = Field(description="Keys in user config") + project_keys: int = Field(description="Keys in project config") + env_configured: bool = Field(description="Whether env var key is set") + + +class AuthKeyInfo(BaseModel): + """Info about a single API key (not the actual key).""" + + name: str + created: str + + +class ModesResponse(BaseModel): + """Response showing available embedding modes.""" + + lsb: dict + dct: DctModeInfo + audio: dict | None = Field(default=None, description="Audio steganography modes (v4.3.0)") + # Channel key status (v4.0.0) + channel: dict | None = Field(default=None, description="Channel key status (v4.0.0)") + + +class StatusResponse(BaseModel): + version: str + has_argon2: bool + has_qrcode_read: bool + has_qrcode_write: bool # v4.2.0: QR generation capability + has_dct: bool + has_audio: bool = Field(default=False, description="Audio steganography support (v4.3.0)") + max_payload_kb: int + available_modes: list[str] + dct_features: dict | None = Field(default=None, description="DCT mode features (v3.0.1+)") + # Channel key status (v4.0.0) + channel: dict | None = Field(default=None, description="Channel key status (v4.0.0)") + breaking_changes: dict = Field(description="v4.0.0 breaking changes") + + +class QrExtractResponse(BaseModel): + success: bool + key_pem: str | None = None + error: str | None = None + + +class QrGenerateRequest(BaseModel): + """Request to generate QR code from RSA key.""" + + key_pem: str = Field(..., description="RSA private key in PEM format") + output_format: str = Field( + default="png", + description="Output format: 'png', 'jpg', or 'ascii'", + ) + compress: bool = Field( + default=True, + description="Compress key data with zstd (recommended for larger keys)", + ) + + +class QrGenerateResponse(BaseModel): + """Response containing generated QR code.""" + + success: bool + format: str | None = None + qr_data: str | None = Field( + default=None, + description="Base64-encoded image data (for png/jpg) or ASCII string", + ) + error: str | None = None + + +class WillFitRequest(BaseModel): + """Request to check if payload will fit.""" + + carrier_image_base64: str + payload_size: int + embed_mode: EmbedModeType = "lsb" + + +class WillFitResponse(BaseModel): + """Response for will_fit check.""" + + fits: bool + payload_size: int + capacity: int + usage_percent: float + headroom: int + mode: str + + +class ErrorResponse(BaseModel): + error: str + detail: str | None = None + + +# --- Audio models (v4.3.0) --- + + +class AudioEncodeRequest(BaseModel): + """Request to encode a text message into audio.""" + + message: str + reference_photo_base64: str + carrier_audio_base64: str + passphrase: str = Field(description="Passphrase for key derivation") + pin: str = "" + rsa_key_base64: str | None = None + rsa_password: str | None = None + channel_key: str | None = Field( + default=None, + description="Channel key for deployment isolation. null=auto, ''=public, 'XXXX-...'=explicit", + ) + embed_mode: AudioEmbedModeType = Field( + default="audio_lsb", + description="Embedding mode: 'audio_lsb' (default) or 'audio_spread' (DSSS)", + ) + chip_tier: int | None = Field( + default=None, + description="Spread spectrum chip tier: 0=lossless(256), 1=high_lossy(512), 2=low_lossy(1024). Only for audio_spread.", + ) + + +class AudioEncodeFileRequest(BaseModel): + """Request to encode a file into audio.""" + + file_data_base64: str + filename: str + mime_type: str | None = None + reference_photo_base64: str + carrier_audio_base64: str + passphrase: str = Field(description="Passphrase for key derivation") + pin: str = "" + rsa_key_base64: str | None = None + rsa_password: str | None = None + channel_key: str | None = Field( + default=None, + description="Channel key for deployment isolation. null=auto, ''=public, 'XXXX-...'=explicit", + ) + embed_mode: AudioEmbedModeType = Field( + default="audio_lsb", + description="Embedding mode: 'audio_lsb' (default) or 'audio_spread' (DSSS)", + ) + chip_tier: int | None = Field( + default=None, + description="Spread spectrum chip tier: 0=lossless(256), 1=high_lossy(512), 2=low_lossy(1024). Only for audio_spread.", + ) + + +class AudioEncodeResponse(BaseModel): + """Response from audio encode operations.""" + + stego_audio_base64: str + embed_mode: str = Field(description="Embedding mode used: 'audio_lsb' or 'audio_spread'") + stats: dict = Field(description="Embedding statistics (samples_modified, capacity_used, etc.)") + channel_mode: str = Field(default="public", description="Channel mode: 'public' or 'private'") + channel_fingerprint: str | None = Field( + default=None, description="Channel key fingerprint (if private mode)" + ) + + +class AudioDecodeRequest(BaseModel): + """Request to decode a message or file from stego audio.""" + + stego_audio_base64: str + reference_photo_base64: str + passphrase: str = Field(description="Passphrase for key derivation") + pin: str = "" + rsa_key_base64: str | None = None + rsa_password: str | None = None + channel_key: str | None = Field( + default=None, + description="Channel key for decryption. null=auto, ''=public, 'XXXX-...'=explicit", + ) + embed_mode: AudioExtractModeType = Field( + default="audio_auto", + description="Extraction mode: 'audio_auto' (default), 'audio_lsb', or 'audio_spread'", + ) + + +class AudioInfoResponse(BaseModel): + """Response with audio file metadata and capacity info.""" + + sample_rate: int + channels: int + duration_seconds: float + num_samples: int + format: str + bit_depth: int | None = None + bitrate: int | None = None + capacity_lsb: int = Field(description="LSB mode capacity in bytes") + capacity_spread: int = Field(description="Spread spectrum mode capacity in bytes") + + +class AudioCapacityRequest(BaseModel): + """Request to check if a payload fits in audio carrier.""" + + carrier_audio_base64: str + payload_size: int = Field(ge=1, description="Payload size in bytes") + embed_mode: AudioEmbedModeType = Field( + default="audio_lsb", description="Embedding mode to check capacity for" + ) + + +class AudioCapacityResponse(BaseModel): + """Response for audio capacity check.""" + + fits: bool + payload_size: int + capacity_bytes: int + usage_percent: float + embed_mode: str + + +# ============================================================================ +# HELPER: RESOLVE CHANNEL KEY +# ============================================================================ + + +def _resolve_channel_key(channel_key: str | None) -> str | None: + """ + Resolve channel key from API parameter. + + Wrapper around library's resolve_channel_key with HTTP exception handling. + + Returns: + Resolved channel key to pass to encode/decode + + Raises: + HTTPException: If key format is invalid + """ + from soosef.stegasoo.channel import resolve_channel_key + + try: + return resolve_channel_key(channel_key) + except (ValueError, FileNotFoundError) as e: + raise HTTPException(400, str(e)) + + +def _get_channel_info(channel_key: str | None) -> tuple[str, str | None]: + """ + Get channel mode and fingerprint for response. + + Uses library's get_channel_response_info for consistent formatting. + + Returns: + (mode, fingerprint) tuple + """ + from soosef.stegasoo.channel import get_channel_response_info + + info = get_channel_response_info(channel_key) + return info["mode"], info.get("fingerprint") + + +# ============================================================================ +# HELPER: ASYNC EXECUTION +# ============================================================================ + + +async def run_in_thread(func, *args, **kwargs): + """ + Run a CPU-bound function in a thread pool. + + This allows the FastAPI server to handle other requests while + encode/decode operations are running. Essential for Pi deployments + where operations can take several seconds. + + Usage: + result = await run_in_thread(encode, message=msg, carrier_image=carrier, ...) + """ + if kwargs: + func = partial(func, **kwargs) + return await asyncio.to_thread(func, *args) + + +# ============================================================================ +# ROUTES - STATUS & INFO +# ============================================================================ + + +@app.get("/", response_model=StatusResponse) +async def root(): + """Get API status and configuration.""" + available_modes = ["lsb"] + dct_features = None + + if has_dct_support(): + available_modes.append("dct") + dct_features = { + "output_formats": ["png", "jpeg"], + "color_modes": ["grayscale", "color"], + "default_output_format": "png", + "default_color_mode": "grayscale", + } + + # Channel key status (v4.0.0) + channel_status = get_channel_status() + channel_info = { + "mode": channel_status["mode"], + "configured": channel_status["configured"], + "fingerprint": channel_status.get("fingerprint"), + "source": channel_status.get("source"), + } + + # Audio modes (v4.3.0) + if HAS_AUDIO_SUPPORT: + available_modes.append("audio_lsb") + available_modes.append("audio_spread") + + return StatusResponse( + version=__version__, + has_argon2=has_argon2(), + has_qrcode_read=HAS_QR_READ, + has_qrcode_write=HAS_QR_WRITE, + has_dct=has_dct_support(), + has_audio=HAS_AUDIO_SUPPORT, + max_payload_kb=MAX_FILE_PAYLOAD_SIZE // 1024, + available_modes=available_modes, + dct_features=dct_features, + channel=channel_info, + breaking_changes={ + "v4_channel_key": "Messages encoded with channel key require same key to decode", + "format_version": 5, + "backward_compatible": False, + "v3_notes": { + "date_removed": "No date_str parameter needed - encode/decode anytime", + "passphrase_renamed": "day_phrase → passphrase (single passphrase, no daily rotation)", + }, + }, + ) + + +@app.get("/modes", response_model=ModesResponse) +async def api_modes(): + """ + Get available embedding modes and their status. + + v4.0.0: Also includes channel key status. + """ + # Channel status + channel_status = get_channel_status() + channel_info = { + "mode": channel_status["mode"], + "configured": channel_status["configured"], + "fingerprint": channel_status.get("fingerprint"), + } + + # Audio modes (v4.3.0) + audio_info = None + if HAS_AUDIO_SUPPORT: + audio_info = { + "available": True, + "modes": { + "audio_lsb": { + "name": "Audio LSB", + "description": "Embed in audio sample LSBs, high capacity", + "output_format": "WAV", + }, + "audio_spread": { + "name": "Spread Spectrum (DSSS)", + "description": "Direct-sequence spread spectrum with Reed-Solomon ECC, better stealth", + "output_format": "WAV", + }, + }, + "supported_formats": ["WAV", "FLAC", "MP3", "OGG", "AAC", "M4A"], + "output_format": "WAV", + "requires": "soundfile", + } + + return ModesResponse( + lsb={ + "available": True, + "name": "Spatial LSB", + "description": "Embed in pixel LSBs, outputs PNG/BMP", + "output_format": "PNG (color)", + "capacity_ratio": "100%", + }, + dct=DctModeInfo( + available=has_dct_support(), + name="DCT Domain", + description="Embed in DCT coefficients, frequency domain steganography", + output_formats=["png", "jpeg"], + color_modes=["grayscale", "color"], + capacity_ratio="~20% of LSB", + requires="scipy", + ), + audio=audio_info, + channel=channel_info, + ) + + +# ============================================================================ +# ROUTES - CHANNEL KEY (v4.0.0) +# ============================================================================ + + +@app.get("/channel/status", response_model=ChannelStatusResponse) +async def api_channel_status( + reveal: bool = Query(False, description="Include full key in response") +): + """ + Get current channel key status. + + v4.0.0: New endpoint for channel key management. + + Returns mode (public/private), fingerprint, and source. + Use reveal=true to include the full key. + """ + status = get_channel_status() + + return ChannelStatusResponse( + mode=status["mode"], + configured=status["configured"], + fingerprint=status.get("fingerprint"), + source=status.get("source"), + key=status.get("key") if reveal and status["configured"] else None, + ) + + +@app.post("/channel/generate", response_model=ChannelGenerateResponse) +async def api_channel_generate( + _: str = Depends(require_api_key), + save: bool = Query(False, description="Save to user config"), + save_project: bool = Query(False, description="Save to project config"), +): + """ + Generate a new channel key. + + v4.0.0: New endpoint for channel key management. + + Optionally saves to user config (~/.stegasoo/channel.key) or + project config (./config/channel.key). + """ + if save and save_project: + raise HTTPException(400, "Cannot use both save and save_project") + + key = generate_channel_key() + fingerprint = f"{key[:4]}-••••-••••-••••-••••-••••-••••-{key[-4:]}" + + saved = False + save_location = None + + if save: + set_channel_key(key, location="user") + saved = True + save_location = "~/.stegasoo/channel.key" + elif save_project: + set_channel_key(key, location="project") + saved = True + save_location = "./config/channel.key" + + return ChannelGenerateResponse( + key=key, + fingerprint=fingerprint, + saved=saved, + save_location=save_location, + ) + + +@app.post("/channel/set") +async def api_channel_set(request: ChannelSetRequest, _: str = Depends(require_api_key)): + """ + Set/save a channel key to config. + + v4.0.0: New endpoint for channel key management. + """ + if not validate_channel_key(request.key): + raise HTTPException( + 400, "Invalid channel key format. Expected: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX" + ) + + if request.location not in ("user", "project"): + raise HTTPException(400, "location must be 'user' or 'project'") + + set_channel_key(request.key, location=request.location) + + status = get_channel_status() + return { + "success": True, + "location": status.get("source"), + "fingerprint": status.get("fingerprint"), + } + + +@app.delete("/channel") +async def api_channel_clear( + _: str = Depends(require_api_key), + location: str = Query("user", description="'user', 'project', or 'all'"), +): + """ + Clear/remove channel key from config. + + v4.0.0: New endpoint for channel key management. + + Note: Does not affect environment variables. + """ + if location == "all": + clear_channel_key(location="user") + clear_channel_key(location="project") + elif location in ("user", "project"): + clear_channel_key(location=location) + else: + raise HTTPException(400, "location must be 'user', 'project', or 'all'") + + status = get_channel_status() + return { + "success": True, + "mode": status["mode"], + "still_configured": status["configured"], + "remaining_source": status.get("source"), + } + + +# ============================================================================ +# ROUTES - AUTHENTICATION (v4.2.1) +# ============================================================================ + + +@app.get("/auth/status", response_model=AuthStatusResponse) +async def api_auth_status(): + """ + Get API key authentication status. + + v4.2.1: New endpoint for auth status. + Returns whether auth is enabled and key counts. + """ + status = get_api_key_status() + return AuthStatusResponse( + enabled=status["enabled"], + total_keys=status["total_keys"], + user_keys=status["user_keys"], + project_keys=status["project_keys"], + env_configured=status["env_configured"], + ) + + +@app.get("/auth/keys", response_model=list[AuthKeyInfo]) +async def api_auth_list_keys( + location: str = Query("user", description="'user' or 'project'"), + _: str = Depends(require_api_key), +): + """ + List configured API keys (names only, not actual keys). + + v4.2.1: New endpoint for auth management. + Requires authentication. + """ + if location not in ("user", "project"): + raise HTTPException(400, "location must be 'user' or 'project'") + + keys = list_api_keys(location) + return [AuthKeyInfo(name=k["name"], created=k["created"]) for k in keys] + + +@app.post("/auth/keys") +async def api_auth_create_key( + name: str = Query(..., description="Name for the new API key"), + location: str = Query("user", description="'user' or 'project'"), + _: str = Depends(require_api_key), +): + """ + Create a new API key. + + v4.2.1: New endpoint for auth management. + Returns the key ONCE - it cannot be retrieved again! + Requires authentication (or no keys configured yet). + """ + if location not in ("user", "project"): + raise HTTPException(400, "location must be 'user' or 'project'") + + try: + key = add_api_key(name, location) + return { + "success": True, + "name": name, + "key": key, + "warning": "Save this key now! It cannot be retrieved again.", + } + except ValueError as e: + raise HTTPException(400, str(e)) + + +@app.delete("/auth/keys") +async def api_auth_delete_key( + name: str = Query(..., description="Name of key to delete"), + location: str = Query("user", description="'user' or 'project'"), + _: str = Depends(require_api_key), +): + """ + Delete an API key by name. + + v4.2.1: New endpoint for auth management. + Requires authentication. + """ + if location not in ("user", "project"): + raise HTTPException(400, "location must be 'user' or 'project'") + + if remove_api_key(name, location): + return {"success": True, "deleted": name} + else: + raise HTTPException(404, f"Key '{name}' not found in {location} config") + + +@app.post("/compare", response_model=CompareModesResponse) +async def api_compare_modes(request: CompareModesRequest, _: str = Depends(require_api_key)): + """ + Compare LSB and DCT embedding modes for a carrier image. + + Returns capacity for both modes and recommendation. + Optionally checks if a specific payload size would fit. + """ + try: + carrier = base64.b64decode(request.carrier_image_base64) + comparison = compare_modes(carrier) + + response = CompareModesResponse( + width=comparison["width"], + height=comparison["height"], + lsb={ + "capacity_bytes": comparison["lsb"]["capacity_bytes"], + "capacity_kb": round(comparison["lsb"]["capacity_kb"], 1), + "available": True, + "output_format": comparison["lsb"]["output"], + }, + dct={ + "capacity_bytes": comparison["dct"]["capacity_bytes"], + "capacity_kb": round(comparison["dct"]["capacity_kb"], 1), + "available": comparison["dct"]["available"], + "output_formats": ["png", "jpeg"], + "color_modes": ["grayscale", "color"], + "ratio_vs_lsb_percent": round(comparison["dct"]["ratio_vs_lsb"], 1), + }, + recommendation=( + "lsb" if not comparison["dct"]["available"] else "dct for stealth, lsb for capacity" + ), + ) + + if request.payload_size: + fits_lsb = request.payload_size <= comparison["lsb"]["capacity_bytes"] + fits_dct = request.payload_size <= comparison["dct"]["capacity_bytes"] + + response.payload_check = { + "size_bytes": request.payload_size, + "fits_lsb": fits_lsb, + "fits_dct": fits_dct, + } + + # Update recommendation based on payload + if fits_dct and comparison["dct"]["available"]: + response.recommendation = "dct (payload fits, better stealth)" + elif fits_lsb: + response.recommendation = "lsb (payload too large for dct)" + else: + response.recommendation = "none (payload too large for both modes)" + + return response + + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/will-fit", response_model=WillFitResponse) +async def api_will_fit(request: WillFitRequest, _: str = Depends(require_api_key)): + """ + Check if a payload of given size will fit in the carrier image. + + Supports both LSB and DCT modes. + """ + try: + # Validate mode + if request.embed_mode == "dct" and not has_dct_support(): + raise HTTPException(400, "DCT mode requires scipy. Install with: pip install scipy") + + carrier = base64.b64decode(request.carrier_image_base64) + result = will_fit_by_mode(request.payload_size, carrier, embed_mode=request.embed_mode) + + return WillFitResponse( + fits=result["fits"], + payload_size=result["payload_size"], + capacity=result["capacity"], + usage_percent=round(result["usage_percent"], 1), + headroom=result["headroom"], + mode=request.embed_mode, + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# ROUTES - QR CODE +# ============================================================================ + + +@app.post("/extract-key-from-qr", response_model=QrExtractResponse) +async def api_extract_key_from_qr( + _: str = Depends(require_api_key), + qr_image: UploadFile = File(..., description="QR code image containing RSA key"), +): + """ + Extract RSA key from a QR code image. + + Supports both compressed (STEGASOO-Z: prefix) and uncompressed keys. + Returns the PEM-encoded key if found. + """ + if not HAS_QR_READ: + raise HTTPException(501, "QR code reading not available. Install pyzbar and libzbar.") + + try: + image_data = await qr_image.read() + key_pem = extract_key_from_qr(image_data) + + if key_pem: + return QrExtractResponse(success=True, key_pem=key_pem) + else: + return QrExtractResponse(success=False, error="No valid RSA key found in QR code") + except Exception as e: + return QrExtractResponse(success=False, error=str(e)) + + +@app.post("/generate-key-qr", response_model=QrGenerateResponse) +async def api_generate_key_qr(request: QrGenerateRequest, _: str = Depends(require_api_key)): + """ + Generate QR code from an RSA private key. + + Supports PNG, JPG, and ASCII output formats. + Uses zstd compression by default for better QR code density. + """ + if not HAS_QR_WRITE: + raise HTTPException(501, "QR code generation not available. Install qrcode library.") + + try: + fmt = request.output_format.lower() + + if fmt == "ascii": + ascii_qr = generate_qr_ascii( + request.key_pem, + compress=request.compress, + invert=False, + ) + return QrGenerateResponse(success=True, format="ascii", qr_data=ascii_qr) + + elif fmt in ("png", "jpg", "jpeg"): + import base64 + + qr_bytes = generate_qr_code( + request.key_pem, + compress=request.compress, + output_format=fmt, + ) + qr_b64 = base64.b64encode(qr_bytes).decode("ascii") + return QrGenerateResponse(success=True, format=fmt, qr_data=qr_b64) + + else: + return QrGenerateResponse( + success=False, + error=f"Unsupported format: {fmt}. Use 'png', 'jpg', or 'ascii'", + ) + + except ValueError as e: + return QrGenerateResponse(success=False, error=str(e)) + except Exception as e: + return QrGenerateResponse(success=False, error=f"QR generation failed: {e}") + + +# ============================================================================ +# ROUTES - GENERATE +# ============================================================================ + + +@app.post("/generate", response_model=GenerateResponse) +async def api_generate(request: GenerateRequest, _: str = Depends(require_api_key)): + """ + Generate credentials for encoding/decoding. + + At least one of use_pin or use_rsa must be True. + + v3.2.0: Generates single passphrase (no daily rotation). + Default increased to 4 words for better security. + """ + if not request.use_pin and not request.use_rsa: + raise HTTPException(400, "Must enable at least one of use_pin or use_rsa") + + if request.rsa_bits not in VALID_RSA_SIZES: + raise HTTPException(400, f"rsa_bits must be one of {VALID_RSA_SIZES}") + + try: + creds = generate_credentials( + use_pin=request.use_pin, + use_rsa=request.use_rsa, + pin_length=request.pin_length, + rsa_bits=request.rsa_bits, + passphrase_words=request.words_per_passphrase, + ) + + return GenerateResponse( + passphrase=creds.passphrase, + pin=creds.pin, + rsa_key_pem=creds.rsa_key_pem, + entropy={ + "passphrase": creds.passphrase_entropy, + "pin": creds.pin_entropy, + "rsa": creds.rsa_entropy, + "total": creds.total_entropy, + }, + phrases=None, # Legacy field removed + ) + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# HELPER FUNCTION FOR DCT PARAMETERS +# ============================================================================ + + +def _get_dct_params(embed_mode: str, dct_output_format: str, dct_color_mode: str) -> dict: + """ + Get DCT-specific parameters if DCT mode is selected. + Returns kwargs to pass to encode(). + """ + if embed_mode != "dct": + return {} + + return { + "dct_output_format": dct_output_format, + "dct_color_mode": dct_color_mode, + } + + +def _get_output_info(embed_mode: str, dct_output_format: str, dct_color_mode: str) -> tuple: + """ + Get output format and color mode strings for response. + Returns (output_format, color_mode, mime_type). + """ + if embed_mode == "dct": + output_format = dct_output_format + color_mode = dct_color_mode + mime_type = "image/jpeg" if dct_output_format == "jpeg" else "image/png" + else: + output_format = "png" + color_mode = "color" + mime_type = "image/png" + + return output_format, color_mode, mime_type + + +# ============================================================================ +# ROUTES - ENCODE (JSON) +# ============================================================================ + + +@app.post("/encode", response_model=EncodeResponse) +async def api_encode(request: EncodeRequest, _: str = Depends(require_api_key)): + """ + Encode a text message into an image. + + Images must be base64-encoded. Returns base64-encoded stego image. + + v4.0.0: Added channel_key parameter for deployment isolation. + v3.2.0: No date_str parameter needed - encode anytime! + """ + # Validate mode + if request.embed_mode == "dct" and not has_dct_support(): + raise HTTPException(400, "DCT mode requires scipy. Install with: pip install scipy") + + # Resolve channel key + resolved_channel_key = _resolve_channel_key(request.channel_key) + + try: + ref_photo = base64.b64decode(request.reference_photo_base64) + carrier = base64.b64decode(request.carrier_image_base64) + rsa_key = base64.b64decode(request.rsa_key_base64) if request.rsa_key_base64 else None + + # Get DCT parameters + dct_params = _get_dct_params( + request.embed_mode, request.dct_output_format, request.dct_color_mode + ) + + # v4.2.0: Run CPU-bound encode in thread pool + result = await run_in_thread( + encode, + message=request.message, + reference_photo=ref_photo, + carrier_image=carrier, + passphrase=request.passphrase, + pin=request.pin, + rsa_key_data=rsa_key, + rsa_password=request.rsa_password, + embed_mode=request.embed_mode, + channel_key=resolved_channel_key, + **dct_params, + ) + + stego_b64 = base64.b64encode(result.stego_image).decode("utf-8") + + output_format, color_mode, _ = _get_output_info( + request.embed_mode, request.dct_output_format, request.dct_color_mode + ) + + # Get channel info for response + channel_mode, channel_fingerprint = _get_channel_info(resolved_channel_key) + + return EncodeResponse( + stego_image_base64=stego_b64, + filename=result.filename, + capacity_used_percent=result.capacity_percent, + embed_mode=request.embed_mode, + output_format=output_format, + color_mode=color_mode, + channel_mode=channel_mode, + channel_fingerprint=channel_fingerprint, + date_used=None, + day_of_week=None, + ) + + except CapacityError as e: + raise HTTPException(400, str(e)) + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/encode/file", response_model=EncodeResponse) +async def api_encode_file(request: EncodeFileRequest, _: str = Depends(require_api_key)): + """ + Encode a file into an image (JSON with base64). + + File data must be base64-encoded. + + v4.0.0: Added channel_key parameter for deployment isolation. + v3.2.0: No date_str parameter needed - encode anytime! + """ + # Validate mode + if request.embed_mode == "dct" and not has_dct_support(): + raise HTTPException(400, "DCT mode requires scipy. Install with: pip install scipy") + + # Resolve channel key + resolved_channel_key = _resolve_channel_key(request.channel_key) + + try: + file_data = base64.b64decode(request.file_data_base64) + ref_photo = base64.b64decode(request.reference_photo_base64) + carrier = base64.b64decode(request.carrier_image_base64) + rsa_key = base64.b64decode(request.rsa_key_base64) if request.rsa_key_base64 else None + + payload = FilePayload( + data=file_data, filename=request.filename, mime_type=request.mime_type + ) + + # Get DCT parameters + dct_params = _get_dct_params( + request.embed_mode, request.dct_output_format, request.dct_color_mode + ) + + # v4.2.0: Run CPU-bound encode in thread pool + result = await run_in_thread( + encode, + message=payload, + reference_photo=ref_photo, + carrier_image=carrier, + passphrase=request.passphrase, + pin=request.pin, + rsa_key_data=rsa_key, + rsa_password=request.rsa_password, + embed_mode=request.embed_mode, + channel_key=resolved_channel_key, + **dct_params, + ) + + stego_b64 = base64.b64encode(result.stego_image).decode("utf-8") + + output_format, color_mode, _ = _get_output_info( + request.embed_mode, request.dct_output_format, request.dct_color_mode + ) + + # Get channel info for response + channel_mode, channel_fingerprint = _get_channel_info(resolved_channel_key) + + return EncodeResponse( + stego_image_base64=stego_b64, + filename=result.filename, + capacity_used_percent=result.capacity_percent, + embed_mode=request.embed_mode, + output_format=output_format, + color_mode=color_mode, + channel_mode=channel_mode, + channel_fingerprint=channel_fingerprint, + date_used=None, + day_of_week=None, + ) + + except CapacityError as e: + raise HTTPException(400, str(e)) + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# ROUTES - DECODE (JSON) +# ============================================================================ + + +@app.post("/decode", response_model=DecodeResponse) +async def api_decode(request: DecodeRequest, _: str = Depends(require_api_key)): + """ + Decode a message or file from a stego image. + + Returns payload_type to indicate if result is text or file. + + v4.0.0: Added channel_key parameter - must match encoding key. + v3.2.0: No date_str parameter needed - decode anytime! + """ + # Validate mode + if request.embed_mode == "dct" and not has_dct_support(): + raise HTTPException(400, "DCT mode requires scipy. Install with: pip install scipy") + + # Resolve channel key + resolved_channel_key = _resolve_channel_key(request.channel_key) + + try: + stego = base64.b64decode(request.stego_image_base64) + ref_photo = base64.b64decode(request.reference_photo_base64) + rsa_key = base64.b64decode(request.rsa_key_base64) if request.rsa_key_base64 else None + + # v4.2.0: Run CPU-bound decode in thread pool + result = await run_in_thread( + decode, + stego_image=stego, + reference_photo=ref_photo, + passphrase=request.passphrase, + pin=request.pin, + rsa_key_data=rsa_key, + rsa_password=request.rsa_password, + embed_mode=request.embed_mode, + channel_key=resolved_channel_key, + ) + + if result.is_file: + return DecodeResponse( + payload_type="file", + file_data_base64=base64.b64encode(result.file_data).decode("utf-8"), + filename=result.filename, + mime_type=result.mime_type, + ) + else: + return DecodeResponse(payload_type="text", message=result.message) + + except DecryptionError as e: + # Provide helpful error message for channel key issues + error_msg = str(e) + if "channel key" in error_msg.lower(): + raise HTTPException(401, error_msg) + raise HTTPException(401, "Decryption failed. Check credentials.") + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# ROUTES - ENCODE/DECODE (MULTIPART) +# ============================================================================ + + +@app.post("/encode/multipart") +async def api_encode_multipart( + _: str = Depends(require_api_key), + passphrase: str = Form(..., description="Passphrase (v3.2.0: renamed from day_phrase)"), + reference_photo: UploadFile = File(...), + carrier: UploadFile = File(...), + message: str = Form(""), + payload_file: UploadFile | None = File(None), + pin: str = Form(""), + rsa_key: UploadFile | None = File(None), + rsa_key_qr: UploadFile | None = File(None), + rsa_password: str = Form(""), + # Channel key (v4.0.0) + channel_key: str = Form( + "auto", description="Channel key: 'auto'=server config, 'none'=public, 'XXXX-...'=explicit" + ), + embed_mode: str = Form("lsb"), + dct_output_format: str = Form("png"), + dct_color_mode: str = Form("grayscale"), +): + """ + Encode using multipart form data (file uploads). + + Provide either 'message' (text) or 'payload_file' (binary file). + RSA key can be provided as 'rsa_key' (.pem file) or 'rsa_key_qr' (QR code image). + Returns the stego image directly with metadata headers. + + v4.0.0: Added channel_key parameter for deployment isolation. + Use 'auto' for server config, 'none' for public mode. + v3.2.0: No date_str parameter needed - encode anytime! + """ + # Validate mode + if embed_mode not in ("lsb", "dct"): + raise HTTPException(400, "embed_mode must be 'lsb' or 'dct'") + if embed_mode == "dct" and not has_dct_support(): + raise HTTPException(400, "DCT mode requires scipy. Install with: pip install scipy") + + # Validate DCT options + if dct_output_format not in ("png", "jpeg"): + raise HTTPException(400, "dct_output_format must be 'png' or 'jpeg'") + if dct_color_mode not in ("grayscale", "color"): + raise HTTPException(400, "dct_color_mode must be 'grayscale' or 'color'") + + # Resolve channel key (v4.0.0) + # Form data: "auto" = use server config, "none" = public, otherwise explicit key + if channel_key.lower() == "auto": + resolved_channel_key = None # Auto mode + elif channel_key.lower() == "none": + resolved_channel_key = "" # Public mode + else: + resolved_channel_key = _resolve_channel_key(channel_key) + + try: + ref_data = await reference_photo.read() + carrier_data = await carrier.read() + + # Handle RSA key from .pem file or QR code image + rsa_key_data = None + rsa_key_from_qr = False + + if rsa_key and rsa_key.filename: + rsa_key_data = await rsa_key.read() + elif rsa_key_qr and rsa_key_qr.filename: + if not HAS_QR_READ: + raise HTTPException( + 501, "QR code reading not available. Install pyzbar and libzbar." + ) + qr_image_data = await rsa_key_qr.read() + key_pem = extract_key_from_qr(qr_image_data) + if not key_pem: + raise HTTPException(400, "Could not extract RSA key from QR code image") + rsa_key_data = key_pem.encode("utf-8") + rsa_key_from_qr = True + + # QR code keys are never password-protected + effective_password = None if rsa_key_from_qr else (rsa_password if rsa_password else None) + + # Determine payload + if payload_file and payload_file.filename: + file_data = await payload_file.read() + payload = FilePayload( + data=file_data, filename=payload_file.filename, mime_type=payload_file.content_type + ) + elif message: + payload = message + else: + raise HTTPException(400, "Must provide either 'message' or 'payload_file'") + + # Get DCT parameters + dct_params = _get_dct_params(embed_mode, dct_output_format, dct_color_mode) + + # v4.2.0: Run CPU-bound encode in thread pool + result = await run_in_thread( + encode, + message=payload, + reference_photo=ref_data, + carrier_image=carrier_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=effective_password, + embed_mode=embed_mode, + channel_key=resolved_channel_key, + **dct_params, + ) + + output_format, color_mode, mime_type = _get_output_info( + embed_mode, dct_output_format, dct_color_mode + ) + + # Get channel info for headers + channel_mode, channel_fingerprint = _get_channel_info(resolved_channel_key) + + headers = { + "Content-Disposition": f"attachment; filename={result.filename}", + "X-Stegasoo-Capacity-Percent": f"{result.capacity_percent:.1f}", + "X-Stegasoo-Embed-Mode": embed_mode, + "X-Stegasoo-Output-Format": output_format, + "X-Stegasoo-Color-Mode": color_mode, + "X-Stegasoo-Channel-Mode": channel_mode, + "X-Stegasoo-Version": __version__, + } + + if channel_fingerprint: + headers["X-Stegasoo-Channel-Fingerprint"] = channel_fingerprint + + return Response( + content=result.stego_image, + media_type=mime_type, + headers=headers, + ) + + except CapacityError as e: + raise HTTPException(400, str(e)) + except StegasooError as e: + raise HTTPException(400, str(e)) + except HTTPException: + raise + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/decode/multipart", response_model=DecodeResponse) +async def api_decode_multipart( + _: str = Depends(require_api_key), + passphrase: str = Form(..., description="Passphrase (v3.2.0: renamed from day_phrase)"), + reference_photo: UploadFile = File(...), + stego_image: UploadFile = File(...), + pin: str = Form(""), + rsa_key: UploadFile | None = File(None), + rsa_key_qr: UploadFile | None = File(None), + rsa_password: str = Form(""), + # Channel key (v4.0.0) + channel_key: str = Form( + "auto", description="Channel key: 'auto'=server config, 'none'=public, 'XXXX-...'=explicit" + ), + embed_mode: str = Form("auto"), +): + """ + Decode using multipart form data (file uploads). + + RSA key can be provided as 'rsa_key' (.pem file) or 'rsa_key_qr' (QR code image). + Returns JSON with payload_type indicating text or file. + + v4.0.0: Added channel_key parameter - must match what was used for encoding. + Use 'auto' for server config, 'none' for public mode. + v3.2.0: No date_str parameter needed - decode anytime! + """ + # Validate mode + if embed_mode not in ("auto", "lsb", "dct"): + raise HTTPException(400, "embed_mode must be 'auto', 'lsb', or 'dct'") + if embed_mode == "dct" and not has_dct_support(): + raise HTTPException(400, "DCT mode requires scipy. Install with: pip install scipy") + + # Resolve channel key (v4.0.0) + if channel_key.lower() == "auto": + resolved_channel_key = None # Auto mode + elif channel_key.lower() == "none": + resolved_channel_key = "" # Public mode + else: + resolved_channel_key = _resolve_channel_key(channel_key) + + try: + ref_data = await reference_photo.read() + stego_data = await stego_image.read() + + # Handle RSA key from .pem file or QR code image + rsa_key_data = None + rsa_key_from_qr = False + + if rsa_key and rsa_key.filename: + rsa_key_data = await rsa_key.read() + elif rsa_key_qr and rsa_key_qr.filename: + if not HAS_QR_READ: + raise HTTPException( + 501, "QR code reading not available. Install pyzbar and libzbar." + ) + qr_image_data = await rsa_key_qr.read() + key_pem = extract_key_from_qr(qr_image_data) + if not key_pem: + raise HTTPException(400, "Could not extract RSA key from QR code image") + rsa_key_data = key_pem.encode("utf-8") + rsa_key_from_qr = True + + # QR code keys are never password-protected + effective_password = None if rsa_key_from_qr else (rsa_password if rsa_password else None) + + # v4.2.0: Run CPU-bound decode in thread pool + result = await run_in_thread( + decode, + stego_image=stego_data, + reference_photo=ref_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=effective_password, + embed_mode=embed_mode, + channel_key=resolved_channel_key, + ) + + if result.is_file: + return DecodeResponse( + payload_type="file", + file_data_base64=base64.b64encode(result.file_data).decode("utf-8"), + filename=result.filename, + mime_type=result.mime_type, + ) + else: + return DecodeResponse(payload_type="text", message=result.message) + + except DecryptionError as e: + error_msg = str(e) + if "channel key" in error_msg.lower(): + raise HTTPException(401, error_msg) + raise HTTPException(401, "Decryption failed. Check credentials.") + except StegasooError as e: + raise HTTPException(400, str(e)) + except HTTPException: + raise + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# ROUTES - IMAGE INFO +# ============================================================================ + + +@app.post("/image/info", response_model=ImageInfoResponse) +async def api_image_info( + _: str = Depends(require_api_key), + image: UploadFile = File(...), + include_modes: bool = Query(True, description="Include capacity by mode (v3.0+)"), +): + """ + Get information about an image's capacity. + + Optionally includes capacity for both LSB and DCT modes. + """ + try: + image_data = await image.read() + + result = validate_image(image_data, check_size=False) + if not result.is_valid: + raise HTTPException(400, result.error_message) + + capacity = calculate_capacity_by_mode(image_data, "lsb") + + response = ImageInfoResponse( + width=result.details["width"], + height=result.details["height"], + pixels=result.details["pixels"], + capacity_bytes=capacity, + capacity_kb=capacity // 1024, + ) + + if include_modes: + comparison = compare_modes(image_data) + response.modes = { + "lsb": ModeCapacity( + capacity_bytes=comparison["lsb"]["capacity_bytes"], + capacity_kb=round(comparison["lsb"]["capacity_kb"], 1), + available=True, + output_format=comparison["lsb"]["output"], + ), + "dct": ModeCapacity( + capacity_bytes=comparison["dct"]["capacity_bytes"], + capacity_kb=round(comparison["dct"]["capacity_kb"], 1), + available=comparison["dct"]["available"], + output_format="PNG/JPEG (grayscale or color)", + ), + } + + return response + + except HTTPException: + raise + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# ROUTES - AUDIO STEGANOGRAPHY (v4.3.0) +# ============================================================================ + + +def _require_audio(): + """Check that audio support is available, raise 501 if not.""" + if not HAS_AUDIO_SUPPORT: + raise HTTPException( + 501, "Audio steganography not available. Install with: pip install stegasoo[audio]" + ) + + +@app.post("/audio/encode", response_model=AudioEncodeResponse) +async def api_audio_encode(request: AudioEncodeRequest, _: str = Depends(require_api_key)): + """ + Encode a text message into audio. + + Audio must be base64-encoded. Returns base64-encoded stego WAV. + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + resolved_channel_key = _resolve_channel_key(request.channel_key) + + try: + ref_photo = base64.b64decode(request.reference_photo_base64) + carrier = base64.b64decode(request.carrier_audio_base64) + rsa_key = base64.b64decode(request.rsa_key_base64) if request.rsa_key_base64 else None + + stego_audio, stats = await run_in_thread( + encode_audio, + message=request.message, + reference_photo=ref_photo, + carrier_audio=carrier, + passphrase=request.passphrase, + pin=request.pin, + rsa_key_data=rsa_key, + rsa_password=request.rsa_password, + embed_mode=request.embed_mode, + channel_key=resolved_channel_key, + chip_tier=request.chip_tier, + ) + + stego_b64 = base64.b64encode(stego_audio).decode("utf-8") + channel_mode, channel_fingerprint = _get_channel_info(resolved_channel_key) + + return AudioEncodeResponse( + stego_audio_base64=stego_b64, + embed_mode=stats.embed_mode, + stats={ + "samples_modified": stats.samples_modified, + "total_samples": stats.total_samples, + "capacity_used": round(stats.capacity_used * 100, 1), + "bytes_embedded": stats.bytes_embedded, + "sample_rate": stats.sample_rate, + "channels": stats.channels, + "duration_seconds": round(stats.duration_seconds, 2), + }, + channel_mode=channel_mode, + channel_fingerprint=channel_fingerprint, + ) + + except CapacityError as e: + raise HTTPException(400, str(e)) + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/audio/encode/file", response_model=AudioEncodeResponse) +async def api_audio_encode_file(request: AudioEncodeFileRequest, _: str = Depends(require_api_key)): + """ + Encode a file into audio (JSON with base64). + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + resolved_channel_key = _resolve_channel_key(request.channel_key) + + try: + file_data = base64.b64decode(request.file_data_base64) + ref_photo = base64.b64decode(request.reference_photo_base64) + carrier = base64.b64decode(request.carrier_audio_base64) + rsa_key = base64.b64decode(request.rsa_key_base64) if request.rsa_key_base64 else None + + payload = FilePayload( + data=file_data, filename=request.filename, mime_type=request.mime_type + ) + + stego_audio, stats = await run_in_thread( + encode_audio, + message=payload, + reference_photo=ref_photo, + carrier_audio=carrier, + passphrase=request.passphrase, + pin=request.pin, + rsa_key_data=rsa_key, + rsa_password=request.rsa_password, + embed_mode=request.embed_mode, + channel_key=resolved_channel_key, + chip_tier=request.chip_tier, + ) + + stego_b64 = base64.b64encode(stego_audio).decode("utf-8") + channel_mode, channel_fingerprint = _get_channel_info(resolved_channel_key) + + return AudioEncodeResponse( + stego_audio_base64=stego_b64, + embed_mode=stats.embed_mode, + stats={ + "samples_modified": stats.samples_modified, + "total_samples": stats.total_samples, + "capacity_used": round(stats.capacity_used * 100, 1), + "bytes_embedded": stats.bytes_embedded, + "sample_rate": stats.sample_rate, + "channels": stats.channels, + "duration_seconds": round(stats.duration_seconds, 2), + }, + channel_mode=channel_mode, + channel_fingerprint=channel_fingerprint, + ) + + except CapacityError as e: + raise HTTPException(400, str(e)) + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/audio/encode/multipart") +async def api_audio_encode_multipart( + _: str = Depends(require_api_key), + passphrase: str = Form(..., description="Passphrase for key derivation"), + reference_photo: UploadFile = File(...), + carrier: UploadFile = File(...), + message: str = Form(""), + payload_file: UploadFile | None = File(None), + pin: str = Form(""), + rsa_key: UploadFile | None = File(None), + rsa_password: str = Form(""), + channel_key: str = Form( + "auto", description="Channel key: 'auto'=server config, 'none'=public, 'XXXX-...'=explicit" + ), + embed_mode: str = Form("audio_lsb"), + chip_tier: int | None = Form( + None, + description="Spread spectrum chip tier: 0=lossless, 1=high_lossy, 2=low_lossy. Only for audio_spread.", + ), +): + """ + Encode audio using multipart form data (file uploads). + + Provide either 'message' (text) or 'payload_file' (binary file). + Returns the stego WAV directly with metadata headers. + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + if embed_mode not in ("audio_lsb", "audio_spread"): + raise HTTPException(400, "embed_mode must be 'audio_lsb' or 'audio_spread'") + + # Resolve channel key + if channel_key.lower() == "auto": + resolved_channel_key = None + elif channel_key.lower() == "none": + resolved_channel_key = "" + else: + resolved_channel_key = _resolve_channel_key(channel_key) + + try: + ref_data = await reference_photo.read() + carrier_data = await carrier.read() + + rsa_key_data = None + if rsa_key and rsa_key.filename: + rsa_key_data = await rsa_key.read() + + effective_password = rsa_password if rsa_password else None + + # Determine payload + if payload_file and payload_file.filename: + file_data = await payload_file.read() + payload = FilePayload( + data=file_data, filename=payload_file.filename, mime_type=payload_file.content_type + ) + elif message: + payload = message + else: + raise HTTPException(400, "Must provide either 'message' or 'payload_file'") + + stego_audio, stats = await run_in_thread( + encode_audio, + message=payload, + reference_photo=ref_data, + carrier_audio=carrier_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=effective_password, + embed_mode=embed_mode, + channel_key=resolved_channel_key, + chip_tier=chip_tier, + ) + + channel_mode, channel_fingerprint = _get_channel_info(resolved_channel_key) + + headers = { + "Content-Disposition": "attachment; filename=stego_audio.wav", + "X-Stegasoo-Embed-Mode": stats.embed_mode, + "X-Stegasoo-Capacity-Percent": f"{stats.capacity_used * 100:.1f}", + "X-Stegasoo-Samples-Modified": str(stats.samples_modified), + "X-Stegasoo-Duration": f"{stats.duration_seconds:.2f}", + "X-Stegasoo-Channel-Mode": channel_mode, + "X-Stegasoo-Version": __version__, + } + if channel_fingerprint: + headers["X-Stegasoo-Channel-Fingerprint"] = channel_fingerprint + + return Response( + content=stego_audio, + media_type="audio/wav", + headers=headers, + ) + + except CapacityError as e: + raise HTTPException(400, str(e)) + except StegasooError as e: + raise HTTPException(400, str(e)) + except HTTPException: + raise + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/audio/decode", response_model=DecodeResponse) +async def api_audio_decode(request: AudioDecodeRequest, _: str = Depends(require_api_key)): + """ + Decode a message or file from stego audio. + + Returns payload_type to indicate if result is text or file. + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + resolved_channel_key = _resolve_channel_key(request.channel_key) + + try: + stego = base64.b64decode(request.stego_audio_base64) + ref_photo = base64.b64decode(request.reference_photo_base64) + rsa_key = base64.b64decode(request.rsa_key_base64) if request.rsa_key_base64 else None + + result = await run_in_thread( + decode_audio, + stego_audio=stego, + reference_photo=ref_photo, + passphrase=request.passphrase, + pin=request.pin, + rsa_key_data=rsa_key, + rsa_password=request.rsa_password, + embed_mode=request.embed_mode, + channel_key=resolved_channel_key, + ) + + if result.is_file: + return DecodeResponse( + payload_type="file", + file_data_base64=base64.b64encode(result.file_data).decode("utf-8"), + filename=result.filename, + mime_type=result.mime_type, + ) + else: + return DecodeResponse(payload_type="text", message=result.message) + + except DecryptionError as e: + error_msg = str(e) + if "channel key" in error_msg.lower(): + raise HTTPException(401, error_msg) + raise HTTPException(401, "Decryption failed. Check credentials.") + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/audio/decode/multipart", response_model=DecodeResponse) +async def api_audio_decode_multipart( + _: str = Depends(require_api_key), + passphrase: str = Form(..., description="Passphrase for key derivation"), + reference_photo: UploadFile = File(...), + stego_audio: UploadFile = File(...), + pin: str = Form(""), + rsa_key: UploadFile | None = File(None), + rsa_password: str = Form(""), + channel_key: str = Form( + "auto", description="Channel key: 'auto'=server config, 'none'=public, 'XXXX-...'=explicit" + ), + embed_mode: str = Form("audio_auto"), +): + """ + Decode audio using multipart form data (file uploads). + + Returns JSON with payload_type indicating text or file. + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + if embed_mode not in ("audio_auto", "audio_lsb", "audio_spread"): + raise HTTPException(400, "embed_mode must be 'audio_auto', 'audio_lsb', or 'audio_spread'") + + # Resolve channel key + if channel_key.lower() == "auto": + resolved_channel_key = None + elif channel_key.lower() == "none": + resolved_channel_key = "" + else: + resolved_channel_key = _resolve_channel_key(channel_key) + + try: + ref_data = await reference_photo.read() + stego_data = await stego_audio.read() + + rsa_key_data = None + if rsa_key and rsa_key.filename: + rsa_key_data = await rsa_key.read() + + effective_password = rsa_password if rsa_password else None + + result = await run_in_thread( + decode_audio, + stego_audio=stego_data, + reference_photo=ref_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=effective_password, + embed_mode=embed_mode, + channel_key=resolved_channel_key, + ) + + if result.is_file: + return DecodeResponse( + payload_type="file", + file_data_base64=base64.b64encode(result.file_data).decode("utf-8"), + filename=result.filename, + mime_type=result.mime_type, + ) + else: + return DecodeResponse(payload_type="text", message=result.message) + + except DecryptionError as e: + error_msg = str(e) + if "channel key" in error_msg.lower(): + raise HTTPException(401, error_msg) + raise HTTPException(401, "Decryption failed. Check credentials.") + except StegasooError as e: + raise HTTPException(400, str(e)) + except HTTPException: + raise + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/audio/info", response_model=AudioInfoResponse) +async def api_audio_info( + _: str = Depends(require_api_key), + audio: UploadFile = File(...), +): + """ + Get audio file metadata and embedding capacity. + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + try: + audio_data = await audio.read() + + info = await run_in_thread(get_audio_info, audio_data) + + # Calculate capacities for both modes + lsb_capacity = await run_in_thread(calculate_audio_lsb_capacity, audio_data) + try: + spread_info = await run_in_thread(calculate_audio_spread_capacity, audio_data) + spread_capacity = spread_info.usable_capacity_bytes + except Exception: + spread_capacity = 0 + + return AudioInfoResponse( + sample_rate=info.sample_rate, + channels=info.channels, + duration_seconds=round(info.duration_seconds, 2), + num_samples=info.num_samples, + format=info.format, + bit_depth=info.bit_depth, + bitrate=info.bitrate, + capacity_lsb=lsb_capacity, + capacity_spread=spread_capacity, + ) + + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +@app.post("/audio/capacity", response_model=AudioCapacityResponse) +async def api_audio_capacity(request: AudioCapacityRequest, _: str = Depends(require_api_key)): + """ + Check if a payload of a given size will fit in an audio carrier. + + v4.3.0: New endpoint for audio steganography. + """ + _require_audio() + + try: + carrier = base64.b64decode(request.carrier_audio_base64) + + if request.embed_mode == "audio_lsb": + capacity = await run_in_thread(calculate_audio_lsb_capacity, carrier) + else: + spread_info = await run_in_thread(calculate_audio_spread_capacity, carrier) + capacity = spread_info.usable_capacity_bytes + + fits = request.payload_size <= capacity + usage = (request.payload_size / capacity * 100) if capacity > 0 else 100.0 + + return AudioCapacityResponse( + fits=fits, + payload_size=request.payload_size, + capacity_bytes=capacity, + usage_percent=round(usage, 1), + embed_mode=request.embed_mode, + ) + + except StegasooError as e: + raise HTTPException(400, str(e)) + except Exception as e: + raise HTTPException(500, str(e)) + + +# ============================================================================ +# ERROR HANDLERS +# ============================================================================ + + +@app.exception_handler(StegasooError) +async def stegasoo_error_handler(request, exc): + return JSONResponse(status_code=400, content={"error": type(exc).__name__, "detail": str(exc)}) + + +# ============================================================================ +# MAIN +# ============================================================================ + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/src/soosef/stegasoo/api_auth.py b/src/soosef/stegasoo/api_auth.py new file mode 100644 index 0000000..f23f384 --- /dev/null +++ b/src/soosef/stegasoo/api_auth.py @@ -0,0 +1,257 @@ +""" +API Key Authentication for Stegasoo REST API. + +Provides simple API key authentication with hashed key storage. +Keys can be stored in user config (~/.stegasoo/) or project config (./config/). + +Usage: + from .auth import require_api_key, get_api_key_status + + @app.get("/protected") + async def protected_endpoint(api_key: str = Depends(require_api_key)): + return {"status": "authenticated"} +""" + +import hashlib +import json +import os +import secrets +from pathlib import Path + +from fastapi import HTTPException, Security +from fastapi.security import APIKeyHeader + +# API key header name +API_KEY_HEADER = APIKeyHeader(name="X-API-Key", auto_error=False) + +# Config locations +USER_CONFIG_DIR = Path.home() / ".stegasoo" +PROJECT_CONFIG_DIR = Path("./config") + +# Key file name +API_KEYS_FILE = "api_keys.json" + +# Environment variable for API key (alternative to file) +API_KEY_ENV_VAR = "STEGASOO_API_KEY" + + +def _hash_key(key: str) -> str: + """Hash an API key for storage.""" + return hashlib.sha256(key.encode()).hexdigest() + + +def _get_keys_file(location: str = "user") -> Path: + """Get path to API keys file.""" + if location == "project": + return PROJECT_CONFIG_DIR / API_KEYS_FILE + return USER_CONFIG_DIR / API_KEYS_FILE + + +def _load_keys(location: str = "user") -> dict: + """Load API keys from config file.""" + keys_file = _get_keys_file(location) + if keys_file.exists(): + try: + with open(keys_file) as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + return {"keys": [], "enabled": True} + return {"keys": [], "enabled": True} + + +def _save_keys(data: dict, location: str = "user") -> None: + """Save API keys to config file.""" + keys_file = _get_keys_file(location) + keys_file.parent.mkdir(parents=True, exist_ok=True) + + with open(keys_file, "w") as f: + json.dump(data, f, indent=2) + + # Secure permissions (owner read/write only) + os.chmod(keys_file, 0o600) + + +def generate_api_key() -> str: + """Generate a new API key.""" + # Format: stegasoo_XXXX_XXXXXXXXXXXXXXXXXXXXXXXXXXXX + # 32 bytes = 256 bits of entropy + random_part = secrets.token_hex(16) + return f"stegasoo_{random_part[:4]}_{random_part[4:]}" + + +def add_api_key(name: str, location: str = "user") -> str: + """ + Generate and store a new API key. + + Args: + name: Descriptive name for the key (e.g., "laptop", "automation") + location: "user" or "project" + + Returns: + The generated API key (only shown once!) + """ + key = generate_api_key() + key_hash = _hash_key(key) + + data = _load_keys(location) + + # Check for duplicate name + for existing in data["keys"]: + if existing["name"] == name: + raise ValueError(f"Key with name '{name}' already exists") + + data["keys"].append( + { + "name": name, + "hash": key_hash, + "created": __import__("datetime").datetime.now().isoformat(), + } + ) + + _save_keys(data, location) + + return key + + +def remove_api_key(name: str, location: str = "user") -> bool: + """ + Remove an API key by name. + + Returns: + True if key was found and removed, False otherwise + """ + data = _load_keys(location) + original_count = len(data["keys"]) + + data["keys"] = [k for k in data["keys"] if k["name"] != name] + + if len(data["keys"]) < original_count: + _save_keys(data, location) + return True + return False + + +def list_api_keys(location: str = "user") -> list[dict]: + """ + List all API keys (names and creation dates, not actual keys). + """ + data = _load_keys(location) + return [{"name": k["name"], "created": k.get("created", "unknown")} for k in data["keys"]] + + +def set_auth_enabled(enabled: bool, location: str = "user") -> None: + """Enable or disable API key authentication.""" + data = _load_keys(location) + data["enabled"] = enabled + _save_keys(data, location) + + +def is_auth_enabled() -> bool: + """Check if API key authentication is enabled.""" + # Check project config first, then user config + for location in ["project", "user"]: + data = _load_keys(location) + if "enabled" in data: + return data["enabled"] + + # Default: enabled if any keys exist + return bool(get_all_key_hashes()) + + +def get_all_key_hashes() -> set[str]: + """Get all valid API key hashes from all sources.""" + hashes = set() + + # Check environment variable first + env_key = os.environ.get(API_KEY_ENV_VAR) + if env_key: + hashes.add(_hash_key(env_key)) + + # Check project and user configs + for location in ["project", "user"]: + data = _load_keys(location) + for key_entry in data.get("keys", []): + if "hash" in key_entry: + hashes.add(key_entry["hash"]) + + return hashes + + +def validate_api_key(key: str) -> bool: + """Validate an API key against stored hashes.""" + if not key: + return False + + key_hash = _hash_key(key) + valid_hashes = get_all_key_hashes() + + return key_hash in valid_hashes + + +def get_api_key_status() -> dict: + """Get current API key authentication status.""" + user_keys = list_api_keys("user") + project_keys = list_api_keys("project") + env_configured = bool(os.environ.get(API_KEY_ENV_VAR)) + + total_keys = len(user_keys) + len(project_keys) + (1 if env_configured else 0) + + return { + "enabled": is_auth_enabled(), + "total_keys": total_keys, + "user_keys": len(user_keys), + "project_keys": len(project_keys), + "env_configured": env_configured, + "keys": { + "user": user_keys, + "project": project_keys, + }, + } + + +# FastAPI dependency for API key authentication +async def require_api_key(api_key: str | None = Security(API_KEY_HEADER)) -> str: + """ + FastAPI dependency that requires a valid API key. + + Usage: + @app.get("/protected") + async def endpoint(key: str = Depends(require_api_key)): + ... + """ + # Check if auth is enabled + if not is_auth_enabled(): + return "auth_disabled" + + # No keys configured = auth disabled + if not get_all_key_hashes(): + return "no_keys_configured" + + # Validate the provided key + if not api_key: + raise HTTPException( + status_code=401, + detail="API key required. Provide X-API-Key header.", + headers={"WWW-Authenticate": "ApiKey"}, + ) + + if not validate_api_key(api_key): + raise HTTPException( + status_code=403, + detail="Invalid API key.", + ) + + return api_key + + +async def optional_api_key(api_key: str | None = Security(API_KEY_HEADER)) -> str | None: + """ + FastAPI dependency that optionally validates API key. + + Returns the key if valid, None if not provided or invalid. + Doesn't raise exceptions - useful for endpoints that work + with or without auth. + """ + if api_key and validate_api_key(api_key): + return api_key + return None diff --git a/src/soosef/stegasoo/audio_steganography.py b/src/soosef/stegasoo/audio_steganography.py new file mode 100644 index 0000000..cf5f870 --- /dev/null +++ b/src/soosef/stegasoo/audio_steganography.py @@ -0,0 +1,510 @@ +""" +Stegasoo Audio Steganography — LSB Embedding/Extraction (v4.3.0) + +LSB (Least Significant Bit) embedding for PCM audio samples. + +Hides data in the least significant bit(s) of audio samples, analogous to +how steganography.py hides data in pixel LSBs. The carrier audio must be +lossless (WAV or FLAC) — lossy codecs (MP3, OGG, AAC) destroy LSBs. + +Uses ChaCha20 as a CSPRNG for pseudo-random sample index selection, +ensuring that without the key an attacker cannot determine which samples +were modified. + +Supports: +- 16-bit PCM (int16 samples) +- 24-bit PCM (int32 samples from soundfile) +- Float audio (converted to int16 before embedding) +- 1 or 2 bits per sample embedding depth +- Mono and multi-channel audio (flattened for embedding) +""" + +import io +import struct + +import numpy as np +import soundfile as sf +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms + +from .constants import ( + AUDIO_MAGIC_LSB, + EMBED_MODE_AUDIO_LSB, +) +from .debug import debug +from .exceptions import AudioCapacityError, AudioError +from .models import AudioEmbedStats +from .steganography import ENCRYPTION_OVERHEAD + +# Progress reporting interval — write every N samples +PROGRESS_INTERVAL = 5000 + + +# ============================================================================= +# PROGRESS REPORTING +# ============================================================================= + + +def _write_progress(progress_file: str | None, current: int, total: int, phase: str = "embedding"): + """Write progress to file for frontend polling.""" + if progress_file is None: + return + try: + import json + + with open(progress_file, "w") as f: + json.dump( + { + "current": current, + "total": total, + "percent": round((current / total) * 100, 1) if total > 0 else 0, + "phase": phase, + }, + f, + ) + except Exception: + pass # Don't let progress writing break encoding + + +# ============================================================================= +# CAPACITY +# ============================================================================= + + +def calculate_audio_lsb_capacity( + audio_data: bytes, + bits_per_sample: int = 1, +) -> int: + """ + Calculate the maximum bytes that can be embedded in a WAV/FLAC file via LSB. + + Reads the carrier audio with soundfile, counts the total number of individual + sample values (num_frames * channels), and computes how many payload bytes + can be hidden at the given bit depth, minus the fixed encryption overhead. + + Args: + audio_data: Raw bytes of a WAV or FLAC file. + bits_per_sample: Number of LSBs to use per sample (1 or 2). + + Returns: + Maximum embeddable payload size in bytes (after subtracting overhead). + + Raises: + AudioError: If the audio cannot be read or is in an unsupported format. + """ + debug.validate( + bits_per_sample in (1, 2), f"bits_per_sample must be 1 or 2, got {bits_per_sample}" + ) + + try: + info = sf.info(io.BytesIO(audio_data)) + except Exception as e: + raise AudioError(f"Failed to read audio file: {e}") from e + + num_samples = info.frames * info.channels + total_bits = num_samples * bits_per_sample + max_bytes = total_bits // 8 + + capacity = max(0, max_bytes - ENCRYPTION_OVERHEAD) + debug.print( + f"Audio LSB capacity: {capacity} bytes " + f"({num_samples} samples, {bits_per_sample} bit(s)/sample, " + f"{info.samplerate} Hz, {info.channels} ch)" + ) + return capacity + + +# ============================================================================= +# SAMPLE INDEX GENERATION (ChaCha20 CSPRNG) +# ============================================================================= +# +# Identical strategy to generate_pixel_indices in steganography.py: +# - >= 50% capacity utilisation: full Fisher-Yates shuffle, take first N +# - < 50%: direct random sampling with collision handling +# +# The key MUST be 32 bytes (same derivation path as the pixel key). + + +@debug.time +def generate_sample_indices(key: bytes, num_samples: int, num_needed: int) -> list[int]: + """ + Generate pseudo-random sample indices using ChaCha20 as a CSPRNG. + + Produces a deterministic sequence of unique sample indices so that + the same key always yields the same embedding locations. + + Args: + key: 32-byte key for the ChaCha20 cipher. + num_samples: Total number of samples in the carrier audio. + num_needed: How many unique sample indices are required. + + Returns: + List of ``num_needed`` unique indices in [0, num_samples). + + Raises: + AssertionError (via debug.validate): On invalid arguments. + """ + debug.validate(len(key) == 32, f"Sample key must be 32 bytes, got {len(key)}") + debug.validate(num_samples > 0, f"Number of samples must be positive, got {num_samples}") + debug.validate(num_needed > 0, f"Number needed must be positive, got {num_needed}") + debug.validate( + num_needed <= num_samples, + f"Cannot select {num_needed} samples from {num_samples} available", + ) + + debug.print(f"Generating {num_needed} sample indices from {num_samples} total samples") + + # Strategy 1: Full Fisher-Yates shuffle when we need many indices + if num_needed >= num_samples // 2: + debug.print(f"Using full shuffle (needed {num_needed}/{num_samples} samples)") + nonce = b"\x00" * 16 + cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + + indices = list(range(num_samples)) + random_bytes = encryptor.update(b"\x00" * (num_samples * 4)) + + for i in range(num_samples - 1, 0, -1): + j_bytes = random_bytes[(num_samples - 1 - i) * 4 : (num_samples - i) * 4] + j = int.from_bytes(j_bytes, "big") % (i + 1) + indices[i], indices[j] = indices[j], indices[i] + + selected = indices[:num_needed] + debug.print(f"Generated {len(selected)} indices via shuffle") + return selected + + # Strategy 2: Direct sampling for lower utilisation + debug.print(f"Using optimized selection (needed {num_needed}/{num_samples} samples)") + selected: list[int] = [] + used: set[int] = set() + + nonce = b"\x00" * 16 + cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + + # Pre-generate 2x bytes to handle expected collisions + bytes_needed = (num_needed * 2) * 4 + random_bytes = encryptor.update(b"\x00" * bytes_needed) + + byte_offset = 0 + collisions = 0 + while len(selected) < num_needed and byte_offset < len(random_bytes) - 4: + idx = int.from_bytes(random_bytes[byte_offset : byte_offset + 4], "big") % num_samples + byte_offset += 4 + + if idx not in used: + used.add(idx) + selected.append(idx) + else: + collisions += 1 + + # Edge case: ran out of pre-generated bytes (very high collision rate) + if len(selected) < num_needed: + debug.print(f"Need {num_needed - len(selected)} more indices, generating...") + extra_needed = num_needed - len(selected) + for _ in range(extra_needed * 2): + extra_bytes = encryptor.update(b"\x00" * 4) + idx = int.from_bytes(extra_bytes, "big") % num_samples + if idx not in used: + used.add(idx) + selected.append(idx) + if len(selected) == num_needed: + break + + debug.print(f"Generated {len(selected)} indices with {collisions} collisions") + debug.validate( + len(selected) == num_needed, + f"Failed to generate enough indices: {len(selected)}/{num_needed}", + ) + return selected + + +# ============================================================================= +# EMBEDDING +# ============================================================================= + + +@debug.time +def embed_in_audio_lsb( + data: bytes, + carrier_audio: bytes, + sample_key: bytes, + bits_per_sample: int = 1, + progress_file: str | None = None, +) -> tuple[bytes, AudioEmbedStats]: + """ + Embed data into PCM audio samples using LSB steganography. + + The payload is prepended with a 4-byte magic header (``AUDIO_MAGIC_LSB``) + and a 4-byte big-endian length prefix, then converted to a binary string. + Pseudo-random sample indices are generated from ``sample_key`` and the + corresponding sample LSBs are overwritten. + + The modified audio is written back as a 16-bit PCM WAV file. + + Args: + data: Encrypted payload bytes to embed. + carrier_audio: Raw bytes of the carrier WAV/FLAC file. + sample_key: 32-byte key for sample index generation. + bits_per_sample: LSBs to use per sample (1 or 2). + progress_file: Optional path for progress JSON (frontend polling). + + Returns: + Tuple of (stego WAV bytes, AudioEmbedStats). + + Raises: + AudioCapacityError: If the payload is too large for the carrier. + AudioError: On any other embedding failure. + """ + debug.print(f"Audio LSB embedding {len(data)} bytes") + debug.data(sample_key, "Sample key for embedding") + debug.validate( + bits_per_sample in (1, 2), f"bits_per_sample must be 1 or 2, got {bits_per_sample}" + ) + debug.validate(len(sample_key) == 32, f"Sample key must be 32 bytes, got {len(sample_key)}") + + try: + # 1. Read carrier audio + samples, samplerate = sf.read(io.BytesIO(carrier_audio), dtype="int16", always_2d=True) + # samples shape: (num_frames, channels) + original_shape = samples.shape + channels = original_shape[1] + duration = original_shape[0] / samplerate + + debug.print( + f"Carrier audio: {samplerate} Hz, {channels} ch, " + f"{original_shape[0]} frames, {duration:.2f}s" + ) + + # Flatten to 1D for embedding + flat_samples = samples.flatten().copy() + num_samples = len(flat_samples) + + # 2. Prepend magic + length prefix + header = AUDIO_MAGIC_LSB + struct.pack(">I", len(data)) + payload = header + data + debug.print( + f"Payload with header: {len(payload)} bytes (magic 4 + len 4 + data {len(data)})" + ) + + # 3. Check capacity + max_bytes = (num_samples * bits_per_sample) // 8 + if len(payload) > max_bytes: + debug.print(f"Capacity error: need {len(payload)}, have {max_bytes}") + raise AudioCapacityError(len(payload), max_bytes) + + debug.print( + f"Capacity usage: {len(payload)}/{max_bytes} bytes " + f"({len(payload) / max_bytes * 100:.1f}%)" + ) + + # 4. Convert payload to binary string + binary_data = "".join(format(b, "08b") for b in payload) + samples_needed = (len(binary_data) + bits_per_sample - 1) // bits_per_sample + + debug.print(f"Need {samples_needed} samples to embed {len(binary_data)} bits") + + # 5. Generate pseudo-random sample indices + selected_indices = generate_sample_indices(sample_key, num_samples, samples_needed) + + # 6. Modify LSBs of selected samples + lsb_mask = (1 << bits_per_sample) - 1 + bit_idx = 0 + modified_count = 0 + total_to_process = len(selected_indices) + + # Initial progress + if progress_file: + _write_progress(progress_file, 5, 100, "embedding") + + for progress_idx, sample_idx in enumerate(selected_indices): + if bit_idx >= len(binary_data): + break + + bits = binary_data[bit_idx : bit_idx + bits_per_sample].ljust(bits_per_sample, "0") + bit_val = int(bits, 2) + + sample_val = flat_samples[sample_idx] + # Work in unsigned 16-bit space to avoid overflow + unsigned_val = int(sample_val) & 0xFFFF + new_unsigned = (unsigned_val & ~lsb_mask) | bit_val + # Convert back to signed int16 + new_val = np.int16(new_unsigned if new_unsigned < 32768 else new_unsigned - 65536) + + if sample_val != new_val: + flat_samples[sample_idx] = new_val + modified_count += 1 + + bit_idx += bits_per_sample + + # Report progress periodically + if progress_file and progress_idx % PROGRESS_INTERVAL == 0: + _write_progress(progress_file, progress_idx, total_to_process, "embedding") + + # Final progress before save + if progress_file: + _write_progress(progress_file, total_to_process, total_to_process, "saving") + + debug.print(f"Modified {modified_count} samples (out of {samples_needed} selected)") + + # 7. Reshape and write back as WAV + stego_samples = flat_samples.reshape(original_shape) + + output_buf = io.BytesIO() + sf.write(output_buf, stego_samples, samplerate, format="WAV", subtype="PCM_16") + output_buf.seek(0) + stego_bytes = output_buf.getvalue() + + stats = AudioEmbedStats( + samples_modified=modified_count, + total_samples=num_samples, + capacity_used=len(payload) / max_bytes, + bytes_embedded=len(payload), + sample_rate=samplerate, + channels=channels, + duration_seconds=duration, + embed_mode=EMBED_MODE_AUDIO_LSB, + ) + + debug.print(f"Audio LSB embedding complete: {len(stego_bytes)} byte WAV") + return stego_bytes, stats + + except AudioCapacityError: + raise + except Exception as e: + debug.exception(e, "embed_in_audio_lsb") + raise AudioError(f"Failed to embed data in audio: {e}") from e + + +# ============================================================================= +# EXTRACTION +# ============================================================================= + + +@debug.time +def extract_from_audio_lsb( + audio_data: bytes, + sample_key: bytes, + bits_per_sample: int = 1, + progress_file: str | None = None, +) -> bytes | None: + """ + Extract hidden data from audio using LSB steganography. + + Reads the stego audio, generates the same pseudo-random sample indices + from ``sample_key``, extracts the LSBs, and reconstructs the payload. + Verifies the ``AUDIO_MAGIC_LSB`` header before returning. + + Args: + audio_data: Raw bytes of the stego WAV file. + sample_key: 32-byte key (must match the one used for embedding). + bits_per_sample: LSBs per sample (must match embedding). + progress_file: Optional path for progress JSON. + + Returns: + Extracted payload bytes (without magic/length prefix), or ``None`` + if extraction fails (wrong key, no data, corrupted). + """ + debug.print(f"Audio LSB extracting from {len(audio_data)} byte audio") + debug.data(sample_key, "Sample key for extraction") + debug.validate( + bits_per_sample in (1, 2), f"bits_per_sample must be 1 or 2, got {bits_per_sample}" + ) + + try: + # 1. Read audio + samples, samplerate = sf.read(io.BytesIO(audio_data), dtype="int16", always_2d=True) + flat_samples = samples.flatten() + num_samples = len(flat_samples) + + debug.print(f"Audio: {samplerate} Hz, {samples.shape[1]} ch, {num_samples} total samples") + + # 2. Extract initial samples to find magic bytes + length (8 bytes = 64 bits) + header_bits_needed = 64 # 4 bytes magic + 4 bytes length + header_samples_needed = (header_bits_needed + bits_per_sample - 1) // bits_per_sample + 10 + + if header_samples_needed > num_samples: + debug.print("Audio too small to contain header") + return None + + initial_indices = generate_sample_indices(sample_key, num_samples, header_samples_needed) + + binary_data = "" + for sample_idx in initial_indices: + val = int(flat_samples[sample_idx]) & 0xFFFF + for bit_pos in range(bits_per_sample - 1, -1, -1): + binary_data += str((val >> bit_pos) & 1) + + # 3. Verify magic bytes + if len(binary_data) < 64: + debug.print(f"Not enough bits for header: {len(binary_data)}/64") + return None + + magic_bits = binary_data[:32] + magic_bytes = int(magic_bits, 2).to_bytes(4, "big") + + if magic_bytes != AUDIO_MAGIC_LSB: + debug.print(f"Magic mismatch: got {magic_bytes!r}, expected {AUDIO_MAGIC_LSB!r}") + return None + + debug.print("Magic bytes verified: AUDL") + + # 4. Parse length + length_bits = binary_data[32:64] + data_length = struct.unpack(">I", int(length_bits, 2).to_bytes(4, "big"))[0] + debug.print(f"Extracted length: {data_length} bytes") + + # Sanity check length + max_possible = (num_samples * bits_per_sample) // 8 - 8 # minus header + if data_length > max_possible or data_length < 1: + debug.print(f"Invalid data length: {data_length} (max possible: {max_possible})") + return None + + # 5. Extract full payload + total_bits = (8 + data_length) * 8 # header (8 bytes) + payload + total_samples_needed = (total_bits + bits_per_sample - 1) // bits_per_sample + + if total_samples_needed > num_samples: + debug.print(f"Need {total_samples_needed} samples but only {num_samples} available") + return None + + debug.print(f"Need {total_samples_needed} samples to extract {data_length} bytes") + + selected_indices = generate_sample_indices(sample_key, num_samples, total_samples_needed) + + # Initial progress + if progress_file: + _write_progress(progress_file, 5, 100, "extracting") + + binary_data = "" + for progress_idx, sample_idx in enumerate(selected_indices): + val = int(flat_samples[sample_idx]) & 0xFFFF + for bit_pos in range(bits_per_sample - 1, -1, -1): + binary_data += str((val >> bit_pos) & 1) + + if progress_file and progress_idx % PROGRESS_INTERVAL == 0: + _write_progress(progress_file, progress_idx, total_samples_needed, "extracting") + + if progress_file: + _write_progress(progress_file, total_samples_needed, total_samples_needed, "extracting") + + # Skip the 8-byte header (magic + length) = 64 bits + data_bits = binary_data[64 : 64 + (data_length * 8)] + + if len(data_bits) < data_length * 8: + debug.print(f"Insufficient bits: {len(data_bits)} < {data_length * 8}") + return None + + # Convert bits back to bytes + data_bytes = bytearray() + for i in range(0, len(data_bits), 8): + byte_bits = data_bits[i : i + 8] + if len(byte_bits) == 8: + data_bytes.append(int(byte_bits, 2)) + + debug.print(f"Audio LSB successfully extracted {len(data_bytes)} bytes") + return bytes(data_bytes) + + except Exception as e: + debug.exception(e, "extract_from_audio_lsb") + return None diff --git a/src/soosef/stegasoo/audio_utils.py b/src/soosef/stegasoo/audio_utils.py new file mode 100644 index 0000000..77f55d2 --- /dev/null +++ b/src/soosef/stegasoo/audio_utils.py @@ -0,0 +1,540 @@ +""" +Stegasoo Audio Utilities (v4.3.0) + +Audio format detection, transcoding, and metadata extraction for audio steganography. + +Dependencies: +- soundfile (sf): Fast WAV/FLAC reading without ffmpeg +- pydub: MP3/OGG/AAC transcoding (wraps ffmpeg) + +Both are optional — functions degrade gracefully when unavailable. +""" + +from __future__ import annotations + +import io +import shutil + +from .constants import ( + EMBED_MODE_AUDIO_AUTO, + MAX_AUDIO_DURATION, + MAX_AUDIO_FILE_SIZE, + MAX_AUDIO_SAMPLE_RATE, + MIN_AUDIO_SAMPLE_RATE, + VALID_AUDIO_EMBED_MODES, +) +from .debug import get_logger +from .exceptions import AudioTranscodeError, AudioValidationError, UnsupportedAudioFormatError +from .models import AudioInfo, ValidationResult + +logger = get_logger(__name__) + + +# ============================================================================= +# FFMPEG AVAILABILITY +# ============================================================================= + + +def has_ffmpeg_support() -> bool: + """Check if ffmpeg is available on the system. + + Returns: + True if ffmpeg is found on PATH, False otherwise. + """ + return shutil.which("ffmpeg") is not None + + +# ============================================================================= +# FORMAT DETECTION +# ============================================================================= + + +def detect_audio_format(audio_data: bytes) -> str: + """Detect audio format from magic bytes. + + Examines the first bytes of audio data to identify the container format. + + Magic byte signatures: + - WAV: b"RIFF" at offset 0 + b"WAVE" at offset 8 + - FLAC: b"fLaC" at offset 0 + - MP3: b"\\xff\\xfb", b"\\xff\\xf3", b"\\xff\\xf2" (sync bytes) or b"ID3" (ID3 tag) + - OGG (Vorbis/Opus): b"OggS" at offset 0 + - AAC: b"\\xff\\xf1" or b"\\xff\\xf9" (ADTS header) + - M4A/MP4: b"ftyp" at offset 4 + + Args: + audio_data: Raw audio file bytes. + + Returns: + Format string: "wav", "flac", "mp3", "ogg", "aac", "m4a", or "unknown". + """ + if len(audio_data) < 12: + logger.debug("detect_audio_format: data too short (%d bytes)", len(audio_data)) + return "unknown" + + # WAV: RIFF....WAVE + if audio_data[:4] == b"RIFF" and audio_data[8:12] == b"WAVE": + logger.debug("Detected WAV format (%d bytes)", len(audio_data)) + return "wav" + + # FLAC + if audio_data[:4] == b"fLaC": + return "flac" + + # OGG (Vorbis or Opus) + if audio_data[:4] == b"OggS": + return "ogg" + + # MP3 with ID3 tag + if audio_data[:3] == b"ID3": + return "mp3" + + # MP3 sync bytes (MPEG audio frame header) + if len(audio_data) >= 2 and audio_data[:2] in (b"\xff\xfb", b"\xff\xf3", b"\xff\xf2"): + return "mp3" + + # M4A/MP4 container: "ftyp" at offset 4 + if audio_data[4:8] == b"ftyp": + return "m4a" + + # AAC ADTS header + if len(audio_data) >= 2 and audio_data[:2] in (b"\xff\xf1", b"\xff\xf9"): + return "aac" + + return "unknown" + + +# ============================================================================= +# TRANSCODING +# ============================================================================= + + +def transcode_to_wav(audio_data: bytes) -> bytes: + """Transcode any supported audio format to WAV PCM format. + + Uses soundfile directly for WAV/FLAC (no ffmpeg needed). + Uses pydub (wraps ffmpeg) for lossy formats (MP3, OGG, AAC, M4A). + + Args: + audio_data: Raw audio file bytes in any supported format. + + Returns: + WAV PCM file bytes (16-bit, original sample rate). + + Raises: + AudioTranscodeError: If transcoding fails. + UnsupportedAudioFormatError: If the format cannot be detected. + """ + fmt = detect_audio_format(audio_data) + logger.info("transcode_to_wav: input format=%s, size=%d bytes", fmt, len(audio_data)) + + if fmt == "unknown": + raise UnsupportedAudioFormatError( + "Cannot detect audio format. Supported: WAV, FLAC, MP3, OGG, AAC, M4A." + ) + + # WAV files: validate with soundfile but return as-is if already PCM + if fmt == "wav": + try: + import soundfile as sf + + buf = io.BytesIO(audio_data) + info = sf.info(buf) + if info.subtype in ("PCM_16", "PCM_24", "PCM_32", "FLOAT", "DOUBLE"): + # Re-encode to ensure consistent PCM_16 output + buf.seek(0) + data, samplerate = sf.read(buf, dtype="int16") + out = io.BytesIO() + sf.write(out, data, samplerate, format="WAV", subtype="PCM_16") + return out.getvalue() + except ImportError: + raise AudioTranscodeError("soundfile package is required for WAV processing") + except Exception as e: + raise AudioTranscodeError(f"Failed to process WAV: {e}") + + # FLAC: use soundfile (fast, no ffmpeg) + if fmt == "flac": + try: + import soundfile as sf + + buf = io.BytesIO(audio_data) + data, samplerate = sf.read(buf, dtype="int16") + out = io.BytesIO() + sf.write(out, data, samplerate, format="WAV", subtype="PCM_16") + return out.getvalue() + except ImportError: + raise AudioTranscodeError("soundfile package is required for FLAC processing") + except Exception as e: + raise AudioTranscodeError(f"Failed to transcode FLAC to WAV: {e}") + + # Lossy formats (MP3, OGG, AAC, M4A): use pydub + ffmpeg + return _transcode_with_pydub(audio_data, fmt, "wav") + + +def transcode_to_mp3(audio_data: bytes, bitrate: str = "256k") -> bytes: + """Transcode audio to MP3 format. + + Uses pydub (wraps ffmpeg) for transcoding. + + Args: + audio_data: Raw audio file bytes in any supported format. + bitrate: Target MP3 bitrate (e.g., "128k", "192k", "256k", "320k"). + + Returns: + MP3 file bytes. + + Raises: + AudioTranscodeError: If transcoding fails or pydub/ffmpeg unavailable. + """ + fmt = detect_audio_format(audio_data) + + if fmt == "unknown": + raise UnsupportedAudioFormatError( + "Cannot detect audio format. Supported: WAV, FLAC, MP3, OGG, AAC, M4A." + ) + + try: + from pydub import AudioSegment + except ImportError: + raise AudioTranscodeError( + "pydub package is required for MP3 transcoding. Install with: pip install pydub" + ) + + if not has_ffmpeg_support(): + raise AudioTranscodeError( + "ffmpeg is required for MP3 transcoding. Install ffmpeg on your system." + ) + + try: + # Map our format names to pydub format names + pydub_fmt = _pydub_format(fmt) + buf = io.BytesIO(audio_data) + audio = AudioSegment.from_file(buf, format=pydub_fmt) + + out = io.BytesIO() + audio.export(out, format="mp3", bitrate=bitrate) + return out.getvalue() + except Exception as e: + raise AudioTranscodeError(f"Failed to transcode to MP3: {e}") + + +def _transcode_with_pydub(audio_data: bytes, src_fmt: str, dst_fmt: str) -> bytes: + """Transcode audio using pydub (requires ffmpeg). + + Args: + audio_data: Raw audio bytes. + src_fmt: Source format string (our naming). + dst_fmt: Destination format string ("wav" or "mp3"). + + Returns: + Transcoded audio bytes. + + Raises: + AudioTranscodeError: If transcoding fails. + """ + try: + from pydub import AudioSegment + except ImportError: + raise AudioTranscodeError( + "pydub package is required for audio transcoding. Install with: pip install pydub" + ) + + if not has_ffmpeg_support(): + raise AudioTranscodeError( + "ffmpeg is required for audio transcoding. Install ffmpeg on your system." + ) + + try: + pydub_fmt = _pydub_format(src_fmt) + buf = io.BytesIO(audio_data) + audio = AudioSegment.from_file(buf, format=pydub_fmt) + + out = io.BytesIO() + if dst_fmt == "wav": + audio.export(out, format="wav") + else: + audio.export(out, format=dst_fmt) + return out.getvalue() + except Exception as e: + raise AudioTranscodeError(f"Failed to transcode {src_fmt} to {dst_fmt}: {e}") + + +def _pydub_format(fmt: str) -> str: + """Map our format names to pydub/ffmpeg format names. + + Args: + fmt: Our internal format name. + + Returns: + pydub-compatible format string. + """ + mapping = { + "wav": "wav", + "flac": "flac", + "mp3": "mp3", + "ogg": "ogg", + "aac": "aac", + "m4a": "m4a", + } + return mapping.get(fmt, fmt) + + +# ============================================================================= +# METADATA EXTRACTION +# ============================================================================= + + +def get_audio_info(audio_data: bytes) -> AudioInfo: + """Extract audio metadata from raw audio bytes. + + Uses soundfile for WAV/FLAC (fast, no ffmpeg dependency). + Falls back to pydub for other formats (requires ffmpeg). + + Args: + audio_data: Raw audio file bytes. + + Returns: + AudioInfo dataclass with sample rate, channels, duration, etc. + + Raises: + UnsupportedAudioFormatError: If the format cannot be detected. + AudioTranscodeError: If metadata extraction fails. + """ + fmt = detect_audio_format(audio_data) + + if fmt == "unknown": + raise UnsupportedAudioFormatError( + "Cannot detect audio format. Supported: WAV, FLAC, MP3, OGG, AAC, M4A." + ) + + # WAV and FLAC: use soundfile (fast) + if fmt in ("wav", "flac"): + return _get_info_soundfile(audio_data, fmt) + + # Lossy formats: use pydub + return _get_info_pydub(audio_data, fmt) + + +def _get_info_soundfile(audio_data: bytes, fmt: str) -> AudioInfo: + """Extract audio info using soundfile (WAV/FLAC). + + Args: + audio_data: Raw audio bytes. + fmt: Format string ("wav" or "flac"). + + Returns: + AudioInfo with metadata. + """ + try: + import soundfile as sf + except ImportError: + raise AudioTranscodeError( + "soundfile package is required. Install with: pip install soundfile" + ) + + try: + buf = io.BytesIO(audio_data) + info = sf.info(buf) + + # Determine bit depth from subtype + bit_depth = _bit_depth_from_subtype(info.subtype) + + return AudioInfo( + sample_rate=info.samplerate, + channels=info.channels, + duration_seconds=info.duration, + num_samples=info.frames, + format=fmt, + bitrate=None, + bit_depth=bit_depth, + ) + except Exception as e: + raise AudioTranscodeError(f"Failed to read {fmt.upper()} metadata: {e}") + + +def _bit_depth_from_subtype(subtype: str) -> int | None: + """Determine bit depth from soundfile subtype string. + + Args: + subtype: Soundfile subtype (e.g., "PCM_16", "PCM_24", "FLOAT"). + + Returns: + Bit depth as integer, or None if unknown. + """ + subtype_map = { + "PCM_S8": 8, + "PCM_U8": 8, + "PCM_16": 16, + "PCM_24": 24, + "PCM_32": 32, + "FLOAT": 32, + "DOUBLE": 64, + } + return subtype_map.get(subtype) + + +def _get_info_pydub(audio_data: bytes, fmt: str) -> AudioInfo: + """Extract audio info using pydub (lossy formats). + + Args: + audio_data: Raw audio bytes. + fmt: Format string ("mp3", "ogg", "aac", "m4a"). + + Returns: + AudioInfo with metadata. + """ + try: + from pydub import AudioSegment + except ImportError: + raise AudioTranscodeError( + "pydub package is required for audio metadata. Install with: pip install pydub" + ) + + if not has_ffmpeg_support(): + raise AudioTranscodeError( + "ffmpeg is required for audio metadata extraction. Install ffmpeg on your system." + ) + + try: + pydub_fmt = _pydub_format(fmt) + buf = io.BytesIO(audio_data) + audio = AudioSegment.from_file(buf, format=pydub_fmt) + + num_samples = int(audio.frame_count()) + duration = audio.duration_seconds + sample_rate = audio.frame_rate + channels = audio.channels + + # Estimate bitrate from file size and duration + bitrate = None + if duration > 0: + bitrate = int((len(audio_data) * 8) / duration) + + return AudioInfo( + sample_rate=sample_rate, + channels=channels, + duration_seconds=duration, + num_samples=num_samples, + format=fmt, + bitrate=bitrate, + bit_depth=audio.sample_width * 8 if audio.sample_width else None, + ) + except Exception as e: + raise AudioTranscodeError(f"Failed to read {fmt.upper()} metadata: {e}") + + +# ============================================================================= +# VALIDATION +# ============================================================================= + + +def validate_audio( + audio_data: bytes, + name: str = "Audio", + check_duration: bool = True, +) -> ValidationResult: + """Validate audio data for steganography. + + Checks: + - Not empty + - Not too large (MAX_AUDIO_FILE_SIZE) + - Valid audio format (detectable via magic bytes) + - Duration within limits (MAX_AUDIO_DURATION) if check_duration=True + - Sample rate within limits (MIN_AUDIO_SAMPLE_RATE to MAX_AUDIO_SAMPLE_RATE) + + Args: + audio_data: Raw audio file bytes. + name: Descriptive name for error messages (default: "Audio"). + check_duration: Whether to enforce duration limit (default: True). + + Returns: + ValidationResult with audio info in details (sample_rate, channels, + duration, num_samples, format) on success. + """ + if not audio_data: + return ValidationResult.error(f"{name} is required") + + if len(audio_data) > MAX_AUDIO_FILE_SIZE: + size_mb = len(audio_data) / (1024 * 1024) + max_mb = MAX_AUDIO_FILE_SIZE / (1024 * 1024) + return ValidationResult.error( + f"{name} too large ({size_mb:.1f} MB). Maximum: {max_mb:.0f} MB" + ) + + # Detect format + fmt = detect_audio_format(audio_data) + if fmt == "unknown": + return ValidationResult.error( + f"Could not detect {name} format. " "Supported formats: WAV, FLAC, MP3, OGG, AAC, M4A." + ) + + # Extract metadata for further validation + try: + info = get_audio_info(audio_data) + except (AudioTranscodeError, UnsupportedAudioFormatError) as e: + return ValidationResult.error(f"Could not read {name}: {e}") + except Exception as e: + return ValidationResult.error(f"Could not read {name}: {e}") + + # Check duration + if check_duration and info.duration_seconds > MAX_AUDIO_DURATION: + return ValidationResult.error( + f"{name} too long ({info.duration_seconds:.1f}s). " + f"Maximum: {MAX_AUDIO_DURATION}s ({MAX_AUDIO_DURATION // 60} minutes)" + ) + + # Check sample rate + if info.sample_rate < MIN_AUDIO_SAMPLE_RATE: + return ValidationResult.error( + f"{name} sample rate too low ({info.sample_rate} Hz). " + f"Minimum: {MIN_AUDIO_SAMPLE_RATE} Hz" + ) + + if info.sample_rate > MAX_AUDIO_SAMPLE_RATE: + return ValidationResult.error( + f"{name} sample rate too high ({info.sample_rate} Hz). " + f"Maximum: {MAX_AUDIO_SAMPLE_RATE} Hz" + ) + + return ValidationResult.ok( + sample_rate=info.sample_rate, + channels=info.channels, + duration=info.duration_seconds, + num_samples=info.num_samples, + format=info.format, + bitrate=info.bitrate, + bit_depth=info.bit_depth, + ) + + +def require_valid_audio(audio_data: bytes, name: str = "Audio") -> None: + """Validate audio, raising AudioValidationError on failure. + + Args: + audio_data: Raw audio file bytes. + name: Descriptive name for error messages. + + Raises: + AudioValidationError: If validation fails. + """ + result = validate_audio(audio_data, name) + if not result.is_valid: + raise AudioValidationError(result.error_message) + + +def validate_audio_embed_mode(mode: str) -> ValidationResult: + """Validate audio embedding mode string. + + Args: + mode: Embedding mode to validate (e.g., "audio_lsb", "audio_mdct", "audio_auto"). + + Returns: + ValidationResult with mode in details on success. + """ + valid_modes = VALID_AUDIO_EMBED_MODES | {EMBED_MODE_AUDIO_AUTO} + if mode not in valid_modes: + return ValidationResult.error( + f"Invalid audio embed_mode: '{mode}'. " + f"Valid options: {', '.join(sorted(valid_modes))}" + ) + return ValidationResult.ok(mode=mode) diff --git a/src/soosef/stegasoo/backends/__init__.py b/src/soosef/stegasoo/backends/__init__.py new file mode 100644 index 0000000..85a3b72 --- /dev/null +++ b/src/soosef/stegasoo/backends/__init__.py @@ -0,0 +1,31 @@ +""" +Stegasoo embedding backends. + +Provides a typed plugin interface for all embedding algorithms. +Backends register with the module-level ``registry`` on import. + +Usage:: + + from stegasoo.backends import registry + + backend = registry.get("lsb") + stego, stats = backend.embed(data, carrier, key) +""" + +from .dct import DCTBackend +from .lsb import LSBBackend +from .protocol import EmbeddingBackend +from .registry import BackendNotFoundError, BackendRegistry, registry + +# Auto-register built-in backends +registry.register(LSBBackend()) +registry.register(DCTBackend()) + +__all__ = [ + "EmbeddingBackend", + "BackendRegistry", + "BackendNotFoundError", + "registry", + "LSBBackend", + "DCTBackend", +] diff --git a/src/soosef/stegasoo/backends/dct.py b/src/soosef/stegasoo/backends/dct.py new file mode 100644 index 0000000..a36477b --- /dev/null +++ b/src/soosef/stegasoo/backends/dct.py @@ -0,0 +1,69 @@ +""" +DCT (Discrete Cosine Transform) image embedding backend. + +Wraps the existing frequency-domain DCT functions in dct_steganography.py. +""" + +from __future__ import annotations + +from typing import Any + + +class DCTBackend: + """Frequency-domain DCT embedding for JPEG-resilient steganography.""" + + @property + def mode(self) -> str: + return "dct" + + @property + def carrier_type(self) -> str: + return "image" + + def is_available(self) -> bool: + from ..dct_steganography import HAS_SCIPY + + return HAS_SCIPY + + def embed( + self, + data: bytes, + carrier: bytes, + key: bytes, + *, + progress_file: str | None = None, + **options: Any, + ) -> tuple[bytes, Any]: + from ..dct_steganography import embed_in_dct + + output_format = options.get("dct_output_format", "png") + color_mode = options.get("dct_color_mode", "color") + quant_step = options.get("quant_step") + jpeg_quality = options.get("jpeg_quality") + max_dimension = options.get("max_dimension") + return embed_in_dct( + data, carrier, key, output_format, color_mode, progress_file, + quant_step=quant_step, jpeg_quality=jpeg_quality, max_dimension=max_dimension, + ) + + def extract( + self, + carrier: bytes, + key: bytes, + *, + progress_file: str | None = None, + **options: Any, + ) -> bytes | None: + from ..dct_steganography import extract_from_dct + + quant_step = options.get("quant_step") + try: + return extract_from_dct(carrier, key, progress_file, quant_step=quant_step) + except Exception: + return None + + def calculate_capacity(self, carrier: bytes, **options: Any) -> int: + from ..dct_steganography import calculate_dct_capacity + + info = calculate_dct_capacity(carrier) + return info.usable_capacity_bytes diff --git a/src/soosef/stegasoo/backends/lsb.py b/src/soosef/stegasoo/backends/lsb.py new file mode 100644 index 0000000..c385c54 --- /dev/null +++ b/src/soosef/stegasoo/backends/lsb.py @@ -0,0 +1,63 @@ +""" +LSB (Least Significant Bit) image embedding backend. + +Wraps the existing spatial-domain LSB functions in steganography.py. +""" + +from __future__ import annotations + +from typing import Any + + +class LSBBackend: + """Spatial-domain LSB embedding for lossless image formats.""" + + @property + def mode(self) -> str: + return "lsb" + + @property + def carrier_type(self) -> str: + return "image" + + def is_available(self) -> bool: + return True # Only needs Pillow, which is always present + + def embed( + self, + data: bytes, + carrier: bytes, + key: bytes, + *, + progress_file: str | None = None, + **options: Any, + ) -> tuple[bytes, Any]: + from ..steganography import _embed_lsb + + bits_per_channel = options.get("bits_per_channel", 1) + output_format = options.get("output_format", None) + stego_bytes, stats, ext = _embed_lsb( + data, carrier, key, bits_per_channel, output_format, progress_file + ) + # Attach output extension to stats for callers that need it + stats.output_extension = ext # type: ignore[attr-defined] + return stego_bytes, stats + + def extract( + self, + carrier: bytes, + key: bytes, + *, + progress_file: str | None = None, + **options: Any, + ) -> bytes | None: + from ..steganography import _extract_lsb + + bits_per_channel = options.get("bits_per_channel", 1) + return _extract_lsb(carrier, key, bits_per_channel) + + def calculate_capacity(self, carrier: bytes, **options: Any) -> int: + from ..steganography import calculate_capacity + + bits_per_channel = options.get("bits_per_channel", 1) + return calculate_capacity(carrier, bits_per_channel) diff --git a/src/soosef/stegasoo/backends/protocol.py b/src/soosef/stegasoo/backends/protocol.py new file mode 100644 index 0000000..21cf60d --- /dev/null +++ b/src/soosef/stegasoo/backends/protocol.py @@ -0,0 +1,91 @@ +""" +Embedding backend protocol definition. + +All embedding backends (LSB, DCT, audio, video, etc.) implement this protocol, +enabling registry-based dispatch instead of if/elif chains. +""" + +from __future__ import annotations + +from typing import Any, Protocol, runtime_checkable + + +@runtime_checkable +class EmbeddingBackend(Protocol): + """Protocol that all embedding backends must satisfy. + + Each backend handles a specific embedding mode (e.g. 'lsb', 'dct', + 'audio_lsb', 'audio_spread') for a specific carrier type ('image', + 'audio', 'video'). + """ + + @property + def mode(self) -> str: + """The embedding mode identifier (e.g. 'lsb', 'dct').""" + ... + + @property + def carrier_type(self) -> str: + """The carrier media type: 'image', 'audio', or 'video'.""" + ... + + def is_available(self) -> bool: + """Whether this backend's dependencies are installed.""" + ... + + def embed( + self, + data: bytes, + carrier: bytes, + key: bytes, + *, + progress_file: str | None = None, + **options: Any, + ) -> tuple[bytes, Any]: + """Embed data into a carrier. + + Args: + data: Encrypted payload bytes. + carrier: Raw carrier file bytes (image, audio, etc.). + key: Derived key for pixel/sample selection. + progress_file: Optional progress file path. + **options: Backend-specific options (bits_per_channel, + output_format, color_mode, chip_tier, etc.). + + Returns: + Tuple of (stego carrier bytes, embed stats). + """ + ... + + def extract( + self, + carrier: bytes, + key: bytes, + *, + progress_file: str | None = None, + **options: Any, + ) -> bytes | None: + """Extract data from a carrier. + + Args: + carrier: Stego carrier file bytes. + key: Derived key for pixel/sample selection. + progress_file: Optional progress file path. + **options: Backend-specific options. + + Returns: + Extracted payload bytes, or None if no payload found. + """ + ... + + def calculate_capacity(self, carrier: bytes, **options: Any) -> int: + """Calculate maximum embeddable payload size in bytes. + + Args: + carrier: Raw carrier file bytes. + **options: Backend-specific options (e.g. bits_per_channel). + + Returns: + Maximum payload capacity in bytes. + """ + ... diff --git a/src/soosef/stegasoo/backends/registry.py b/src/soosef/stegasoo/backends/registry.py new file mode 100644 index 0000000..b2eec43 --- /dev/null +++ b/src/soosef/stegasoo/backends/registry.py @@ -0,0 +1,63 @@ +""" +Backend registry for embedding mode dispatch. + +Backends register themselves by mode string. The registry replaces +if/elif dispatch in steganography.py with a lookup table. +""" + +from __future__ import annotations + +from ..exceptions import StegasooError +from .protocol import EmbeddingBackend + + +class BackendNotFoundError(StegasooError): + """Raised when a requested backend mode is not registered.""" + + +class BackendRegistry: + """Registry mapping mode strings to embedding backends.""" + + def __init__(self) -> None: + self._backends: dict[str, EmbeddingBackend] = {} + + def register(self, backend: EmbeddingBackend) -> None: + """Register a backend for its mode string.""" + self._backends[backend.mode] = backend + + def get(self, mode: str) -> EmbeddingBackend: + """Look up a backend by mode. Raises BackendNotFoundError if not found.""" + if mode not in self._backends: + available = ", ".join(sorted(self._backends.keys())) or "(none)" + raise BackendNotFoundError( + f"No backend registered for mode '{mode}'. Available: {available}" + ) + return self._backends[mode] + + def has(self, mode: str) -> bool: + """Check if a backend is registered for the given mode.""" + return mode in self._backends + + def available_modes(self, carrier_type: str | None = None) -> list[str]: + """List registered mode strings, optionally filtered by carrier type. + + Only includes modes whose backend reports is_available() == True. + """ + return sorted( + mode + for mode, backend in self._backends.items() + if backend.is_available() + and (carrier_type is None or backend.carrier_type == carrier_type) + ) + + def all_modes(self, carrier_type: str | None = None) -> list[str]: + """List all registered mode strings (including unavailable ones).""" + return sorted( + mode + for mode, backend in self._backends.items() + if carrier_type is None or backend.carrier_type == carrier_type + ) + + +# Module-level singleton +registry = BackendRegistry() diff --git a/src/soosef/stegasoo/batch.py b/src/soosef/stegasoo/batch.py new file mode 100644 index 0000000..98276e6 --- /dev/null +++ b/src/soosef/stegasoo/batch.py @@ -0,0 +1,684 @@ +""" +Stegasoo Batch Processing Module (v3.2.0) + +Enables encoding/decoding multiple files in a single operation. +Supports parallel processing, progress tracking, and detailed reporting. + +Changes in v3.2.0: +- BatchCredentials: renamed day_phrase → passphrase, removed date_str +- Updated all credential handling to use v3.2.0 API +""" + +import json +import threading +import time +from collections.abc import Callable, Iterator +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path + +from .constants import ALLOWED_IMAGE_EXTENSIONS, LOSSLESS_FORMATS + + +class BatchStatus(Enum): + """Status of individual batch items.""" + + PENDING = "pending" + PROCESSING = "processing" + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" + + +@dataclass +class BatchItem: + """Represents a single item in a batch operation.""" + + input_path: Path + output_path: Path | None = None + status: BatchStatus = BatchStatus.PENDING + error: str | None = None + start_time: float | None = None + end_time: float | None = None + input_size: int = 0 + output_size: int = 0 + message: str = "" + + @property + def duration(self) -> float | None: + """Processing duration in seconds.""" + if self.start_time and self.end_time: + return self.end_time - self.start_time + return None + + def to_dict(self) -> dict: + """Convert to dictionary for JSON serialization.""" + return { + "input_path": str(self.input_path), + "output_path": str(self.output_path) if self.output_path else None, + "status": self.status.value, + "error": self.error, + "duration_seconds": self.duration, + "input_size": self.input_size, + "output_size": self.output_size, + "message": self.message, + } + + +@dataclass +class BatchCredentials: + """ + Credentials for batch encode/decode operations (v3.2.0). + + Provides a structured way to pass authentication factors + for batch processing instead of using plain dicts. + + Changes in v3.2.0: + - Renamed day_phrase → passphrase + - Removed date_str (no longer used in cryptographic operations) + + Example: + creds = BatchCredentials( + reference_photo=ref_bytes, + passphrase="apple forest thunder mountain", + pin="123456" + ) + result = processor.batch_encode(images, creds, message="secret") + """ + + reference_photo: bytes + passphrase: str # v3.2.0: renamed from day_phrase + pin: str = "" + rsa_key_data: bytes | None = None + rsa_password: str | None = None + + def to_dict(self) -> dict: + """Convert to dictionary for API compatibility.""" + return { + "reference_photo": self.reference_photo, + "passphrase": self.passphrase, + "pin": self.pin, + "rsa_key_data": self.rsa_key_data, + "rsa_password": self.rsa_password, + } + + @classmethod + def from_dict(cls, data: dict) -> "BatchCredentials": + """ + Create BatchCredentials from a dictionary. + + Handles both v3.2.0 format (passphrase) and legacy formats (day_phrase, phrase). + """ + # Handle legacy 'day_phrase' and 'phrase' keys + passphrase = data.get("passphrase") or data.get("day_phrase") or data.get("phrase", "") + + return cls( + reference_photo=data["reference_photo"], + passphrase=passphrase, + pin=data.get("pin", ""), + rsa_key_data=data.get("rsa_key_data"), + rsa_password=data.get("rsa_password"), + ) + + +@dataclass +class BatchResult: + """Summary of a batch operation.""" + + operation: str + total: int = 0 + succeeded: int = 0 + failed: int = 0 + skipped: int = 0 + start_time: float = field(default_factory=time.time) + end_time: float | None = None + items: list[BatchItem] = field(default_factory=list) + + @property + def duration(self) -> float | None: + """Total batch duration in seconds.""" + if self.end_time: + return self.end_time - self.start_time + return None + + def to_dict(self) -> dict: + """Convert to dictionary for JSON serialization.""" + return { + "operation": self.operation, + "summary": { + "total": self.total, + "succeeded": self.succeeded, + "failed": self.failed, + "skipped": self.skipped, + "duration_seconds": self.duration, + }, + "items": [item.to_dict() for item in self.items], + } + + def to_json(self, indent: int = 2) -> str: + """Serialize to JSON string.""" + return json.dumps(self.to_dict(), indent=indent) + + +# Type alias for progress callback +ProgressCallback = Callable[[int, int, BatchItem], None] + + +class BatchProcessor: + """ + Handles batch encoding/decoding operations (v3.2.0). + + Usage: + processor = BatchProcessor(max_workers=4) + + # Batch encode with BatchCredentials + creds = BatchCredentials( + reference_photo=ref_bytes, + passphrase="apple forest thunder mountain", + pin="123456" + ) + result = processor.batch_encode( + images=['img1.png', 'img2.png'], + message="Secret message", + output_dir="./encoded/", + credentials=creds, + ) + + # Batch encode with dict credentials + result = processor.batch_encode( + images=['img1.png', 'img2.png'], + message="Secret message", + credentials={ + "reference_photo": ref_bytes, + "passphrase": "apple forest thunder mountain", + "pin": "123456" + }, + ) + + # Batch decode + result = processor.batch_decode( + images=['encoded1.png', 'encoded2.png'], + credentials=creds, + ) + """ + + def __init__(self, max_workers: int = 4): + """ + Initialize batch processor. + + Args: + max_workers: Maximum parallel workers (default 4) + """ + self.max_workers = max_workers + self._lock = threading.Lock() + + def find_images( + self, + paths: list[str | Path], + recursive: bool = False, + ) -> Iterator[Path]: + """ + Find all valid image files from paths. + + Args: + paths: List of files or directories + recursive: Search directories recursively + + Yields: + Path objects for each valid image + """ + for path in paths: + path = Path(path) + + if path.is_file(): + if self._is_valid_image(path): + yield path + + elif path.is_dir(): + pattern = "**/*" if recursive else "*" + for file_path in path.glob(pattern): + if file_path.is_file() and self._is_valid_image(file_path): + yield file_path + + def _is_valid_image(self, path: Path) -> bool: + """Check if path is a valid image file.""" + return path.suffix.lower().lstrip(".") in ALLOWED_IMAGE_EXTENSIONS + + def _normalize_credentials( + self, credentials: dict | BatchCredentials | None + ) -> BatchCredentials: + """ + Normalize credentials to BatchCredentials object. + + Handles both dict and BatchCredentials input, and legacy 'day_phrase' key. + """ + if credentials is None: + raise ValueError("Credentials are required") + + if isinstance(credentials, BatchCredentials): + return credentials + + if isinstance(credentials, dict): + return BatchCredentials.from_dict(credentials) + + raise ValueError(f"Invalid credentials type: {type(credentials)}") + + def batch_encode( + self, + images: list[str | Path], + message: str | None = None, + file_payload: Path | None = None, + output_dir: Path | None = None, + output_suffix: str = "_encoded", + credentials: dict | BatchCredentials | None = None, + compress: bool = True, + recursive: bool = False, + progress_callback: ProgressCallback | None = None, + encode_func: Callable = None, + ) -> BatchResult: + """ + Encode message into multiple images. + + Args: + images: List of image paths or directories + message: Text message to encode (mutually exclusive with file_payload) + file_payload: File to embed (mutually exclusive with message) + output_dir: Output directory (default: same as input) + output_suffix: Suffix for output files + credentials: BatchCredentials or dict with 'passphrase', 'pin', etc. + compress: Enable compression + recursive: Search directories recursively + progress_callback: Called for each item: callback(current, total, item) + encode_func: Custom encode function (for integration) + + Returns: + BatchResult with operation summary + """ + if message is None and file_payload is None: + raise ValueError("Either message or file_payload must be provided") + + # Normalize credentials to BatchCredentials + creds = self._normalize_credentials(credentials) + + result = BatchResult(operation="encode") + image_paths = list(self.find_images(images, recursive)) + result.total = len(image_paths) + + if output_dir: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Prepare batch items + for img_path in image_paths: + if output_dir: + out_path = output_dir / f"{img_path.stem}{output_suffix}.png" + else: + out_path = img_path.parent / f"{img_path.stem}{output_suffix}.png" + + item = BatchItem( + input_path=img_path, + output_path=out_path, + input_size=img_path.stat().st_size if img_path.exists() else 0, + ) + result.items.append(item) + + # Process items + def process_encode(item: BatchItem) -> BatchItem: + item.status = BatchStatus.PROCESSING + item.start_time = time.time() + + try: + if encode_func: + # Use provided encode function + encode_func( + image_path=item.input_path, + output_path=item.output_path, + message=message, + file_payload=file_payload, + credentials=creds.to_dict(), + compress=compress, + ) + else: + # Use stegasoo encode + self._do_encode(item, message, file_payload, creds, compress) + + item.status = BatchStatus.SUCCESS + item.output_size = ( + item.output_path.stat().st_size + if item.output_path and item.output_path.exists() + else 0 + ) + item.message = f"Encoded to {item.output_path.name}" + + except Exception as e: + item.status = BatchStatus.FAILED + item.error = str(e) + + item.end_time = time.time() + return item + + # Execute with thread pool + self._execute_batch(result, process_encode, progress_callback) + + return result + + def batch_decode( + self, + images: list[str | Path], + output_dir: Path | None = None, + credentials: dict | BatchCredentials | None = None, + recursive: bool = False, + progress_callback: ProgressCallback | None = None, + decode_func: Callable = None, + ) -> BatchResult: + """ + Decode messages from multiple images. + + Args: + images: List of image paths or directories + output_dir: Output directory for file payloads (default: same as input) + credentials: BatchCredentials or dict with 'passphrase', 'pin', etc. + recursive: Search directories recursively + progress_callback: Called for each item: callback(current, total, item) + decode_func: Custom decode function (for integration) + + Returns: + BatchResult with decoded messages in item.message fields + """ + # Normalize credentials to BatchCredentials + creds = self._normalize_credentials(credentials) + + result = BatchResult(operation="decode") + image_paths = list(self.find_images(images, recursive)) + result.total = len(image_paths) + + if output_dir: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Prepare batch items + for img_path in image_paths: + item = BatchItem( + input_path=img_path, + output_path=output_dir, + input_size=img_path.stat().st_size if img_path.exists() else 0, + ) + result.items.append(item) + + # Process items + def process_decode(item: BatchItem) -> BatchItem: + item.status = BatchStatus.PROCESSING + item.start_time = time.time() + + try: + if decode_func: + # Use provided decode function + decoded = decode_func( + image_path=item.input_path, + output_dir=item.output_path, + credentials=creds.to_dict(), + ) + item.message = ( + decoded.get("message", "") if isinstance(decoded, dict) else str(decoded) + ) + else: + # Use stegasoo decode + item.message = self._do_decode(item, creds) + + item.status = BatchStatus.SUCCESS + + except Exception as e: + item.status = BatchStatus.FAILED + item.error = str(e) + + item.end_time = time.time() + return item + + # Execute with thread pool + self._execute_batch(result, process_decode, progress_callback) + + return result + + def _execute_batch( + self, + result: BatchResult, + process_func: Callable[[BatchItem], BatchItem], + progress_callback: ProgressCallback | None = None, + ) -> None: + """Execute batch processing with thread pool.""" + completed = 0 + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + futures = {executor.submit(process_func, item): item for item in result.items} + + for future in as_completed(futures): + item = future.result() + completed += 1 + + with self._lock: + if item.status == BatchStatus.SUCCESS: + result.succeeded += 1 + elif item.status == BatchStatus.FAILED: + result.failed += 1 + elif item.status == BatchStatus.SKIPPED: + result.skipped += 1 + + if progress_callback: + progress_callback(completed, result.total, item) + + result.end_time = time.time() + + def _do_encode( + self, + item: BatchItem, + message: str | None, + file_payload: Path | None, + creds: BatchCredentials, + compress: bool, + ) -> None: + """ + Perform actual encoding using stegasoo.encode. + + Override this method to customize encoding behavior. + """ + try: + from .encode import encode + from .models import FilePayload + + # Read carrier image + carrier_image = item.input_path.read_bytes() + + if file_payload: + # Encode file + payload = FilePayload.from_file(str(file_payload)) + result = encode( + message=payload, + reference_photo=creds.reference_photo, + carrier_image=carrier_image, + passphrase=creds.passphrase, + pin=creds.pin, + rsa_key_data=creds.rsa_key_data, + rsa_password=creds.rsa_password, + ) + else: + # Encode text message + result = encode( + message=message, + reference_photo=creds.reference_photo, + carrier_image=carrier_image, + passphrase=creds.passphrase, + pin=creds.pin, + rsa_key_data=creds.rsa_key_data, + rsa_password=creds.rsa_password, + ) + + # Write output + if item.output_path: + item.output_path.write_bytes(result.stego_image) + + except ImportError: + # Fallback to mock if stegasoo.encode not available + self._mock_encode(item, message, creds, compress) + + def _do_decode( + self, + item: BatchItem, + creds: BatchCredentials, + ) -> str: + """ + Perform actual decoding using stegasoo.decode. + + Override this method to customize decoding behavior. + """ + try: + from .decode import decode + + # Read stego image + stego_image = item.input_path.read_bytes() + + result = decode( + stego_image=stego_image, + reference_photo=creds.reference_photo, + passphrase=creds.passphrase, + pin=creds.pin, + rsa_key_data=creds.rsa_key_data, + rsa_password=creds.rsa_password, + ) + + if result.is_text: + return result.message or "" + else: + # File payload - save it + if item.output_path and result.file_data: + output_file = item.output_path / (result.filename or "extracted_file") + output_file.write_bytes(result.file_data) + return f"File extracted: {result.filename or 'extracted_file'}" + return f"[File: {result.filename or 'binary data'}]" + + except ImportError: + # Fallback to mock if stegasoo.decode not available + return self._mock_decode(item, creds) + + def _mock_encode( + self, item: BatchItem, message: str, creds: BatchCredentials, compress: bool + ) -> None: + """Mock encode for testing - replace with actual stego.encode()""" + # This is a placeholder - in real usage, you'd call your actual encode function + # For now, just copy the file to simulate encoding + import shutil + + if item.output_path: + shutil.copy(item.input_path, item.output_path) + + def _mock_decode(self, item: BatchItem, creds: BatchCredentials) -> str: + """Mock decode for testing - replace with actual stego.decode()""" + # This is a placeholder - in real usage, you'd call your actual decode function + return "[Decoded message would appear here]" + + +def batch_capacity_check( + images: list[str | Path], + recursive: bool = False, +) -> list[dict]: + """ + Check capacity of multiple images without encoding. + + Args: + images: List of image paths or directories + recursive: Search directories recursively + + Returns: + List of dicts with path, dimensions, and estimated capacity + """ + from PIL import Image + + from .constants import MAX_IMAGE_PIXELS + + processor = BatchProcessor() + results = [] + + for img_path in processor.find_images(images, recursive): + try: + with Image.open(img_path) as img: + width, height = img.size + pixels = width * height + + # Estimate: 3 bits per pixel (RGB LSB), minus header overhead + capacity_bits = pixels * 3 + capacity_bytes = (capacity_bits // 8) - 100 # Header overhead + + results.append( + { + "path": str(img_path), + "dimensions": f"{width}x{height}", + "pixels": pixels, + "format": img.format, + "mode": img.mode, + "capacity_bytes": max(0, capacity_bytes), + "capacity_kb": max(0, capacity_bytes // 1024), + "valid": pixels <= MAX_IMAGE_PIXELS and img.format in LOSSLESS_FORMATS, + "warnings": _get_image_warnings(img, img_path), + } + ) + except Exception as e: + results.append( + { + "path": str(img_path), + "error": str(e), + "valid": False, + } + ) + + return results + + +def _get_image_warnings(img, path: Path) -> list[str]: + """Generate warnings for an image.""" + from .constants import LOSSLESS_FORMATS, MAX_IMAGE_PIXELS + + warnings = [] + + if img.format not in LOSSLESS_FORMATS: + warnings.append(f"Lossy format ({img.format}) - quality will degrade on re-save") + + if img.size[0] * img.size[1] > MAX_IMAGE_PIXELS: + warnings.append(f"Image exceeds {MAX_IMAGE_PIXELS:,} pixel limit") + + if img.mode not in ("RGB", "RGBA"): + warnings.append(f"Non-RGB mode ({img.mode}) - will be converted") + + return warnings + + +# CLI-friendly functions + + +def print_batch_result(result: BatchResult, verbose: bool = False) -> None: + """Print batch result summary to console.""" + print(f"\n{'='*60}") + print(f"Batch {result.operation.upper()} Complete") + print(f"{'='*60}") + print(f"Total: {result.total}") + print(f"Succeeded: {result.succeeded}") + print(f"Failed: {result.failed}") + print(f"Skipped: {result.skipped}") + if result.duration: + print(f"Duration: {result.duration:.2f}s") + + if verbose or result.failed > 0: + print(f"\n{'─'*60}") + for item in result.items: + status_icon = { + BatchStatus.SUCCESS: "✓", + BatchStatus.FAILED: "✗", + BatchStatus.SKIPPED: "○", + BatchStatus.PENDING: "…", + BatchStatus.PROCESSING: "⟳", + }.get(item.status, "?") + + print(f"{status_icon} {item.input_path.name}") + if item.error: + print(f" Error: {item.error}") + elif item.message and verbose: + print(f" {item.message}") diff --git a/src/soosef/stegasoo/channel.py b/src/soosef/stegasoo/channel.py new file mode 100644 index 0000000..24836ee --- /dev/null +++ b/src/soosef/stegasoo/channel.py @@ -0,0 +1,649 @@ +""" +Channel Key Management for Stegasoo (v4.0.0) + +A channel key ties encode/decode operations to a specific deployment or group. +Messages encoded with one channel key can only be decoded by systems with the +same channel key configured. + +Use cases: +- Organization deployment: IT sets a company-wide channel key +- Friend groups: Share a channel key for private communication +- Air-gapped systems: Generate unique key per installation +- Public instances: No channel key = compatible with any instance without a channel key + +Storage priority: +1. Environment variable: STEGASOO_CHANNEL_KEY +2. Config file: ~/.stegasoo/channel.key or ./config/channel.key +3. None (public mode - compatible with any instance without a channel key) + +INTEGRATION STATUS (v4.0.0): +- ✅ get_channel_key_hash() integrated into derive_hybrid_key() in crypto.py +- ✅ get_channel_key_hash() integrated into derive_pixel_key() in crypto.py +- ✅ channel_key parameter added to encode() and decode() functions +- ✅ Header flags indicate whether message was encoded with channel key +- ✅ Helpful error messages for channel key mismatches +""" + +import hashlib +import os +import re +import secrets +from pathlib import Path + +from .debug import debug + +# Channel key format: 8 groups of 4 alphanumeric chars (32 chars total) +# Example: ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456 +CHANNEL_KEY_PATTERN = re.compile(r"^[A-Z0-9]{4}(-[A-Z0-9]{4}){7}$") +CHANNEL_KEY_LENGTH = 32 # Characters (excluding dashes) +CHANNEL_KEY_FORMATTED_LENGTH = 39 # With dashes + +# Environment variable name +CHANNEL_KEY_ENV_VAR = "STEGASOO_CHANNEL_KEY" + +# Config locations (in priority order) +CONFIG_LOCATIONS = [ + Path("./config/channel.key"), # Project config + Path.home() / ".stegasoo" / "channel.key", # User config +] + +# Encrypted config marker +ENCRYPTED_PREFIX = "ENC:" + + +def _get_machine_key() -> bytes: + """ + Get a machine-specific key for encrypting stored channel keys. + + Uses /etc/machine-id on Linux, falls back to hostname hash. + This ties the encrypted key to this specific machine. + """ + machine_id = None + + # Try Linux machine-id + try: + machine_id = Path("/etc/machine-id").read_text().strip() + except (OSError, FileNotFoundError): + pass + + # Fallback to hostname + if not machine_id: + import socket + + machine_id = socket.gethostname() + + # Hash to get consistent 32 bytes + return hashlib.sha256(machine_id.encode()).digest() + + +def _encrypt_for_storage(plaintext: str) -> str: + """ + Encrypt a channel key for storage using machine-specific key. + + Returns ENC: prefixed base64 string. + """ + import base64 + + key = _get_machine_key() + plaintext_bytes = plaintext.encode() + + # XOR with key (cycling if needed) + encrypted = bytes(pb ^ key[i % len(key)] for i, pb in enumerate(plaintext_bytes)) + + return ENCRYPTED_PREFIX + base64.b64encode(encrypted).decode() + + +def _decrypt_from_storage(stored: str) -> str | None: + """ + Decrypt a stored channel key. + + Returns None if decryption fails or format is invalid. + """ + import base64 + + if not stored.startswith(ENCRYPTED_PREFIX): + # Not encrypted, return as-is (legacy plaintext) + return stored + + try: + encrypted = base64.b64decode(stored[len(ENCRYPTED_PREFIX) :]) + key = _get_machine_key() + + # XOR to decrypt + decrypted = bytes(eb ^ key[i % len(key)] for i, eb in enumerate(encrypted)) + + return decrypted.decode() + except Exception: + return None + + +def generate_channel_key() -> str: + """ + Generate a new random channel key. + + Returns: + Formatted channel key (e.g., "ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456") + + Example: + >>> key = generate_channel_key() + >>> len(key) + 39 + """ + # Generate 32 random alphanumeric characters + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + raw_key = "".join(secrets.choice(alphabet) for _ in range(CHANNEL_KEY_LENGTH)) + + formatted = format_channel_key(raw_key) + debug.print(f"Generated channel key: {get_channel_fingerprint(formatted)}") + return formatted + + +def format_channel_key(raw_key: str) -> str: + """ + Format a raw key string into the standard format. + + Args: + raw_key: Raw key string (with or without dashes) + + Returns: + Formatted key with dashes (XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX) + + Raises: + ValueError: If key is invalid length or contains invalid characters + + Example: + >>> format_channel_key("ABCD1234EFGH5678IJKL9012MNOP3456") + "ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456" + """ + # Remove any existing dashes, spaces, and convert to uppercase + clean = raw_key.replace("-", "").replace(" ", "").upper() + + if len(clean) != CHANNEL_KEY_LENGTH: + raise ValueError(f"Channel key must be {CHANNEL_KEY_LENGTH} characters (got {len(clean)})") + + # Validate characters + if not all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" for c in clean): + raise ValueError("Channel key must contain only letters A-Z and digits 0-9") + + # Format with dashes every 4 characters + return "-".join(clean[i : i + 4] for i in range(0, CHANNEL_KEY_LENGTH, 4)) + + +def validate_channel_key(key: str) -> bool: + """ + Validate a channel key format. + + Args: + key: Channel key to validate + + Returns: + True if valid format, False otherwise + + Example: + >>> validate_channel_key("ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456") + True + >>> validate_channel_key("invalid") + False + """ + if not key: + return False + + try: + formatted = format_channel_key(key) + return bool(CHANNEL_KEY_PATTERN.match(formatted)) + except ValueError: + return False + + +def get_channel_key() -> str | None: + """ + Get the current channel key from environment or config. + + Checks in order: + 1. STEGASOO_CHANNEL_KEY environment variable + 2. ./config/channel.key file + 3. ~/.stegasoo/channel.key file + + Returns: + Channel key if configured, None if in public mode + + Example: + >>> key = get_channel_key() + >>> if key: + ... print("Private channel") + ... else: + ... print("Public mode") + """ + # 1. Check environment variable + env_key = os.environ.get(CHANNEL_KEY_ENV_VAR, "").strip() + if env_key: + if validate_channel_key(env_key): + debug.print(f"Channel key from environment: {get_channel_fingerprint(env_key)}") + return format_channel_key(env_key) + else: + debug.print(f"Warning: Invalid {CHANNEL_KEY_ENV_VAR} format, ignoring") + + # 2. Check config files (may be encrypted) + for config_path in CONFIG_LOCATIONS: + if config_path.exists(): + try: + stored = config_path.read_text().strip() + # Decrypt if encrypted, otherwise use as-is (legacy) + key = _decrypt_from_storage(stored) + if key and validate_channel_key(key): + debug.print(f"Channel key from {config_path}: {get_channel_fingerprint(key)}") + return format_channel_key(key) + except (OSError, PermissionError) as e: + debug.print(f"Could not read {config_path}: {e}") + continue + + # 3. No channel key configured (public mode) + debug.print("No channel key configured (public mode)") + return None + + +def set_channel_key(key: str, location: str = "project") -> Path: + """ + Save a channel key to config file. + + Args: + key: Channel key to save (will be formatted) + location: 'project' for ./config/ or 'user' for ~/.stegasoo/ + + Returns: + Path where key was saved + + Raises: + ValueError: If key format is invalid + + Example: + >>> path = set_channel_key("ABCD1234EFGH5678IJKL9012MNOP3456") + >>> print(path) + ./config/channel.key + """ + formatted = format_channel_key(key) + + if location == "user": + config_path = Path.home() / ".stegasoo" / "channel.key" + else: + config_path = Path("./config/channel.key") + + # Create directory if needed + config_path.parent.mkdir(parents=True, exist_ok=True) + + # Encrypt and write (tied to this machine's identity) + encrypted = _encrypt_for_storage(formatted) + config_path.write_text(encrypted + "\n") + + # Set restrictive permissions (owner read/write only) + try: + config_path.chmod(0o600) + except (OSError, AttributeError): + pass # Windows doesn't support chmod the same way + + debug.print(f"Channel key saved to {config_path}") + return config_path + + +def clear_channel_key(location: str = "all") -> list[Path]: + """ + Remove channel key configuration. + + Args: + location: 'project', 'user', or 'all' + + Returns: + List of paths that were deleted + + Example: + >>> deleted = clear_channel_key('all') + >>> print(f"Removed {len(deleted)} files") + """ + deleted = [] + + paths_to_check = [] + if location in ("project", "all"): + paths_to_check.append(Path("./config/channel.key")) + if location in ("user", "all"): + paths_to_check.append(Path.home() / ".stegasoo" / "channel.key") + + for path in paths_to_check: + if path.exists(): + try: + path.unlink() + deleted.append(path) + debug.print(f"Removed channel key: {path}") + except (OSError, PermissionError) as e: + debug.print(f"Could not remove {path}: {e}") + + return deleted + + +def get_channel_key_hash(key: str | None = None) -> bytes | None: + """ + Get the channel key as a 32-byte hash suitable for key derivation. + + This hash is mixed into the Argon2 key derivation to bind + encryption to a specific channel. + + Args: + key: Channel key (if None, reads from config) + + Returns: + 32-byte SHA-256 hash of channel key, or None if no channel key + + Example: + >>> hash_bytes = get_channel_key_hash() + >>> if hash_bytes: + ... print(f"Hash: {len(hash_bytes)} bytes") + """ + if key is None: + key = get_channel_key() + + if not key: + return None + + # Hash the formatted key to get consistent 32 bytes + formatted = format_channel_key(key) + return hashlib.sha256(formatted.encode("utf-8")).digest() + + +def get_channel_fingerprint(key: str | None = None) -> str | None: + """ + Get a short fingerprint for display purposes. + Shows first and last 4 chars with masked middle. + + Args: + key: Channel key (if None, reads from config) + + Returns: + Fingerprint like "ABCD-••••-••••-••••-••••-••••-••••-3456" or None + + Example: + >>> print(get_channel_fingerprint()) + ABCD-••••-••••-••••-••••-••••-••••-3456 + """ + if key is None: + key = get_channel_key() + + if not key: + return None + + formatted = format_channel_key(key) + parts = formatted.split("-") + + # Show first and last group, mask the rest + masked = [parts[0]] + ["••••"] * 6 + [parts[-1]] + return "-".join(masked) + + +def get_channel_status() -> dict: + """ + Get comprehensive channel key status. + + Returns: + Dictionary with: + - mode: 'private' or 'public' + - configured: bool + - fingerprint: masked key or None + - source: where key came from or None + - key: full key (for export) or None + + Example: + >>> status = get_channel_status() + >>> print(f"Mode: {status['mode']}") + Mode: private + """ + key = get_channel_key() + + if key: + # Find which source provided the key + source = "unknown" + env_key = os.environ.get(CHANNEL_KEY_ENV_VAR, "").strip() + if env_key and validate_channel_key(env_key): + source = "environment" + else: + for config_path in CONFIG_LOCATIONS: + if config_path.exists(): + try: + stored = config_path.read_text().strip() + file_key = _decrypt_from_storage(stored) + if ( + file_key + and validate_channel_key(file_key) + and format_channel_key(file_key) == key + ): + source = str(config_path) + break + except (OSError, PermissionError, ValueError): + continue + + return { + "mode": "private", + "configured": True, + "fingerprint": get_channel_fingerprint(key), + "source": source, + "key": key, + } + else: + return { + "mode": "public", + "configured": False, + "fingerprint": None, + "source": None, + "key": None, + } + + +def has_channel_key() -> bool: + """ + Quick check if a channel key is configured. + + Returns: + True if channel key is set, False for public mode + + Example: + >>> if has_channel_key(): + ... print("Private channel active") + """ + return get_channel_key() is not None + + +def resolve_channel_key( + value: str | None = None, + *, + file_path: str | Path | None = None, + no_channel: bool = False, +) -> str | None: + """ + Resolve a channel key from user input (unified for all frontends). + + This consolidates channel key resolution logic used by CLI, API, and WebUI. + + Args: + value: Input value: + - 'auto' or None: Use server-configured key + - 'none' or '': Public mode (no channel key) + - explicit key: Validate and use + file_path: Path to file containing channel key + no_channel: If True, return "" for public mode (overrides value) + + Returns: + None: Use server-configured key (auto mode) + "": Public mode (no channel key) + str: Explicit valid channel key + + Raises: + ValueError: If key format is invalid + FileNotFoundError: If file_path doesn't exist + + Example: + >>> resolve_channel_key("auto") # -> None + >>> resolve_channel_key("none") # -> "" + >>> resolve_channel_key(no_channel=True) # -> "" + >>> resolve_channel_key("ABCD-1234-...") # -> "ABCD-1234-..." + >>> resolve_channel_key(file_path="key.txt") # reads from file + """ + debug.print( + f"resolve_channel_key: value={value}, file_path={file_path}, no_channel={no_channel}" + ) + + # no_channel flag takes precedence + if no_channel: + debug.print("resolve_channel_key: public mode (no_channel=True)") + return "" + + # Read from file if provided + if file_path: + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"Channel key file not found: {file_path}") + key = path.read_text().strip() + if not validate_channel_key(key): + raise ValueError(f"Invalid channel key format in file: {file_path}") + debug.print(f"resolve_channel_key: from file -> {get_channel_fingerprint(key)}") + return format_channel_key(key) + + # Handle value string + if value is None or value.lower() == "auto": + debug.print("resolve_channel_key: auto mode (server config)") + return None + + if value == "" or value.lower() == "none": + debug.print("resolve_channel_key: public mode (explicit none)") + return "" + + # Explicit key - validate + if validate_channel_key(value): + formatted = format_channel_key(value) + debug.print(f"resolve_channel_key: explicit key -> {get_channel_fingerprint(formatted)}") + return formatted + + raise ValueError( + "Invalid channel key format. Expected: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX\n" + "Generate a new key with: stegasoo channel generate" + ) + + +def get_channel_response_info(channel_key: str | None) -> dict: + """ + Get channel info for API/WebUI responses. + + Args: + channel_key: Resolved channel key (None=auto, ""=public, str=explicit) + + Returns: + Dict with mode, fingerprint, and display info + + Example: + >>> info = get_channel_response_info("ABCD-1234-...") + >>> info['mode'] + 'explicit' + """ + if channel_key is None: + # Auto mode - check server config + server_key = get_channel_key() + if server_key: + return { + "mode": "private", + "fingerprint": get_channel_fingerprint(server_key), + "source": "server", + } + return { + "mode": "public", + "fingerprint": None, + "source": "server", + } + + if channel_key == "": + return { + "mode": "public", + "fingerprint": None, + "source": "explicit", + } + + return { + "mode": "private", + "fingerprint": get_channel_fingerprint(channel_key), + "source": "explicit", + } + + +# ============================================================================= +# CLI SUPPORT +# ============================================================================= + +if __name__ == "__main__": + import sys + + def print_status(): + """Print current channel status.""" + status = get_channel_status() + print(f"Mode: {status['mode'].upper()}") + if status["configured"]: + print(f"Fingerprint: {status['fingerprint']}") + print(f"Source: {status['source']}") + else: + print("No channel key configured (public mode)") + + if len(sys.argv) < 2: + print("Channel Key Manager") + print("=" * 40) + print_status() + print() + print("Commands:") + print(" python -m stegasoo.channel generate - Generate new key") + print(" python -m stegasoo.channel set - Set channel key") + print(" python -m stegasoo.channel show - Show full key") + print(" python -m stegasoo.channel clear - Remove channel key") + print(" python -m stegasoo.channel status - Show status") + sys.exit(0) + + cmd = sys.argv[1].lower() + + if cmd == "generate": + key = generate_channel_key() + print("Generated channel key:") + print(f" {key}") + print() + save = input("Save to config? [y/N]: ").strip().lower() + if save == "y": + path = set_channel_key(key) + print(f"Saved to: {path}") + + elif cmd == "set": + if len(sys.argv) < 3: + print("Usage: python -m stegasoo.channel set ") + sys.exit(1) + + try: + key = sys.argv[2] + formatted = format_channel_key(key) + path = set_channel_key(formatted) + print(f"Channel key set: {get_channel_fingerprint(formatted)}") + print(f"Saved to: {path}") + except ValueError as e: + print(f"Error: {e}") + sys.exit(1) + + elif cmd == "show": + status = get_channel_status() + if status["configured"]: + print(f"Channel key: {status['key']}") + print(f"Source: {status['source']}") + else: + print("No channel key configured") + + elif cmd == "clear": + deleted = clear_channel_key("all") + if deleted: + print(f"Removed channel key from: {', '.join(str(p) for p in deleted)}") + else: + print("No channel key files found") + + elif cmd == "status": + print_status() + + else: + print(f"Unknown command: {cmd}") + sys.exit(1) diff --git a/src/soosef/stegasoo/cli.py b/src/soosef/stegasoo/cli.py new file mode 100644 index 0000000..00fa7d8 --- /dev/null +++ b/src/soosef/stegasoo/cli.py @@ -0,0 +1,2982 @@ +""" +Stegasoo CLI Module (v3.2.0) + +A proper CLI architecture using Click. This module demonstrates several +important patterns for building production-quality command-line tools: + +PATTERN: COMMAND GROUPS +======================= +Click's @group decorator creates a hierarchy of commands: + + stegasoo <- Main entry point + ├── encode <- Simple commands at root level + ├── decode + ├── generate + ├── info + ├── batch/ <- Group for related commands + │ ├── encode + │ ├── decode + │ └── check + ├── channel/ <- Another group + │ ├── generate + │ ├── show + │ ├── status + │ ├── qr + │ └── clear + ├── tools/ <- Utility group + │ ├── capacity + │ ├── strip + │ ├── peek + │ └── exif + └── admin/ <- Administration group + ├── recover + └── generate-key + +PATTERN: JSON OUTPUT MODE +========================= +Every command supports --json for machine-readable output. The pattern: + + @click.pass_context + def my_command(ctx, ...): + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + # Human-readable output with colors/formatting + click.echo(f"✓ Success: {result}") + +This makes the CLI scriptable - you can pipe to jq, use in shell scripts, etc. + +PATTERN: SENSITIVE INPUT +======================== +Passwords/secrets use Click's secure prompts: + + @click.option("--passphrase", prompt=True, hide_input=True, + confirmation_prompt=True, help="Passphrase") + +- prompt=True: Asks if not provided +- hide_input=True: No echo (like sudo) +- confirmation_prompt=True: "Repeat for confirmation" + +PATTERN: DRY-RUN MODE +===================== +For destructive or slow operations, --dry-run shows what WOULD happen: + + if dry_run: + click.echo(f"Would encode to {output}") + return + +Changes in v3.2.0: +- Updated to use DEFAULT_PASSPHRASE_WORDS (consistency with v3.2.0 naming) +- Updated help text to use 'passphrase' terminology +""" + +import json +from pathlib import Path + +import click + +from .batch import ( + BatchProcessor, + batch_capacity_check, + print_batch_result, +) +from .constants import ( + DEFAULT_PASSPHRASE_WORDS, # v3.2.0: renamed from DEFAULT_PHRASE_WORDS + DEFAULT_PIN_LENGTH, + MAX_FILE_PAYLOAD_SIZE, + MAX_MESSAGE_SIZE, + __version__, +) + +# Click context settings - these apply to all commands +# help_option_names lets users use either -h or --help +CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) + + +# ============================================================================= +# ROOT GROUP - The main entry point +# ============================================================================= +# +# @click.group() creates a command group. The function becomes both: +# 1. A callable that sets up shared state (ctx.obj) +# 2. A container for subcommands via @cli.command() decorators +# +# The context object (ctx.obj) is passed down to all subcommands. +# We use it to share the --json flag across the entire CLI. + + +@click.group(context_settings=CONTEXT_SETTINGS) +@click.version_option(__version__, "-v", "--version") +@click.option("--json", "json_output", is_flag=True, help="Output results as JSON") +@click.option("--debug", "debug_mode", is_flag=True, help="Enable debug logging to stderr") +@click.pass_context +def cli(ctx, json_output, debug_mode): + """ + Stegasoo - Steganography with hybrid authentication. + + Hide messages in images using PIN + passphrase security. + """ + # ensure_object(dict) creates ctx.obj if it doesn't exist + # This prevents "NoneType has no attribute" errors + ctx.ensure_object(dict) + ctx.obj["json"] = json_output + + if debug_mode: + from .debug import debug + + debug.enable(True) + + +# ============================================================================= +# ENCODE COMMANDS +# ============================================================================= +# +# The encode command demonstrates several Click patterns: +# +# 1. ARGUMENT vs OPTION +# - Arguments are positional: `stegasoo encode photo.png` +# - Options have flags: `stegasoo encode -m "message" --pin 1234` +# Rule of thumb: required inputs → arguments, optional/secret → options +# +# 2. MUTUAL EXCLUSIVITY +# We need either --message OR --file, not both. Click doesn't have built-in +# mutual exclusivity, so we check manually: +# +# if not message and not file_payload: +# raise click.UsageError("Either --message or --file is required") +# +# 3. TYPE VALIDATION +# Click validates types automatically: +# - type=click.Path(exists=True) → file must exist +# - type=click.Choice(["a", "b"]) → must be one of these values +# - type=int → must be an integer +# +# 4. DEFAULT VALUES +# Options can have smart defaults: +# - default="zlib" → use this if not specified +# - default=True with is_flag=True → boolean flag defaults to on + + +@cli.command() +@click.argument("carrier", type=click.Path(exists=True)) +@click.option( + "-r", + "--reference", + required=True, + type=click.Path(exists=True), + help="Reference photo (shared secret)", +) +@click.option("-m", "--message", help="Message to encode") +@click.option( + "-f", + "--file", + "file_payload", + type=click.Path(exists=True), + help="File to embed instead of message", +) +@click.option("-o", "--output", type=click.Path(), help="Output image path") +@click.option( + "--passphrase", + prompt=True, + hide_input=True, + confirmation_prompt=True, + help="Passphrase (recommend 4+ words)", +) +@click.option("--pin", prompt=True, hide_input=True, confirmation_prompt=True, help="PIN code") +@click.option("--dry-run", is_flag=True, help="Show capacity usage without encoding") +@click.option( + "--platform", + type=click.Choice(["telegram", "discord", "signal", "whatsapp"], case_sensitive=False), + help="DCT preset for social media platform (implies DCT+JPEG mode)", +) +@click.option("--verify/--no-verify", default=True, help="Pre-verify payload survives platform recompression") +@click.pass_context +def encode(ctx, carrier, reference, message, file_payload, output, passphrase, pin, dry_run, platform, verify): + """ + Encode a message or file into an image. + + Examples: + + stegasoo encode photo.png -r ref.jpg -m "Secret message" --passphrase --pin + + stegasoo encode photo.png -r ref.jpg -f secret.pdf -o encoded.png + """ + from PIL import Image + + from .encode import encode as stegasoo_encode + from .encode import encode_file as stegasoo_encode_file + + if not message and not file_payload: + raise click.UsageError("Either --message or --file is required") + + # Calculate payload size + if file_payload: + payload_size = Path(file_payload).stat().st_size + payload_type = "file" + else: + payload_size = len(message.encode("utf-8")) + payload_type = "text" + + # Get image capacity + with Image.open(carrier) as img: + width, height = img.size + capacity_bytes = (width * height * 3 // 8) - 69 # v3.2.0: corrected overhead + + if dry_run: + result = { + "carrier": carrier, + "reference": reference, + "dimensions": f"{width}x{height}", + "capacity_bytes": capacity_bytes, + "payload_type": payload_type, + "payload_size": payload_size, + "usage_percent": round(payload_size / capacity_bytes * 100, 1), + "fits": payload_size < capacity_bytes, + } + + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + click.echo(f"Carrier: {carrier} ({width}x{height})") + click.echo(f"Reference: {reference}") + click.echo(f"Capacity: {capacity_bytes:,} bytes ({capacity_bytes//1024} KB)") + click.echo(f"Payload: {payload_size:,} bytes ({payload_type})") + click.echo(f"Usage: {result['usage_percent']}%") + click.echo(f"Status: {'✓ Fits' if result['fits'] else '✗ Too large'}") + return + + # Read input files + with open(reference, "rb") as f: + reference_data = f.read() + with open(carrier, "rb") as f: + carrier_data = f.read() + + # Determine output path and format + # Default to JPEG for JPEG carriers (preserves DCT mode benefits) + carrier_ext = Path(carrier).suffix.lower() + if not output: + if carrier_ext in (".jpg", ".jpeg"): + output = f"{Path(carrier).stem}_encoded.jpg" + else: + output = f"{Path(carrier).stem}_encoded.png" + + # Detect output format from extension + output_ext = Path(output).suffix.lower() + use_dct = output_ext in (".jpg", ".jpeg") + + from .steganography import EMBED_MODE_DCT, EMBED_MODE_LSB + + # Platform preset overrides + preset = None + if platform: + from .platform_presets import get_preset + + preset = get_preset(platform) + use_dct = True # Platform mode implies DCT+JPEG + if output_ext not in (".jpg", ".jpeg"): + output = str(Path(output).with_suffix(".jpg")) + click.echo(f" Platform mode: output changed to {output}") + + try: + encode_kwargs = { + "reference_photo": reference_data, + "carrier_image": carrier_data, + "passphrase": passphrase, + "pin": pin, + "embed_mode": EMBED_MODE_DCT if use_dct else EMBED_MODE_LSB, + "dct_output_format": "jpeg" if use_dct else "png", + } + + if preset: + encode_kwargs["platform"] = platform + + if file_payload: + result = stegasoo_encode_file(filepath=file_payload, **encode_kwargs) + else: + result = stegasoo_encode(message=message, **encode_kwargs) + + # Pre-verify survival if platform mode + if preset and verify: + from .crypto import derive_pixel_key + from .platform_presets import pre_verify_survival + + pixel_key = derive_pixel_key(reference_data, passphrase, pin) + survived = pre_verify_survival(result.stego_image, pixel_key, preset) + if not survived: + click.echo( + f" ⚠ Warning: Payload may not survive {preset.name} recompression. " + "Try a larger carrier image or shorter message.", + err=True, + ) + + # Write output + with open(output, "wb") as f: + f.write(result.stego_image) + + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "carrier": carrier, + "reference": reference, + "output": output, + "payload_type": payload_type, + }, + indent=2, + ) + ) + else: + click.echo(f"✓ Encoded {payload_type} to {output}") + click.echo(f" Reference: {reference}") + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Encoding failed: {e}", err=True) + raise SystemExit(1) + + +@cli.command() +@click.argument("image", type=click.Path(exists=True)) +@click.option( + "-r", + "--reference", + required=True, + type=click.Path(exists=True), + help="Reference photo (shared secret)", +) +@click.option("--passphrase", prompt=True, hide_input=True, help="Passphrase") +@click.option("--pin", prompt=True, hide_input=True, help="PIN code") +@click.option("-o", "--output", type=click.Path(), help="Output path for file payloads") +@click.option( + "--platform", + type=click.Choice(["telegram", "discord", "signal", "whatsapp"], case_sensitive=False), + help="Platform preset (must match encoding platform)", +) +@click.pass_context +def decode(ctx, image, reference, passphrase, pin, output, platform): + """ + Decode a message or file from an image. + + Examples: + + stegasoo decode encoded.png -r ref.jpg --passphrase --pin + + stegasoo decode encoded.png -r ref.jpg --platform telegram + """ + from .decode import decode as stegasoo_decode + + # Read input files + with open(image, "rb") as f: + stego_data = f.read() + with open(reference, "rb") as f: + reference_data = f.read() + + # Resolve platform preset for DCT decoding + decode_kwargs = {} + if platform: + from .platform_presets import get_preset + + preset = get_preset(platform) + decode_kwargs["platform"] = platform + + try: + result = stegasoo_decode( + stego_image=stego_data, + reference_photo=reference_data, + passphrase=passphrase, + pin=pin, + **decode_kwargs, + ) + + if result.is_file: + # File payload + filename = result.filename or "decoded_file" + output_path = Path(output) / filename if output else Path(filename) + + # Ensure output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "wb") as f: + f.write(result.file_data) + + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "image": image, + "reference": reference, + "payload_type": "file", + "filename": filename, + "output": str(output_path), + "size": len(result.file_data), + }, + indent=2, + ) + ) + else: + click.echo(f"✓ Extracted file: {output_path}") + click.echo(f" Size: {len(result.file_data):,} bytes") + else: + # Text message + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "image": image, + "reference": reference, + "payload_type": "text", + "message": result.message, + }, + indent=2, + ) + ) + else: + click.echo(f"Decoded from {image}:") + click.echo(result.message) + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Decoding failed: {e}", err=True) + raise SystemExit(1) + + +# ============================================================================= +# AUDIO COMMANDS (v4.3.0) +# ============================================================================= + + +@cli.command("audio-encode") +@click.argument("carrier", type=click.Path(exists=True)) +@click.option( + "-r", + "--reference", + required=True, + type=click.Path(exists=True), + help="Reference photo (shared secret)", +) +@click.option("-m", "--message", help="Message to encode") +@click.option( + "-f", + "--file", + "file_payload", + type=click.Path(exists=True), + help="File to embed instead of message", +) +@click.option("-o", "--output", type=click.Path(), help="Output audio path") +@click.option( + "--mode", + "embed_mode", + default="audio_lsb", + type=click.Choice(["audio_lsb", "audio_spread"]), + help="Embedding mode", +) +@click.option( + "--passphrase", + prompt=True, + hide_input=True, + confirmation_prompt=True, + help="Passphrase (recommend 4+ words)", +) +@click.option("--pin", prompt=True, hide_input=True, confirmation_prompt=True, help="PIN code") +@click.option( + "--rsa-key", + type=click.Path(exists=True), + help="RSA private key PEM file", +) +@click.option("--rsa-password", default=None, help="Password for encrypted RSA key") +@click.option("--channel-key", default=None, help="Channel key for deployment isolation") +@click.option( + "--chip-tier", + "chip_tier", + default=None, + type=click.Choice(["lossless", "high", "low"]), + help="Spread spectrum chip tier (lossless=256, high=512, low=1024). Only for audio_spread.", +) +@click.option("--dry-run", is_flag=True, help="Show capacity usage without encoding") +@click.pass_context +def audio_encode( + ctx, + carrier, + reference, + message, + file_payload, + output, + embed_mode, + passphrase, + pin, + rsa_key, + rsa_password, + channel_key, + chip_tier, + dry_run, +): + """ + Encode a message or file into an audio carrier. + + Examples: + + stegasoo audio-encode carrier.wav -r ref.jpg -m "Secret" --mode audio_lsb + + stegasoo audio-encode carrier.wav -r ref.jpg -f secret.pdf --mode audio_spread + + stegasoo audio-encode carrier.wav -r ref.jpg -m "Secret" --dry-run + """ + from .constants import AUDIO_ENABLED + + if not AUDIO_ENABLED: + raise click.UsageError( + "Audio support is disabled. Install audio extras (pip install stegasoo[audio]) " + "or set STEGASOO_AUDIO=1 to force enable." + ) + + from .audio_steganography import calculate_audio_lsb_capacity + from .encode import encode_audio + from .models import FilePayload + from .spread_steganography import calculate_audio_spread_capacity + + if not message and not file_payload: + raise click.UsageError("Either --message or --file is required") + + # Read RSA key if provided + rsa_key_data = None + if rsa_key: + with open(rsa_key, "rb") as f: + rsa_key_data = f.read() + + # Calculate payload size + if file_payload: + payload_size = Path(file_payload).stat().st_size + payload_type = "file" + else: + payload_size = len(message.encode("utf-8")) + payload_type = "text" + + # Read input files + with open(reference, "rb") as f: + reference_data = f.read() + with open(carrier, "rb") as f: + carrier_data = f.read() + + if dry_run: + try: + from .audio_utils import get_audio_info + + info = get_audio_info(carrier_data) + lsb_capacity = calculate_audio_lsb_capacity(carrier_data) + spread_capacity = calculate_audio_spread_capacity(carrier_data) + + if embed_mode == "audio_lsb": + capacity = lsb_capacity + else: + capacity = spread_capacity.usable_capacity_bytes + + result = { + "carrier": carrier, + "reference": reference, + "format": info.format, + "sample_rate": info.sample_rate, + "channels": info.channels, + "duration_seconds": round(info.duration_seconds, 2), + "embed_mode": embed_mode, + "capacity_bytes": capacity, + "lsb_capacity_bytes": lsb_capacity, + "spread_capacity_bytes": spread_capacity.usable_capacity_bytes, + "payload_type": payload_type, + "payload_size": payload_size, + "usage_percent": round(payload_size / capacity * 100, 1) if capacity > 0 else 0, + "fits": payload_size < capacity, + } + + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + click.echo( + f"Carrier: {carrier} ({info.format}, {info.sample_rate}Hz, {info.channels}ch)" + ) + click.echo(f"Duration: {info.duration_seconds:.1f}s") + click.echo(f"Reference: {reference}") + click.echo(f"Mode: {embed_mode}") + click.echo(f"LSB capacity: {lsb_capacity:,} bytes ({lsb_capacity // 1024} KB)") + click.echo(f"Spread capacity: {spread_capacity.usable_capacity_bytes:,} bytes") + click.echo(f"Payload: {payload_size:,} bytes ({payload_type})") + click.echo(f"Usage: {result['usage_percent']}%") + click.echo(f"Status: {'✓ Fits' if result['fits'] else '✗ Too large'}") + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Capacity check failed: {e}", err=True) + raise SystemExit(1) + return + + # Determine output path + if not output: + output = f"{Path(carrier).stem}_encoded.wav" + + try: + if file_payload: + payload = FilePayload.from_file(file_payload) + else: + payload = message + + # Resolve chip tier name to integer + resolved_chip_tier = None + if chip_tier is not None: + from .constants import AUDIO_SS_CHIP_TIER_NAMES + + resolved_chip_tier = AUDIO_SS_CHIP_TIER_NAMES.get(chip_tier) + + stego_audio, stats = encode_audio( + message=payload, + reference_photo=reference_data, + carrier_audio=carrier_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=rsa_password, + embed_mode=embed_mode, + channel_key=channel_key, + chip_tier=resolved_chip_tier, + ) + + with open(output, "wb") as f: + f.write(stego_audio) + + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "carrier": carrier, + "reference": reference, + "output": output, + "mode": stats.embed_mode, + "samples_modified": stats.samples_modified, + "duration_seconds": round(stats.duration_seconds, 2), + "capacity_used": round(stats.capacity_used * 100, 1), + }, + indent=2, + ) + ) + else: + click.echo(f"✓ Encoded to {output}") + click.echo(f" Mode: {stats.embed_mode}") + click.echo(f" Duration: {stats.duration_seconds:.1f}s") + click.echo(f" Capacity used: {stats.capacity_used * 100:.1f}%") + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Audio encoding failed: {e}", err=True) + raise SystemExit(1) + + +@cli.command("audio-decode") +@click.argument("audio", type=click.Path(exists=True)) +@click.option( + "-r", + "--reference", + required=True, + type=click.Path(exists=True), + help="Reference photo (shared secret)", +) +@click.option( + "--mode", + "embed_mode", + default="audio_auto", + type=click.Choice(["audio_auto", "audio_lsb", "audio_spread"]), + help="Embedding mode (auto-detect by default)", +) +@click.option("--passphrase", prompt=True, hide_input=True, help="Passphrase") +@click.option("--pin", prompt=True, hide_input=True, help="PIN code") +@click.option( + "--rsa-key", + type=click.Path(exists=True), + help="RSA private key PEM file", +) +@click.option("--rsa-password", default=None, help="Password for encrypted RSA key") +@click.option("--channel-key", default=None, help="Channel key for deployment isolation") +@click.option("-o", "--output", type=click.Path(), help="Output path for file payloads") +@click.pass_context +def audio_decode( + ctx, audio, reference, embed_mode, passphrase, pin, rsa_key, rsa_password, channel_key, output +): + """ + Decode a message or file from stego audio. + + Examples: + + stegasoo audio-decode stego.wav -r ref.jpg + + stegasoo audio-decode stego.wav -r ref.jpg --mode audio_lsb -o ./extracted/ + """ + from .constants import AUDIO_ENABLED + + if not AUDIO_ENABLED: + raise click.UsageError( + "Audio support is disabled. Install audio extras (pip install stegasoo[audio]) " + "or set STEGASOO_AUDIO=1 to force enable." + ) + + from .decode import decode_audio + + # Read RSA key if provided + rsa_key_data = None + if rsa_key: + with open(rsa_key, "rb") as f: + rsa_key_data = f.read() + + with open(audio, "rb") as f: + audio_data = f.read() + with open(reference, "rb") as f: + reference_data = f.read() + + try: + result = decode_audio( + stego_audio=audio_data, + reference_photo=reference_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=rsa_password, + embed_mode=embed_mode, + channel_key=channel_key, + ) + + if result.is_file: + filename = result.filename or "decoded_file" + output_path = Path(output) / filename if output else Path(filename) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "wb") as f: + f.write(result.file_data) + + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "audio": audio, + "payload_type": "file", + "filename": filename, + "output": str(output_path), + "size": len(result.file_data), + }, + indent=2, + ) + ) + else: + click.echo(f"✓ Extracted file: {output_path}") + click.echo(f" Size: {len(result.file_data):,} bytes") + else: + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "audio": audio, + "payload_type": "text", + "message": result.message, + }, + indent=2, + ) + ) + else: + click.echo(f"Decoded from {audio}:") + click.echo(result.message) + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Audio decoding failed: {e}", err=True) + raise SystemExit(1) + + +@cli.command("audio-info") +@click.argument("audio", type=click.Path(exists=True)) +@click.pass_context +def audio_info(ctx, audio): + """ + Show audio file information and steganographic capacity. + + Examples: + + stegasoo audio-info carrier.wav + + stegasoo --json audio-info carrier.wav + """ + from .constants import AUDIO_ENABLED + + if not AUDIO_ENABLED: + raise click.UsageError( + "Audio support is disabled. Install audio extras (pip install stegasoo[audio]) " + "or set STEGASOO_AUDIO=1 to force enable." + ) + + from .audio_steganography import calculate_audio_lsb_capacity + from .audio_utils import get_audio_info + from .spread_steganography import calculate_audio_spread_capacity + + with open(audio, "rb") as f: + audio_data = f.read() + + try: + info = get_audio_info(audio_data) + lsb_capacity = calculate_audio_lsb_capacity(audio_data) + + # Calculate spread capacity at each chip tier + spread_tiers = {} + for tier_name, tier_val in [("lossless", 0), ("high", 1), ("low", 2)]: + cap = calculate_audio_spread_capacity(audio_data, chip_tier=tier_val) + spread_tiers[tier_name] = { + "bytes": cap.usable_capacity_bytes, + "kb": round(cap.usable_capacity_bytes / 1024, 1), + "chip_length": cap.chip_length, + "embeddable_channels": cap.embeddable_channels, + } + + result = { + "file": audio, + "format": info.format, + "sample_rate": info.sample_rate, + "channels": info.channels, + "duration_seconds": round(info.duration_seconds, 2), + "num_samples": info.num_samples, + "bit_depth": info.bit_depth, + "file_size": len(audio_data), + "capacity": { + "audio_lsb": { + "bytes": lsb_capacity, + "kb": round(lsb_capacity / 1024, 1), + }, + "audio_spread": spread_tiers, + }, + } + + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + click.echo(f"File: {audio}") + click.echo(f"Format: {info.format}") + click.echo(f"Sample rate: {info.sample_rate} Hz") + click.echo(f"Channels: {info.channels}") + click.echo(f"Duration: {info.duration_seconds:.1f}s") + click.echo(f"Samples: {info.num_samples:,}") + if info.bit_depth: + click.echo(f"Bit depth: {info.bit_depth}-bit") + click.echo(f"File size: {len(audio_data):,} bytes") + click.echo() + click.echo("Steganographic capacity:") + click.echo(f" LSB: {lsb_capacity:,} bytes ({lsb_capacity // 1024} KB)") + for tier_name in ("lossless", "high", "low"): + t = spread_tiers[tier_name] + click.echo( + f" Spread ({tier_name:>8}, chip={t['chip_length']}): " + f"{t['bytes']:,} bytes ({t['kb']} KB)" + ) + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Audio info failed: {e}", err=True) + raise SystemExit(1) + + +# ============================================================================= +# VIDEO COMMANDS (v4.4.0) +# ============================================================================= + + +@cli.command("video-encode") +@click.argument("carrier", type=click.Path(exists=True)) +@click.option( + "-r", + "--reference", + required=True, + type=click.Path(exists=True), + help="Reference photo (shared secret)", +) +@click.option("-m", "--message", help="Message to encode") +@click.option( + "-f", + "--file", + "file_payload", + type=click.Path(exists=True), + help="File to embed instead of message", +) +@click.option("-o", "--output", type=click.Path(), help="Output video path") +@click.option( + "--passphrase", + prompt=True, + hide_input=True, + confirmation_prompt=True, + help="Passphrase (recommend 4+ words)", +) +@click.option("--pin", prompt=True, hide_input=True, confirmation_prompt=True, help="PIN code") +@click.option( + "--rsa-key", + type=click.Path(exists=True), + help="RSA private key PEM file", +) +@click.option("--rsa-password", default=None, help="Password for encrypted RSA key") +@click.option("--channel-key", default=None, help="Channel key for deployment isolation") +@click.option("--dry-run", is_flag=True, help="Show capacity usage without encoding") +@click.pass_context +def video_encode( + ctx, + carrier, + reference, + message, + file_payload, + output, + passphrase, + pin, + rsa_key, + rsa_password, + channel_key, + dry_run, +): + """ + Encode a message or file into a video carrier. + + Output is MKV format with FFV1 lossless codec to preserve embedded data. + + Examples: + + stegasoo video-encode carrier.mp4 -r ref.jpg -m "Secret" + + stegasoo video-encode carrier.mp4 -r ref.jpg -f secret.pdf -o stego.mkv + + stegasoo video-encode carrier.mp4 -r ref.jpg -m "Secret" --dry-run + """ + from .constants import VIDEO_ENABLED + + if not VIDEO_ENABLED: + raise click.UsageError( + "Video support is disabled. Install ffmpeg and audio extras, " + "or set STEGASOO_VIDEO=1 to force enable." + ) + + from .encode import encode_video + from .models import FilePayload + from .video_utils import calculate_video_capacity, get_video_info + + if not message and not file_payload: + raise click.UsageError("Either --message or --file is required") + + # Read RSA key if provided + rsa_key_data = None + if rsa_key: + with open(rsa_key, "rb") as f: + rsa_key_data = f.read() + + # Calculate payload size + if file_payload: + payload_size = Path(file_payload).stat().st_size + payload_type = "file" + else: + payload_size = len(message.encode("utf-8")) + payload_type = "text" + + # Read input files + with open(reference, "rb") as f: + reference_data = f.read() + with open(carrier, "rb") as f: + carrier_data = f.read() + + if dry_run: + try: + info = get_video_info(carrier_data) + capacity_info = calculate_video_capacity(carrier_data) + + result = { + "carrier": carrier, + "reference": reference, + "format": info.format, + "codec": info.codec, + "resolution": f"{info.width}x{info.height}", + "fps": round(info.fps, 2), + "duration_seconds": round(info.duration_seconds, 2), + "total_frames": info.total_frames, + "i_frames": info.i_frame_count, + "capacity_bytes": capacity_info.usable_capacity_bytes, + "capacity_kb": round(capacity_info.usable_capacity_bytes / 1024, 1), + "payload_type": payload_type, + "payload_size": payload_size, + "usage_percent": round( + payload_size / capacity_info.usable_capacity_bytes * 100, 1 + ) + if capacity_info.usable_capacity_bytes > 0 + else 0, + "fits": payload_size < capacity_info.usable_capacity_bytes, + } + + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + click.echo(f"Carrier: {carrier} ({info.format}, {info.codec})") + click.echo(f"Resolution: {info.width}x{info.height} @ {info.fps:.2f} fps") + click.echo(f"Duration: {info.duration_seconds:.1f}s") + click.echo(f"Frames: {info.total_frames} total, {info.i_frame_count} I-frames") + click.echo(f"Reference: {reference}") + click.echo( + f"Capacity: {capacity_info.usable_capacity_bytes:,} bytes " + f"({capacity_info.usable_capacity_bytes // 1024} KB)" + ) + click.echo(f"Payload: {payload_size:,} bytes ({payload_type})") + click.echo(f"Usage: {result['usage_percent']}%") + click.echo(f"Status: {'✓ Fits' if result['fits'] else '✗ Too large'}") + click.echo() + click.echo("Note: Output will be MKV format with FFV1 lossless codec") + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Capacity check failed: {e}", err=True) + raise SystemExit(1) + return + + # Determine output path + if not output: + output = f"{Path(carrier).stem}_encoded.mkv" + + try: + if file_payload: + payload = FilePayload.from_file(file_payload) + else: + payload = message + + stego_video, stats = encode_video( + message=payload, + reference_photo=reference_data, + carrier_video=carrier_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=rsa_password, + channel_key=channel_key, + ) + + with open(output, "wb") as f: + f.write(stego_video) + + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "carrier": carrier, + "reference": reference, + "output": output, + "codec": stats.codec, + "frames_modified": stats.frames_modified, + "duration_seconds": round(stats.duration_seconds, 2), + "capacity_used": round(stats.capacity_used * 100, 1), + }, + indent=2, + ) + ) + else: + click.echo(f"✓ Encoded to {output}") + click.echo(f" Codec: {stats.codec} (lossless)") + click.echo(f" Frames modified: {stats.frames_modified}") + click.echo(f" Capacity used: {stats.capacity_used * 100:.1f}%") + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Video encoding failed: {e}", err=True) + raise SystemExit(1) + + +@cli.command("video-decode") +@click.argument("video", type=click.Path(exists=True)) +@click.option( + "-r", + "--reference", + required=True, + type=click.Path(exists=True), + help="Reference photo (shared secret)", +) +@click.option("--passphrase", prompt=True, hide_input=True, help="Passphrase") +@click.option("--pin", prompt=True, hide_input=True, help="PIN code") +@click.option( + "--rsa-key", + type=click.Path(exists=True), + help="RSA private key PEM file", +) +@click.option("--rsa-password", default=None, help="Password for encrypted RSA key") +@click.option("--channel-key", default=None, help="Channel key for deployment isolation") +@click.option("-o", "--output", type=click.Path(), help="Output path for file payloads") +@click.pass_context +def video_decode( + ctx, video, reference, passphrase, pin, rsa_key, rsa_password, channel_key, output +): + """ + Decode a message or file from stego video. + + Examples: + + stegasoo video-decode stego.mkv -r ref.jpg + + stegasoo video-decode stego.mkv -r ref.jpg -o ./extracted/ + """ + from .constants import VIDEO_ENABLED + + if not VIDEO_ENABLED: + raise click.UsageError( + "Video support is disabled. Install ffmpeg and audio extras, " + "or set STEGASOO_VIDEO=1 to force enable." + ) + + from .decode import decode_video + + # Read RSA key if provided + rsa_key_data = None + if rsa_key: + with open(rsa_key, "rb") as f: + rsa_key_data = f.read() + + with open(video, "rb") as f: + video_data = f.read() + with open(reference, "rb") as f: + reference_data = f.read() + + try: + result = decode_video( + stego_video=video_data, + reference_photo=reference_data, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=rsa_password, + channel_key=channel_key, + ) + + if result.is_file: + filename = result.filename or "decoded_file" + output_path = Path(output) / filename if output else Path(filename) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "wb") as f: + f.write(result.file_data) + + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "video": video, + "payload_type": "file", + "filename": filename, + "output": str(output_path), + "size": len(result.file_data), + }, + indent=2, + ) + ) + else: + click.echo(f"✓ Extracted file: {output_path}") + click.echo(f" Size: {len(result.file_data):,} bytes") + else: + if ctx.obj.get("json"): + click.echo( + json.dumps( + { + "status": "success", + "video": video, + "payload_type": "text", + "message": result.message, + }, + indent=2, + ) + ) + else: + click.echo(f"Decoded from {video}:") + click.echo(result.message) + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Video decoding failed: {e}", err=True) + raise SystemExit(1) + + +@cli.command("video-info") +@click.argument("video", type=click.Path(exists=True)) +@click.pass_context +def video_info(ctx, video): + """ + Show video file information and steganographic capacity. + + Examples: + + stegasoo video-info carrier.mp4 + + stegasoo --json video-info carrier.mp4 + """ + from .constants import VIDEO_ENABLED + + if not VIDEO_ENABLED: + raise click.UsageError( + "Video support is disabled. Install ffmpeg and audio extras, " + "or set STEGASOO_VIDEO=1 to force enable." + ) + + from .video_utils import calculate_video_capacity, get_video_info + + with open(video, "rb") as f: + video_data = f.read() + + try: + info = get_video_info(video_data) + capacity_info = calculate_video_capacity(video_data) + + result = { + "file": video, + "format": info.format, + "codec": info.codec, + "resolution": { + "width": info.width, + "height": info.height, + }, + "fps": round(info.fps, 2), + "duration_seconds": round(info.duration_seconds, 2), + "total_frames": info.total_frames, + "i_frame_count": info.i_frame_count, + "bitrate": info.bitrate, + "file_size": len(video_data), + "capacity": { + "bytes": capacity_info.usable_capacity_bytes, + "kb": round(capacity_info.usable_capacity_bytes / 1024, 1), + "mb": round(capacity_info.usable_capacity_bytes / (1024 * 1024), 2), + }, + } + + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + click.echo(f"File: {video}") + click.echo(f"Format: {info.format}") + click.echo(f"Codec: {info.codec}") + click.echo(f"Resolution: {info.width}x{info.height}") + click.echo(f"Frame rate: {info.fps:.2f} fps") + click.echo(f"Duration: {info.duration_seconds:.1f}s") + click.echo(f"Total frames: {info.total_frames:,}") + click.echo(f"I-frames (keyframes): {info.i_frame_count:,}") + if info.bitrate: + click.echo(f"Bitrate: {info.bitrate // 1000} kbps") + click.echo(f"File size: {len(video_data):,} bytes") + click.echo() + click.echo("Steganographic capacity (LSB, I-frames only):") + click.echo( + f" {capacity_info.usable_capacity_bytes:,} bytes " + f"({capacity_info.usable_capacity_bytes // 1024} KB)" + ) + click.echo() + click.echo("Note: Output will be MKV format with FFV1 lossless codec") + + except Exception as e: + if ctx.obj.get("json"): + click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2)) + else: + click.echo(f"✗ Video info failed: {e}", err=True) + raise SystemExit(1) + + +# ============================================================================= +# BATCH COMMANDS +# ============================================================================= +# +# Batch processing demonstrates: +# +# 1. SUBGROUPS +# @cli.group() creates a nested command group: +# stegasoo batch encode *.png +# stegasoo batch decode *.png +# stegasoo batch check *.png +# +# 2. VARIADIC ARGUMENTS +# nargs=-1 accepts multiple arguments: +# @click.argument("images", nargs=-1, required=True) +# This lets users do: `stegasoo batch encode img1.png img2.png img3.png` +# Or with shell expansion: `stegasoo batch encode *.png` +# +# 3. PROGRESS CALLBACKS +# We pass a callback to the BatchProcessor for real-time updates: +# +# def progress(current, total, item): +# click.echo(f"[{current}/{total}] {item.input_path.name}") +# +# processor.batch_encode(..., progress_callback=progress) +# +# 4. PARALLEL PROCESSING +# --jobs/-j controls worker count. Default is 4 for good balance between +# speed and memory usage. Each worker loads images into memory. + + +@cli.group() +def batch(): + """Batch operations on multiple images.""" + pass + + +@batch.command("encode") +@click.argument("images", nargs=-1, required=True, type=click.Path(exists=True)) +@click.option("-m", "--message", help="Message to encode in all images") +@click.option( + "-f", "--file", "file_payload", type=click.Path(exists=True), help="File to embed in all images" +) +@click.option( + "-o", "--output-dir", type=click.Path(), help="Output directory (default: same as input)" +) +@click.option("--suffix", default="_encoded", help="Output filename suffix") +@click.option( + "--passphrase", + prompt=True, + hide_input=True, + confirmation_prompt=True, + help="Passphrase (recommend 4+ words)", +) +@click.option("--pin", prompt=True, hide_input=True, confirmation_prompt=True, help="PIN code") +@click.option("-r", "--recursive", is_flag=True, help="Search directories recursively") +@click.option("-j", "--jobs", default=4, help="Parallel workers (default: 4)") +@click.option("-v", "--verbose", is_flag=True, help="Show detailed output") +@click.pass_context +def batch_encode( + ctx, + images, + message, + file_payload, + output_dir, + suffix, + passphrase, + pin, + recursive, + jobs, + verbose, +): + """ + Encode message into multiple images. + + Examples: + + stegasoo batch encode *.png -m "Secret" --passphrase --pin + + stegasoo batch encode ./photos/ -r -o ./encoded/ + """ + if not message and not file_payload: + raise click.UsageError("Either --message or --file is required") + + processor = BatchProcessor(max_workers=jobs) + + # Progress callback + def progress(current, total, item): + if not ctx.obj.get("json"): + status = "✓" if item.status.value == "success" else "✗" + click.echo(f"[{current}/{total}] {status} {item.input_path.name}") + + # v3.2.0: Use 'passphrase' key instead of 'phrase' + credentials = {"passphrase": passphrase, "pin": pin} + + result = processor.batch_encode( + images=list(images), + message=message, + file_payload=Path(file_payload) if file_payload else None, + output_dir=Path(output_dir) if output_dir else None, + output_suffix=suffix, + credentials=credentials, + recursive=recursive, + progress_callback=progress if not ctx.obj.get("json") else None, + ) + + if ctx.obj.get("json"): + click.echo(result.to_json()) + else: + print_batch_result(result, verbose) + + +@batch.command("decode") +@click.argument("images", nargs=-1, required=True, type=click.Path(exists=True)) +@click.option("-o", "--output-dir", type=click.Path(), help="Output directory for file payloads") +@click.option("--passphrase", prompt=True, hide_input=True, help="Passphrase") +@click.option("--pin", prompt=True, hide_input=True, help="PIN code") +@click.option("-r", "--recursive", is_flag=True, help="Search directories recursively") +@click.option("-j", "--jobs", default=4, help="Parallel workers (default: 4)") +@click.option("-v", "--verbose", is_flag=True, help="Show detailed output") +@click.pass_context +def batch_decode(ctx, images, output_dir, passphrase, pin, recursive, jobs, verbose): + """ + Decode messages from multiple images. + + Examples: + + stegasoo batch decode encoded*.png --passphrase --pin + + stegasoo batch decode ./encoded/ -r -o ./extracted/ + """ + processor = BatchProcessor(max_workers=jobs) + + # Progress callback + def progress(current, total, item): + if not ctx.obj.get("json"): + status = "✓" if item.status.value == "success" else "✗" + click.echo(f"[{current}/{total}] {status} {item.input_path.name}") + + # v3.2.0: Use 'passphrase' key instead of 'phrase' + credentials = {"passphrase": passphrase, "pin": pin} + + result = processor.batch_decode( + images=list(images), + output_dir=Path(output_dir) if output_dir else None, + credentials=credentials, + recursive=recursive, + progress_callback=progress if not ctx.obj.get("json") else None, + ) + + if ctx.obj.get("json"): + click.echo(result.to_json()) + else: + print_batch_result(result, verbose) + + +@batch.command("check") +@click.argument("images", nargs=-1, required=True, type=click.Path(exists=True)) +@click.option("-r", "--recursive", is_flag=True, help="Search directories recursively") +@click.pass_context +def batch_check(ctx, images, recursive): + """ + Check capacity of multiple images. + + Examples: + + stegasoo batch check *.png + + stegasoo batch check ./photos/ -r + """ + results = batch_capacity_check(list(images), recursive) + + if ctx.obj.get("json"): + click.echo(json.dumps(results, indent=2)) + else: + click.echo(f"{'Image':<40} {'Size':<12} {'Capacity':<12} {'Status'}") + click.echo("─" * 80) + + for item in results: + if "error" in item: + click.echo(f"{Path(item['path']).name:<40} {'ERROR':<12} {'':<12} {item['error']}") + else: + name = Path(item["path"]).name + if len(name) > 38: + name = name[:35] + "..." + + status = "✓" if item["valid"] else "⚠" + warnings = ", ".join(item.get("warnings", [])) + + click.echo( + f"{name:<40} " + f"{item['dimensions']:<12} " + f"{item['capacity_kb']:,} KB".ljust(12) + " " + f"{status} {warnings}" + ) + + +# ============================================================================= +# UTILITY COMMANDS +# ============================================================================= + + +@cli.command() +@click.option( + "--words", + default=DEFAULT_PASSPHRASE_WORDS, + help=f"Number of words in passphrase (default: {DEFAULT_PASSPHRASE_WORDS})", +) +@click.option( + "--pin-length", default=DEFAULT_PIN_LENGTH, help=f"PIN length (default: {DEFAULT_PIN_LENGTH})" +) +@click.option("--channel-key", is_flag=True, help="Also generate a 256-bit channel key") +@click.pass_context +def generate(ctx, words, pin_length, channel_key): + """ + Generate random credentials (passphrase + PIN + optional channel key). + + Examples: + + stegasoo generate + + stegasoo generate --words 6 --pin-length 8 + + stegasoo generate --channel-key + """ + import secrets + + # Generate PIN + pin = "".join(str(secrets.randbelow(10)) for _ in range(pin_length)) + # Ensure PIN doesn't start with 0 + if pin[0] == "0": + pin = str(secrets.randbelow(9) + 1) + pin[1:] + + # Generate passphrase (would use BIP-39 wordlist) + # Placeholder - actual implementation uses constants.get_wordlist() + try: + from .constants import get_wordlist + + wordlist = get_wordlist() + phrase_words = [secrets.choice(wordlist) for _ in range(words)] + except (ImportError, FileNotFoundError): + # Fallback for testing + sample_words = [ + "alpha", + "bravo", + "charlie", + "delta", + "echo", + "foxtrot", + "golf", + "hotel", + "india", + "juliet", + "kilo", + "lima", + ] + phrase_words = [secrets.choice(sample_words) for _ in range(words)] + + passphrase = " ".join(phrase_words) + + result = { + "passphrase": passphrase, + "pin": pin, + "passphrase_words": words, + "pin_length": pin_length, + } + + # Generate channel key if requested + if channel_key: + from .channel import generate_channel_key + + result["channel_key"] = generate_channel_key() + + if ctx.obj.get("json"): + click.echo(json.dumps(result, indent=2)) + else: + click.echo(f"Passphrase: {passphrase}") + click.echo(f"PIN: {pin}") + if channel_key: + click.echo(f"Channel Key: {result['channel_key']}") + click.echo("\n⚠️ Save these credentials securely - they cannot be recovered!") + + +@cli.command() +@click.option("--full", is_flag=True, help="Show full system information (Pi stats)") +@click.pass_context +def info(ctx, full): + """Show version, features, and system information.""" + import os + import subprocess + + # Check for DCT support + try: + from .dct_steganography import HAS_JPEGLIB, HAS_SCIPY + + has_dct = HAS_SCIPY and HAS_JPEGLIB + except ImportError: + has_dct = False + + # Check service status + service_status = "unknown" + service_url = None + try: + result = subprocess.run( + ["systemctl", "is-active", "stegasoo"], + capture_output=True, + text=True, + timeout=2, + ) + service_status = result.stdout.strip() + if service_status == "active": + # Try to get URL from service environment + env_result = subprocess.run( + ["systemctl", "show", "stegasoo", "--property=Environment"], + capture_output=True, + text=True, + timeout=2, + ) + https_enabled = "HTTPS_ENABLED=true" in env_result.stdout + protocol = "https" if https_enabled else "http" + # Get IP + ip_result = subprocess.run( + ["hostname", "-I"], + capture_output=True, + text=True, + timeout=2, + ) + ip = ip_result.stdout.strip().split()[0] if ip_result.stdout.strip() else "localhost" + service_url = f"{protocol}://{ip}" + except (subprocess.TimeoutExpired, FileNotFoundError, IndexError): + pass + + # Check channel key + channel_fingerprint = None + channel_source = None + try: + from .channel import get_channel_fingerprint, get_channel_key, get_channel_status + + key = get_channel_key() + if key: + channel_fingerprint = get_channel_fingerprint(key) + status = get_channel_status() + channel_source = status.get("source") + except ImportError: + pass + + # System info (Pi-specific) + cpu_freq = None + cpu_temp = None + disk_free = None + uptime = None + + if full: + try: + # CPU frequency + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq") as f: + cpu_freq = int(f.read().strip()) // 1000 # MHz + except (FileNotFoundError, ValueError): + pass + + try: + # CPU temp + with open("/sys/class/thermal/thermal_zone0/temp") as f: + cpu_temp = int(f.read().strip()) / 1000 # Celsius + except (FileNotFoundError, ValueError): + pass + + try: + # Disk free + st = os.statvfs("/") + disk_free = (st.f_bavail * st.f_frsize) / (1024**3) # GB + except OSError: + pass + + try: + # Uptime + with open("/proc/uptime") as f: + uptime_secs = float(f.read().split()[0]) + days = int(uptime_secs // 86400) + hours = int((uptime_secs % 86400) // 3600) + uptime = f"{days}d {hours}h" if days else f"{hours}h" + except (FileNotFoundError, ValueError): + pass + + info_data = { + "version": __version__, + "service": service_status, + "url": service_url, + "dct_support": has_dct, + "channel": ( + { + "fingerprint": channel_fingerprint, + "source": channel_source, + } + if channel_fingerprint + else None + ), + "limits": { + "max_message_bytes": MAX_MESSAGE_SIZE, + "max_file_payload_bytes": MAX_FILE_PAYLOAD_SIZE, + }, + "system": ( + { + "cpu_mhz": cpu_freq, + "temp_c": cpu_temp, + "disk_free_gb": round(disk_free, 1) if disk_free else None, + "uptime": uptime, + } + if full + else None + ), + } + + if ctx.obj.get("json"): + click.echo(json.dumps(info_data, indent=2)) + else: + # Fastfetch-style output + click.echo(f"\033[1mSTEGASOO\033[0m v{__version__}") + click.echo("─" * 36) + + # Service status + if service_status == "active": + click.echo(" Service: \033[32m● running\033[0m") + if service_url: + click.echo(f" URL: {service_url}") + elif service_status == "inactive": + click.echo(" Service: \033[31m○ stopped\033[0m") + else: + click.echo(f" Service: \033[33m? {service_status}\033[0m") + + # Channel + if channel_fingerprint: + masked = f"{channel_fingerprint[:4]}••••••••{channel_fingerprint[-4:]}" + click.echo(f" Channel: {masked}") + else: + click.echo(" Channel: public") + + # DCT + dct_status = "\033[32m✓ enabled\033[0m" if has_dct else "\033[31m✗ disabled\033[0m" + click.echo(f" DCT: {dct_status}") + + # System info (if --full) + if full and any([cpu_freq, cpu_temp, disk_free, uptime]): + click.echo("─" * 36) + if cpu_freq: + click.echo(f" CPU: {cpu_freq} MHz") + if cpu_temp: + temp_color = ( + "\033[32m" if cpu_temp < 60 else "\033[33m" if cpu_temp < 75 else "\033[31m" + ) + click.echo(f" Temp: {temp_color}{cpu_temp:.1f}°C\033[0m") + if uptime: + click.echo(f" Uptime: {uptime}") + if disk_free: + click.echo(f" Disk: {disk_free:.1f} GB free") + + +# ============================================================================= +# CHANNEL KEY COMMANDS +# ============================================================================= + + +@cli.group() +@click.pass_context +def channel(ctx): + """ + Manage channel keys for deployment isolation. + + Channel keys bind encode/decode operations to a specific group or deployment. + Messages encoded with one channel key can only be decoded by systems with + the same channel key. + + Examples: + + stegasoo channel generate + + stegasoo channel show + + stegasoo channel qr + + stegasoo channel qr -o channel-key.png + """ + pass + + +@channel.command("generate") +@click.option("--save", is_flag=True, help="Save to project config file") +@click.option("--save-user", is_flag=True, help="Save to user config (~/.stegasoo/)") +@click.pass_context +def channel_generate(ctx, save, save_user): + """ + Generate a new random channel key. + + Examples: + + stegasoo channel generate + + stegasoo channel generate --save + + stegasoo channel generate --save-user + """ + from .channel import generate_channel_key, set_channel_key + + key = generate_channel_key() + + if ctx.obj.get("json"): + result = {"channel_key": key} + if save or save_user: + location = "user" if save_user else "project" + path = set_channel_key(key, location) + result["saved_to"] = str(path) + click.echo(json.dumps(result, indent=2)) + else: + click.echo("Generated channel key:") + click.echo(f" {key}") + click.echo() + + if save or save_user: + location = "user" if save_user else "project" + path = set_channel_key(key, location) + click.echo(f"Saved to: {path}") + else: + click.echo("To use this key:") + click.echo(f' export STEGASOO_CHANNEL_KEY="{key}"') + click.echo() + click.echo("Or save to config:") + click.echo(" stegasoo channel generate --save") + + +@channel.command("show") +@click.option("--key", "explicit_key", help="Show this key instead of configured one") +@click.pass_context +def channel_show(ctx, explicit_key): + """ + Show the current channel key. + + Examples: + + stegasoo channel show + + stegasoo channel show --key "ABCD-1234-..." + """ + from .channel import format_channel_key, get_channel_status, validate_channel_key + + if explicit_key: + if not validate_channel_key(explicit_key): + click.echo("Error: Invalid channel key format", err=True) + raise SystemExit(1) + key = format_channel_key(explicit_key) + source = "command line" + else: + status = get_channel_status() + if not status["configured"]: + if ctx.obj.get("json"): + click.echo(json.dumps({"configured": False, "mode": "public"})) + else: + click.echo("No channel key configured (public mode)") + return + key = status["key"] + source = status["source"] + + if ctx.obj.get("json"): + click.echo(json.dumps({"channel_key": key, "source": source})) + else: + click.echo(f"Channel key: {key}") + click.echo(f"Source: {source}") + + +@channel.command("status") +@click.pass_context +def channel_status(ctx): + """ + Show channel key status and configuration. + + Examples: + + stegasoo channel status + + stegasoo --json channel status + """ + from .channel import get_channel_status + + status = get_channel_status() + + if ctx.obj.get("json"): + click.echo(json.dumps(status, indent=2)) + else: + click.echo(f"Mode: {status['mode'].upper()}") + if status["configured"]: + click.echo(f"Fingerprint: {status['fingerprint']}") + click.echo(f"Source: {status['source']}") + else: + click.echo("No channel key configured") + click.echo() + click.echo("To set up a channel key:") + click.echo(" stegasoo channel generate --save") + + +@channel.command("qr") +@click.option("--key", "explicit_key", help="Generate QR for this key instead of configured one") +@click.option( + "--format", + "output_format", + type=click.Choice(["ascii", "png"]), + default="ascii", + help="Output format (default: ascii)", +) +@click.option("-o", "--output", type=click.Path(), help="Output file (PNG format, or - for stdout)") +@click.pass_context +def channel_qr(ctx, explicit_key, output_format, output): + """ + Display channel key as QR code. + + Examples: + + stegasoo channel qr + + stegasoo channel qr -o channel-key.png + + stegasoo channel qr --format png -o - > key.png + """ + import sys + + from .channel import format_channel_key, get_channel_key, validate_channel_key + + # Get the key to display + if explicit_key: + if not validate_channel_key(explicit_key): + click.echo("Error: Invalid channel key format", err=True) + raise SystemExit(1) + key = format_channel_key(explicit_key) + else: + key = get_channel_key() + if not key: + click.echo("Error: No channel key configured", err=True) + click.echo("Generate one with: stegasoo channel generate", err=True) + raise SystemExit(1) + + # Import qrcode + try: + import qrcode + except ImportError: + click.echo("Error: qrcode library not installed", err=True) + click.echo("Install with: pip install qrcode[pil]", err=True) + raise SystemExit(1) + + # Determine output mode + if output: + output_format = "png" # Force PNG when output file specified + + if output_format == "png": + # Generate PNG QR code (requires Pillow) + try: + import PIL # noqa: F401 - check Pillow is available + except ImportError: + click.echo("Error: PIL/Pillow not installed for PNG output", err=True) + click.echo("Install with: pip install Pillow", err=True) + raise SystemExit(1) + + qr = qrcode.QRCode( + version=1, + error_correction=qrcode.constants.ERROR_CORRECT_M, + box_size=10, + border=4, + ) + qr.add_data(key) + qr.make(fit=True) + img = qr.make_image(fill_color="black", back_color="white") + + if output == "-": + # Write to stdout + img.save(sys.stdout.buffer, format="PNG") + elif output: + # Write to file + img.save(output) + click.echo(f"Saved QR code to: {output}", err=True) + else: + # No output specified but PNG format requested - error + click.echo("Error: PNG format requires -o/--output", err=True) + raise SystemExit(1) + + else: + # ASCII output to terminal + qr = qrcode.QRCode( + version=1, + error_correction=qrcode.constants.ERROR_CORRECT_M, + box_size=1, + border=2, + ) + qr.add_data(key) + qr.make(fit=True) + + click.echo() + click.echo(f"Channel Key: {key}") + click.echo() + qr.print_ascii(invert=True) + click.echo() + click.echo("Scan this QR code to share the channel key.") + + +@channel.command("clear") +@click.option("--project", is_flag=True, help="Only clear project config") +@click.option("--user", is_flag=True, help="Only clear user config") +@click.pass_context +def channel_clear(ctx, project, user): + """ + Remove channel key configuration. + + Examples: + + stegasoo channel clear + + stegasoo channel clear --project + + stegasoo channel clear --user + """ + from .channel import clear_channel_key + + if project and user: + location = "all" + elif project: + location = "project" + elif user: + location = "user" + else: + location = "all" + + deleted = clear_channel_key(location) + + if ctx.obj.get("json"): + click.echo(json.dumps({"deleted": [str(p) for p in deleted]})) + else: + if deleted: + click.echo(f"Removed channel key from: {', '.join(str(p) for p in deleted)}") + else: + click.echo("No channel key files found") + + +# ============================================================================= +# TOOLS COMMANDS +# ============================================================================= + + +@cli.group() +@click.pass_context +def tools(ctx): + """Image security tools.""" + pass + + +@tools.command("capacity") +@click.argument("image", type=click.Path(exists=True)) +@click.option("--json", "as_json", is_flag=True, help="Output as JSON") +def tools_capacity(image, as_json): + """Show steganography capacity for an image. + + Example: + + stegasoo tools capacity photo.jpg + """ + from .dct_steganography import estimate_capacity_comparison + + with open(image, "rb") as f: + image_data = f.read() + + result = estimate_capacity_comparison(image_data) + result["filename"] = Path(image).name + result["megapixels"] = round((result["width"] * result["height"]) / 1_000_000, 2) + + if as_json: + click.echo(json.dumps(result, indent=2)) + else: + click.echo(f"\n {result['filename']}") + click.echo(f" {'─' * 40}") + click.echo(f" Dimensions: {result['width']} × {result['height']}") + click.echo(f" Megapixels: {result['megapixels']} MP") + click.echo(f" {'─' * 40}") + click.echo(f" LSB Capacity: {result['lsb']['capacity_kb']:.1f} KB") + if result["dct"]["available"]: + click.echo(f" DCT Capacity: {result['dct']['capacity_kb']:.1f} KB") + else: + click.echo(" DCT Capacity: N/A (scipy required)") + click.echo() + + +@tools.command("strip") +@click.argument("image", type=click.Path(exists=True)) +@click.option("-o", "--output", type=click.Path(), help="Output file (default: _clean.png)") +@click.option( + "--format", "fmt", type=click.Choice(["png", "bmp"]), default="png", help="Output format" +) +def tools_strip(image, output, fmt): + """Strip EXIF/metadata from an image. + + Example: + + stegasoo tools strip photo.jpg + stegasoo tools strip photo.jpg -o clean.png + """ + from .utils import strip_image_metadata + + with open(image, "rb") as f: + image_data = f.read() + + clean_data = strip_image_metadata(image_data, output_format=fmt.upper()) + + if not output: + stem = Path(image).stem + output = f"{stem}_clean.{fmt}" + + with open(output, "wb") as f: + f.write(clean_data) + + click.echo(f"Saved clean image to: {output}") + + +@tools.command("peek") +@click.argument("image", type=click.Path(exists=True)) +@click.option("--json", "as_json", is_flag=True, help="Output as JSON") +def tools_peek(image, as_json): + """Check if image contains Stegasoo hidden data. + + Example: + + stegasoo tools peek suspicious.jpg + """ + from .steganography import peek_image + + with open(image, "rb") as f: + image_data = f.read() + + result = peek_image(image_data) + result["filename"] = Path(image).name + + if as_json: + click.echo(json.dumps(result)) + else: + if result["has_stegasoo"]: + click.echo(f"\n ✓ Stegasoo data detected in {result['filename']}") + click.echo(f" Mode: {result['mode'].upper()}") + else: + click.echo(f"\n ✗ No Stegasoo header found in {result['filename']}") + click.echo() + + +@tools.command("exif") +@click.argument("image", type=click.Path(exists=True)) +@click.option("--clear", is_flag=True, help="Remove all EXIF metadata") +@click.option("--set", "set_fields", multiple=True, help="Set EXIF field (e.g. --set Artist=John)") +@click.option("-o", "--output", type=click.Path(), help="Output file (required for modifications)") +@click.option("--json", "as_json", is_flag=True, help="Output as JSON") +def tools_exif(image, clear, set_fields, output, as_json): + """View or edit EXIF metadata. + + Examples: + + stegasoo tools exif photo.jpg + + stegasoo tools exif photo.jpg --clear -o clean.jpg + + stegasoo tools exif photo.jpg --set Artist="John Doe" -o updated.jpg + """ + from .utils import read_image_exif, strip_image_metadata, write_image_exif + + with open(image, "rb") as f: + image_data = f.read() + + # View mode (no modifications) + if not clear and not set_fields: + exif = read_image_exif(image_data) + + if as_json: + click.echo(json.dumps(exif, indent=2, default=str)) + else: + click.echo(f"\n EXIF Metadata: {Path(image).name}") + click.echo(f" {'─' * 45}") + if not exif: + click.echo(" No EXIF metadata found") + else: + for key, value in sorted(exif.items()): + # Skip complex nested structures for display + if isinstance(value, dict): + click.echo(f" {key}: [complex data]") + elif isinstance(value, list): + click.echo(f" {key}: {value}") + else: + # Truncate long values + str_val = str(value) + if len(str_val) > 50: + str_val = str_val[:47] + "..." + click.echo(f" {key}: {str_val}") + click.echo() + return + + # Modification mode - require output file + if not output: + raise click.UsageError("Output file required for modifications (use -o/--output)") + + if clear: + # Strip all metadata + clean_data = strip_image_metadata(image_data, output_format="JPEG") + with open(output, "wb") as f: + f.write(clean_data) + click.echo(f"Cleared EXIF metadata, saved to: {output}") + elif set_fields: + # Parse field=value pairs + updates = {} + for field in set_fields: + if "=" not in field: + raise click.UsageError(f"Invalid format: {field} (use Field=Value)") + key, val = field.split("=", 1) + updates[key.strip()] = val.strip() + + try: + updated_data = write_image_exif(image_data, updates) + with open(output, "wb") as f: + f.write(updated_data) + click.echo(f"Updated {len(updates)} EXIF field(s), saved to: {output}") + except ValueError as e: + raise click.UsageError(str(e)) + + +@tools.command("compress") +@click.argument("image", type=click.Path(exists=True)) +@click.option("-q", "--quality", type=int, default=75, help="JPEG quality (1-100, default: 75)") +@click.option( + "-o", "--output", type=click.Path(), help="Output file (default: _q.jpg)" +) +def tools_compress(image, quality, output): + """Compress a JPEG image. + + DCT steganography survives JPEG compression! Use this to reduce file size + while preserving hidden data. + + Examples: + + stegasoo tools compress photo.jpg -q 60 + stegasoo tools compress photo.jpg -q 80 -o smaller.jpg + """ + import io + + from PIL import Image + + if not 1 <= quality <= 100: + raise click.UsageError("Quality must be between 1 and 100") + + with open(image, "rb") as f: + image_data = f.read() + + img = Image.open(io.BytesIO(image_data)) + + # Convert to RGB if needed (JPEG doesn't support alpha) + if img.mode in ("RGBA", "P"): + img = img.convert("RGB") + + buffer = io.BytesIO() + img.save(buffer, format="JPEG", quality=quality) + compressed_data = buffer.getvalue() + + if not output: + stem = Path(image).stem + output = f"{stem}_q{quality}.jpg" + + with open(output, "wb") as f: + f.write(compressed_data) + + orig_size = len(image_data) + new_size = len(compressed_data) + reduction = (1 - new_size / orig_size) * 100 + + click.echo(f"Compressed to: {output}") + click.echo(f" Original: {orig_size:,} bytes") + click.echo(f" Compressed: {new_size:,} bytes ({reduction:.1f}% smaller)") + + +@tools.command("rotate") +@click.argument("image", type=click.Path(exists=True)) +@click.option( + "-r", "--rotation", type=click.Choice(["90", "180", "270"]), help="Rotation degrees clockwise" +) +@click.option("--flip-h", is_flag=True, help="Flip horizontally") +@click.option("--flip-v", is_flag=True, help="Flip vertically") +@click.option("-o", "--output", type=click.Path(), help="Output file") +def tools_rotate(image, rotation, flip_h, flip_v, output): + """Rotate and/or flip an image. + + For JPEGs, uses lossless jpegtran rotation which preserves DCT steganography. + For other formats, uses PIL (re-encodes the image). + + Examples: + + stegasoo tools rotate photo.jpg -r 90 + stegasoo tools rotate photo.jpg -r 180 --flip-h -o rotated.jpg + """ + import io + import shutil + + from PIL import Image + + with open(image, "rb") as f: + image_data = f.read() + + # Must have rotation or flip + if not rotation and not flip_h and not flip_v: + raise click.UsageError("Must specify at least one of -r/--rotation, --flip-h, or --flip-v") + + img = Image.open(io.BytesIO(image_data)) + is_jpeg = img.format == "JPEG" + img.close() + + rotation_deg = int(rotation) if rotation else 0 + + # For JPEGs, use lossless jpegtran + if is_jpeg and shutil.which("jpegtran"): + from .dct_steganography import _jpegtran_rotate + + result_data = image_data + + # Apply rotation + if rotation_deg in (90, 180, 270): + result_data = _jpegtran_rotate(result_data, rotation_deg) + + # Apply flips using jpegtran + if flip_h or flip_v: + import os + import subprocess + import tempfile + + for flip_type in (["horizontal"] if flip_h else []) + (["vertical"] if flip_v else []): + with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f: + f.write(result_data) + input_path = f.name + output_path = tempfile.mktemp(suffix=".jpg") + try: + subprocess.run( + [ + "jpegtran", + "-flip", + flip_type, + "-copy", + "all", + "-outfile", + output_path, + input_path, + ], + capture_output=True, + timeout=30, + check=True, + ) + with open(output_path, "rb") as f: + result_data = f.read() + finally: + for p in [input_path, output_path]: + try: + os.unlink(p) + except OSError: + pass + + ext = "jpg" + click.echo(" (Used lossless jpegtran - DCT stego preserved)") + else: + # Use PIL for non-JPEGs + img = Image.open(io.BytesIO(image_data)) + + # PIL rotation is counter-clockwise, we want clockwise + if rotation_deg: + pil_rotation = {90: 270, 180: 180, 270: 90}[rotation_deg] + img = img.rotate(pil_rotation, expand=True) + + if flip_h: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + if flip_v: + img = img.transpose(Image.FLIP_TOP_BOTTOM) + + buffer = io.BytesIO() + img.save(buffer, format="PNG") + result_data = buffer.getvalue() + ext = "png" + + if not output: + stem = Path(image).stem + suffix = "rotated" if rotation_deg else "flipped" + output = f"{stem}_{suffix}.{ext}" + + with open(output, "wb") as f: + f.write(result_data) + + click.echo(f"Saved to: {output}") + + +@tools.command("convert") +@click.argument("image", type=click.Path(exists=True)) +@click.option( + "-f", + "--format", + "fmt", + type=click.Choice(["png", "jpg", "bmp", "webp"]), + required=True, + help="Output format", +) +@click.option( + "-q", "--quality", type=int, default=95, help="Quality for lossy formats (default: 95)" +) +@click.option("-o", "--output", type=click.Path(), help="Output file") +def tools_convert(image, fmt, quality, output): + """Convert image to a different format. + + Examples: + + stegasoo tools convert photo.png -f jpg + stegasoo tools convert photo.jpg -f png -o lossless.png + """ + import io + + from PIL import Image + + with open(image, "rb") as f: + image_data = f.read() + + img = Image.open(io.BytesIO(image_data)) + + # Handle format-specific conversions + save_format = {"jpg": "JPEG", "png": "PNG", "bmp": "BMP", "webp": "WEBP"}[fmt] + + if save_format == "JPEG" and img.mode in ("RGBA", "P"): + img = img.convert("RGB") + + buffer = io.BytesIO() + if save_format in ("JPEG", "WEBP"): + img.save(buffer, format=save_format, quality=quality) + else: + img.save(buffer, format=save_format) + + result_data = buffer.getvalue() + + if not output: + stem = Path(image).stem + output = f"{stem}.{fmt}" + + with open(output, "wb") as f: + f.write(result_data) + + click.echo(f"Converted to: {output}") + + +# ============================================================================= +# STEGANALYSIS COMMANDS +# ============================================================================= + + +@cli.command() +@click.argument("image", type=click.Path(exists=True)) +@click.option("--json", "as_json", is_flag=True, help="Output as JSON") +@click.option( + "--mode", + type=click.Choice(["lsb", "auto"]), + default="lsb", + help="Analysis mode (default: lsb)", +) +def check(image, as_json, mode): + """Analyze an image for steganographic detectability. + + Runs chi-square and RS (Regular-Singular) statistical tests to estimate + how detectable any hidden data might be. Outputs a risk level. + + Examples: + + stegasoo check carrier.png + + stegasoo check stego.png --json + + stegasoo check suspicious.bmp --mode lsb + """ + from .steganalysis import check_image + + with open(image, "rb") as f: + image_data = f.read() + + result = check_image(image_data, mode=mode) + result["filename"] = Path(image).name + + if as_json: + click.echo(json.dumps(result, indent=2)) + else: + risk = result["risk"] + risk_colors = {"low": "green", "medium": "yellow", "high": "red"} + risk_display = click.style(risk.upper(), fg=risk_colors.get(risk, "white"), bold=True) + + click.echo(f"\n Steganalysis: {result['filename']}") + click.echo(f" Image: {result['width']}x{result['height']}, {result['channels']} channels") + click.echo(f" Detectability risk: {risk_display}") + + click.echo("\n Chi-square (p-values):") + for ch, p in result["chi_square"].items(): + indicator = "!" if p < 0.05 else " " + click.echo(f" {indicator} {ch}: {p:.6f}") + + click.echo("\n RS embedding estimate:") + for ch, est in result["rs"].items(): + indicator = "!" if est > 0.1 else " " + click.echo(f" {indicator} {ch}: {est:.4f} ({est * 100:.1f}%)") + + click.echo() + + +# ============================================================================= +# ADMIN COMMANDS (Web UI administration) +# ============================================================================= + + +@cli.group() +@click.pass_context +def admin(ctx): + """Web UI administration commands.""" + pass + + +@admin.command("recover") +@click.option( + "--db", + "db_path", + type=click.Path(exists=True), + help="Path to stegasoo.db (default: frontends/web/instance/stegasoo.db)", +) +@click.option( + "--password", prompt=True, hide_input=True, confirmation_prompt=True, help="New admin password" +) +def admin_recover(db_path, password): + """Reset admin password using recovery key. + + Allows password reset for Web UI admin account when locked out. + Requires the recovery key that was saved during setup. + + Example: + + stegasoo admin recover --db /path/to/stegasoo.db + """ + import sqlite3 + + from argon2 import PasswordHasher + + from .recovery import verify_recovery_key + + # Try default paths if not specified + if not db_path: + candidates = [ + Path("frontends/web/instance/stegasoo.db"), + Path("instance/stegasoo.db"), + Path("/app/instance/stegasoo.db"), + ] + for candidate in candidates: + if candidate.exists(): + db_path = str(candidate) + break + + if not db_path or not Path(db_path).exists(): + raise click.UsageError("Database not found. Use --db to specify path to stegasoo.db") + + click.echo(f"Database: {db_path}") + + # Connect and check for recovery key + db = sqlite3.connect(db_path) + db.row_factory = sqlite3.Row + + # Get recovery key hash from app_settings + cursor = db.execute("SELECT value FROM app_settings WHERE key = 'recovery_key_hash'") + row = cursor.fetchone() + + if not row: + db.close() + raise click.ClickException( + "No recovery key configured for this instance. " "Password reset is not possible." + ) + + stored_hash = row["value"] + + # Prompt for recovery key + recovery_key = click.prompt( + "Enter your recovery key", + hide_input=False, # Recovery keys are meant to be visible + ) + + # Verify recovery key + if not verify_recovery_key(recovery_key, stored_hash): + db.close() + raise click.ClickException("Invalid recovery key") + + # Validate password + if len(password) < 8: + db.close() + raise click.UsageError("Password must be at least 8 characters") + + # Hash new password with same settings as web UI + ph = PasswordHasher( + time_cost=3, + memory_cost=65536, # 64MB + parallelism=4, + hash_len=32, + salt_len=16, + ) + new_hash = ph.hash(password) + + # Find and update admin user + admin = db.execute( + "SELECT id, username FROM users WHERE role = 'admin' ORDER BY id LIMIT 1" + ).fetchone() + + if not admin: + db.close() + raise click.ClickException("No admin user found in database") + + db.execute( + "UPDATE users SET password_hash = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?", + (new_hash, admin["id"]), + ) + db.commit() + db.close() + + click.echo(f"\nPassword reset successfully for admin '{admin['username']}'") + click.echo("You can now login to the Web UI with your new password.") + + +@admin.command("generate-key") +@click.option("--qr", "show_qr", is_flag=True, help="Show QR code in terminal (if supported)") +def admin_generate_key(show_qr): + """Generate a new recovery key (for reference only). + + This generates a new random recovery key and displays it. + To actually set the recovery key, use the Web UI. + + Example: + + stegasoo admin generate-key + stegasoo admin generate-key --qr + """ + from .recovery import generate_recovery_key, get_recovery_fingerprint + + key = generate_recovery_key() + + click.echo("\nNew Recovery Key:") + click.echo("─" * 50) + click.echo(f" {key}") + click.echo("─" * 50) + click.echo(f"Fingerprint: {get_recovery_fingerprint(key)}") + + if show_qr: + try: + import qrcode + + qr = qrcode.QRCode(box_size=1, border=1) + qr.add_data(key) + qr.make() + click.echo("\nQR Code:") + qr.print_ascii(invert=True) + except ImportError: + click.echo("\n(qrcode library not installed for terminal QR)") + + click.echo("\nNote: Save this key securely. To set it in the Web UI,") + click.echo("go to Account > Recovery Key > Regenerate") + + +# ============================================================================= +# API COMMANDS (REST API management) +# ============================================================================= + + +def _setup_frontends_path(): + """Add frontends directory to sys.path for importing API/web modules.""" + import sys + + # Try multiple possible locations + possible_paths = [ + # Development: stegasoo/frontends + Path(__file__).parent.parent.parent / "frontends", + # Installed package: site-packages/frontends + Path(__file__).parent.parent / "frontends", + ] + + for path in possible_paths: + if path.exists() and str(path) not in sys.path: + sys.path.insert(0, str(path)) + return True + + return False + + +@cli.group() +@click.pass_context +def api(ctx): + """REST API management commands.""" + pass + + +@api.group("keys") +def api_keys(): + """Manage API keys for authentication.""" + pass + + +@api_keys.command("list") +@click.option( + "--location", + type=click.Choice(["user", "project", "all"]), + default="all", + help="Config location to list keys from", +) +def api_keys_list(location): + """List configured API keys. + + Shows key names and creation dates (not actual keys). + + Examples: + + stegasoo api keys list + stegasoo api keys list --location user + """ + _setup_frontends_path() + + try: + from api.auth import get_api_key_status, list_api_keys + except ImportError: + raise click.ClickException("API frontend not available") + + status = get_api_key_status() + + click.echo(f"\nAPI Key Authentication: {'Enabled' if status['enabled'] else 'Disabled'}") + click.echo(f"Total keys: {status['total_keys']}") + click.echo(f"Environment variable: {'Set' if status['env_configured'] else 'Not set'}") + + locations = ["user", "project"] if location == "all" else [location] + + for loc in locations: + keys = list_api_keys(loc) + click.echo(f"\n{loc.title()} keys ({len(keys)}):") + if keys: + for k in keys: + click.echo(f" - {k['name']} (created: {k['created'][:10]})") + else: + click.echo(" (none)") + + +@api_keys.command("create") +@click.argument("name") +@click.option( + "--location", + type=click.Choice(["user", "project"]), + default="user", + help="Where to store the key", +) +def api_keys_create(name, location): + """Create a new API key. + + The key is shown ONCE and cannot be retrieved again. + Save it immediately! + + Examples: + + stegasoo api keys create laptop + stegasoo api keys create automation --location project + """ + _setup_frontends_path() + + try: + from api.auth import add_api_key + except ImportError: + raise click.ClickException("API frontend not available") + + try: + key = add_api_key(name, location) + click.echo(f"\nAPI Key created: {name}") + click.echo("─" * 60) + click.echo(f" {key}") + click.echo("─" * 60) + click.echo("\nSave this key NOW! It cannot be retrieved again.") + click.echo(f"Stored in: {location} config") + except ValueError as e: + raise click.ClickException(str(e)) + + +@api_keys.command("delete") +@click.argument("name") +@click.option( + "--location", type=click.Choice(["user", "project"]), default="user", help="Config location" +) +def api_keys_delete(name, location): + """Delete an API key by name. + + Examples: + + stegasoo api keys delete laptop + stegasoo api keys delete automation --location project + """ + _setup_frontends_path() + + try: + from api.auth import remove_api_key + except ImportError: + raise click.ClickException("API frontend not available") + + if remove_api_key(name, location): + click.echo(f"Deleted API key: {name}") + else: + raise click.ClickException(f"Key '{name}' not found in {location} config") + + +@api.group("tls") +def api_tls(): + """Manage TLS certificates for HTTPS.""" + pass + + +@api_tls.command("generate") +@click.option("--hostname", default="localhost", help="Server hostname for certificate") +@click.option("--days", default=365, help="Certificate validity in days") +@click.option( + "--output", "-o", type=click.Path(), help="Output directory (default: ~/.stegasoo/certs)" +) +def api_tls_generate(hostname, days, output): + """Generate self-signed TLS certificate. + + Creates a certificate valid for: + - The specified hostname + - localhost / 127.0.0.1 + - hostname.local (for mDNS) + - All detected local network IPs + + Examples: + + stegasoo api tls generate + stegasoo api tls generate --hostname myserver --days 730 + stegasoo api tls generate -o /etc/stegasoo/certs + """ + _setup_frontends_path() + + try: + from web.ssl_utils import generate_self_signed_cert, get_cert_paths + except ImportError: + raise click.ClickException("Web frontend not available (ssl_utils required)") + + if output: + base_dir = Path(output) + else: + base_dir = Path.home() / ".stegasoo" + + click.echo(f"Generating TLS certificate for: {hostname}") + click.echo(f"Validity: {days} days") + + cert_path, key_path = generate_self_signed_cert(base_dir, hostname, days) + + click.echo(f"\nCertificate: {cert_path}") + click.echo(f"Private Key: {key_path}") + click.echo("\nTo use with the API:") + click.echo(f" uvicorn main:app --ssl-certfile {cert_path} --ssl-keyfile {key_path}") + + +@api_tls.command("info") +@click.option( + "--cert", + "-c", + type=click.Path(exists=True), + help="Certificate file (default: ~/.stegasoo/certs/server.crt)", +) +def api_tls_info(cert): + """Show information about a TLS certificate. + + Examples: + + stegasoo api tls info + stegasoo api tls info --cert /path/to/server.crt + """ + from cryptography import x509 + + if not cert: + cert = Path.home() / ".stegasoo" / "certs" / "server.crt" + if not cert.exists(): + raise click.ClickException( + f"No certificate found at {cert}. Generate one with: stegasoo api tls generate" + ) + + cert_data = Path(cert).read_bytes() + certificate = x509.load_pem_x509_certificate(cert_data) + + click.echo(f"\nCertificate: {cert}") + click.echo("─" * 50) + click.echo(f"Subject: {certificate.subject.rfc4514_string()}") + click.echo(f"Issuer: {certificate.issuer.rfc4514_string()}") + click.echo(f"Serial: {certificate.serial_number}") + click.echo(f"Valid from: {certificate.not_valid_before_utc}") + click.echo(f"Valid until: {certificate.not_valid_after_utc}") + + # Check expiry + import datetime + + now = datetime.datetime.now(datetime.UTC) + if certificate.not_valid_after_utc < now: + click.echo("\nStatus: EXPIRED") + elif certificate.not_valid_after_utc < now + datetime.timedelta(days=30): + days_left = (certificate.not_valid_after_utc - now).days + click.echo(f"\nStatus: Expires in {days_left} days (consider renewal)") + else: + days_left = (certificate.not_valid_after_utc - now).days + click.echo(f"\nStatus: Valid ({days_left} days remaining)") + + # Show SANs + try: + san_ext = certificate.extensions.get_extension_for_class(x509.SubjectAlternativeName) + click.echo("\nSubject Alternative Names:") + for name in san_ext.value: + click.echo(f" - {name.value}") + except x509.ExtensionNotFound: + pass + + +@api.command("serve") +@click.option("--host", default="127.0.0.1", help="Host to bind to") +@click.option("--port", default=8000, help="Port to bind to") +@click.option("--ssl/--no-ssl", default=True, help="Enable/disable TLS") +@click.option("--cert", type=click.Path(exists=True), help="TLS certificate file") +@click.option("--key", type=click.Path(exists=True), help="TLS private key file") +@click.option("--reload", "do_reload", is_flag=True, help="Enable auto-reload for development") +def api_serve(host, port, ssl, cert, key, do_reload): + """Start the REST API server. + + By default starts with TLS using certificates from ~/.stegasoo/certs/. + If no certificates exist, they are generated automatically. + + Examples: + + stegasoo api serve + stegasoo api serve --host 0.0.0.0 --port 8443 + stegasoo api serve --no-ssl + stegasoo api serve --cert /path/to/cert.pem --key /path/to/key.pem + """ + _setup_frontends_path() + + # Determine cert paths + if ssl: + if cert and key: + cert_path, key_path = cert, key + else: + try: + from web.ssl_utils import ensure_certs + + base_dir = Path.home() / ".stegasoo" + cert_path, key_path = ensure_certs( + base_dir, host if host != "0.0.0.0" else "localhost" + ) + except ImportError: + raise click.ClickException("ssl_utils not available") + + click.echo(f"Starting API server with TLS on https://{host}:{port}") + click.echo(f"Certificate: {cert_path}") + else: + cert_path = key_path = None + click.echo(f"Starting API server on http://{host}:{port}") + click.echo("WARNING: TLS disabled - connections are not encrypted!") + + # Import and run uvicorn + try: + import uvicorn + except ImportError: + raise click.ClickException("uvicorn not installed. Install with: pip install uvicorn") + + uvicorn_kwargs = { + "app": "api.main:app", + "host": host, + "port": port, + "reload": do_reload, + } + + if ssl and cert_path and key_path: + uvicorn_kwargs["ssl_certfile"] = str(cert_path) + uvicorn_kwargs["ssl_keyfile"] = str(key_path) + + uvicorn.run(**uvicorn_kwargs) + + +def main(): + """Entry point for CLI.""" + cli(obj={}) + + +if __name__ == "__main__": + main() diff --git a/src/soosef/stegasoo/compression.py b/src/soosef/stegasoo/compression.py new file mode 100644 index 0000000..252bcb1 --- /dev/null +++ b/src/soosef/stegasoo/compression.py @@ -0,0 +1,252 @@ +""" +Stegasoo Compression Module + +Provides transparent compression/decompression for payloads before encryption. +Supports multiple algorithms with automatic detection on decompression. +""" + +import struct +import zlib +from enum import IntEnum + +from .debug import get_logger + +logger = get_logger(__name__) + +# Optional LZ4 support (faster, slightly worse ratio) +try: + import lz4.frame + + HAS_LZ4 = True +except ImportError: + HAS_LZ4 = False + +# Optional ZSTD support (best ratio, fast) +try: + import zstandard as zstd + + HAS_ZSTD = True +except ImportError: + HAS_ZSTD = False + + +class CompressionAlgorithm(IntEnum): + """Supported compression algorithms.""" + + NONE = 0 + ZLIB = 1 + LZ4 = 2 + ZSTD = 3 # v4.2.0: Best ratio, fast compression + + +# Magic bytes for compressed payloads +COMPRESSION_MAGIC = b"\x00CMP" + +# Minimum size to bother compressing (small data often expands) +MIN_COMPRESS_SIZE = 64 + +# Compression level for zlib (1-9, higher = better ratio but slower) +ZLIB_LEVEL = 6 + + +class CompressionError(Exception): + """Raised when compression/decompression fails.""" + + pass + + +def compress(data: bytes, algorithm: CompressionAlgorithm = CompressionAlgorithm.ZLIB) -> bytes: + """ + Compress data with specified algorithm. + + Format: MAGIC (4) + ALGORITHM (1) + ORIGINAL_SIZE (4) + COMPRESSED_DATA + + Args: + data: Raw bytes to compress + algorithm: Compression algorithm to use + + Returns: + Compressed data with header, or original data if compression didn't help + """ + if len(data) < MIN_COMPRESS_SIZE: + # Too small to benefit from compression + return _wrap_uncompressed(data) + + if algorithm == CompressionAlgorithm.NONE: + return _wrap_uncompressed(data) + + elif algorithm == CompressionAlgorithm.ZLIB: + compressed = zlib.compress(data, level=ZLIB_LEVEL) + + elif algorithm == CompressionAlgorithm.LZ4: + if not HAS_LZ4: + # Fall back to zlib if LZ4 not available + compressed = zlib.compress(data, level=ZLIB_LEVEL) + algorithm = CompressionAlgorithm.ZLIB + else: + compressed = lz4.frame.compress(data) + + elif algorithm == CompressionAlgorithm.ZSTD: + if not HAS_ZSTD: + # Fall back to zlib if ZSTD not available + compressed = zlib.compress(data, level=ZLIB_LEVEL) + algorithm = CompressionAlgorithm.ZLIB + else: + cctx = zstd.ZstdCompressor(level=19) # High compression level + compressed = cctx.compress(data) + else: + raise CompressionError(f"Unknown compression algorithm: {algorithm}") + + # Only use compression if it actually reduced size + if len(compressed) >= len(data): + return _wrap_uncompressed(data) + + # Build header: MAGIC + algorithm + original_size + compressed_data + header = COMPRESSION_MAGIC + struct.pack(" bytes: + """ + Decompress data, auto-detecting algorithm from header. + + Args: + data: Potentially compressed data + + Returns: + Decompressed data (or original if not compressed) + """ + # Check for compression magic + if not data.startswith(COMPRESSION_MAGIC): + # Not compressed by us, return as-is + return data + + if len(data) < 9: # MAGIC(4) + ALGO(1) + SIZE(4) + raise CompressionError("Truncated compression header") + + # Parse header + algorithm = CompressionAlgorithm(data[4]) + original_size = struct.unpack(" bytes: + """Wrap uncompressed data with header for consistency.""" + header = COMPRESSION_MAGIC + struct.pack(" float: + """ + Calculate compression ratio. + + Returns: + Ratio where < 1.0 means compression helped, > 1.0 means it expanded + """ + if len(original) == 0: + return 1.0 + return len(compressed) / len(original) + + +def estimate_compressed_size( + data: bytes, algorithm: CompressionAlgorithm = CompressionAlgorithm.ZLIB +) -> int: + """ + Estimate compressed size without full compression. + Uses sampling for large data. + + Args: + data: Data to estimate + algorithm: Algorithm to estimate for + + Returns: + Estimated compressed size in bytes + """ + if len(data) < MIN_COMPRESS_SIZE: + return len(data) + 9 # Header overhead + + # For small data, just compress it + if len(data) < 10000: + compressed = compress(data, algorithm) + return len(compressed) + + # For large data, sample and extrapolate + sample_size = 8192 + sample = data[:sample_size] + + if algorithm == CompressionAlgorithm.ZLIB: + compressed_sample = zlib.compress(sample, level=ZLIB_LEVEL) + elif algorithm == CompressionAlgorithm.LZ4 and HAS_LZ4: + compressed_sample = lz4.frame.compress(sample) + elif algorithm == CompressionAlgorithm.ZSTD and HAS_ZSTD: + cctx = zstd.ZstdCompressor(level=19) + compressed_sample = cctx.compress(sample) + else: + compressed_sample = zlib.compress(sample, level=ZLIB_LEVEL) + + ratio = len(compressed_sample) / len(sample) + estimated = int(len(data) * ratio) + 9 # Add header + + return estimated + + +def get_available_algorithms() -> list[CompressionAlgorithm]: + """Get list of available compression algorithms.""" + algorithms = [CompressionAlgorithm.NONE, CompressionAlgorithm.ZLIB] + if HAS_LZ4: + algorithms.append(CompressionAlgorithm.LZ4) + if HAS_ZSTD: + algorithms.append(CompressionAlgorithm.ZSTD) + return algorithms + + +def get_best_algorithm() -> CompressionAlgorithm: + """Get the best available compression algorithm (prefer ZSTD > ZLIB > LZ4).""" + if HAS_ZSTD: + return CompressionAlgorithm.ZSTD + return CompressionAlgorithm.ZLIB + + +def algorithm_name(algo: CompressionAlgorithm) -> str: + """Get human-readable algorithm name.""" + names = { + CompressionAlgorithm.NONE: "None", + CompressionAlgorithm.ZLIB: "Zlib (deflate)", + CompressionAlgorithm.LZ4: "LZ4 (fast)", + CompressionAlgorithm.ZSTD: "Zstd (best)", + } + return names.get(algo, "Unknown") diff --git a/src/soosef/stegasoo/constants.py b/src/soosef/stegasoo/constants.py new file mode 100644 index 0000000..9b63f4d --- /dev/null +++ b/src/soosef/stegasoo/constants.py @@ -0,0 +1,455 @@ +""" +Stegasoo Constants and Configuration (v4.2.0 - Performance & Compression) + +Central location for all magic numbers, limits, and crypto parameters. +All version numbers, limits, and configuration values should be defined here. + +CHANGES in v4.2.0: +- Added zstd compression for QR codes (better ratio than zlib) +- RSA key size capped at 3072 bits (4096 too large for QR codes) +- Progress bar improvements for encode/decode operations +- File auto-expire increased to 10 minutes + +CHANGES in v4.0.2: +- Added Web UI authentication with SQLite3 user storage +- Added optional HTTPS with auto-generated self-signed certificates +- UI improvements for QR preview panels and PIN/channel columns + +BREAKING CHANGES in v4.0.0: +- Added channel key support for deployment/group isolation +- FORMAT_VERSION bumped to 5 (adds flags byte to header) +- Header size increased by 1 byte for flags + +BREAKING CHANGES in v3.2.0: +- Removed date dependency from cryptographic operations +- Renamed day_phrase → passphrase throughout codebase +""" + +import importlib.resources +from pathlib import Path + +# ============================================================================ +# VERSION +# ============================================================================ + +__version__ = "4.2.1" + +# ============================================================================ +# FILE FORMAT +# ============================================================================ + +MAGIC_HEADER = b"\x89ST3" + +# FORMAT VERSION HISTORY: +# Version 1-3: Date-dependent encryption (v3.0.x - v3.1.x) +# Version 4: Date-independent encryption (v3.2.0) +# Version 5: Channel key support (v4.0.0) - adds flags byte to header +# Version 6: HKDF per-message key derivation (v4.4.0) - adds message nonce to header +FORMAT_VERSION = 6 +FORMAT_VERSION_LEGACY = 5 # For backward-compatible decryption + +# Payload type markers +PAYLOAD_TEXT = 0x01 +PAYLOAD_FILE = 0x02 + +# ============================================================================ +# CRYPTO PARAMETERS +# ============================================================================ + +SALT_SIZE = 32 +IV_SIZE = 12 +TAG_SIZE = 16 + +# Argon2 parameters (memory-hard KDF) +ARGON2_TIME_COST = 4 +ARGON2_MEMORY_COST = 256 * 1024 # 256 MB +ARGON2_PARALLELISM = 4 + +# PBKDF2 fallback parameters +PBKDF2_ITERATIONS = 600000 + +# HKDF per-message key derivation (v4.4.0 / FORMAT_VERSION 6) +MESSAGE_NONCE_SIZE = 16 # 128-bit random nonce per message +HKDF_INFO_ENCRYPT = b"stegasoo-v6-encrypt" # HKDF info for encryption key +HKDF_INFO_PIXEL = b"stegasoo-v6-pixel" # HKDF info for pixel selection key (reserved) + +# ============================================================================ +# INPUT LIMITS +# ============================================================================ + +MAX_IMAGE_PIXELS = 24_000_000 # ~24 megapixels +MIN_IMAGE_PIXELS = 256 * 256 # Minimum viable image size + +MAX_MESSAGE_SIZE = 250_000 # 250 KB (text messages) +MAX_MESSAGE_CHARS = 250_000 # Alias for clarity in templates +MIN_MESSAGE_LENGTH = 1 # Minimum message length +MAX_MESSAGE_LENGTH = MAX_MESSAGE_SIZE # Alias for consistency + +MAX_PAYLOAD_SIZE = MAX_MESSAGE_SIZE # Maximum payload size (alias) +MAX_FILENAME_LENGTH = 255 # Max filename length to store + +# File size limits +MAX_FILE_SIZE = 30 * 1024 * 1024 # 30MB total file size +MAX_FILE_PAYLOAD_SIZE = 2 * 1024 * 1024 # 2MB payload +MAX_UPLOAD_SIZE = 30 * 1024 * 1024 # 30MB max upload (Flask) + +# PIN configuration +MIN_PIN_LENGTH = 6 +MAX_PIN_LENGTH = 9 +DEFAULT_PIN_LENGTH = 6 + +# Passphrase configuration (v3.2.0: renamed from PHRASE to PASSPHRASE) +# Increased defaults to compensate for removed date entropy (~33 bits) +MIN_PASSPHRASE_WORDS = 3 +MAX_PASSPHRASE_WORDS = 12 +DEFAULT_PASSPHRASE_WORDS = 4 # Increased from 3 (was DEFAULT_PHRASE_WORDS) +RECOMMENDED_PASSPHRASE_WORDS = 4 # Best practice guideline + +# Legacy aliases for backward compatibility during transition +MIN_PHRASE_WORDS = MIN_PASSPHRASE_WORDS +MAX_PHRASE_WORDS = MAX_PASSPHRASE_WORDS +DEFAULT_PHRASE_WORDS = DEFAULT_PASSPHRASE_WORDS + +# RSA configuration +MIN_RSA_BITS = 2048 +VALID_RSA_SIZES = (2048, 3072) # 4096 removed - too large for QR codes +DEFAULT_RSA_BITS = 2048 + +MIN_KEY_PASSWORD_LENGTH = 8 + +# ============================================================================ +# WEB/API CONFIGURATION +# ============================================================================ + +# Temporary file storage +TEMP_FILE_EXPIRY = 600 # 10 minutes in seconds +TEMP_FILE_EXPIRY_MINUTES = 10 + +# Thumbnail settings +THUMBNAIL_SIZE = (250, 250) # Maximum dimensions for thumbnails +THUMBNAIL_QUALITY = 85 + +# QR Code limits +QR_MAX_BINARY = 2900 # Safe limit for binary data in QR +QR_CROP_PADDING_PERCENT = 0.1 # Default padding when cropping QR codes +QR_CROP_MIN_PADDING_PX = 10 # Minimum padding in pixels + +# ============================================================================ +# FILE TYPES +# ============================================================================ + +ALLOWED_IMAGE_EXTENSIONS = {"png", "jpg", "jpeg", "bmp", "gif"} +ALLOWED_KEY_EXTENSIONS = {"pem", "key"} + +# Lossless image formats (safe for steganography) +LOSSLESS_FORMATS = {"PNG", "BMP", "TIFF"} + +# Supported image formats for steganography +SUPPORTED_IMAGE_FORMATS = LOSSLESS_FORMATS + +# ============================================================================ +# DAYS (kept for organizational/UI purposes, not crypto) +# ============================================================================ + +DAY_NAMES = ("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday") + +# ============================================================================ +# COMPRESSION +# ============================================================================ + +# Minimum payload size to attempt compression (smaller often expands) +MIN_COMPRESS_SIZE = 64 + +# Zlib compression level (1-9, higher = better ratio, slower) +ZLIB_COMPRESSION_LEVEL = 6 + +# Compression header magic bytes +COMPRESSION_MAGIC = b"\x00CMP" + +# ============================================================================ +# BATCH PROCESSING +# ============================================================================ + +# Default parallel workers for batch operations +BATCH_DEFAULT_WORKERS = 4 + +# Maximum parallel workers +BATCH_MAX_WORKERS = 16 + +# Output filename suffix for batch encode +BATCH_OUTPUT_SUFFIX = "_encoded" + +# ============================================================================ +# DATA FILES +# ============================================================================ + + +def get_data_dir() -> Path: + """Get the data directory path. + + Checks locations in order: + 1. Package data (installed via pip/wheel) using importlib.resources + 2. Development layout (src/stegasoo -> project root/data) + 3. Docker container (/app/data) + 4. Current working directory fallbacks + """ + # Try package data first (works when installed via pip) + try: + pkg_data = importlib.resources.files("soosef.stegasoo.data") + # Check if the package data directory exists and has our files + if (pkg_data / "bip39-words.txt").is_file(): + # Return as Path - importlib.resources.files returns a Traversable + return Path(str(pkg_data)) + except (ModuleNotFoundError, TypeError): + pass + + # Fallback to file-based locations + # From src/stegasoo/constants.py: + # .parent = src/stegasoo/ + # .parent.parent = src/ + # .parent.parent.parent = project root (where data/ lives) + candidates = [ + Path(__file__).parent / "data", # Installed package (stegasoo/data/) + Path(__file__).parent.parent.parent / "data", # Development: src/stegasoo -> project root + Path("/app/data"), # Docker + Path.cwd() / "data", # Current directory + Path.cwd().parent / "data", # One level up from cwd + Path.cwd().parent.parent / "data", # Two levels up from cwd + ] + + for path in candidates: + if path.exists(): + return path + + # Default to package data path for clearer error messages + return Path(__file__).parent / "data" + + +def get_bip39_words() -> list[str]: + """Load BIP-39 wordlist.""" + wordlist_path = get_data_dir() / "bip39-words.txt" + + if not wordlist_path.exists(): + raise FileNotFoundError( + f"BIP-39 wordlist not found at {wordlist_path}. " + "Please ensure bip39-words.txt is in the data directory." + ) + + with open(wordlist_path) as f: + return [line.strip() for line in f if line.strip()] + + +# Lazy-loaded wordlist +_bip39_words: list[str] | None = None + + +def get_wordlist() -> list[str]: + """Get the BIP-39 wordlist (cached).""" + global _bip39_words + if _bip39_words is None: + _bip39_words = get_bip39_words() + return _bip39_words + + +# ============================================================================= +# STEGANALYSIS (v4.4.0) +# ============================================================================= + +# Chi-square p-value threshold: HIGH p-value = equalized PoV pairs = suspicious +STEGANALYSIS_CHI_SUSPICIOUS_THRESHOLD = 0.95 # p > 0.95 → pairs suspiciously equalized + +# RS embedding rate thresholds (primary metric): higher = more likely embedded +STEGANALYSIS_RS_HIGH_THRESHOLD = 0.3 # > 30% estimated embedding → high risk +STEGANALYSIS_RS_MEDIUM_THRESHOLD = 0.1 # > 10% estimated embedding → medium risk + +# ============================================================================= +# DCT STEGANOGRAPHY (v3.0+) +# ============================================================================= + +# Embedding modes +EMBED_MODE_LSB = "lsb" # Spatial LSB embedding (default, original mode) +EMBED_MODE_DCT = "dct" # DCT domain embedding (new in v3.0) +EMBED_MODE_AUTO = "auto" # Auto-detect on decode + +# DCT-specific constants +DCT_MAGIC_HEADER = b"\x89DCT" # Magic header for DCT mode +DCT_FORMAT_VERSION = 1 +DCT_STEP_SIZE = 8 # QIM quantization step + +# Recovery key obfuscation - FIXED value for admin recovery QR codes +# SHA256("\x89ST3\x89DCT") - hardcoded so it never changes even if headers are added +# Used to XOR recovery keys in QR codes so they scan as gibberish +RECOVERY_OBFUSCATION_KEY = bytes.fromhex( + "d6c70bce27780db942562550e9fe1459" "9dfdb8421f5acc79696b05db4e7afbd2" +) # 32 bytes + +# Valid embedding modes +VALID_EMBED_MODES = {EMBED_MODE_LSB, EMBED_MODE_DCT} + +# Capacity estimation constants +LSB_BYTES_PER_PIXEL = 3 / 8 # 3 bits per pixel (RGB, 1 bit per channel) / 8 bits per byte +DCT_BYTES_PER_PIXEL = 0.125 # Approximate for DCT mode (varies by implementation) + + +def detect_stego_mode(encrypted_data: bytes) -> str: + """ + Detect embedding mode from encrypted payload header. + + Args: + encrypted_data: First few bytes of extracted payload + + Returns: + 'lsb' or 'dct' or 'unknown' + """ + if len(encrypted_data) < 4: + return "unknown" + + header = encrypted_data[:4] + + if header == b"\x89ST3": + return EMBED_MODE_LSB + elif header == b"\x89DCT": + return EMBED_MODE_DCT + else: + return "unknown" + + +# ============================================================================= +# FEATURE TOGGLES (v4.3.1) +# ============================================================================= +# Environment variables to enable/disable optional feature families. +# Values: "auto" (default — detect dependencies), "1"/"true" (force on), +# "0"/"false" (force off even if deps are installed). +# Pi builds or minimal installs can set STEGASOO_AUDIO=0 to stay image-only. + +import os as _os + + +def _parse_feature_toggle(env_var: str, default: str = "auto") -> str | bool: + """Parse a feature toggle env var. Returns 'auto', True, or False.""" + val = _os.environ.get(env_var, default).strip().lower() + if val in ("1", "true", "yes", "on"): + return True + if val in ("0", "false", "no", "off"): + return False + return "auto" + + +def _check_audio_deps() -> bool: + """Check if audio dependencies (soundfile, numpy) are importable.""" + try: + import numpy # noqa: F401 + import soundfile # noqa: F401 + + return True + except ImportError: + return False + + +def _check_video_deps() -> bool: + """Check if video dependencies (ffmpeg binary + audio deps) are available.""" + import shutil + + if not _check_audio_deps(): + return False + return shutil.which("ffmpeg") is not None + + +def _resolve_feature(toggle: str | bool, dep_check: callable) -> bool: + """Resolve a feature toggle to a final bool.""" + if toggle is True: + if not dep_check(): + raise ImportError( + f"Feature force-enabled but required dependencies are missing. " + f"Install the relevant extras (e.g. pip install stegasoo[audio])." + ) + return True + if toggle is False: + return False + # auto + return dep_check() + + +_audio_toggle = _parse_feature_toggle("STEGASOO_AUDIO") +_video_toggle = _parse_feature_toggle("STEGASOO_VIDEO") + +AUDIO_ENABLED: bool = _resolve_feature(_audio_toggle, _check_audio_deps) +VIDEO_ENABLED: bool = _resolve_feature(_video_toggle, _check_video_deps) + + +# ============================================================================= +# AUDIO STEGANOGRAPHY (v4.3.0) +# ============================================================================= + +# Audio embedding modes +EMBED_MODE_AUDIO_LSB = "audio_lsb" +EMBED_MODE_AUDIO_SPREAD = "audio_spread" +EMBED_MODE_AUDIO_AUTO = "audio_auto" +VALID_AUDIO_EMBED_MODES = {EMBED_MODE_AUDIO_LSB, EMBED_MODE_AUDIO_SPREAD} + +# Audio magic bytes (for format detection in stego audio) +AUDIO_MAGIC_LSB = b"AUDL" +AUDIO_MAGIC_SPREAD = b"AUDS" + +# Audio input limits +MAX_AUDIO_DURATION = 600 # 10 minutes +MAX_AUDIO_FILE_SIZE = 100 * 1024 * 1024 # 100 MB +MIN_AUDIO_SAMPLE_RATE = 8000 # G.729 level +MAX_AUDIO_SAMPLE_RATE = 192000 # Studio quality +ALLOWED_AUDIO_EXTENSIONS = {"wav", "flac", "mp3", "ogg", "opus", "aac", "m4a", "wma"} + +# Spread spectrum parameters +AUDIO_SS_CHIP_LENGTH = 1024 # Samples per chip (spreading factor) — legacy/default +AUDIO_SS_AMPLITUDE = 0.05 # Per-sample embedding strength (~-26dB, masked by audio) +AUDIO_SS_RS_NSYM = 32 # Reed-Solomon parity symbols + +# Spread spectrum v2: per-channel hybrid embedding (v4.4.0) +AUDIO_SS_HEADER_VERSION = 2 # v2 header format identifier + +# Chip tier system — trade capacity for robustness +AUDIO_SS_CHIP_TIER_LOSSLESS = 0 # 256 chips — lossless carriers (FLAC/WAV/ALAC) +AUDIO_SS_CHIP_TIER_HIGH_LOSSY = 1 # 512 chips — high-rate lossy (AAC 256k+) +AUDIO_SS_CHIP_TIER_LOW_LOSSY = 2 # 1024 chips — low-rate lossy (AAC 128k, Opus) +AUDIO_SS_DEFAULT_CHIP_TIER = 2 # Most robust, backward compatible +AUDIO_SS_CHIP_LENGTHS = {0: 256, 1: 512, 2: 1024} + +# Chip tier name mapping (for CLI/UI) +AUDIO_SS_CHIP_TIER_NAMES = { + "lossless": AUDIO_SS_CHIP_TIER_LOSSLESS, + "high": AUDIO_SS_CHIP_TIER_HIGH_LOSSY, + "low": AUDIO_SS_CHIP_TIER_LOW_LOSSY, +} + +# LFE channel skipping — LFE is bandlimited to ~120Hz, terrible carrier +AUDIO_LFE_CHANNEL_INDEX = 3 # Standard WAV/WAVEFORMATEXTENSIBLE ordering +AUDIO_LFE_MIN_CHANNELS = 6 # Only skip LFE for 5.1+ layouts + +# Echo hiding parameters +AUDIO_ECHO_DELAY_0 = 50 # Echo delay for bit 0 (samples at 44.1kHz ~ 1.1ms) +AUDIO_ECHO_DELAY_1 = 100 # Echo delay for bit 1 (samples at 44.1kHz ~ 2.3ms) +AUDIO_ECHO_AMPLITUDE = 0.3 # Echo strength (relative to original) +AUDIO_ECHO_WINDOW_SIZE = 8192 # Window size for echo embedding + + +# ============================================================================= +# VIDEO STEGANOGRAPHY (v4.4.0) +# ============================================================================= + +# Video embedding modes +EMBED_MODE_VIDEO_LSB = "video_lsb" +EMBED_MODE_VIDEO_AUTO = "video_auto" +VALID_VIDEO_EMBED_MODES = {EMBED_MODE_VIDEO_LSB} + +# Video magic bytes (for format detection in stego video) +VIDEO_MAGIC_LSB = b"VIDL" + +# Video input limits +MAX_VIDEO_FILE_SIZE = 4 * 1024 * 1024 * 1024 # 4 GB +MAX_VIDEO_DURATION = 3600 # 1 hour in seconds +MIN_VIDEO_RESOLUTION = (64, 64) +MAX_VIDEO_RESOLUTION = (7680, 4320) # 8K UHD +ALLOWED_VIDEO_EXTENSIONS = {"mp4", "mkv", "webm", "avi", "mov"} + +# Video output settings +VIDEO_OUTPUT_CODEC = "ffv1" # FFV1 lossless codec +VIDEO_OUTPUT_CONTAINER = "mkv" # MKV container for FFV1 diff --git a/src/soosef/stegasoo/crypto.py b/src/soosef/stegasoo/crypto.py new file mode 100644 index 0000000..8f473a9 --- /dev/null +++ b/src/soosef/stegasoo/crypto.py @@ -0,0 +1,836 @@ +""" +Stegasoo Cryptographic Functions (v4.0.0 - Channel Key Support) + +This is the crypto layer - where we turn plaintext into indecipherable noise. + +The security model is multi-factor: +┌────────────────────────────────────────────────────────────────────┐ +│ SOMETHING YOU HAVE SOMETHING YOU KNOW │ +│ ├─ Reference photo ├─ Passphrase (4+ BIP-39 words) │ +│ └─ RSA private key (opt) └─ PIN (6-9 digits) │ +│ │ +│ DEPLOYMENT BINDING │ +│ └─ Channel key (ties messages to a specific server/group) │ +└────────────────────────────────────────────────────────────────────┘ + +All factors get mixed together through Argon2id (memory-hard KDF) to derive +the actual encryption key. Miss any factor = wrong key = garbage output. + +Encryption: AES-256-GCM (authenticated encryption - tamper = detection) +KDF: Argon2id (256MB RAM, 4 iterations) or PBKDF2 fallback (600K iterations) + +v4.0.0: Added channel key for server/group isolation +v3.2.0: Removed date dependency (was cute but annoying in practice) +""" + +import hashlib +import io +import secrets +import struct + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import hashes as _hashes +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +from cryptography.hazmat.primitives.kdf.hkdf import HKDFExpand +from PIL import Image + +from .constants import ( + ARGON2_MEMORY_COST, + ARGON2_PARALLELISM, + ARGON2_TIME_COST, + FORMAT_VERSION, + FORMAT_VERSION_LEGACY, + HKDF_INFO_ENCRYPT, + IV_SIZE, + MAGIC_HEADER, + MAX_FILENAME_LENGTH, + MESSAGE_NONCE_SIZE, + PAYLOAD_FILE, + PAYLOAD_TEXT, + PBKDF2_ITERATIONS, + SALT_SIZE, + TAG_SIZE, +) +from .debug import get_logger +from .exceptions import DecryptionError, EncryptionError, InvalidHeaderError, KeyDerivationError +from .models import DecodeResult, FilePayload + +logger = get_logger(__name__) + +# Check for Argon2 availability +try: + from argon2.low_level import Type, hash_secret_raw + + HAS_ARGON2 = True +except ImportError: + HAS_ARGON2 = False + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC + + + +# ============================================================================= +# CHANNEL KEY RESOLUTION +# ============================================================================= + +# Sentinel value for "use auto-detected channel key" +CHANNEL_KEY_AUTO = "auto" + + +def _resolve_channel_key(channel_key: str | bool | None) -> bytes | None: + """ + Resolve channel key parameter to actual key hash. + + Args: + channel_key: Channel key parameter with these behaviors: + - None or "auto": Use server's configured key (from env/config) + - str (valid key): Use this specific key + - "" or False: Explicitly use NO channel key (public mode) + + Returns: + 32-byte channel key hash, or None for public mode + """ + # Explicit public mode + if channel_key == "" or channel_key is False: + return None + + # Auto-detect from environment/config + if channel_key is None or channel_key == CHANNEL_KEY_AUTO: + from .channel import get_channel_key_hash + + return get_channel_key_hash() + + # Explicit key provided - validate and hash it + if isinstance(channel_key, str): + from .channel import format_channel_key, validate_channel_key + + if not validate_channel_key(channel_key): + raise ValueError(f"Invalid channel key format: {channel_key}") + formatted = format_channel_key(channel_key) + return hashlib.sha256(formatted.encode("utf-8")).digest() + + raise ValueError(f"Invalid channel_key type: {type(channel_key)}") + + +# ============================================================================= +# CORE CRYPTO FUNCTIONS +# ============================================================================= +# +# The "reference photo as a key" concept is one of Stegasoo's unique features. +# Most steganography tools just use a password. We add the photo as a +# "something you have" factor - like a hardware token, but it's a cat picture. + + +def hash_photo(image_data: bytes) -> bytes: + """ + Compute deterministic hash of photo pixel content. + + This is the magic sauce that turns your cat photo into a cryptographic key. + + Why pixels and not the file hash? + - File metadata changes (EXIF stripped, resaved) = different file hash + - But pixel content stays the same + - We hash the RGB values directly, so format conversions don't matter + + The double-hash with prefix is belt-and-suspenders mixing. Probably + overkill, but hey, it's crypto - paranoia is a feature. + + Args: + image_data: Raw image file bytes (any format PIL can read) + + Returns: + 32-byte SHA-256 hash of pixel content + """ + # Convert to RGB to normalize (RGBA, grayscale, etc. all become RGB) + img: Image.Image = Image.open(io.BytesIO(image_data)).convert("RGB") + pixels = img.tobytes() + + # Double-hash: SHA256(SHA256(pixels) + first 1KB of pixels) + # The prefix adds image-specific data to prevent length-extension shenanigans + h = hashlib.sha256(pixels).digest() + h = hashlib.sha256(h + pixels[:1024]).digest() + return h + + +def derive_hybrid_key( + photo_data: bytes, + passphrase: str, + salt: bytes, + pin: str = "", + rsa_key_data: bytes | None = None, + channel_key: str | bool | None = None, +) -> bytes: + """ + Derive encryption key from multiple factors. + + This is the heart of Stegasoo's security model. We take all the things + you need to prove you're authorized (photo, passphrase, PIN, etc.) and + blend them together into one 32-byte key. + + The flow: + ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ + │ Photo hash │ + │ passphrase │ + │ PIN + RSA │ + salt + └─────────────┘ └─────────────┘ └─────────────┘ + │ │ │ + └────────────────┴────────────────┘ + │ + ▼ + ┌─────────────────┐ + │ Argon2id │ <- Memory-hard KDF + │ 256MB / 4 iter │ <- Makes brute force expensive + └─────────────────┘ + │ + ▼ + 32-byte AES key + + Why Argon2id? + - Memory-hard: attackers can't just throw GPUs at it + - 256MB RAM per attempt = expensive at scale + - Winner of the Password Hashing Competition (2015) + - "id" variant resists both side-channel and GPU attacks + + Fallback: PBKDF2-SHA512 with 600K iterations (for systems without argon2) + + Args: + photo_data: Reference photo bytes + passphrase: Shared passphrase (recommend 4+ words from BIP-39) + salt: Random salt for this message (32 bytes) + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + channel_key: Channel key parameter: + - None or "auto": Use configured key + - str: Use this specific key + - "" or False: No channel key (public mode) + + Returns: + 32-byte derived key (ready for AES-256) + + Raises: + KeyDerivationError: If key derivation fails + """ + try: + photo_hash = hash_photo(photo_data) + logger.debug( + "derive_hybrid_key: photo_hash=%s, pin=%s, rsa=%s, channel=%s, salt=%d bytes", + photo_hash[:4].hex(), + "set" if pin else "none", + "set" if rsa_key_data else "none", + ( + "explicit" + if isinstance(channel_key, str) and channel_key + else "auto" if channel_key is None else "none" + ), + len(salt), + ) + + # Resolve channel key (server-specific binding) + channel_hash = _resolve_channel_key(channel_key) + + # Build key material by concatenating all factors + # Passphrase is lowercased to be forgiving of case differences + key_material = photo_hash + passphrase.lower().encode() + pin.encode() + salt + + # Add RSA key hash if provided (another "something you have") + if rsa_key_data: + key_material += hashlib.sha256(rsa_key_data).digest() + + # Add channel key hash if configured (v4.0.0 - deployment binding) + if channel_hash: + key_material += channel_hash + + logger.debug("Key material: %d bytes", len(key_material)) + + # Run it all through the KDF + if HAS_ARGON2: + logger.debug( + "KDF: Argon2id (memory=%dKB, time=%d, parallel=%d)", + ARGON2_MEMORY_COST, + ARGON2_TIME_COST, + ARGON2_PARALLELISM, + ) + # Argon2id: the good stuff + key = hash_secret_raw( + secret=key_material, + salt=salt[:32], + time_cost=ARGON2_TIME_COST, # 4 iterations + memory_cost=ARGON2_MEMORY_COST, # 256 MB RAM + parallelism=ARGON2_PARALLELISM, # 4 threads + hash_len=32, + type=Type.ID, # Hybrid mode: resists side-channel AND GPU attacks + ) + else: + logger.warning( + "KDF: PBKDF2 fallback (%d iterations) - argon2 not available", PBKDF2_ITERATIONS + ) + # PBKDF2 fallback for systems without argon2-cffi + # 600K iterations is slow but not memory-hard + kdf = PBKDF2HMAC( + algorithm=hashes.SHA512(), + length=32, + salt=salt, + iterations=PBKDF2_ITERATIONS, + backend=default_backend(), + ) + key = kdf.derive(key_material) + + logger.debug("KDF complete, derived %d-byte key", len(key)) + return key + + except Exception as e: + raise KeyDerivationError(f"Failed to derive key: {e}") from e + + +def derive_pixel_key( + photo_data: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + channel_key: str | bool | None = None, +) -> bytes: + """ + Derive key for pseudo-random pixel selection. + + This key determines which pixels are used for embedding, + making the message location unpredictable without the correct inputs. + + Args: + photo_data: Reference photo bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + channel_key: Channel key parameter (see derive_hybrid_key) + + Returns: + 32-byte key for pixel selection + """ + photo_hash = hash_photo(photo_data) + + # Resolve channel key + channel_hash = _resolve_channel_key(channel_key) + + material = photo_hash + passphrase.lower().encode() + pin.encode() + + if rsa_key_data: + material += hashlib.sha256(rsa_key_data).digest() + + # Add channel key hash if configured (v4.0.0) + if channel_hash: + material += channel_hash + + return hashlib.sha256(material + b"pixel_selection").digest() + + +def derive_message_key(root_key: bytes, nonce: bytes) -> bytes: + """ + Derive a per-message encryption key via HKDF-Expand. + + Each message gets a unique encryption key even with identical credentials, + because the nonce is random per message. This provides key diversification: + compromising the ciphertext of one message doesn't help with another. + + Args: + root_key: 32-byte root key from Argon2id/PBKDF2 + nonce: 16-byte random nonce (unique per message) + + Returns: + 32-byte per-message encryption key + """ + hkdf = HKDFExpand( + algorithm=_hashes.SHA256(), + length=32, + info=HKDF_INFO_ENCRYPT + nonce, + backend=default_backend(), + ) + return hkdf.derive(root_key) + + +def _pack_payload( + content: str | bytes | FilePayload, +) -> tuple[bytes, int]: + """ + Pack payload with type marker and metadata. + + Format for text: + [type:1][data] + + Format for file: + [type:1][filename_len:2][filename][mime_len:2][mime][data] + + Args: + content: Text string, raw bytes, or FilePayload + + Returns: + Tuple of (packed bytes, payload type) + """ + if isinstance(content, str): + # Text message + data = content.encode("utf-8") + return bytes([PAYLOAD_TEXT]) + data, PAYLOAD_TEXT + + elif isinstance(content, FilePayload): + # File with metadata + filename = content.filename[:MAX_FILENAME_LENGTH].encode("utf-8") + mime = (content.mime_type or "")[:100].encode("utf-8") + + packed = ( + bytes([PAYLOAD_FILE]) + + struct.pack(">H", len(filename)) + + filename + + struct.pack(">H", len(mime)) + + mime + + content.data + ) + return packed, PAYLOAD_FILE + + else: + # Raw bytes - treat as file with no name + packed = ( + bytes([PAYLOAD_FILE]) + + struct.pack(">H", 0) # No filename + + struct.pack(">H", 0) # No mime + + content + ) + return packed, PAYLOAD_FILE + + +def _unpack_payload(data: bytes) -> DecodeResult: + """ + Unpack payload and extract content with metadata. + + Args: + data: Packed payload bytes + + Returns: + DecodeResult with appropriate content + """ + if len(data) < 1: + raise DecryptionError("Empty payload") + + payload_type = data[0] + + if payload_type == PAYLOAD_TEXT: + # Text message + text = data[1:].decode("utf-8") + return DecodeResult(payload_type="text", message=text) + + elif payload_type == PAYLOAD_FILE: + # File with metadata + offset = 1 + + # Read filename + filename_len = struct.unpack(">H", data[offset : offset + 2])[0] + offset += 2 + filename = data[offset : offset + filename_len].decode("utf-8") if filename_len else None + offset += filename_len + + # Read mime type + mime_len = struct.unpack(">H", data[offset : offset + 2])[0] + offset += 2 + mime_type = data[offset : offset + mime_len].decode("utf-8") if mime_len else None + offset += mime_len + + # Rest is file data + file_data = data[offset:] + + return DecodeResult( + payload_type="file", file_data=file_data, filename=filename, mime_type=mime_type + ) + + else: + # Unknown type - try to decode as text (backward compatibility) + try: + text = data.decode("utf-8") + return DecodeResult(payload_type="text", message=text) + except UnicodeDecodeError: + return DecodeResult(payload_type="file", file_data=data) + + +# ============================================================================= +# HEADER FLAGS (v4.0.0) +# ============================================================================= +# +# The flags byte tells us about the message without decrypting it. +# Currently just one flag, but the byte gives us room for 8. + +FLAG_CHANNEL_KEY = 0x01 # Bit 0: Message was encoded with a channel key +# Future flags could include: compression, file attachment, etc. + + +def encrypt_message( + message: str | bytes | FilePayload, + photo_data: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + channel_key: str | bool | None = None, +) -> bytes: + """ + Encrypt message or file using AES-256-GCM. + + This is where plaintext becomes ciphertext. We use AES-256-GCM which is: + - AES: The standard, used by everyone from banks to governments + - 256-bit key: Enough entropy to survive until the heat death of the universe + - GCM mode: Authenticated encryption - if anyone tampers, decryption fails + + The output format (v4.0.0): + ┌──────────────────────────────────────────────────────────────────────┐ + │ \x89ST3 │ 05 │ flags │ salt (32B) │ iv (12B) │ tag (16B) │ ··· │ + │ magic │ver │ │ │ │ │cipher│ + └──────────────────────────────────────────────────────────────────────┘ + + Why the random padding at the end? + - Message length can reveal information (traffic analysis) + - We add 64-319 random bytes and round to 256-byte boundary + - All messages look roughly the same size + + Args: + message: Message string, raw bytes, or FilePayload to encrypt + photo_data: Reference photo bytes (your "key photo") + passphrase: Shared passphrase (recommend 4+ words from BIP-39) + pin: Optional static PIN for additional security + rsa_key_data: Optional RSA key bytes (another "something you have") + channel_key: Channel key parameter: + - None or "auto": Use server's configured key + - str: Use this specific key + - "" or False: No channel key (public mode) + + Returns: + Encrypted message bytes ready for embedding + + Raises: + EncryptionError: If encryption fails (shouldn't happen with valid inputs) + """ + try: + salt = secrets.token_bytes(SALT_SIZE) + root_key = derive_hybrid_key(photo_data, passphrase, salt, pin, rsa_key_data, channel_key) + + # v6: Per-message key via HKDF — each message gets a unique encryption key + message_nonce = secrets.token_bytes(MESSAGE_NONCE_SIZE) + key = derive_message_key(root_key, message_nonce) + + iv = secrets.token_bytes(IV_SIZE) + + # Determine flags + flags = 0 + channel_hash = _resolve_channel_key(channel_key) + if channel_hash: + flags |= FLAG_CHANNEL_KEY + + # Pack payload with type marker + packed_payload, _ = _pack_payload(message) + + logger.debug( + "encrypt_message: packed_payload=%d bytes, flags=0x%02x, format_version=%d", + len(packed_payload), + flags, + FORMAT_VERSION, + ) + + # Random padding to hide message length + padding_len = secrets.randbelow(256) + 64 + padded_len = ((len(packed_payload) + padding_len + 255) // 256) * 256 + padding_needed = padded_len - len(packed_payload) + padding = secrets.token_bytes(padding_needed - 4) + struct.pack(">I", len(packed_payload)) + padded_message = packed_payload + padding + + logger.debug( + "Padded message: %d bytes (payload + %d padding)", len(padded_message), padding_needed + ) + + # Build header for AAD (v6: includes nonce in authenticated data) + header = MAGIC_HEADER + bytes([FORMAT_VERSION, flags]) + + # Encrypt with AES-256-GCM + cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=default_backend()) + encryptor = cipher.encryptor() + encryptor.authenticate_additional_data(header + message_nonce) + ciphertext = encryptor.update(padded_message) + encryptor.finalize() + + total_size = ( + len(header) + + MESSAGE_NONCE_SIZE + + len(salt) + + len(iv) + + len(encryptor.tag) + + len(ciphertext) + ) + logger.debug( + "Encrypted output: %d bytes (header=%d, nonce=%d, salt=%d, iv=%d, tag=%d, ct=%d)", + total_size, + len(header), + MESSAGE_NONCE_SIZE, + len(salt), + len(iv), + len(encryptor.tag), + len(ciphertext), + ) + + # v6: [magic|version|flags|nonce|salt|iv|tag|ciphertext] + return header + message_nonce + salt + iv + encryptor.tag + ciphertext + + except Exception as e: + logger.error("Encryption failed: %s", e) + raise EncryptionError(f"Encryption failed: {e}") from e + + +def parse_header(encrypted_data: bytes) -> dict | None: + """ + Parse the header from encrypted data. + + Supports both v5 (legacy) and v6 (HKDF) header formats. + + v5: [magic:4][ver:1][flags:1][salt:32][iv:12][tag:16][ciphertext] (66+ bytes) + v6: [magic:4][ver:1][flags:1][nonce:16][salt:32][iv:12][tag:16][ciphertext] (82+ bytes) + + Args: + encrypted_data: Raw encrypted bytes + + Returns: + Dict with version, salt, iv, tag, ciphertext, flags, and optionally + message_nonce (v6). Returns None if invalid. + """ + # Min v5 size: 4+1+1+32+12+16 = 66 bytes + if len(encrypted_data) < 66 or encrypted_data[:4] != MAGIC_HEADER: + return None + + try: + version = encrypted_data[4] + + if version == FORMAT_VERSION: + # v6: has message nonce + if len(encrypted_data) < 82: + return None + flags = encrypted_data[5] + offset = 6 + message_nonce = encrypted_data[offset : offset + MESSAGE_NONCE_SIZE] + offset += MESSAGE_NONCE_SIZE + salt = encrypted_data[offset : offset + SALT_SIZE] + offset += SALT_SIZE + iv = encrypted_data[offset : offset + IV_SIZE] + offset += IV_SIZE + tag = encrypted_data[offset : offset + TAG_SIZE] + offset += TAG_SIZE + ciphertext = encrypted_data[offset:] + + return { + "version": version, + "flags": flags, + "has_channel_key": bool(flags & FLAG_CHANNEL_KEY), + "message_nonce": message_nonce, + "salt": salt, + "iv": iv, + "tag": tag, + "ciphertext": ciphertext, + } + + elif version == FORMAT_VERSION_LEGACY: + # v5: no nonce + flags = encrypted_data[5] + offset = 6 + salt = encrypted_data[offset : offset + SALT_SIZE] + offset += SALT_SIZE + iv = encrypted_data[offset : offset + IV_SIZE] + offset += IV_SIZE + tag = encrypted_data[offset : offset + TAG_SIZE] + offset += TAG_SIZE + ciphertext = encrypted_data[offset:] + + return { + "version": version, + "flags": flags, + "has_channel_key": bool(flags & FLAG_CHANNEL_KEY), + "message_nonce": None, + "salt": salt, + "iv": iv, + "tag": tag, + "ciphertext": ciphertext, + } + + else: + return None + + except Exception: + return None + + +def decrypt_message( + encrypted_data: bytes, + photo_data: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + channel_key: str | bool | None = None, +) -> DecodeResult: + """ + Decrypt message (v4.0.0 - with channel key support). + + Args: + encrypted_data: Encrypted message bytes + photo_data: Reference photo bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + channel_key: Channel key parameter (see encrypt_message) + + Returns: + DecodeResult with decrypted content + + Raises: + InvalidHeaderError: If data doesn't have valid Stegasoo header + DecryptionError: If decryption fails (wrong credentials) + """ + logger.debug("decrypt_message: %d bytes of encrypted data", len(encrypted_data)) + + header = parse_header(encrypted_data) + if not header: + logger.error("Invalid or missing Stegasoo header in %d bytes", len(encrypted_data)) + raise InvalidHeaderError("Invalid or missing Stegasoo header") + + logger.debug( + "Header: version=%d, flags=0x%02x, has_channel_key=%s, ciphertext=%d bytes", + header["version"], + header["flags"], + header["has_channel_key"], + len(header["ciphertext"]), + ) + + # Check for channel key mismatch and provide helpful error + channel_hash = _resolve_channel_key(channel_key) + has_configured_key = channel_hash is not None + message_has_key = header["has_channel_key"] + + try: + root_key = derive_hybrid_key( + photo_data, passphrase, header["salt"], pin, rsa_key_data, channel_key + ) + + version = header["version"] + message_nonce = header["message_nonce"] + + if version == FORMAT_VERSION and message_nonce is not None: + # v6: Derive per-message key via HKDF + key = derive_message_key(root_key, message_nonce) + aad_header = MAGIC_HEADER + bytes([FORMAT_VERSION, header["flags"]]) + message_nonce + else: + # v5 (legacy): Root key used directly + key = root_key + aad_header = MAGIC_HEADER + bytes([FORMAT_VERSION_LEGACY, header["flags"]]) + + cipher = Cipher( + algorithms.AES(key), modes.GCM(header["iv"], header["tag"]), backend=default_backend() + ) + decryptor = cipher.decryptor() + decryptor.authenticate_additional_data(aad_header) + + padded_plaintext = decryptor.update(header["ciphertext"]) + decryptor.finalize() + original_length = struct.unpack(">I", padded_plaintext[-4:])[0] + + logger.debug( + "Decrypted %d bytes, original payload length: %d", + len(padded_plaintext), + original_length, + ) + + payload_data = padded_plaintext[:original_length] + result = _unpack_payload(payload_data) + + logger.debug("Decryption successful: %s (v%d)", result.payload_type, version) + return result + + except Exception as e: + # Provide more helpful error message for channel key issues + if message_has_key and not has_configured_key: + raise DecryptionError( + "Decryption failed. This message was encoded with a channel key, " + "but no channel key is configured. Provide the correct channel key." + ) from e + elif not message_has_key and has_configured_key: + raise DecryptionError( + "Decryption failed. This message was encoded without a channel key, " + "but you have one configured. Try with channel_key='' for public mode." + ) from e + else: + raise DecryptionError( + "Decryption failed. Check your passphrase, PIN, RSA key, " + "reference photo, and channel key." + ) from e + + +def decrypt_message_text( + encrypted_data: bytes, + photo_data: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + channel_key: str | bool | None = None, +) -> str: + """ + Decrypt message and return as text string. + + For backward compatibility - returns text content or raises error for files. + + Args: + encrypted_data: Encrypted message bytes + photo_data: Reference photo bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + channel_key: Channel key parameter + + Returns: + Decrypted message string + + Raises: + DecryptionError: If decryption fails or content is a file + """ + result = decrypt_message(encrypted_data, photo_data, passphrase, pin, rsa_key_data, channel_key) + + if result.is_file: + if result.file_data: + # Try to decode as text + try: + return result.file_data.decode("utf-8") + except UnicodeDecodeError: + raise DecryptionError( + f"Content is a binary file ({result.filename or 'unnamed'}), not text" + ) + return "" + + return result.message or "" + + +def has_argon2() -> bool: + """Check if Argon2 is available.""" + return HAS_ARGON2 + + +# ============================================================================= +# CHANNEL KEY UTILITIES (exposed for convenience) +# ============================================================================= + + +def get_active_channel_key() -> str | None: + """ + Get the currently configured channel key (if any). + + Returns: + Formatted channel key string, or None if not configured + """ + from .channel import get_channel_key + + return get_channel_key() + + +def get_channel_fingerprint(key: str | None = None) -> str | None: + """ + Get a display-safe fingerprint of a channel key. + + Args: + key: Channel key (if None, uses configured key) + + Returns: + Masked key like "ABCD-••••-••••-••••-••••-••••-••••-3456" or None + """ + from .channel import get_channel_fingerprint as _get_fingerprint + + return _get_fingerprint(key) diff --git a/src/soosef/stegasoo/data/__init__.py b/src/soosef/stegasoo/data/__init__.py new file mode 100644 index 0000000..8d76b1d --- /dev/null +++ b/src/soosef/stegasoo/data/__init__.py @@ -0,0 +1 @@ +# Package data directory for stegasoo diff --git a/src/soosef/stegasoo/data/bip39-words.txt b/src/soosef/stegasoo/data/bip39-words.txt new file mode 100644 index 0000000..942040e --- /dev/null +++ b/src/soosef/stegasoo/data/bip39-words.txt @@ -0,0 +1,2048 @@ +abandon +ability +able +about +above +absent +absorb +abstract +absurd +abuse +access +accident +account +accuse +achieve +acid +acoustic +acquire +across +act +action +actor +actress +actual +adapt +add +addict +address +adjust +admit +adult +advance +advice +aerobic +affair +afford +afraid +again +age +agent +agree +ahead +aim +air +airport +aisle +alarm +album +alcohol +alert +alien +all +alley +allow +almost +alone +alpha +already +also +alter +always +amateur +amazing +among +amount +amused +analyst +anchor +ancient +anger +angle +angry +animal +ankle +announce +annual +another +answer +antenna +antique +anxiety +any +apart +apology +appear +apple +approve +april +arch +arctic +area +arena +argue +arm +armed +armor +army +around +arrange +arrest +arrive +arrow +art +artefact +artist +artwork +ask +aspect +assault +asset +assist +assume +asthma +athlete +atom +attack +attend +attitude +attract +auction +audit +august +aunt +author +auto +autumn +average +avocado +avoid +awake +aware +away +awesome +awful +awkward +axis +baby +bachelor +bacon +badge +bag +balance +balcony +ball +bamboo +banana +banner +bar +barely +bargain +barrel +base +basic +basket +battle +beach +bean +beauty +because +become +beef +before +begin +behave +behind +believe +below +belt +bench +benefit +best +betray +better +between +beyond +bicycle +bid +bike +bind +biology +bird +birth +bitter +black +blade +blame +blanket +blast +bleak +bless +blind +blood +blossom +blouse +blue +blur +blush +board +boat +body +boil +bomb +bone +bonus +book +boost +border +boring +borrow +boss +bottom +bounce +box +boy +bracket +brain +brand +brass +brave +bread +breeze +brick +bridge +brief +bright +bring +brisk +broccoli +broken +bronze +broom +brother +brown +brush +bubble +buddy +budget +buffalo +build +bulb +bulk +bullet +bundle +bunker +burden +burger +burst +bus +business +busy +butter +buyer +buzz +cabbage +cabin +cable +cactus +cage +cake +call +calm +camera +camp +can +canal +cancel +candy +cannon +canoe +canvas +canyon +capable +capital +captain +car +carbon +card +cargo +carpet +carry +cart +case +cash +casino +castle +casual +cat +catalog +catch +category +cattle +caught +cause +caution +cave +ceiling +celery +cement +census +century +cereal +certain +chair +chalk +champion +change +chaos +chapter +charge +chase +chat +cheap +check +cheese +chef +cherry +chest +chicken +chief +child +chimney +choice +choose +chronic +chuckle +chunk +churn +cigar +cinnamon +circle +citizen +city +civil +claim +clap +clarify +claw +clay +clean +clerk +clever +click +client +cliff +climb +clinic +clip +clock +clog +close +cloth +cloud +clown +club +clump +cluster +clutch +coach +coast +coconut +code +coffee +coil +coin +collect +color +column +combine +come +comfort +comic +common +company +concert +conduct +confirm +congress +connect +consider +control +convince +cook +cool +copper +copy +coral +core +corn +correct +cost +cotton +couch +country +couple +course +cousin +cover +coyote +crack +cradle +craft +cram +crane +crash +crater +crawl +crazy +cream +credit +creek +crew +cricket +crime +crisp +critic +crop +cross +crouch +crowd +crucial +cruel +cruise +crumble +crunch +crush +cry +crystal +cube +culture +cup +cupboard +curious +current +curtain +curve +cushion +custom +cute +cycle +dad +damage +damp +dance +danger +daring +dash +daughter +dawn +day +deal +debate +debris +decade +december +decide +decline +decorate +decrease +deer +defense +define +defy +degree +delay +deliver +demand +demise +denial +dentist +deny +depart +depend +deposit +depth +deputy +derive +describe +desert +design +desk +despair +destroy +detail +detect +develop +device +devote +diagram +dial +diamond +diary +dice +diesel +diet +differ +digital +dignity +dilemma +dinner +dinosaur +direct +dirt +disagree +discover +disease +dish +dismiss +disorder +display +distance +divert +divide +divorce +dizzy +doctor +document +dog +doll +dolphin +domain +donate +donkey +donor +door +dose +double +dove +draft +dragon +drama +drastic +draw +dream +dress +drift +drill +drink +drip +drive +drop +drum +dry +duck +dumb +dune +during +dust +dutch +duty +dwarf +dynamic +eager +eagle +early +earn +earth +easily +east +easy +echo +ecology +economy +edge +edit +educate +effort +egg +eight +either +elbow +elder +electric +elegant +element +elephant +elevator +elite +else +embark +embody +embrace +emerge +emotion +employ +empower +empty +enable +enact +end +endless +endorse +enemy +energy +enforce +engage +engine +enhance +enjoy +enlist +enough +enrich +enroll +ensure +enter +entire +entry +envelope +episode +equal +equip +era +erase +erode +erosion +error +erupt +escape +essay +essence +estate +eternal +ethics +evidence +evil +evoke +evolve +exact +example +excess +exchange +excite +exclude +excuse +execute +exercise +exhaust +exhibit +exile +exist +exit +exotic +expand +expect +expire +explain +expose +express +extend +extra +eye +eyebrow +fabric +face +faculty +fade +faint +faith +fall +false +fame +family +famous +fan +fancy +fantasy +farm +fashion +fat +fatal +father +fatigue +fault +favorite +feature +february +federal +fee +feed +feel +female +fence +festival +fetch +fever +few +fiber +fiction +field +figure +file +film +filter +final +find +fine +finger +finish +fire +firm +first +fiscal +fish +fit +fitness +fix +flag +flame +flash +flat +flavor +flee +flight +flip +float +flock +floor +flower +fluid +flush +fly +foam +focus +fog +foil +fold +follow +food +foot +force +forest +forget +fork +fortune +forum +forward +fossil +foster +found +fox +fragile +frame +frequent +fresh +friend +fringe +frog +front +frost +frown +frozen +fruit +fuel +fun +funny +furnace +fury +future +gadget +gain +galaxy +gallery +game +gap +garage +garbage +garden +garlic +garment +gas +gasp +gate +gather +gauge +gaze +general +genius +genre +gentle +genuine +gesture +ghost +giant +gift +giggle +ginger +giraffe +girl +give +glad +glance +glare +glass +glide +glimpse +globe +gloom +glory +glove +glow +glue +goat +goddess +gold +good +goose +gorilla +gospel +gossip +govern +gown +grab +grace +grain +grant +grape +grass +gravity +great +green +grid +grief +grit +grocery +group +grow +grunt +guard +guess +guide +guilt +guitar +gun +gym +habit +hair +half +hammer +hamster +hand +happy +harbor +hard +harsh +harvest +hat +have +hawk +hazard +head +health +heart +heavy +hedgehog +height +hello +helmet +help +hen +hero +hidden +high +hill +hint +hip +hire +history +hobby +hockey +hold +hole +holiday +hollow +home +honey +hood +hope +horn +horror +horse +hospital +host +hotel +hour +hover +hub +huge +human +humble +humor +hundred +hungry +hunt +hurdle +hurry +hurt +husband +hybrid +ice +icon +idea +identify +idle +ignore +ill +illegal +illness +image +imitate +immense +immune +impact +impose +improve +impulse +inch +include +income +increase +index +indicate +indoor +industry +infant +inflict +inform +inhale +inherit +initial +inject +injury +inmate +inner +innocent +input +inquiry +insane +insect +inside +inspire +install +intact +interest +into +invest +invite +involve +iron +island +isolate +issue +item +ivory +jacket +jaguar +jar +jazz +jealous +jeans +jelly +jewel +job +join +joke +journey +joy +judge +juice +jump +jungle +junior +junk +just +kangaroo +keen +keep +ketchup +key +kick +kid +kidney +kind +kingdom +kiss +kit +kitchen +kite +kitten +kiwi +knee +knife +knock +know +lab +label +labor +ladder +lady +lake +lamp +language +laptop +large +later +latin +laugh +laundry +lava +law +lawn +lawsuit +layer +lazy +leader +leaf +learn +leave +lecture +left +leg +legal +legend +leisure +lemon +lend +length +lens +leopard +lesson +letter +level +liar +liberty +library +license +life +lift +light +like +limb +limit +link +lion +liquid +list +little +live +lizard +load +loan +lobster +local +lock +logic +lonely +long +loop +lottery +loud +lounge +love +loyal +lucky +luggage +lumber +lunar +lunch +luxury +lyrics +machine +mad +magic +magnet +maid +mail +main +major +make +mammal +man +manage +mandate +mango +mansion +manual +maple +marble +march +margin +marine +market +marriage +mask +mass +master +match +material +math +matrix +matter +maximum +maze +meadow +mean +measure +meat +mechanic +medal +media +melody +melt +member +memory +mention +menu +mercy +merge +merit +merry +mesh +message +metal +method +middle +midnight +milk +million +mimic +mind +minimum +minor +minute +miracle +mirror +misery +miss +mistake +mix +mixed +mixture +mobile +model +modify +mom +moment +monitor +monkey +monster +month +moon +moral +more +morning +mosquito +mother +motion +motor +mountain +mouse +move +movie +much +muffin +mule +multiply +muscle +museum +mushroom +music +must +mutual +myself +mystery +myth +naive +name +napkin +narrow +nasty +nation +nature +near +neck +need +negative +neglect +neither +nephew +nerve +nest +net +network +neutral +never +news +next +nice +night +noble +noise +nominee +noodle +normal +north +nose +notable +note +nothing +notice +novel +now +nuclear +number +nurse +nut +oak +obey +object +oblige +obscure +observe +obtain +obvious +occur +ocean +october +odor +off +offer +office +often +oil +okay +old +olive +olympic +omit +once +one +onion +online +only +open +opera +opinion +oppose +option +orange +orbit +orchard +order +ordinary +organ +orient +original +orphan +ostrich +other +outdoor +outer +output +outside +oval +oven +over +own +owner +oxygen +oyster +ozone +pact +paddle +page +pair +palace +palm +panda +panel +panic +panther +paper +parade +parent +park +parrot +party +pass +patch +path +patient +patrol +pattern +pause +pave +payment +peace +peanut +pear +peasant +pelican +pen +penalty +pencil +people +pepper +perfect +permit +person +pet +phone +photo +phrase +physical +piano +picnic +picture +piece +pig +pigeon +pill +pilot +pink +pioneer +pipe +pistol +pitch +pizza +place +planet +plastic +plate +play +please +pledge +pluck +plug +plunge +poem +poet +point +polar +pole +police +pond +pony +pool +popular +portion +position +possible +post +potato +pottery +poverty +powder +power +practice +praise +predict +prefer +prepare +present +pretty +prevent +price +pride +primary +print +priority +prison +private +prize +problem +process +produce +profit +program +project +promote +proof +property +prosper +protect +proud +provide +public +pudding +pull +pulp +pulse +pumpkin +punch +pupil +puppy +purchase +purity +purpose +purse +push +put +puzzle +pyramid +quality +quantum +quarter +question +quick +quit +quiz +quote +rabbit +raccoon +race +rack +radar +radio +rail +rain +raise +rally +ramp +ranch +random +range +rapid +rare +rate +rather +raven +raw +razor +ready +real +reason +rebel +rebuild +recall +receive +recipe +record +recycle +reduce +reflect +reform +refuse +region +regret +regular +reject +relax +release +relief +rely +remain +remember +remind +remove +render +renew +rent +reopen +repair +repeat +replace +report +require +rescue +resemble +resist +resource +response +result +retire +retreat +return +reunion +reveal +review +reward +rhythm +rib +ribbon +rice +rich +ride +ridge +rifle +right +rigid +ring +riot +ripple +risk +ritual +rival +river +road +roast +robot +robust +rocket +romance +roof +rookie +room +rose +rotate +rough +round +route +royal +rubber +rude +rug +rule +run +runway +rural +sad +saddle +sadness +safe +sail +salad +salmon +salon +salt +salute +same +sample +sand +satisfy +satoshi +sauce +sausage +save +say +scale +scan +scare +scatter +scene +scheme +school +science +scissors +scorpion +scout +scrap +screen +script +scrub +sea +search +season +seat +second +secret +section +security +seed +seek +segment +select +sell +seminar +senior +sense +sentence +series +service +session +settle +setup +seven +shadow +shaft +shallow +share +shed +shell +sheriff +shield +shift +shine +ship +shiver +shock +shoe +shoot +shop +short +shoulder +shove +shrimp +shrug +shuffle +shy +sibling +sick +side +siege +sight +sign +silent +silk +silly +silver +similar +simple +since +sing +siren +sister +situate +six +size +skate +sketch +ski +skill +skin +skirt +skull +slab +slam +sleep +slender +slice +slide +slight +slim +slogan +slot +slow +slush +small +smart +smile +smoke +smooth +snack +snake +snap +sniff +snow +soap +soccer +social +sock +soda +soft +solar +soldier +solid +solution +solve +someone +song +soon +sorry +sort +soul +sound +soup +source +south +space +spare +spatial +spawn +speak +special +speed +spell +spend +sphere +spice +spider +spike +spin +spirit +split +spoil +sponsor +spoon +sport +spot +spray +spread +spring +spy +square +squeeze +squirrel +stable +stadium +staff +stage +stairs +stamp +stand +start +state +stay +steak +steel +stem +step +stereo +stick +still +sting +stock +stomach +stone +stool +story +stove +strategy +street +strike +strong +struggle +student +stuff +stumble +style +subject +submit +subway +success +such +sudden +suffer +sugar +suggest +suit +summer +sun +sunny +sunset +super +supply +supreme +sure +surface +surge +surprise +surround +survey +suspect +sustain +swallow +swamp +swap +swarm +swear +sweet +swift +swim +swing +switch +sword +symbol +symptom +syrup +system +table +tackle +tag +tail +talent +talk +tank +tape +target +task +taste +tattoo +taxi +teach +team +tell +ten +tenant +tennis +tent +term +test +text +thank +that +theme +then +theory +there +they +thing +this +thought +three +thrive +throw +thumb +thunder +ticket +tide +tiger +tilt +timber +time +tiny +tip +tired +tissue +title +toast +tobacco +today +toddler +toe +together +toilet +token +tomato +tomorrow +tone +tongue +tonight +tool +tooth +top +topic +topple +torch +tornado +tortoise +toss +total +tourist +toward +tower +town +toy +track +trade +traffic +tragic +train +transfer +trap +trash +travel +tray +treat +tree +trend +trial +tribe +trick +trigger +trim +trip +trophy +trouble +truck +true +truly +trumpet +trust +truth +try +tube +tuition +tumble +tuna +tunnel +turkey +turn +turtle +twelve +twenty +twice +twin +twist +two +type +typical +ugly +umbrella +unable +unaware +uncle +uncover +under +undo +unfair +unfold +unhappy +uniform +unique +unit +universe +unknown +unlock +until +unusual +unveil +update +upgrade +uphold +upon +upper +upset +urban +urge +usage +use +used +useful +useless +usual +utility +vacant +vacuum +vague +valid +valley +valve +van +vanish +vapor +various +vast +vault +vehicle +velvet +vendor +venture +venue +verb +verify +version +very +vessel +veteran +viable +vibrant +vicious +victory +video +view +village +vintage +violin +virtual +virus +visa +visit +visual +vital +vivid +vocal +voice +void +volcano +volume +vote +voyage +wage +wagon +wait +walk +wall +walnut +want +warfare +warm +warrior +wash +wasp +waste +water +wave +way +wealth +weapon +wear +weasel +weather +web +wedding +weekend +weird +welcome +west +wet +whale +what +wheat +wheel +when +where +whip +whisper +wide +width +wife +wild +will +win +window +wine +wing +wink +winner +winter +wire +wisdom +wise +wish +witness +wolf +woman +wonder +wood +wool +word +work +world +worry +worth +wrap +wreck +wrestle +wrist +write +wrong +yard +year +yellow +you +young +youth +zebra +zero +zone +zoo diff --git a/src/soosef/stegasoo/dct_steganography.py b/src/soosef/stegasoo/dct_steganography.py new file mode 100644 index 0000000..afba8e8 --- /dev/null +++ b/src/soosef/stegasoo/dct_steganography.py @@ -0,0 +1,1856 @@ +""" +DCT Domain Steganography Module (v4.1.0) + +The fancy pants mode. Instead of hiding bits in pixel values (LSB mode), +we hide them in the *frequency domain* - specifically in the Discrete Cosine +Transform coefficients that JPEG compression uses internally. + +Why is this cool? +- Survives some image processing that would destroy LSB data +- Works with JPEG without the usual "save destroys everything" problem +- Uses the same math that JPEG itself uses - we're hiding in plain sight + +Two approaches depending on what you want: +1. PNG output: We do our own DCT math via scipy (works on any image) +2. JPEG output: We use jpeglib to directly modify the coefficients (chef's kiss) + +v4.1.0 - The "please stop corrupting my data" release: +- Reed-Solomon error correction (can fix up to 16 byte errors per chunk) +- Majority voting on headers (store 3 copies, take the winner) +- Because some image regions are just... problematic + +v3.2.0-patch2 - The "scipy why are you like this" release: +- Chunked processing because scipy's FFT was corrupting memory on big images +- Process blocks one at a time with fresh arrays +- Yes, it's slower. No, I don't care. Correctness > speed. + +Requires: scipy (PNG mode), optionally jpeglib (JPEG mode), reedsolo (error correction) +""" + +import gc +import hashlib +import io +import struct +from dataclasses import dataclass +from enum import Enum + +import numpy as np +from PIL import Image, ImageOps + +# Check for scipy availability (for PNG/DCT mode) +# Prefer scipy.fft (newer, more stable) over scipy.fftpack +try: + from scipy.fft import dct, dctn, idct, idctn + + HAS_SCIPY = True +except ImportError: + try: + from scipy.fftpack import dct, dctn, idct, idctn + + HAS_SCIPY = True + except ImportError: + HAS_SCIPY = False + dct = None + idct = None + dctn = None + idctn = None + +# Check for jpeglib availability (for proper JPEG mode) +try: + import jpeglib + + HAS_JPEGLIB = True +except ImportError: + HAS_JPEGLIB = False + jpeglib = None + +# Import custom exceptions +from .exceptions import InvalidMagicBytesError +from .exceptions import ReedSolomonError as StegasooRSError + +# Progress reporting interval (write every N blocks) +PROGRESS_INTERVAL = 50 + + +def _write_progress(progress_file: str | None, current: int, total: int, phase: str = "embedding"): + """Write progress to file for frontend polling.""" + if progress_file is None: + return + try: + import json + + with open(progress_file, "w") as f: + json.dump( + { + "current": current, + "total": total, + "percent": round((current / total) * 100, 1) if total > 0 else 0, + "phase": phase, + }, + f, + ) + except Exception: + pass # Don't let progress writing break encoding + + +# ============================================================================ +# CONSTANTS +# ============================================================================ + +# JPEG uses 8x8 blocks for DCT - this is baked into the standard +BLOCK_SIZE = 8 + +# The zig-zag order of DCT coefficients. JPEG stores them this way because +# the human eye is more sensitive to low frequencies (top-left corner) +# than high frequencies (bottom-right). After quantization, most high-freq +# coefficients become zero, so zig-zag gives great compression. +# +# Visual of an 8x8 DCT block with zig-zag numbering: +# +# DC 1 5 6 14 15 27 28 <- Low frequency (smooth gradients) +# 2 4 7 13 16 26 29 42 +# 3 8 12 17 25 30 41 43 +# 9 11 18 24 31 40 44 53 +# 10 19 23 32 39 45 52 54 +# 20 22 33 38 46 51 55 60 +# 21 34 37 47 50 56 59 61 +# 35 36 48 49 57 58 62 63 <- High frequency (fine detail/noise) +# +# Position (0,0) is the DC coefficient - the average brightness of the block. +# We NEVER touch DC because changing it causes visible brightness shifts. +EMBED_POSITIONS = [ + (0, 1), # 1st AC coefficient + (1, 0), # 2nd AC coefficient + (2, 0), # ... and so on in zig-zag order + (1, 1), + (0, 2), + (0, 3), + (1, 2), + (2, 1), + (3, 0), + (4, 0), + (3, 1), + (2, 2), + (1, 3), + (0, 4), + (0, 5), + (1, 4), + (2, 3), + (3, 2), + (4, 1), + (5, 0), + (5, 1), + (4, 2), + (3, 3), + (2, 4), + (1, 5), + (0, 6), + (0, 7), + (1, 6), + (2, 5), + (3, 4), + (4, 3), + (5, 2), + (6, 1), + (7, 0), +] + +# We use positions 4-20 (mid-frequency range). Here's the reasoning: +# - Positions 0-3: Too low frequency, changes are visible as color shifts +# - Positions 4-20: Sweet spot - carries enough energy to survive, not visible +# - Positions 21+: High frequency, often quantized to zero, unreliable +DEFAULT_EMBED_POSITIONS = EMBED_POSITIONS[4:20] + +# Quantization step for QIM (Quantization Index Modulation). +# This is how we actually embed bits: we round the coefficient to a grid +# and then nudge it based on whether we want a 0 or 1. +# Bigger step = more robust to noise, but more visible. 25 is a good balance. +QUANT_STEP = 25 + +# Magic bytes so we can identify our own images +DCT_MAGIC = b"DCTS" # scipy DCT mode marker +JPEGLIB_MAGIC = b"JPGS" # jpeglib native JPEG mode marker +HEADER_SIZE = 10 # Magic (4) + version (1) + flags (1) + length (4) + +OUTPUT_FORMAT_PNG = "png" +OUTPUT_FORMAT_JPEG = "jpeg" +JPEG_OUTPUT_QUALITY = 95 # High quality but not 100 (100 causes issues, see below) + +# For jpeglib mode: we only embed in coefficients with magnitude >= 2 +# Coefficients of 0 or 1 are usually quantized noise - unreliable +JPEGLIB_MIN_COEF_MAGNITUDE = 2 + +# We embed in the Y (luminance) channel only - it has the most capacity +# Cb/Cr are often subsampled 4:2:0 anyway +JPEGLIB_EMBED_CHANNEL = 0 + +# Header flags +FLAG_COLOR_MODE = 0x01 # Set if we preserved color (YCbCr mode) +FLAG_RS_PROTECTED = 0x02 # Set if Reed-Solomon protected (v4.1.0+) + +# Reed-Solomon settings - the "please don't lose my data" system +# 32 parity symbols per chunk means we can correct up to 16 byte errors +# Math: RS(255, 223) where 255-223=32 parity bytes, corrects floor(32/2)=16 +RS_NSYM = 32 + +# We store the payload length 3 times and take majority vote +# Because if the length is wrong, everything is wrong +RS_LENGTH_HEADER_SIZE = 8 # 4 bytes raw length + 4 bytes RS-encoded length +RS_LENGTH_COPIES = 3 # Store 3 copies, need 2 to agree +RS_LENGTH_PREFIX_SIZE = RS_LENGTH_HEADER_SIZE * RS_LENGTH_COPIES # 24 bytes total + +# Chunking for large images - scipy's FFT gets memory-corrupty on huge arrays +MAX_CHUNK_HEIGHT = 512 # Process in strips to keep memory sane + +# Fun bug: JPEGs saved with quality=100 have quantization tables full of 1s +# This makes the DCT coefficients HUGE and jpeglib crashes spectacularly +# Solution: detect and re-save at quality 95 first +JPEGLIB_NORMALIZE_QUALITY = 95 +JPEGLIB_MAX_QUANT_VALUE_THRESHOLD = 1 # All 1s in quant table = bad news + + +# ============================================================================ +# DATA CLASSES +# ============================================================================ + + +class DCTOutputFormat(Enum): + PNG = "png" + JPEG = "jpeg" + + +@dataclass +class DCTEmbedStats: + blocks_used: int + blocks_available: int + bits_embedded: int + capacity_bits: int + usage_percent: float + image_width: int + image_height: int + output_format: str + jpeg_native: bool = False + color_mode: str = "grayscale" + + +@dataclass +class DCTCapacityInfo: + width: int + height: int + blocks_x: int + blocks_y: int + total_blocks: int + bits_per_block: int + total_capacity_bits: int + total_capacity_bytes: int + usable_capacity_bytes: int + + +# ============================================================================ +# AVAILABILITY CHECKS +# ============================================================================ + + +def _check_scipy(): + if not HAS_SCIPY: + raise ImportError("DCT steganography requires scipy. Install with: pip install scipy") + + +def has_dct_support() -> bool: + return HAS_SCIPY + + +def has_jpeglib_support() -> bool: + return HAS_JPEGLIB + + +# ============================================================================ +# REED-SOLOMON ERROR CORRECTION +# ============================================================================ +# +# Why do we need this? DCT embedding isn't perfect. Some image regions are +# problematic - flat areas, high compression, edge cases. Bits can flip. +# +# Reed-Solomon is the same error correction used in CDs, DVDs, QR codes, and +# deep space communications. If it's good enough for Voyager, it's good enough +# for hiding cat pictures in other cat pictures. +# +# How it works (simplified): +# 1. Take your data bytes +# 2. Add extra "parity" bytes calculated from the data +# 3. If some bytes get corrupted, the math lets you reconstruct them +# 4. RS(255, 223) means: 255 byte blocks, 223 data + 32 parity +# 5. Can correct up to 16 corrupted bytes per block (floor(32/2)) +# +# The tradeoff: ~14% overhead (32/223). Worth it for reliability. + +try: + from reedsolo import ReedSolomonError, RSCodec + + HAS_REEDSOLO = True +except ImportError: + HAS_REEDSOLO = False + RSCodec = None + ReedSolomonError = None + + +def _rs_encode(data: bytes) -> bytes: + """ + Wrap data in Reed-Solomon error correction. + + Takes your precious payload and adds parity bytes so we can + recover from the inevitable bit-rot of DCT embedding. + """ + if not HAS_REEDSOLO: + return data # YOLO mode - no protection, good luck + rs = RSCodec(RS_NSYM) + return bytes(rs.encode(data)) + + +def _rs_decode(data: bytes) -> bytes: + """ + Decode Reed-Solomon protected data, fixing errors along the way. + + This is where the magic happens. If bits got flipped during + extraction, RS will quietly fix them. If too many flipped... + well, we tried. + """ + if not HAS_REEDSOLO: + return data + rs = RSCodec(RS_NSYM) + try: + decoded, _, errata_pos = rs.decode(data) + if errata_pos: + # Errors were found and corrected - RS earned its keep today + pass + return bytes(decoded) + except ReedSolomonError as e: + # Too many errors - the image got mangled beyond repair + raise StegasooRSError(f"Image corrupted beyond repair: {e}") from e + + +# ============================================================================ +# SAFE DCT FUNCTIONS +# ============================================================================ +# +# Story time: scipy's fftpack (the old DCT implementation) has memory issues +# when you process large images. We'd get random garbage in our output, or +# worse, segfaults. Turns out it was reusing internal buffers in unsafe ways. +# +# The fix? Be paranoid. Every single array operation creates a fresh copy. +# Is it slower? Yes. Does it work? Also yes. I'll take correct over fast. +# +# The newer scipy.fft module is better, but we still play it safe because +# not everyone has the latest scipy and I don't want debugging nightmares. + + +def _safe_dct2(block: np.ndarray) -> np.ndarray: + """ + Apply 2D DCT (Discrete Cosine Transform) to an 8x8 block. + + The DCT converts spatial data (pixel values) into frequency data + (how much of each frequency component is present). It's the heart + of JPEG compression. + + We do it row-by-row and column-by-column with fresh arrays each time + because scipy's built-in dct2 can corrupt memory on large batches. + Paranoid? Yes. Necessary? Also yes. + """ + # Create a brand new array (not a view) - paranoia level: maximum + safe_block = np.array(block, dtype=np.float64, copy=True, order="C") + + # 2D DCT = 1D DCT on rows, then 1D DCT on columns (separable transform) + # First pass: DCT each column + temp = np.zeros_like(safe_block, dtype=np.float64, order="C") + for i in range(BLOCK_SIZE): + col = np.array(safe_block[:, i], dtype=np.float64, copy=True) + temp[:, i] = dct(col, norm="ortho") # ortho normalization for symmetry + + # Second pass: DCT each row of the result + result = np.zeros_like(temp, dtype=np.float64, order="C") + for i in range(BLOCK_SIZE): + row = np.array(temp[i, :], dtype=np.float64, copy=True) + result[i, :] = dct(row, norm="ortho") + + return result + + +def _safe_idct2(block: np.ndarray) -> np.ndarray: + """ + Apply 2D inverse DCT - convert frequency data back to pixels. + + After we've embedded our secret bits in the DCT coefficients, + we need to convert back to pixel values. This is the reverse + of _safe_dct2. + + Same paranoid memory handling because same paranoid developer. + """ + safe_block = np.array(block, dtype=np.float64, copy=True, order="C") + + # Inverse is the same idea: IDCT rows, then IDCT columns + temp = np.zeros_like(safe_block, dtype=np.float64, order="C") + for i in range(BLOCK_SIZE): + row = np.array(safe_block[i, :], dtype=np.float64, copy=True) + temp[i, :] = idct(row, norm="ortho") + + result = np.zeros_like(temp, dtype=np.float64, order="C") + for i in range(BLOCK_SIZE): + col = np.array(temp[:, i], dtype=np.float64, copy=True) + result[:, i] = idct(col, norm="ortho") + + return result + + +# ============================================================================ +# IMAGE PROCESSING HELPERS +# ============================================================================ + + +def _apply_exif_orientation(image_data: bytes) -> bytes: + """ + Apply EXIF orientation to image and return corrected bytes. + + Portrait photos from cameras often have EXIF orientation metadata that + tells viewers to rotate the image for display. However, the raw pixel + data is stored in landscape orientation. This function applies that + rotation to the pixel data so the output matches what users expect. + + Without this, a portrait photo encoded with DCT would come out rotated + 90 degrees because we'd embed in the raw (landscape) orientation. + """ + img = Image.open(io.BytesIO(image_data)) + original_format = img.format or "JPEG" + + # Apply EXIF orientation (rotates/flips pixels to match EXIF tag) + # This also removes the EXIF orientation tag since it's now baked in + corrected = ImageOps.exif_transpose(img) + + # If no change was needed, return original data unchanged + if corrected is img: + img.close() + return image_data + + # Save corrected image back to bytes + output = io.BytesIO() + if original_format == "JPEG": + if corrected.mode in ("RGBA", "P"): + corrected = corrected.convert("RGB") + corrected.save(output, format="JPEG", quality=95) + else: + corrected.save(output, format="PNG") + + img.close() + corrected.close() + output.seek(0) + return output.getvalue() + + +def _to_grayscale(image_data: bytes) -> np.ndarray: + img = Image.open(io.BytesIO(image_data)) + gray = img.convert("L") + return np.array(gray, dtype=np.float32, copy=True, order="C") + + +def _extract_y_channel(image_data: bytes) -> np.ndarray: + """Extract Y (luminance) channel - float32 for memory efficiency.""" + img = Image.open(io.BytesIO(image_data)) + if img.mode != "RGB": + img = img.convert("RGB") + + rgb = np.array(img, dtype=np.float32, copy=True, order="C") + Y = 0.299 * rgb[:, :, 0] + 0.587 * rgb[:, :, 1] + 0.114 * rgb[:, :, 2] + return np.array(Y, dtype=np.float32, copy=True, order="C") + + +def _pad_to_blocks(image: np.ndarray) -> tuple[np.ndarray, tuple[int, int]]: + """Pad image to block boundaries - uses float32 for memory efficiency.""" + h, w = image.shape + new_h = ((h + BLOCK_SIZE - 1) // BLOCK_SIZE) * BLOCK_SIZE + new_w = ((w + BLOCK_SIZE - 1) // BLOCK_SIZE) * BLOCK_SIZE + + if new_h == h and new_w == w: + return np.array(image, dtype=np.float32, copy=True, order="C"), (h, w) + + padded = np.zeros((new_h, new_w), dtype=np.float32, order="C") + padded[:h, :w] = image + + # Simple edge replication for padding + if new_h > h: + for i in range(h, new_h): + padded[i, :w] = padded[h - 1, :w] + if new_w > w: + for j in range(w, new_w): + padded[:h, j] = padded[:h, w - 1] + if new_h > h and new_w > w: + padded[h:, w:] = padded[h - 1, w - 1] + + return padded, (h, w) + + +def _unpad_image(image: np.ndarray, original_size: tuple[int, int]) -> np.ndarray: + """Remove padding - uses float32 for memory efficiency.""" + h, w = original_size + return np.array(image[:h, :w], dtype=np.float32, copy=True, order="C") + + +def _embed_bit_in_coeff(coef: float, bit: int, quant_step: int = QUANT_STEP) -> float: + """ + Embed a single bit into a DCT coefficient using QIM. + + QIM (Quantization Index Modulation) is smarter than simple LSB flipping. + Instead of just changing the last bit, we round to a quantization grid + and use odd/even to encode 0/1. + + Why is this better? + - More robust to noise (small changes don't flip the bit) + - Works naturally with JPEG's own quantization + - The change is spread across the coefficient's magnitude + + Visual example (quant_step=25): + - Coef = 73, want bit=0 -> round to 75 (75/25=3, 3%2=1) -> nudge to 50 (50/25=2, 2%2=0) + - Coef = 73, want bit=1 -> round to 75 (75/25=3, 3%2=1) -> already odd, keep at 75 + """ + quantized = round(coef / quant_step) + if (quantized % 2) != bit: + # Need to flip even<->odd. Nudge in the direction that's closest. + if quantized % 2 == 0 and bit == 1: + quantized += 1 if coef >= quantized * quant_step else -1 + elif quantized % 2 == 1 and bit == 0: + quantized += 1 if coef >= quantized * quant_step else -1 + return float(quantized * quant_step) + + +def _extract_bit_from_coeff(coef: float, quant_step: int = QUANT_STEP) -> int: + """ + Extract a bit from a DCT coefficient. + + The inverse of _embed_bit_in_coeff. We round to the quantization grid + and check if it's odd (1) or even (0). + + This is why QIM is robust: small noise in the coefficient usually + doesn't change which grid point we round to. + """ + quantized = round(coef / quant_step) + return int(quantized % 2) + + +def _generate_block_order(num_blocks: int, seed: bytes) -> list: + """ + Generate a pseudo-random order for processing blocks. + + This is crucial for security - if we just went left-to-right, top-to-bottom, + anyone could find the message by checking blocks in order. Instead, we + use a keyed shuffle so only someone with the same seed can find the data. + + The seed comes from the crypto layer (derived from passphrase + photo + pin), + so the block order is effectively part of the encryption. + """ + # Use SHA-256 to expand the seed into randomness + hash_bytes = hashlib.sha256(seed).digest() + # Seed numpy's RNG (we use RandomState for reproducibility across versions) + rng = np.random.RandomState(int.from_bytes(hash_bytes[:4], "big")) + # Fisher-Yates shuffle + order = list(range(num_blocks)) + rng.shuffle(order) + return order + + +def _save_stego_image(image: np.ndarray, output_format: str = OUTPUT_FORMAT_PNG) -> bytes: + clipped = np.clip(image, 0, 255).astype(np.uint8) + img = Image.fromarray(clipped, mode="L") + buffer = io.BytesIO() + if output_format == OUTPUT_FORMAT_JPEG: + img.save(buffer, format="JPEG", quality=JPEG_OUTPUT_QUALITY, subsampling=0, optimize=True) + else: + img.save(buffer, format="PNG", optimize=True) + return buffer.getvalue() + + +def _save_color_image(rgb_array: np.ndarray, output_format: str = OUTPUT_FORMAT_PNG) -> bytes: + clipped = np.clip(rgb_array, 0, 255).astype(np.uint8) + img = Image.fromarray(clipped, mode="RGB") + buffer = io.BytesIO() + if output_format == OUTPUT_FORMAT_JPEG: + img.save(buffer, format="JPEG", quality=JPEG_OUTPUT_QUALITY, subsampling=0, optimize=True) + else: + img.save(buffer, format="PNG", optimize=True) + return buffer.getvalue() + + +def _rgb_to_ycbcr(rgb: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Convert RGB to YCbCr color space. + + YCbCr separates brightness (Y) from color (Cb=blue-ish, Cr=red-ish). + This is what JPEG uses internally, and it's great for us because: + - Human eyes are WAY more sensitive to brightness than color + - We can hide data in Y without it being as visible + - Cb/Cr are often subsampled (4:2:0) so Y has more capacity anyway + + The coefficients here are from ITU-R BT.601 - the standard for video. + + Uses float32 to reduce memory usage (~50% savings vs float64). + """ + # Use float32 - sufficient precision for 8-bit images, halves memory + R = rgb[:, :, 0].astype(np.float32) + G = rgb[:, :, 1].astype(np.float32) + B = rgb[:, :, 2].astype(np.float32) + + # Y = luminance (brightness). Green contributes most because eyes are most sensitive to it. + Y = np.array(0.299 * R + 0.587 * G + 0.114 * B, dtype=np.float32, copy=True, order="C") + # Cb = blue-difference chroma (centered at 128) + Cb = np.array( + 128 - 0.168736 * R - 0.331264 * G + 0.5 * B, dtype=np.float32, copy=True, order="C" + ) + # Cr = red-difference chroma (centered at 128) + Cr = np.array( + 128 + 0.5 * R - 0.418688 * G - 0.081312 * B, dtype=np.float32, copy=True, order="C" + ) + + return Y, Cb, Cr + + +def _ycbcr_to_rgb(Y: np.ndarray, Cb: np.ndarray, Cr: np.ndarray) -> np.ndarray: + """ + Convert YCbCr back to RGB. + + After embedding in the Y channel, we need to reconstruct RGB for display. + The Cb/Cr channels are unchanged - we only touched luminance. + """ + # Use float32 for memory efficiency + R = Y + 1.402 * (Cr - 128) + G = Y - 0.344136 * (Cb - 128) - 0.714136 * (Cr - 128) + B = Y + 1.772 * (Cb - 128) + + rgb = np.zeros((Y.shape[0], Y.shape[1], 3), dtype=np.float32, order="C") + rgb[:, :, 0] = R + rgb[:, :, 1] = G + rgb[:, :, 2] = B + return rgb + + +def _create_header(data_length: int, flags: int = 0) -> bytes: + return struct.pack(">4sBBI", DCT_MAGIC, 1, flags, data_length) + + +def _parse_header(header_bits: list) -> tuple[int, int, int]: + if len(header_bits) < HEADER_SIZE * 8: + raise ValueError("Insufficient header data") + + header_bytes = bytes( + [ + sum(header_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(HEADER_SIZE) + ] + ) + + magic, version, flags, length = struct.unpack(">4sBBI", header_bytes) + + if magic != DCT_MAGIC: + raise InvalidMagicBytesError("Not a Stegasoo image or wrong mode (try LSB instead of DCT)") + + return version, flags, length + + +# ============================================================================ +# JPEGLIB HELPERS +# ============================================================================ + + +def _jpeglib_bytes_to_file(data: bytes, suffix: str = ".jpg") -> str: + import os + import tempfile + + fd, path = tempfile.mkstemp(suffix=suffix) + try: + os.write(fd, data) + finally: + os.close(fd) + return path + + +def _jpeglib_get_usable_positions(coef_array: np.ndarray) -> list: + positions = [] + h, w = coef_array.shape + for row in range(h): + for col in range(w): + if (row % BLOCK_SIZE == 0) and (col % BLOCK_SIZE == 0): + continue + if abs(coef_array[row, col]) >= JPEGLIB_MIN_COEF_MAGNITUDE: + positions.append((row, col)) + return positions + + +def _jpeglib_generate_order(num_positions: int, seed: bytes) -> list: + hash_bytes = hashlib.sha256(seed + b"jpeg_coef_order").digest() + rng = np.random.RandomState(int.from_bytes(hash_bytes[:4], "big")) + order = list(range(num_positions)) + rng.shuffle(order) + return order + + +def _jpeglib_create_header(data_length: int, flags: int = 0) -> bytes: + return struct.pack(">4sBBI", JPEGLIB_MAGIC, 1, flags, data_length) + + +def _jpeglib_parse_header(header_bytes: bytes) -> tuple[int, int, int]: + if len(header_bytes) < HEADER_SIZE: + raise ValueError("Insufficient header data") + magic, version, flags, length = struct.unpack(">4sBBI", header_bytes[:HEADER_SIZE]) + if magic != JPEGLIB_MAGIC: + raise InvalidMagicBytesError("Not a Stegasoo JPEG or wrong mode") + return version, flags, length + + +# ============================================================================ +# PUBLIC API +# ============================================================================ + + +def calculate_dct_capacity(image_data: bytes) -> DCTCapacityInfo: + """Calculate DCT embedding capacity of an image.""" + _check_scipy() + + # Just get dimensions, don't process anything + img = Image.open(io.BytesIO(image_data)) + width, height = img.size + img.close() # Explicitly close + + blocks_x = width // BLOCK_SIZE + blocks_y = height // BLOCK_SIZE + total_blocks = blocks_x * blocks_y + + bits_per_block = len(DEFAULT_EMBED_POSITIONS) + total_bits = total_blocks * bits_per_block + total_bytes = total_bits // 8 + # Account for header and RS overhead + # RS format: [24-byte length prefix (3 copies)] + RS(header + data) + # RS adds RS_NSYM bytes per 223-byte chunk (255 - RS_NSYM = 223) + # Conservatively estimate RS overhead as ~15% + one chunk minimum + if HAS_REEDSOLO: + # Overhead = 24 (prefix) + 10 (header) + RS overhead + # Simplify: base overhead = 24 + 10 + 32 + 15% margin for larger data + overhead = RS_LENGTH_PREFIX_SIZE + HEADER_SIZE + RS_NSYM + 20 + else: + overhead = HEADER_SIZE + usable_bytes = max(0, total_bytes - overhead) + + return DCTCapacityInfo( + width=width, + height=height, + blocks_x=blocks_x, + blocks_y=blocks_y, + total_blocks=total_blocks, + bits_per_block=bits_per_block, + total_capacity_bits=total_bits, + total_capacity_bytes=total_bytes, + usable_capacity_bytes=usable_bytes, + ) + + +def will_fit_dct(data_length: int, image_data: bytes) -> bool: + capacity = calculate_dct_capacity(image_data) + return data_length <= capacity.usable_capacity_bytes + + +def estimate_capacity_comparison(image_data: bytes) -> dict: + """Compare LSB and DCT capacity (no actual DCT operations).""" + img = Image.open(io.BytesIO(image_data)) + width, height = img.size + img.close() + + pixels = width * height + lsb_bytes = (pixels * 3) // 8 + + blocks = (width // 8) * (height // 8) + dct_bytes = (blocks * 16) // 8 - HEADER_SIZE + + return { + "width": width, + "height": height, + "lsb": { + "capacity_bytes": lsb_bytes, + "capacity_kb": lsb_bytes / 1024, + "output": "PNG/BMP (color)", + }, + "dct": { + "capacity_bytes": dct_bytes, + "capacity_kb": dct_bytes / 1024, + "output": "PNG or JPEG (grayscale)", + "ratio_vs_lsb": (dct_bytes / lsb_bytes * 100) if lsb_bytes > 0 else 0, + "available": HAS_SCIPY, + }, + "jpeg_native": { + "available": HAS_JPEGLIB, + "note": "Uses jpeglib for proper JPEG coefficient embedding", + }, + } + + +def embed_in_dct( + data: bytes, + carrier_image: bytes, + seed: bytes, + output_format: str = OUTPUT_FORMAT_PNG, + color_mode: str = "color", + progress_file: str | None = None, + quant_step: int | None = None, + jpeg_quality: int | None = None, + max_dimension: int | None = None, +) -> tuple[bytes, DCTEmbedStats]: + """Embed data using DCT coefficient modification. + + Args: + data: Payload bytes to embed. + carrier_image: Carrier image bytes. + seed: Key for block selection. + output_format: 'png' or 'jpeg'. + color_mode: 'color' or 'grayscale'. + progress_file: Optional progress file. + quant_step: Override QIM quantization step (default: QUANT_STEP). + Higher = more robust to recompression, more visible. + jpeg_quality: Override JPEG output quality (default: JPEG_OUTPUT_QUALITY). + max_dimension: Resize carrier if larger than this. + """ + if output_format not in (OUTPUT_FORMAT_PNG, OUTPUT_FORMAT_JPEG): + raise ValueError(f"Invalid output format: {output_format}") + + if color_mode not in ("color", "grayscale"): + color_mode = "color" + + qs = quant_step if quant_step is not None else QUANT_STEP + + # Apply EXIF orientation to carrier image before embedding + carrier_image = _apply_exif_orientation(carrier_image) + + # Resize if max_dimension specified (for platform presets) + if max_dimension is not None: + img_check = Image.open(io.BytesIO(carrier_image)) + w, h = img_check.size + if max(w, h) > max_dimension: + scale = max_dimension / max(w, h) + new_size = (int(w * scale), int(h * scale)) + img_check = img_check.resize(new_size, Image.LANCZOS) + buf = io.BytesIO() + img_check.save(buf, format="PNG") + carrier_image = buf.getvalue() + img_check.close() + + if output_format == OUTPUT_FORMAT_JPEG and HAS_JPEGLIB: + return _embed_jpeglib(data, carrier_image, seed, color_mode, progress_file) + + _check_scipy() + return _embed_scipy_dct_safe( + data, carrier_image, seed, output_format, color_mode, progress_file, quant_step=qs + ) + + +def _embed_scipy_dct_safe( + data: bytes, + carrier_image: bytes, + seed: bytes, + output_format: str, + color_mode: str = "color", + progress_file: str | None = None, + quant_step: int = QUANT_STEP, +) -> tuple[bytes, DCTEmbedStats]: + """ + Embed using scipy DCT with safe memory handling. + + Uses row-by-row 1D DCT operations instead of 2D arrays to avoid + scipy memory corruption issues with large images. + """ + capacity_info = calculate_dct_capacity(carrier_image) + + if len(data) > capacity_info.usable_capacity_bytes: + raise ValueError( + f"Data too large ({len(data)} bytes) for carrier " + f"(capacity: {capacity_info.usable_capacity_bytes} bytes)" + ) + + # Load image + img = Image.open(io.BytesIO(carrier_image)) + width, height = img.size + + flags = FLAG_COLOR_MODE if color_mode == "color" else 0 + + # Build raw payload (header + data) + header = _create_header(len(data), flags) + raw_payload = header + data + + # Apply Reed-Solomon error correction to entire payload if available + if HAS_REEDSOLO: + rs_payload = _rs_encode(raw_payload) + # Format: [length_header x 3 for majority voting] + [RS-encoded payload] + # Each length_header is 8 bytes: 4 for raw_payload_length + 4 for rs_payload_length + length_header = struct.pack(">II", len(raw_payload), len(rs_payload)) + length_prefix = length_header * RS_LENGTH_COPIES # Repeat 3 times + payload = length_prefix + rs_payload + else: + payload = raw_payload + bits = [] + for byte in payload: + for i in range(7, -1, -1): + bits.append((byte >> i) & 1) + + # Generate block order + num_blocks = capacity_info.total_blocks + block_order = _generate_block_order(num_blocks, seed) + blocks_x = width // BLOCK_SIZE + + if color_mode == "color" and img.mode in ("RGB", "RGBA"): + if img.mode == "RGBA": + img = img.convert("RGB") + + # Process color image (float32 for memory efficiency) + rgb = np.array(img, dtype=np.float32, copy=True, order="C") + img.close() + + Y, Cb, Cr = _rgb_to_ycbcr(rgb) + del rgb + gc.collect() + + Y_padded, original_size = _pad_to_blocks(Y) + del Y + gc.collect() + + # Embed in Y channel + Y_embedded = _embed_in_channel_safe( + Y_padded, bits, block_order, blocks_x, progress_file, quant_step=quant_step + ) + del Y_padded + gc.collect() + + Y_result = _unpad_image(Y_embedded, original_size) + del Y_embedded + gc.collect() + + result_rgb = _ycbcr_to_rgb(Y_result, Cb, Cr) + del Y_result, Cb, Cr + gc.collect() + + stego_bytes = _save_color_image(result_rgb, output_format) + del result_rgb + gc.collect() + else: + # Grayscale mode + image = _to_grayscale(carrier_image) + img.close() + + padded, original_size = _pad_to_blocks(image) + del image + gc.collect() + + embedded = _embed_in_channel_safe( + padded, bits, block_order, blocks_x, progress_file, quant_step=quant_step + ) + del padded + gc.collect() + + result = _unpad_image(embedded, original_size) + del embedded + gc.collect() + + stego_bytes = _save_stego_image(result, output_format) + del result + gc.collect() + + stats = DCTEmbedStats( + blocks_used=(len(bits) + len(DEFAULT_EMBED_POSITIONS) - 1) // len(DEFAULT_EMBED_POSITIONS), + blocks_available=capacity_info.total_blocks, + bits_embedded=len(bits), + capacity_bits=capacity_info.total_capacity_bits, + usage_percent=(len(bits) / capacity_info.total_capacity_bits) * 100, + image_width=width, + image_height=height, + output_format=output_format, + jpeg_native=False, + color_mode=color_mode, + ) + + return stego_bytes, stats + + +def _embed_in_channel_safe( + channel: np.ndarray, + bits: list, + block_order: list, + blocks_x: int, + progress_file: str | None = None, + quant_step: int = QUANT_STEP, +) -> np.ndarray: + """ + Embed bits in channel using vectorized DCT operations. + + Processes blocks in batches for ~10x speedup over sequential processing. + """ + h, w = channel.shape + + # Create result with explicit new memory (float32 for memory efficiency) + result = np.array(channel, dtype=np.float32, copy=True, order="C") + + # Pre-compute embed positions as numpy indices + embed_rows = np.array([pos[0] for pos in DEFAULT_EMBED_POSITIONS]) + embed_cols = np.array([pos[1] for pos in DEFAULT_EMBED_POSITIONS]) + bits_per_block = len(DEFAULT_EMBED_POSITIONS) + + # Calculate how many blocks we need + total_bits = len(bits) + blocks_needed = (total_bits + bits_per_block - 1) // bits_per_block + blocks_to_process = min(blocks_needed, len(block_order)) + + # Initial progress write - signals Argon2/prep is done, embedding starting + if progress_file: + _write_progress(progress_file, 5, 100, "embedding") + + # Vectorized embedding: process blocks in batches + BATCH_SIZE = 500 + bit_idx = 0 + block_idx = 0 + + while block_idx < blocks_to_process and bit_idx < total_bits: + # Determine batch size + batch_end = min(block_idx + BATCH_SIZE, blocks_to_process) + batch_order = block_order[block_idx:batch_end] + batch_count = len(batch_order) + + # Extract blocks into 3D array (float32 for memory efficiency) + blocks = np.zeros((batch_count, BLOCK_SIZE, BLOCK_SIZE), dtype=np.float32) + block_positions = [] + for i, block_num in enumerate(batch_order): + by = (block_num // blocks_x) * BLOCK_SIZE + bx = (block_num % blocks_x) * BLOCK_SIZE + blocks[i] = result[by : by + BLOCK_SIZE, bx : bx + BLOCK_SIZE] + block_positions.append((by, bx)) + + # Vectorized 2D DCT on all blocks at once + dct_blocks = dctn(blocks, axes=(1, 2), norm="ortho") + + # Embed bits in each block (vectorized where possible) + for i in range(batch_count): + if bit_idx >= total_bits: + break + + # Get bits for this block + block_bits = bits[bit_idx : bit_idx + bits_per_block] + num_bits = len(block_bits) + + if num_bits == bits_per_block: + # Full block - vectorized embedding + coeffs = dct_blocks[i, embed_rows, embed_cols] + bit_array = np.array(block_bits) + # QIM embedding: round to grid, adjust for bit + quantized = np.round(coeffs / quant_step).astype(int) + # If quantized % 2 != bit, nudge coefficient + needs_adjust = (quantized % 2) != bit_array + # Determine direction to nudge + dct_blocks[i, embed_rows[needs_adjust], embed_cols[needs_adjust]] = ( + (quantized[needs_adjust] + (1 - 2 * (quantized[needs_adjust] % 2 == 1))) + * quant_step + ).astype(np.float64) + # For bits that already match, just quantize + dct_blocks[i, embed_rows[~needs_adjust], embed_cols[~needs_adjust]] = ( + quantized[~needs_adjust] * quant_step + ).astype(np.float64) + else: + # Partial block - process remaining bits individually + for j, bit in enumerate(block_bits): + row, col = embed_rows[j], embed_cols[j] + dct_blocks[i, row, col] = _embed_bit_in_coeff( + float(dct_blocks[i, row, col]), bit + ) + + bit_idx += num_bits + + # Vectorized inverse DCT + modified_blocks = idctn(dct_blocks, axes=(1, 2), norm="ortho") + + # Copy modified blocks back to result + for i, (by, bx) in enumerate(block_positions): + result[by : by + BLOCK_SIZE, bx : bx + BLOCK_SIZE] = modified_blocks[i] + + # Cleanup + del blocks, dct_blocks, modified_blocks + block_idx = batch_end + + # Report progress periodically + if progress_file and block_idx % PROGRESS_INTERVAL == 0: + _write_progress(progress_file, block_idx, blocks_to_process, "embedding") + + # Final progress update + if progress_file: + _write_progress(progress_file, blocks_to_process, blocks_to_process, "finalizing") + + # Force garbage collection + gc.collect() + + return result + + +def _normalize_jpeg_for_jpeglib(image_data: bytes) -> bytes: + """ + Normalize a JPEG image to ensure jpeglib can process it safely. + + JPEGs saved with quality=100 have quantization tables with all values = 1, + which causes jpeglib to crash due to huge coefficient magnitudes. + This function detects such images and re-saves them at a safe quality level. + + Args: + image_data: Raw JPEG bytes + + Returns: + Normalized JPEG bytes (may be unchanged if already safe) + """ + img = Image.open(io.BytesIO(image_data)) + + # Only process JPEGs + if img.format != "JPEG": + img.close() + return image_data + + # Check quantization tables + needs_normalization = False + if hasattr(img, "quantization") and img.quantization: + for table_id, table in img.quantization.items(): + # If all values in any table are <= threshold, normalize + if max(table) <= JPEGLIB_MAX_QUANT_VALUE_THRESHOLD: + needs_normalization = True + break + + if not needs_normalization: + img.close() + return image_data + + # Re-save at safe quality level + if img.mode != "RGB": + img = img.convert("RGB") + + buffer = io.BytesIO() + img.save(buffer, format="JPEG", quality=JPEGLIB_NORMALIZE_QUALITY, subsampling=0) + img.close() + + return buffer.getvalue() + + +def _embed_jpeglib( + data: bytes, + carrier_image: bytes, + seed: bytes, + color_mode: str = "color", + progress_file: str | None = None, +) -> tuple[bytes, DCTEmbedStats]: + """Embed using jpeglib for proper JPEG coefficient modification.""" + import os + import tempfile + + # Normalize JPEG to avoid crashes with quality=100 images + carrier_image = _normalize_jpeg_for_jpeglib(carrier_image) + + img = Image.open(io.BytesIO(carrier_image)) + width, height = img.size + + if img.format != "JPEG": + buffer = io.BytesIO() + if img.mode != "RGB": + img = img.convert("RGB") + img.save(buffer, format="JPEG", quality=95, subsampling=0) + carrier_image = buffer.getvalue() + img.close() + + input_path = _jpeglib_bytes_to_file(carrier_image, suffix=".jpg") + output_path = tempfile.mktemp(suffix=".jpg") + + flags = FLAG_COLOR_MODE if color_mode == "color" else 0 + + try: + jpeg = jpeglib.to_jpegio(jpeglib.read_dct(input_path)) + coef_array = jpeg.coef_arrays[JPEGLIB_EMBED_CHANNEL] + + all_positions = _jpeglib_get_usable_positions(coef_array) + order = _jpeglib_generate_order(len(all_positions), seed) + + # Build raw payload (header + data) + header = _jpeglib_create_header(len(data), flags) + raw_payload = header + data + + # Apply Reed-Solomon error correction to entire payload if available + if HAS_REEDSOLO: + rs_payload = _rs_encode(raw_payload) + # Format: [length_header x 3 for majority voting] + [RS-encoded payload] + length_header = struct.pack(">II", len(raw_payload), len(rs_payload)) + length_prefix = length_header * RS_LENGTH_COPIES + payload = length_prefix + rs_payload + else: + payload = raw_payload + + bits = [] + for byte in payload: + for i in range(7, -1, -1): + bits.append((byte >> i) & 1) + + if len(bits) > len(all_positions): + raise ValueError( + f"Payload too large: {len(bits)} bits, " + f"only {len(all_positions)} usable coefficients" + ) + + coefs_used = 0 + total_bits = len(bits) + progress_interval = max(total_bits // 20, 100) # Report ~20 times or every 100 bits + + # Initial progress write - signals prep is done, embedding starting + if progress_file: + _write_progress(progress_file, 5, 100, "embedding") + + for bit_idx, pos_idx in enumerate(order): + if bit_idx >= len(bits): + break + + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + + if (coef & 1) != bits[bit_idx]: + if coef > 0: + coef_array[row, col] = coef - 1 if (coef & 1) else coef + 1 + else: + coef_array[row, col] = coef + 1 if (coef & 1) else coef - 1 + + coefs_used += 1 + + # Report progress periodically + if progress_file and bit_idx % progress_interval == 0: + _write_progress(progress_file, bit_idx, total_bits, "embedding") + + # Final progress before save + if progress_file: + _write_progress(progress_file, total_bits, total_bits, "saving") + + jpeg.write(output_path) + + with open(output_path, "rb") as f: + stego_bytes = f.read() + + stats = DCTEmbedStats( + blocks_used=coefs_used // 63, + blocks_available=len(all_positions) // 63, + bits_embedded=len(bits), + capacity_bits=len(all_positions), + usage_percent=(len(bits) / len(all_positions)) * 100 if all_positions else 0, + image_width=width, + image_height=height, + output_format=OUTPUT_FORMAT_JPEG, + jpeg_native=True, + color_mode=color_mode, + ) + + return stego_bytes, stats + + finally: + for path in [input_path, output_path]: + try: + os.unlink(path) + except OSError: + pass + + +def _jpegtran_available() -> bool: + """Check if jpegtran is available on the system.""" + import shutil + + return shutil.which("jpegtran") is not None + + +def _jpegtran_rotate(image_data: bytes, rotation: int) -> bytes: + """ + Losslessly rotate a JPEG using jpegtran. + + This preserves DCT coefficients by rearranging blocks rather than + re-encoding. Essential for rotating stego images without destroying + the hidden data. + + Args: + image_data: JPEG image bytes + rotation: Degrees clockwise (90, 180, or 270) + + Returns: + Rotated JPEG bytes with DCT coefficients preserved + """ + import os + import subprocess + import tempfile + + if rotation not in (90, 180, 270): + raise ValueError(f"Invalid rotation: {rotation}") + + # Write input to temp file + with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f: + f.write(image_data) + input_path = f.name + + output_path = tempfile.mktemp(suffix=".jpg") + + try: + # jpegtran -rotate 90|180|270 -copy all + # -copy all: preserve all metadata + # NOTE: Don't use -trim as it drops edge blocks and destroys stego data + # NOTE: Don't use -perfect as it fails on images with non-MCU-aligned edges + result = subprocess.run( + [ + "jpegtran", + "-rotate", + str(rotation), + "-copy", + "all", + "-outfile", + output_path, + input_path, + ], + capture_output=True, + timeout=30, + ) + + if result.returncode != 0: + raise RuntimeError(f"jpegtran failed: {result.stderr.decode()}") + + with open(output_path, "rb") as f: + return f.read() + finally: + for path in [input_path, output_path]: + try: + os.unlink(path) + except OSError: + pass + + +def _rotate_image_bytes(image_data: bytes, rotation: int, lossless: bool = True) -> bytes: + """ + Rotate image by 90, 180, or 270 degrees and return as bytes. + + For JPEGs with lossless=True (default), uses jpegtran to preserve DCT + coefficients. This is essential for rotating stego images. + + For PNGs or when jpegtran is unavailable, uses PIL (which re-encodes + but PNGs are lossless anyway). + """ + img = Image.open(io.BytesIO(image_data)) + original_format = img.format or "PNG" + img.close() + + # Use jpegtran for lossless JPEG rotation + if lossless and original_format == "JPEG" and _jpegtran_available(): + return _jpegtran_rotate(image_data, rotation) + + # Fallback to PIL for PNGs or when jpegtran unavailable + img = Image.open(io.BytesIO(image_data)) + + # PIL rotation is counter-clockwise, we want clockwise + # 90 CW = 270 CCW, 180 = 180, 270 CW = 90 CCW + pil_rotation = {90: 270, 180: 180, 270: 90}[rotation] + rotated = img.rotate(pil_rotation, expand=True) + + output = io.BytesIO() + # Save in original format if possible, fallback to PNG + save_format = original_format if original_format in ("JPEG", "PNG") else "PNG" + if save_format == "JPEG": + rotated.save(output, format="JPEG", quality=95) + else: + rotated.save(output, format="PNG") + output.seek(0) + return output.getvalue() + + +def _quick_validate_dct_header(image_data: bytes, seed: bytes) -> bool: + """ + Quick validation that only extracts enough DCT data to check magic bytes. + Returns True if header looks valid, False otherwise. + + This is much faster than full extraction - only processes first ~8 blocks. + """ + try: + # Convert to grayscale for quick check + gray = _to_grayscale(image_data) + height, width = gray.shape + padded, _ = _pad_to_blocks(gray) + padded_h, padded_w = padded.shape + blocks_x = padded_w // BLOCK_SIZE + num_blocks = (padded_h // BLOCK_SIZE) * blocks_x + + # Generate block order + block_order = _generate_block_order(num_blocks, seed) + + # Only extract first 8 blocks (enough for RS length prefix + header) + # 8 blocks * 16 bits/block = 128 bits = 16 bytes (covers RS prefix) + blocks_needed = min(8, len(block_order)) + + all_bits = [] + for block_num in block_order[:blocks_needed]: + by = (block_num // blocks_x) * BLOCK_SIZE + bx = (block_num % blocks_x) * BLOCK_SIZE + block = padded[by : by + BLOCK_SIZE, bx : bx + BLOCK_SIZE].astype(np.float32) + + dct_block = dctn(block, norm="ortho") + + for row, col in EMBED_POSITIONS: + coef = dct_block[row, col] + bit = _extract_bit_from_coeff(coef) + all_bits.append(bit) + + # Check RS format first (3 copies of 8-byte length header) + if len(all_bits) >= RS_LENGTH_PREFIX_SIZE * 8: + length_prefix_bits = all_bits[: RS_LENGTH_PREFIX_SIZE * 8] + length_prefix_bytes = bytes( + [ + sum(length_prefix_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(RS_LENGTH_PREFIX_SIZE) + ] + ) + + # Check if 2+ copies match (indicates valid RS format) + copies = [] + for i in range(RS_LENGTH_COPIES): + start = i * RS_LENGTH_HEADER_SIZE + end = start + RS_LENGTH_HEADER_SIZE + copies.append(length_prefix_bytes[start:end]) + + from collections import Counter + + counter = Counter(copies) + _, count = counter.most_common(1)[0] + + if count >= 2: + return True # Looks like valid RS format + + # Check legacy format (magic bytes in first 10 bytes) + if len(all_bits) >= HEADER_SIZE * 8: + try: + _parse_header(all_bits[: HEADER_SIZE * 8]) + return True # Magic bytes matched + except (ValueError, InvalidMagicBytesError): + pass + + return False + except Exception: + return False + + +def extract_from_dct( + stego_image: bytes, + seed: bytes, + progress_file: str | None = None, + quant_step: int | None = None, +) -> bytes: + """ + Extract data from DCT stego image. + + If extraction fails with InvalidMagicBytesError, automatically tries + 90°, 180°, and 270° rotations to handle images that were rotated after + encoding (e.g., by external tools or EXIF orientation changes). + + Uses quick header validation to skip obviously invalid rotations. + """ + qs = quant_step if quant_step is not None else QUANT_STEP + rotations_to_try = [0, 90, 180, 270] + last_error = None + valid_rotations = [] + + # Phase 1: Quick validation to find candidate rotations + for rotation in rotations_to_try: + if rotation == 0: + image_to_check = stego_image + else: + image_to_check = _rotate_image_bytes(stego_image, rotation) + + if _quick_validate_dct_header(image_to_check, seed): + valid_rotations.append((rotation, image_to_check)) + + # If no rotations pass quick check, try all anyway (fallback) + if not valid_rotations: + # Must try all rotations - quick validation might have failed due to + # scipy vs jpeglib differences or other edge cases + for rotation in rotations_to_try: + if rotation == 0: + valid_rotations.append((0, stego_image)) + else: + valid_rotations.append((rotation, _rotate_image_bytes(stego_image, rotation))) + + # Phase 2: Full extraction on valid candidates + for rotation, image_to_decode in valid_rotations: + try: + img = Image.open(io.BytesIO(image_to_decode)) + fmt = img.format + img.close() + + if fmt == "JPEG" and HAS_JPEGLIB: + try: + result = _extract_jpeglib(image_to_decode, seed, progress_file) + if rotation != 0: + try: + from . import debug + + debug.print(f"DCT decode succeeded after {rotation}° rotation") + except Exception: + pass # Don't let debug logging break extraction + return result + except (ValueError, InvalidMagicBytesError) as e: + last_error = e if isinstance(e, InvalidMagicBytesError) else last_error + continue + + _check_scipy() + result = _extract_scipy_dct_safe(image_to_decode, seed, progress_file, quant_step=qs) + if rotation != 0: + try: + from . import debug + + debug.print(f"DCT decode succeeded after {rotation}° rotation") + except Exception: + pass # Don't let debug logging break extraction + return result + + except InvalidMagicBytesError as e: + last_error = e + continue + + # All rotations failed + raise last_error or InvalidMagicBytesError("Not a Stegasoo image (tried all rotations)") + + +def _extract_scipy_dct_safe( + stego_image: bytes, + seed: bytes, + progress_file: str | None = None, + quant_step: int = QUANT_STEP, +) -> bytes: + """Extract using safe DCT operations with vectorized processing.""" + # Progress starts at 25% (decode.py writes 20% for Argon2, 25% before extraction) + + img = Image.open(io.BytesIO(stego_image)) + width, height = img.size + mode = img.mode + + if mode in ("RGB", "RGBA"): + channel = _extract_y_channel(stego_image) + else: + channel = _to_grayscale(stego_image) + img.close() + + padded, _ = _pad_to_blocks(channel) + del channel + gc.collect() + + # Use ORIGINAL image dimensions for block calculations (must match embed) + # Embed uses width // BLOCK_SIZE, not padded width + h, w = padded.shape # Padded dimensions for bounds checking + blocks_x = width // BLOCK_SIZE + blocks_y = height // BLOCK_SIZE + num_blocks = blocks_y * blocks_x + + block_order = _generate_block_order(num_blocks, seed) + + # Vectorized extraction: process blocks in batches for ~10x speedup + # Batch size balances memory usage vs. parallelization benefit + BATCH_SIZE = 500 + all_bits = [] + + # Pre-compute embed positions as numpy indices for vectorized access + embed_rows = np.array([pos[0] for pos in DEFAULT_EMBED_POSITIONS]) + embed_cols = np.array([pos[1] for pos in DEFAULT_EMBED_POSITIONS]) + + # Progress reporting interval - report frequently for responsive UI + PROGRESS_INTERVAL = 500 # Report every N blocks (matches BATCH_SIZE) + + block_idx = 0 + while block_idx < len(block_order): + # Determine batch size (may be smaller at end) + batch_end = min(block_idx + BATCH_SIZE, len(block_order)) + batch_order = block_order[block_idx:batch_end] + batch_count = len(batch_order) + + # Extract blocks into 3D array (batch_count, 8, 8) - float32 for memory efficiency + blocks = np.zeros((batch_count, BLOCK_SIZE, BLOCK_SIZE), dtype=np.float32) + for i, block_num in enumerate(batch_order): + by = (block_num // blocks_x) * BLOCK_SIZE + bx = (block_num % blocks_x) * BLOCK_SIZE + blocks[i] = padded[by : by + BLOCK_SIZE, bx : bx + BLOCK_SIZE] + + # Vectorized 2D DCT on all blocks at once (~10-15x faster than sequential) + dct_blocks = dctn(blocks, axes=(1, 2), norm="ortho") + + # Extract bits from embed positions (vectorized) + # Shape: (batch_count, num_positions) + coeffs = dct_blocks[:, embed_rows, embed_cols] + + # Quantize and extract bits (vectorized) + quantized = np.round(coeffs / quant_step).astype(int) + bits = (quantized % 2).flatten().tolist() + all_bits.extend(bits) + + del blocks, dct_blocks, coeffs, quantized + block_idx = batch_end + + # Report progress (scale to 25-70% range, RS decode gets 70-100%) + # Starts at 25% because decode.py writes 25% before calling extraction + if progress_file and block_idx % PROGRESS_INTERVAL < BATCH_SIZE: + extract_pct = 25 + int(45 * block_idx / num_blocks) + _write_progress(progress_file, extract_pct, 100, "extracting") + + # Check if we have enough bits (early exit) + if len(all_bits) >= HEADER_SIZE * 8: + try: + _, flags, data_length = _parse_header(all_bits[: HEADER_SIZE * 8]) + total_needed = (HEADER_SIZE + data_length) * 8 + if len(all_bits) >= total_needed: + break + except (ValueError, InvalidMagicBytesError): + pass # RS-protected format has length prefix first, not magic bytes + + del padded + gc.collect() + + # Extraction done, RS decode starts at 70% + _write_progress(progress_file, 70, 100, "decoding") + + # Try RS-protected format first (has 24-byte length prefix: 3 copies of 8-byte header) + if HAS_REEDSOLO and len(all_bits) >= RS_LENGTH_PREFIX_SIZE * 8: + # Extract length prefix (24 bytes: 3 copies of 8-byte header for majority voting) + length_prefix_bits = all_bits[: RS_LENGTH_PREFIX_SIZE * 8] + length_prefix_bytes = bytes( + [ + sum(length_prefix_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(RS_LENGTH_PREFIX_SIZE) + ] + ) + + # Extract 3 copies and use majority voting + copies = [] + for i in range(RS_LENGTH_COPIES): + start = i * RS_LENGTH_HEADER_SIZE + end = start + RS_LENGTH_HEADER_SIZE + copies.append(length_prefix_bytes[start:end]) + + # Count occurrences of each unique copy + from collections import Counter + + counter = Counter(copies) + best_header, count = counter.most_common(1)[0] + + # Only proceed if we have at least 2 matching copies (majority) + if count >= 2: + raw_payload_length, rs_encoded_length = struct.unpack(">II", best_header) + else: + # No majority - try first copy as fallback + raw_payload_length, rs_encoded_length = struct.unpack(">II", copies[0]) + + # Sanity check: both lengths should be reasonable + max_reasonable = (len(all_bits) // 8) - RS_LENGTH_PREFIX_SIZE + if ( + raw_payload_length > 0 + and raw_payload_length <= max_reasonable + and rs_encoded_length > 0 + and rs_encoded_length <= max_reasonable + and rs_encoded_length >= raw_payload_length + ): + # This looks like RS-protected format + total_bits_needed = (RS_LENGTH_PREFIX_SIZE + rs_encoded_length) * 8 + + if len(all_bits) >= total_bits_needed: + rs_bits = all_bits[RS_LENGTH_PREFIX_SIZE * 8 : total_bits_needed] + rs_encoded = bytes( + [ + sum(rs_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(rs_encoded_length) + ] + ) + + # 75% - bits converted, starting RS decode (slow part) + _write_progress(progress_file, 75, 100, "decoding") + + try: + # RS decode to get header + data + raw_payload = _rs_decode(rs_encoded) + + # 95% - RS decode done + _write_progress(progress_file, 95, 100, "decoding") + + # Parse header from decoded payload + _, flags, data_length = _parse_header( + [((raw_payload[i // 8] >> (7 - i % 8)) & 1) for i in range(HEADER_SIZE * 8)] + ) + + # Extract data + data = raw_payload[HEADER_SIZE : HEADER_SIZE + data_length] + _write_progress(progress_file, 100, 100, "complete") + return data + except (ValueError, struct.error): + pass # Fall through to legacy format + + # Legacy format: header not protected by RS + _, flags, data_length = _parse_header(all_bits) + data_bits = all_bits[HEADER_SIZE * 8 : (HEADER_SIZE + data_length) * 8] + + data = bytes( + [ + sum(data_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(data_length) + ] + ) + + _write_progress(progress_file, 100, 100, "complete") + return data + + +def _extract_jpeglib( + stego_image: bytes, + seed: bytes, + progress_file: str | None = None, +) -> bytes: + """Extract using jpeglib for JPEG images.""" + import os + + # Progress starts at 25% (decode.py writes 20% for Argon2, 25% before extraction) + + # Normalize JPEG to avoid crashes with quality=100 images + # (shouldn't happen with stego images, but be defensive) + stego_image = _normalize_jpeg_for_jpeglib(stego_image) + + temp_path = _jpeglib_bytes_to_file(stego_image, suffix=".jpg") + + try: + jpeg = jpeglib.to_jpegio(jpeglib.read_dct(temp_path)) + coef_array = jpeg.coef_arrays[JPEGLIB_EMBED_CHANNEL] + + all_positions = _jpeglib_get_usable_positions(coef_array) + order = _jpeglib_generate_order(len(all_positions), seed) + + _write_progress(progress_file, 30, 100, "extracting") + + # Try RS-protected format first (has 24-byte length prefix: 3 copies for majority voting) + if HAS_REEDSOLO and len(all_positions) >= RS_LENGTH_PREFIX_SIZE * 8: + # Extract length prefix (24 bytes: 3 copies of 8-byte header) + length_prefix_bits = [] + for pos_idx in order[: RS_LENGTH_PREFIX_SIZE * 8]: + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + length_prefix_bits.append(coef & 1) + + length_prefix_bytes = bytes( + [ + sum(length_prefix_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(RS_LENGTH_PREFIX_SIZE) + ] + ) + + # Extract 3 copies and use majority voting + from collections import Counter + + copies = [] + for i in range(RS_LENGTH_COPIES): + start = i * RS_LENGTH_HEADER_SIZE + end = start + RS_LENGTH_HEADER_SIZE + copies.append(length_prefix_bytes[start:end]) + + counter = Counter(copies) + best_header, count = counter.most_common(1)[0] + + if count >= 2: + raw_payload_length, rs_encoded_length = struct.unpack(">II", best_header) + else: + raw_payload_length, rs_encoded_length = struct.unpack(">II", copies[0]) + + # Sanity check + max_reasonable = (len(all_positions) // 8) - RS_LENGTH_PREFIX_SIZE + if ( + raw_payload_length > 0 + and raw_payload_length <= max_reasonable + and rs_encoded_length > 0 + and rs_encoded_length <= max_reasonable + and rs_encoded_length >= raw_payload_length + ): + total_bits_needed = (RS_LENGTH_PREFIX_SIZE + rs_encoded_length) * 8 + + if len(all_positions) >= total_bits_needed: + # Extract RS-encoded data + all_bits = [] + for bit_idx, pos_idx in enumerate(order): + if bit_idx >= total_bits_needed: + break + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + all_bits.append(coef & 1) + + rs_bits = all_bits[RS_LENGTH_PREFIX_SIZE * 8 :] + rs_encoded = bytes( + [ + sum(rs_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(rs_encoded_length) + ] + ) + + try: + _write_progress(progress_file, 75, 100, "decoding") + raw_payload = _rs_decode(rs_encoded) + _write_progress(progress_file, 95, 100, "decoding") + _, flags, data_length = _jpeglib_parse_header(raw_payload[:HEADER_SIZE]) + data = raw_payload[HEADER_SIZE : HEADER_SIZE + data_length] + _write_progress(progress_file, 100, 100, "complete") + return data + except (ValueError, struct.error): + pass # Fall through to legacy format + + # Legacy format: header not protected by RS + header_bits = [] + for pos_idx in order[: HEADER_SIZE * 8]: + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + header_bits.append(coef & 1) + + header_bytes = bytes( + [ + sum(header_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(HEADER_SIZE) + ] + ) + + _, flags, data_length = _jpeglib_parse_header(header_bytes) + total_bits_needed = (HEADER_SIZE + data_length) * 8 + + all_bits = [] + for bit_idx, pos_idx in enumerate(order): + if bit_idx >= total_bits_needed: + break + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + all_bits.append(coef & 1) + + data_bits = all_bits[HEADER_SIZE * 8 :] + data = bytes( + [ + sum(data_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(data_length) + ] + ) + + _write_progress(progress_file, 100, 100, "complete") + return data + + finally: + try: + os.unlink(temp_path) + except OSError: + pass + + +# ============================================================================ +# CONVENIENCE FUNCTIONS +# ============================================================================ + + +def get_output_extension(output_format: str) -> str: + if output_format == OUTPUT_FORMAT_JPEG: + return ".jpg" + return ".png" + + +def get_output_mimetype(output_format: str) -> str: + if output_format == OUTPUT_FORMAT_JPEG: + return "image/jpeg" + return "image/png" diff --git a/src/soosef/stegasoo/debug.py b/src/soosef/stegasoo/debug.py new file mode 100644 index 0000000..70f4cc1 --- /dev/null +++ b/src/soosef/stegasoo/debug.py @@ -0,0 +1,262 @@ +""" +Stegasoo Debugging Utilities + +Debugging, logging, and performance monitoring tools. + +Configuration: + STEGASOO_LOG_LEVEL env var controls log level: + - Not set or empty: logging disabled (production default) + - DEBUG: verbose debug output (encode/decode flow, crypto params, etc.) + - INFO: operational messages (format detection, mode selection) + - WARNING: potential issues (fallback KDF, format transcoding) + - ERROR: operation failures + + STEGASOO_DEBUG=1 is a shorthand for STEGASOO_LOG_LEVEL=DEBUG + + CLI: stegasoo --debug encode ... (sets DEBUG level for that invocation) + +All output goes to Python's logging module under the 'stegasoo' logger hierarchy. +The legacy debug.print() API is preserved for backward compatibility. +""" + +import logging +import os +import sys +import time +import traceback +from collections.abc import Callable +from functools import wraps +from typing import Any + +# Map string level names to logging levels +_LEVEL_MAP = { + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "WARNING": logging.WARNING, + "ERROR": logging.ERROR, + "CRITICAL": logging.CRITICAL, +} + +# Root logger for the stegasoo package +logger = logging.getLogger("stegasoo") + +# Global debug configuration +LOG_PERFORMANCE = True # Log function timing +VALIDATION_ASSERTIONS = True # Enable runtime validation assertions + + +def _configure_from_env() -> bool: + """Configure logging from environment variables. Returns True if debug enabled.""" + # STEGASOO_DEBUG=1 is shorthand for DEBUG level + if os.environ.get("STEGASOO_DEBUG", "").strip() in ("1", "true", "yes"): + _setup_logging(logging.DEBUG) + return True + + level_str = os.environ.get("STEGASOO_LOG_LEVEL", "").strip().upper() + if level_str and level_str in _LEVEL_MAP: + _setup_logging(_LEVEL_MAP[level_str]) + return level_str == "DEBUG" + + return False + + +def _setup_logging(level: int) -> None: + """Configure the stegasoo logger with a stderr handler.""" + logger.setLevel(level) + + # Only add handler if none exist (avoid duplicates on re-init) + if not logger.handlers: + handler = logging.StreamHandler(sys.stderr) + handler.setLevel(level) + formatter = logging.Formatter( + "[%(asctime)s.%(msecs)03d] [%(levelname)s] [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + else: + # Update existing handler level + for handler in logger.handlers: + handler.setLevel(level) + + +# Auto-configure on import +DEBUG_ENABLED = _configure_from_env() + + +def enable_debug(enable: bool = True) -> None: + """Enable or disable debug mode globally.""" + global DEBUG_ENABLED + DEBUG_ENABLED = enable + if enable: + _setup_logging(logging.DEBUG) + else: + logger.setLevel(logging.WARNING) + + +def enable_performance_logging(enable: bool = True) -> None: + """Enable or disable performance timing.""" + global LOG_PERFORMANCE + LOG_PERFORMANCE = enable + + +def enable_assertions(enable: bool = True) -> None: + """Enable or disable validation assertions.""" + global VALIDATION_ASSERTIONS + VALIDATION_ASSERTIONS = enable + + +def debug_print(message: str, level: str = "INFO") -> None: + """Log a message at the given level via the stegasoo logger.""" + log_level = _LEVEL_MAP.get(level.upper(), logging.DEBUG) + logger.log(log_level, message) + + +def debug_data(data: bytes, label: str = "Data", max_bytes: int = 32) -> str: + """Format bytes for debugging.""" + if not logger.isEnabledFor(logging.DEBUG): + return "" + + if not data: + return f"{label}: Empty" + + if len(data) <= max_bytes: + return f"{label} ({len(data)} bytes): {data.hex()}" + else: + return ( + f"{label} ({len(data)} bytes): " + f"{data[:max_bytes // 2].hex()}...{data[-max_bytes // 2:].hex()}" + ) + + +def debug_exception(e: Exception, context: str = "") -> None: + """Log exception with context for debugging.""" + logger.error("Exception in %s: %s: %s", context, type(e).__name__, e) + if logger.isEnabledFor(logging.DEBUG): + logger.debug(traceback.format_exc()) + + +def time_function(func: Callable) -> Callable: + """Decorator to time function execution for performance debugging.""" + + @wraps(func) + def wrapper(*args, **kwargs) -> Any: + if not (logger.isEnabledFor(logging.DEBUG) and LOG_PERFORMANCE): + return func(*args, **kwargs) + + start = time.perf_counter() + try: + result = func(*args, **kwargs) + return result + finally: + end = time.perf_counter() + logger.debug("%s took %.6fs", func.__name__, end - start) + + return wrapper + + +def validate_assertion(condition: bool, message: str) -> None: + """Runtime validation that can be disabled in production.""" + if VALIDATION_ASSERTIONS and not condition: + raise AssertionError(f"Validation failed: {message}") + + +def memory_usage() -> dict[str, float | str]: + """Get current memory usage (if psutil is available).""" + try: + import psutil + + process = psutil.Process(os.getpid()) + mem_info = process.memory_info() + + return { + "rss_mb": mem_info.rss / 1024 / 1024, + "vms_mb": mem_info.vms / 1024 / 1024, + "percent": process.memory_percent(), + } + except ImportError: + return {"error": "psutil not installed"} + + +def hexdump(data: bytes, offset: int = 0, length: int = 64) -> str: + """Create hexdump string for debugging binary data.""" + if not data: + return "Empty" + + result = [] + data_to_dump = data[:length] + + for i in range(0, len(data_to_dump), 16): + chunk = data_to_dump[i : i + 16] + hex_str = " ".join(f"{b:02x}" for b in chunk) + hex_str = hex_str.ljust(47) + ascii_str = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk) + result.append(f"{offset + i:08x}: {hex_str} {ascii_str}") + + if len(data) > length: + result.append(f"... ({len(data) - length} more bytes)") + + return "\n".join(result) + + +def get_logger(name: str) -> logging.Logger: + """Get a child logger under the stegasoo namespace. + + Usage in modules: + from .debug import get_logger + logger = get_logger(__name__) + logger.debug("message") + """ + return logging.getLogger(name) + + +class Debug: + """Debugging utility class (backward-compatible API).""" + + def __init__(self): + self.enabled = DEBUG_ENABLED + + def print(self, message: str, level: str = "INFO") -> None: + """Print debug message.""" + debug_print(message, level) + + def data(self, data: bytes, label: str = "Data", max_bytes: int = 32) -> str: + """Format bytes for debugging.""" + return debug_data(data, label, max_bytes) + + def exception(self, e: Exception, context: str = "") -> None: + """Log exception with context.""" + debug_exception(e, context) + + def time(self, func: Callable) -> Callable: + """Decorator to time function execution.""" + return time_function(func) + + def validate(self, condition: bool, message: str) -> None: + """Runtime validation assertion.""" + validate_assertion(condition, message) + + def memory(self) -> dict[str, float | str]: + """Get current memory usage.""" + return memory_usage() + + def hexdump(self, data: bytes, offset: int = 0, length: int = 64) -> str: + """Create hexdump string.""" + return hexdump(data, offset, length) + + def enable(self, enable: bool = True) -> None: + """Enable or disable debug mode.""" + enable_debug(enable) + self.enabled = enable + + def enable_performance(self, enable: bool = True) -> None: + """Enable or disable performance logging.""" + enable_performance_logging(enable) + + def enable_assertions(self, enable: bool = True) -> None: + """Enable or disable validation assertions.""" + enable_assertions(enable) + + +# Create singleton instance +debug = Debug() diff --git a/src/soosef/stegasoo/decode.py b/src/soosef/stegasoo/decode.py new file mode 100644 index 0000000..115115e --- /dev/null +++ b/src/soosef/stegasoo/decode.py @@ -0,0 +1,492 @@ +""" +Stegasoo Decode Module (v4.0.0) + +High-level decoding functions for extracting messages and files from images. + +Changes in v4.0.0: +- Added channel_key parameter for deployment/group isolation +- Improved error messages for channel key mismatches +""" + +import json +from pathlib import Path + +from .constants import EMBED_MODE_AUTO +from .crypto import decrypt_message +from .debug import debug +from .exceptions import DecryptionError, ExtractionError +from .models import DecodeResult +from .steganography import extract_from_image +from .validation import ( + require_security_factors, + require_valid_image, + require_valid_pin, + require_valid_rsa_key, +) + + +def _write_progress(progress_file: str | None, current: int, total: int, phase: str) -> None: + """Write progress to file for UI polling.""" + if progress_file is None: + return + try: + with open(progress_file, "w") as f: + json.dump( + { + "current": current, + "total": total, + "percent": (current / total * 100) if total > 0 else 0, + "phase": phase, + }, + f, + ) + except OSError: + pass + + +def decode( + stego_image: bytes, + reference_photo: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = EMBED_MODE_AUTO, + channel_key: str | bool | None = None, + progress_file: str | None = None, + platform: str | None = None, +) -> DecodeResult: + """ + Decode a message or file from a stego image. + + Args: + stego_image: Stego image bytes + reference_photo: Shared reference photo bytes + passphrase: Shared passphrase used during encoding + pin: Optional static PIN (if used during encoding) + rsa_key_data: Optional RSA key bytes (if used during encoding) + rsa_password: Optional RSA key password + embed_mode: 'auto' (default), 'lsb', or 'dct' + progress_file: Optional path to write progress JSON for UI polling + channel_key: Channel key for deployment/group isolation: + - None or "auto": Use server's configured key + - str: Use this specific channel key + - "" or False: No channel key (public mode) + + Returns: + DecodeResult with message or file data + + Example: + >>> result = decode( + ... stego_image=stego_bytes, + ... reference_photo=ref_bytes, + ... passphrase="apple forest thunder mountain", + ... pin="123456" + ... ) + >>> if result.is_text: + ... print(result.message) + ... else: + ... with open(result.filename, 'wb') as f: + ... f.write(result.file_data) + + Example with explicit channel key: + >>> result = decode( + ... stego_image=stego_bytes, + ... reference_photo=ref_bytes, + ... passphrase="apple forest thunder mountain", + ... pin="123456", + ... channel_key="ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456" + ... ) + """ + debug.print( + f"decode: passphrase length={len(passphrase.split())} words, " + f"mode={embed_mode}, " + f"channel_key={'explicit' if isinstance(channel_key, str) and channel_key else 'auto' if channel_key is None else 'none'}" + ) + + # Validate inputs + require_valid_image(stego_image, "Stego image") + require_valid_image(reference_photo, "Reference photo") + require_security_factors(pin, rsa_key_data) + + if pin: + require_valid_pin(pin) + if rsa_key_data: + require_valid_rsa_key(rsa_key_data, rsa_password) + + # Progress: starting key derivation (Argon2 - slow on Pi) + _write_progress(progress_file, 20, 100, "initializing") + + # Derive pixel/coefficient selection key (with channel key) + from .crypto import derive_pixel_key + + pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key) + + # Progress: key derivation done, starting extraction + _write_progress(progress_file, 25, 100, "extracting") + + # Resolve platform preset for DCT extraction + extract_kwargs = {} + if platform: + from .platform_presets import get_preset + + preset = get_preset(platform) + extract_kwargs["quant_step"] = preset.quant_step + + # Extract encrypted data + encrypted = extract_from_image( + stego_image, + pixel_key, + embed_mode=embed_mode, + progress_file=progress_file, + **extract_kwargs, + ) + + if not encrypted: + debug.print("No data extracted from image") + raise ExtractionError("Could not extract data. Check your credentials and image.") + + debug.print(f"Extracted {len(encrypted)} bytes from image") + + # Decrypt (with channel key) + result = decrypt_message(encrypted, reference_photo, passphrase, pin, rsa_key_data, channel_key) + + debug.print(f"Decryption successful: {result.payload_type}") + return result + + +def decode_file( + stego_image: bytes, + reference_photo: bytes, + passphrase: str, + output_path: Path | None = None, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = EMBED_MODE_AUTO, + channel_key: str | bool | None = None, + progress_file: str | None = None, +) -> Path: + """ + Decode a file from a stego image and save it. + + Args: + stego_image: Stego image bytes + reference_photo: Shared reference photo bytes + passphrase: Shared passphrase + output_path: Optional output path (defaults to original filename) + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + rsa_password: Optional RSA key password + embed_mode: 'auto', 'lsb', or 'dct' + channel_key: Channel key parameter (see decode()) + progress_file: Optional path to write progress JSON for UI polling + + Returns: + Path where file was saved + + Raises: + DecryptionError: If payload is text, not a file + """ + result = decode( + stego_image, + reference_photo, + passphrase, + pin, + rsa_key_data, + rsa_password, + embed_mode, + channel_key, + progress_file, + ) + + if not result.is_file: + raise DecryptionError("Payload is a text message, not a file") + + if output_path is None: + output_path = Path(result.filename or "extracted_file") + else: + output_path = Path(output_path) + if output_path.is_dir(): + output_path = output_path / (result.filename or "extracted_file") + + # Write file + output_path.write_bytes(result.file_data or b"") + + debug.print(f"File saved to: {output_path}") + return output_path + + +def decode_text( + stego_image: bytes, + reference_photo: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = EMBED_MODE_AUTO, + channel_key: str | bool | None = None, + progress_file: str | None = None, +) -> str: + """ + Decode a text message from a stego image. + + Convenience function that returns just the message string. + + Args: + stego_image: Stego image bytes + reference_photo: Shared reference photo bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + rsa_password: Optional RSA key password + embed_mode: 'auto', 'lsb', or 'dct' + channel_key: Channel key parameter (see decode()) + progress_file: Optional path to write progress JSON for UI polling + + Returns: + Decoded message string + + Raises: + DecryptionError: If payload is a file, not text + """ + result = decode( + stego_image, + reference_photo, + passphrase, + pin, + rsa_key_data, + rsa_password, + embed_mode, + channel_key, + progress_file, + ) + + if result.is_file: + # Try to decode as text + if result.file_data: + try: + return result.file_data.decode("utf-8") + except UnicodeDecodeError: + raise DecryptionError( + f"Payload is a binary file ({result.filename or 'unnamed'}), not text" + ) + return "" + + return result.message or "" + + +def decode_audio( + stego_audio: bytes, + reference_photo: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = "audio_auto", + channel_key: str | bool | None = None, + progress_file: str | None = None, +) -> DecodeResult: + """ + Decode a message or file from stego audio. + + Args: + stego_audio: Stego audio bytes + reference_photo: Shared reference photo bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + rsa_password: Optional RSA key password + embed_mode: 'audio_auto', 'audio_lsb', or 'audio_spread' + channel_key: Channel key for deployment/group isolation + progress_file: Optional path to write progress JSON + + Returns: + DecodeResult with message or file data + """ + from .constants import ( + AUDIO_ENABLED, + EMBED_MODE_AUDIO_AUTO, + EMBED_MODE_AUDIO_LSB, + EMBED_MODE_AUDIO_SPREAD, + ) + + if not AUDIO_ENABLED: + raise ExtractionError( + "Audio support is disabled. Install audio extras (pip install stegasoo[audio]) " + "or set STEGASOO_AUDIO=1 to force enable." + ) + + from .audio_utils import detect_audio_format, transcode_to_wav + + debug.print( + f"decode_audio: mode={embed_mode}, " f"passphrase length={len(passphrase.split())} words" + ) + + # Validate inputs + require_valid_image(reference_photo, "Reference photo") + require_security_factors(pin, rsa_key_data) + + if pin: + require_valid_pin(pin) + if rsa_key_data: + require_valid_rsa_key(rsa_key_data, rsa_password) + + # Detect format and transcode to WAV for processing + audio_format = detect_audio_format(stego_audio) + debug.print(f"Detected audio format: {audio_format}") + + wav_audio = stego_audio + if audio_format != "wav": + debug.print(f"Transcoding {audio_format} to WAV for extraction") + wav_audio = transcode_to_wav(stego_audio) + + _write_progress(progress_file, 20, 100, "initializing") + + # Derive sample selection key + from .crypto import derive_pixel_key + + pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key) + + _write_progress(progress_file, 25, 100, "extracting") + + encrypted = None + + if embed_mode == EMBED_MODE_AUDIO_AUTO: + # Try modes in order: spread spectrum -> LSB + try: + from .spread_steganography import extract_from_audio_spread + + encrypted = extract_from_audio_spread(wav_audio, pixel_key) + if encrypted: + debug.print("Auto-detect: spread spectrum extraction succeeded") + except (ImportError, Exception): + pass + + if not encrypted: + from .audio_steganography import extract_from_audio_lsb + + encrypted = extract_from_audio_lsb(wav_audio, pixel_key) + if encrypted: + debug.print("Auto-detect: LSB extraction succeeded") + + elif embed_mode == EMBED_MODE_AUDIO_LSB: + from .audio_steganography import extract_from_audio_lsb + + encrypted = extract_from_audio_lsb(wav_audio, pixel_key, progress_file=progress_file) + + elif embed_mode == EMBED_MODE_AUDIO_SPREAD: + from .spread_steganography import extract_from_audio_spread + + encrypted = extract_from_audio_spread(wav_audio, pixel_key, progress_file=progress_file) + else: + raise ValueError(f"Invalid audio embed mode: {embed_mode}") + + if not encrypted: + debug.print("No data extracted from audio") + raise ExtractionError("Could not extract data from audio. Check your credentials.") + + debug.print(f"Extracted {len(encrypted)} bytes from audio") + + # Decrypt + result = decrypt_message(encrypted, reference_photo, passphrase, pin, rsa_key_data, channel_key) + + debug.print(f"Decryption successful: {result.payload_type}") + return result + + +def decode_video( + stego_video: bytes, + reference_photo: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = "video_auto", + channel_key: str | bool | None = None, + progress_file: str | None = None, +) -> DecodeResult: + """ + Decode a message or file from stego video. + + Extracts data from I-frames (keyframes) using LSB steganography. + + Args: + stego_video: Stego video bytes + reference_photo: Shared reference photo bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + rsa_password: Optional RSA key password + embed_mode: 'video_auto' or 'video_lsb' + channel_key: Channel key for deployment/group isolation + progress_file: Optional path to write progress JSON + + Returns: + DecodeResult with message or file data + """ + from .constants import ( + EMBED_MODE_VIDEO_AUTO, + EMBED_MODE_VIDEO_LSB, + VIDEO_ENABLED, + ) + + if not VIDEO_ENABLED: + raise ExtractionError( + "Video support is disabled. Install video extras and ffmpeg, " + "or set STEGASOO_VIDEO=1 to force enable." + ) + + from .video_utils import detect_video_format + + debug.print( + f"decode_video: mode={embed_mode}, " f"passphrase length={len(passphrase.split())} words" + ) + + # Validate inputs + require_valid_image(reference_photo, "Reference photo") + require_security_factors(pin, rsa_key_data) + + if pin: + require_valid_pin(pin) + if rsa_key_data: + require_valid_rsa_key(rsa_key_data, rsa_password) + + # Detect format + video_format = detect_video_format(stego_video) + debug.print(f"Detected video format: {video_format}") + + if video_format == "unknown": + raise ExtractionError("Could not detect video format.") + + _write_progress(progress_file, 20, 100, "initializing") + + # Derive pixel/frame selection key + from .crypto import derive_pixel_key + + pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key) + + _write_progress(progress_file, 25, 100, "extracting") + + encrypted = None + + if embed_mode == EMBED_MODE_VIDEO_AUTO or embed_mode == EMBED_MODE_VIDEO_LSB: + from .video_steganography import extract_from_video_lsb + + encrypted = extract_from_video_lsb(stego_video, pixel_key, progress_file=progress_file) + if encrypted: + debug.print("Video LSB extraction succeeded") + else: + raise ValueError(f"Invalid video embed mode: {embed_mode}") + + if not encrypted: + debug.print("No data extracted from video") + raise ExtractionError("Could not extract data from video. Check your credentials.") + + debug.print(f"Extracted {len(encrypted)} bytes from video") + + # Decrypt + result = decrypt_message(encrypted, reference_photo, passphrase, pin, rsa_key_data, channel_key) + + debug.print(f"Decryption successful: {result.payload_type}") + return result diff --git a/src/soosef/stegasoo/encode.py b/src/soosef/stegasoo/encode.py new file mode 100644 index 0000000..234bc1d --- /dev/null +++ b/src/soosef/stegasoo/encode.py @@ -0,0 +1,478 @@ +""" +Stegasoo Encode Module (v4.0.0) + +High-level encoding functions for hiding messages and files in images. + +Changes in v4.0.0: +- Added channel_key parameter for deployment/group isolation + +Changes in v4.3.0: +- Added encode_audio() for audio steganography + +Changes in v4.4.0: +- Added encode_video() for video steganography +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from .constants import EMBED_MODE_LSB +from .crypto import derive_pixel_key, encrypt_message +from .debug import debug +from .exceptions import AudioError, VideoError +from .models import EncodeResult, FilePayload +from .steganography import embed_in_image +from .utils import generate_filename +from .validation import ( + require_security_factors, + require_valid_image, + require_valid_payload, + require_valid_pin, + require_valid_rsa_key, +) + +if TYPE_CHECKING: + from .models import AudioEmbedStats, VideoEmbedStats + + +def encode( + message: str | bytes | FilePayload, + reference_photo: bytes, + carrier_image: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + output_format: str | None = None, + embed_mode: str = EMBED_MODE_LSB, + dct_output_format: str = "png", + dct_color_mode: str = "color", + channel_key: str | bool | None = None, + progress_file: str | None = None, + platform: str | None = None, +) -> EncodeResult: + """ + Encode a message or file into an image. + + Args: + message: Text message, raw bytes, or FilePayload to hide + reference_photo: Shared reference photo bytes + carrier_image: Carrier image bytes + passphrase: Shared passphrase (recommend 4+ words) + pin: Optional static PIN + rsa_key_data: Optional RSA private key PEM bytes + rsa_password: Optional password for encrypted RSA key + output_format: Force output format ('PNG', 'BMP') - LSB mode only + embed_mode: 'lsb' (default) or 'dct' + dct_output_format: For DCT mode - 'png' or 'jpeg' + dct_color_mode: For DCT mode - 'grayscale' or 'color' + channel_key: Channel key for deployment/group isolation: + - None or "auto": Use server's configured key + - str: Use this specific channel key + - "" or False: No channel key (public mode) + + Returns: + EncodeResult with stego image and metadata + + Example: + >>> result = encode( + ... message="Secret message", + ... reference_photo=ref_bytes, + ... carrier_image=carrier_bytes, + ... passphrase="apple forest thunder mountain", + ... pin="123456" + ... ) + >>> with open('stego.png', 'wb') as f: + ... f.write(result.stego_image) + + Example with explicit channel key: + >>> result = encode( + ... message="Secret message", + ... reference_photo=ref_bytes, + ... carrier_image=carrier_bytes, + ... passphrase="apple forest thunder mountain", + ... pin="123456", + ... channel_key="ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456" + ... ) + """ + debug.print( + f"encode: passphrase length={len(passphrase.split())} words, " + f"pin={'set' if pin else 'none'}, mode={embed_mode}, " + f"channel_key={'explicit' if isinstance(channel_key, str) and channel_key else 'auto' if channel_key is None else 'none'}" + ) + + # Validate inputs + require_valid_payload(message) + require_valid_image(reference_photo, "Reference photo") + require_valid_image(carrier_image, "Carrier image") + require_security_factors(pin, rsa_key_data) + + if pin: + require_valid_pin(pin) + if rsa_key_data: + require_valid_rsa_key(rsa_key_data, rsa_password) + + # Encrypt message (with channel key) + encrypted = encrypt_message( + message, reference_photo, passphrase, pin, rsa_key_data, channel_key + ) + + debug.print(f"Encrypted payload: {len(encrypted)} bytes") + + # Derive pixel/coefficient selection key (with channel key) + pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key) + + # Resolve platform preset for DCT encoding + platform_kwargs = {} + if platform: + from .platform_presets import get_preset + + preset = get_preset(platform) + platform_kwargs = { + "quant_step": preset.quant_step, + "max_dimension": preset.max_dimension, + "jpeg_quality": preset.jpeg_quality, + } + + # Embed in image + stego_data, stats, extension = embed_in_image( + encrypted, + carrier_image, + pixel_key, + output_format=output_format, + embed_mode=embed_mode, + dct_output_format=dct_output_format, + dct_color_mode=dct_color_mode, + progress_file=progress_file, + **platform_kwargs, + ) + + # Generate filename + filename = generate_filename(extension=extension) + + # Create result + if hasattr(stats, "pixels_modified"): + # LSB mode stats + return EncodeResult( + stego_image=stego_data, + filename=filename, + pixels_modified=stats.pixels_modified, + total_pixels=stats.total_pixels, + capacity_used=stats.capacity_used, + date_used=None, # No longer used in v3.2.0+ + ) + else: + # DCT mode stats + return EncodeResult( + stego_image=stego_data, + filename=filename, + pixels_modified=stats.blocks_used * 64, + total_pixels=stats.blocks_available * 64, + capacity_used=stats.usage_percent / 100.0, + date_used=None, + ) + + +def encode_file( + filepath: str | Path, + reference_photo: bytes, + carrier_image: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + output_format: str | None = None, + filename_override: str | None = None, + embed_mode: str = EMBED_MODE_LSB, + dct_output_format: str = "png", + dct_color_mode: str = "color", + channel_key: str | bool | None = None, +) -> EncodeResult: + """ + Encode a file into an image. + + Convenience wrapper that loads a file and encodes it. + + Args: + filepath: Path to file to embed + reference_photo: Shared reference photo bytes + carrier_image: Carrier image bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + rsa_password: Optional RSA key password + output_format: Force output format - LSB only + filename_override: Override stored filename + embed_mode: 'lsb' or 'dct' + dct_output_format: 'png' or 'jpeg' + dct_color_mode: 'grayscale' or 'color' + channel_key: Channel key parameter (see encode()) + + Returns: + EncodeResult + """ + payload = FilePayload.from_file(str(filepath), filename_override) + + return encode( + message=payload, + reference_photo=reference_photo, + carrier_image=carrier_image, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=rsa_password, + output_format=output_format, + embed_mode=embed_mode, + dct_output_format=dct_output_format, + dct_color_mode=dct_color_mode, + channel_key=channel_key, + ) + + +def encode_bytes( + data: bytes, + filename: str, + reference_photo: bytes, + carrier_image: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + output_format: str | None = None, + mime_type: str | None = None, + embed_mode: str = EMBED_MODE_LSB, + dct_output_format: str = "png", + dct_color_mode: str = "color", + channel_key: str | bool | None = None, +) -> EncodeResult: + """ + Encode raw bytes with metadata into an image. + + Args: + data: Raw bytes to embed + filename: Filename to associate with data + reference_photo: Shared reference photo bytes + carrier_image: Carrier image bytes + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA key bytes + rsa_password: Optional RSA key password + output_format: Force output format - LSB only + mime_type: MIME type of data + embed_mode: 'lsb' or 'dct' + dct_output_format: 'png' or 'jpeg' + dct_color_mode: 'grayscale' or 'color' + channel_key: Channel key parameter (see encode()) + + Returns: + EncodeResult + """ + payload = FilePayload(data=data, filename=filename, mime_type=mime_type) + + return encode( + message=payload, + reference_photo=reference_photo, + carrier_image=carrier_image, + passphrase=passphrase, + pin=pin, + rsa_key_data=rsa_key_data, + rsa_password=rsa_password, + output_format=output_format, + embed_mode=embed_mode, + dct_output_format=dct_output_format, + dct_color_mode=dct_color_mode, + channel_key=channel_key, + ) + + +def encode_audio( + message: str | bytes | FilePayload, + reference_photo: bytes, + carrier_audio: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = "audio_lsb", + channel_key: str | bool | None = None, + progress_file: str | None = None, + chip_tier: int | None = None, +) -> tuple[bytes, AudioEmbedStats]: + """ + Encode a message or file into an audio carrier. + + Args: + message: Text message, raw bytes, or FilePayload to hide + reference_photo: Shared reference photo bytes + carrier_audio: Carrier audio bytes (WAV, FLAC, MP3, etc.) + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA private key PEM bytes + rsa_password: Optional password for encrypted RSA key + embed_mode: 'audio_lsb' or 'audio_spread' + channel_key: Channel key for deployment/group isolation + progress_file: Optional path to write progress JSON + chip_tier: Spread spectrum chip tier (0=lossless, 1=high_lossy, 2=low_lossy). + Only used for audio_spread mode. Default None → uses constant default. + + Returns: + Tuple of (stego audio bytes, AudioEmbedStats) + """ + from .constants import AUDIO_ENABLED, EMBED_MODE_AUDIO_LSB, EMBED_MODE_AUDIO_SPREAD + + if not AUDIO_ENABLED: + raise AudioError( + "Audio support is disabled. Install audio extras (pip install stegasoo[audio]) " + "or set STEGASOO_AUDIO=1 to force enable." + ) + + from .audio_utils import detect_audio_format, transcode_to_wav + + debug.print( + f"encode_audio: mode={embed_mode}, " + f"passphrase length={len(passphrase.split())} words, " + f"pin={'set' if pin else 'none'}" + ) + + # Validate inputs + require_valid_payload(message) + require_valid_image(reference_photo, "Reference photo") + require_security_factors(pin, rsa_key_data) + + if pin: + require_valid_pin(pin) + if rsa_key_data: + require_valid_rsa_key(rsa_key_data, rsa_password) + + # Detect audio format and transcode to WAV if needed + audio_format = detect_audio_format(carrier_audio) + debug.print(f"Detected audio format: {audio_format}") + + if audio_format not in ("wav", "flac"): + debug.print(f"Transcoding {audio_format} to WAV for embedding") + carrier_audio = transcode_to_wav(carrier_audio) + + # Encrypt message + encrypted = encrypt_message( + message, reference_photo, passphrase, pin, rsa_key_data, channel_key + ) + debug.print(f"Encrypted payload: {len(encrypted)} bytes") + + # Derive sample selection key + pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key) + + # Embed based on mode + if embed_mode == EMBED_MODE_AUDIO_LSB: + from .audio_steganography import embed_in_audio_lsb + + stego_audio, stats = embed_in_audio_lsb( + encrypted, carrier_audio, pixel_key, progress_file=progress_file + ) + elif embed_mode == EMBED_MODE_AUDIO_SPREAD: + from .constants import AUDIO_SS_DEFAULT_CHIP_TIER + from .spread_steganography import embed_in_audio_spread + + tier = chip_tier if chip_tier is not None else AUDIO_SS_DEFAULT_CHIP_TIER + stego_audio, stats = embed_in_audio_spread( + encrypted, carrier_audio, pixel_key, chip_tier=tier, progress_file=progress_file + ) + else: + raise ValueError(f"Invalid audio embed mode: {embed_mode}") + + return stego_audio, stats + + +def encode_video( + message: str | bytes | FilePayload, + reference_photo: bytes, + carrier_video: bytes, + passphrase: str, + pin: str = "", + rsa_key_data: bytes | None = None, + rsa_password: str | None = None, + embed_mode: str = "video_lsb", + channel_key: str | bool | None = None, + progress_file: str | None = None, +) -> tuple[bytes, VideoEmbedStats]: + """ + Encode a message or file into a video carrier. + + Embeds data across I-frames (keyframes) using LSB steganography. + Output is an MKV container with FFV1 lossless codec to preserve + the embedded data perfectly. + + Args: + message: Text message, raw bytes, or FilePayload to hide + reference_photo: Shared reference photo bytes + carrier_video: Carrier video bytes (MP4, MKV, WebM, AVI, MOV) + passphrase: Shared passphrase + pin: Optional static PIN + rsa_key_data: Optional RSA private key PEM bytes + rsa_password: Optional password for encrypted RSA key + embed_mode: 'video_lsb' (currently the only option) + channel_key: Channel key for deployment/group isolation + progress_file: Optional path to write progress JSON + + Returns: + Tuple of (stego video bytes, VideoEmbedStats) + + Note: + The output video will be in MKV format with FFV1 lossless codec, + regardless of the input format. This is necessary to preserve + the embedded data without lossy compression artifacts. + """ + from .constants import EMBED_MODE_VIDEO_LSB, VIDEO_ENABLED + + if not VIDEO_ENABLED: + raise VideoError( + "Video support is disabled. Install video extras and ffmpeg, " + "or set STEGASOO_VIDEO=1 to force enable." + ) + + from .video_utils import detect_video_format + + debug.print( + f"encode_video: mode={embed_mode}, " + f"passphrase length={len(passphrase.split())} words, " + f"pin={'set' if pin else 'none'}" + ) + + # Validate inputs + require_valid_payload(message) + require_valid_image(reference_photo, "Reference photo") + require_security_factors(pin, rsa_key_data) + + if pin: + require_valid_pin(pin) + if rsa_key_data: + require_valid_rsa_key(rsa_key_data, rsa_password) + + # Detect video format + video_format = detect_video_format(carrier_video) + debug.print(f"Detected video format: {video_format}") + + if video_format == "unknown": + raise VideoError("Could not detect video format. Supported: MP4, MKV, WebM, AVI, MOV.") + + # Encrypt message + encrypted = encrypt_message( + message, reference_photo, passphrase, pin, rsa_key_data, channel_key + ) + debug.print(f"Encrypted payload: {len(encrypted)} bytes") + + # Derive pixel/frame selection key + pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key) + + # Embed based on mode + if embed_mode == EMBED_MODE_VIDEO_LSB: + from .video_steganography import embed_in_video_lsb + + stego_video, stats = embed_in_video_lsb( + encrypted, carrier_video, pixel_key, progress_file=progress_file + ) + else: + raise ValueError(f"Invalid video embed mode: {embed_mode}") + + return stego_video, stats diff --git a/src/soosef/stegasoo/exceptions.py b/src/soosef/stegasoo/exceptions.py new file mode 100644 index 0000000..54b4905 --- /dev/null +++ b/src/soosef/stegasoo/exceptions.py @@ -0,0 +1,293 @@ +""" +Stegasoo Exceptions + +Custom exception classes for clear error handling across all frontends. +""" + + +class StegasooError(Exception): + """Base exception for all Stegasoo errors.""" + + pass + + +# ============================================================================ +# VALIDATION ERRORS +# ============================================================================ + + +class ValidationError(StegasooError): + """Base class for validation errors.""" + + pass + + +class PinValidationError(ValidationError): + """PIN validation failed.""" + + pass + + +class MessageValidationError(ValidationError): + """Message validation failed.""" + + pass + + +class ImageValidationError(ValidationError): + """Image validation failed.""" + + pass + + +class KeyValidationError(ValidationError): + """RSA key validation failed.""" + + pass + + +class SecurityFactorError(ValidationError): + """Security factor requirements not met.""" + + pass + + +# ============================================================================ +# CRYPTO ERRORS +# ============================================================================ + + +class CryptoError(StegasooError): + """Base class for cryptographic errors.""" + + pass + + +class EncryptionError(CryptoError): + """Encryption failed.""" + + pass + + +class DecryptionError(CryptoError): + """Decryption failed (wrong key, corrupted data, etc.).""" + + pass + + +class KeyDerivationError(CryptoError): + """Key derivation failed.""" + + pass + + +class KeyGenerationError(CryptoError): + """Key generation failed.""" + + pass + + +class KeyPasswordError(CryptoError): + """RSA key password is incorrect or missing.""" + + pass + + +# ============================================================================ +# STEGANOGRAPHY ERRORS +# ============================================================================ + + +class SteganographyError(StegasooError): + """Base class for steganography errors.""" + + pass + + +class CapacityError(SteganographyError): + """Carrier image too small for message.""" + + def __init__(self, needed: int, available: int): + self.needed = needed + self.available = available + super().__init__( + f"Carrier image too small. Need {needed:,} bytes, have {available:,} bytes capacity." + ) + + +class ExtractionError(SteganographyError): + """Failed to extract hidden data from image.""" + + pass + + +class EmbeddingError(SteganographyError): + """Failed to embed data in image.""" + + pass + + +class InvalidHeaderError(SteganographyError): + """Invalid or missing Stegasoo header in extracted data.""" + + pass + + +class InvalidMagicBytesError(SteganographyError): + """Magic bytes don't match - not a Stegasoo image or wrong mode.""" + + pass + + +class ReedSolomonError(SteganographyError): + """Reed-Solomon error correction failed - image too corrupted.""" + + pass + + +class NoDataFoundError(SteganographyError): + """No hidden data found in image.""" + + pass + + +class ModeMismatchError(SteganographyError): + """Wrong steganography mode (LSB vs DCT).""" + + pass + + +# ============================================================================ +# FILE ERRORS +# ============================================================================ + + +class FileError(StegasooError): + """Base class for file-related errors.""" + + pass + + +class FileNotFoundError(FileError): + """Required file not found.""" + + pass + + +class FileTooLargeError(FileError): + """File exceeds size limit.""" + + def __init__(self, size: int, limit: int, filename: str = "File"): + self.size = size + self.limit = limit + self.filename = filename + super().__init__( + f"{filename} too large ({size:,} bytes). Maximum allowed: {limit:,} bytes." + ) + + +class UnsupportedFileTypeError(FileError): + """File type not supported.""" + + def __init__(self, extension: str, allowed: set[str]): + self.extension = extension + self.allowed = allowed + super().__init__( + f"Unsupported file type: .{extension}. Allowed: {', '.join(sorted(allowed))}" + ) + + +# ============================================================================ +# AUDIO ERRORS +# ============================================================================ + + +class AudioError(SteganographyError): + """Base class for audio steganography errors.""" + + pass + + +class AudioValidationError(ValidationError): + """Audio validation failed.""" + + pass + + +class AudioCapacityError(CapacityError): + """Audio carrier too small for message.""" + + def __init__(self, needed: int, available: int): + self.needed = needed + self.available = available + # Call SteganographyError.__init__ directly (skip CapacityError's image-specific message) + SteganographyError.__init__( + self, + f"Audio carrier too small. Need {needed:,} bytes, have {available:,} bytes capacity.", + ) + + +class AudioExtractionError(ExtractionError): + """Failed to extract hidden data from audio.""" + + pass + + +class AudioTranscodeError(AudioError): + """Audio transcoding failed.""" + + pass + + +class UnsupportedAudioFormatError(AudioError): + """Audio format not supported.""" + + pass + + +# ============================================================================ +# VIDEO ERRORS +# ============================================================================ + + +class VideoError(SteganographyError): + """Base class for video steganography errors.""" + + pass + + +class VideoValidationError(ValidationError): + """Video validation failed.""" + + pass + + +class VideoCapacityError(CapacityError): + """Video carrier too small for message.""" + + def __init__(self, needed: int, available: int): + self.needed = needed + self.available = available + # Call SteganographyError.__init__ directly (skip CapacityError's image-specific message) + SteganographyError.__init__( + self, + f"Video carrier too small. Need {needed:,} bytes, have {available:,} bytes capacity.", + ) + + +class VideoExtractionError(ExtractionError): + """Failed to extract hidden data from video.""" + + pass + + +class VideoTranscodeError(VideoError): + """Video transcoding failed.""" + + pass + + +class UnsupportedVideoFormatError(VideoError): + """Video format not supported.""" + + pass diff --git a/src/soosef/stegasoo/generate.py b/src/soosef/stegasoo/generate.py new file mode 100644 index 0000000..3c91f66 --- /dev/null +++ b/src/soosef/stegasoo/generate.py @@ -0,0 +1,167 @@ +""" +Stegasoo Generate Module (v3.2.0) + +Public API for generating credentials (PINs, passphrases, RSA keys). +""" + +from .constants import ( + DEFAULT_PASSPHRASE_WORDS, + DEFAULT_PIN_LENGTH, + DEFAULT_RSA_BITS, +) +from .debug import debug +from .keygen import ( + export_rsa_key_pem, + generate_phrase, + load_rsa_key, +) +from .keygen import ( + generate_pin as _generate_pin, +) +from .keygen import ( + generate_rsa_key as _generate_rsa_key, +) +from .models import Credentials + +# Re-export from keygen for convenience +__all__ = [ + "generate_pin", + "generate_passphrase", + "generate_rsa_key", + "generate_credentials", + "export_rsa_key_pem", + "load_rsa_key", +] + + +def generate_pin(length: int = DEFAULT_PIN_LENGTH) -> str: + """ + Generate a random PIN. + + PINs never start with zero for usability. + + Args: + length: PIN length (6-9 digits, default 6) + + Returns: + PIN string + + Example: + >>> pin = generate_pin() + >>> len(pin) + 6 + >>> pin[0] != '0' + True + """ + return _generate_pin(length) + + +def generate_passphrase(words: int = DEFAULT_PASSPHRASE_WORDS) -> str: + """ + Generate a random passphrase from BIP-39 wordlist. + + In v3.2.0, this generates a single passphrase (not daily phrases). + Default is 4 words for good security (increased from 3 in v3.1.0). + + Args: + words: Number of words (3-12, default 4) + + Returns: + Space-separated passphrase + + Example: + >>> passphrase = generate_passphrase(4) + >>> len(passphrase.split()) + 4 + """ + return generate_phrase(words) + + +def generate_rsa_key(bits: int = DEFAULT_RSA_BITS, password: str | None = None) -> str: + """ + Generate an RSA private key in PEM format. + + Args: + bits: Key size (2048 or 3072, default 2048) + password: Optional password to encrypt the key + + Returns: + PEM-encoded key string + + Example: + >>> key_pem = generate_rsa_key(2048) + >>> '-----BEGIN PRIVATE KEY-----' in key_pem + True + """ + key_obj = _generate_rsa_key(bits) + pem_bytes = export_rsa_key_pem(key_obj, password) + return pem_bytes.decode("utf-8") + + +def generate_credentials( + use_pin: bool = True, + use_rsa: bool = False, + pin_length: int = DEFAULT_PIN_LENGTH, + rsa_bits: int = DEFAULT_RSA_BITS, + passphrase_words: int = DEFAULT_PASSPHRASE_WORDS, + rsa_password: str | None = None, +) -> Credentials: + """ + Generate a complete set of credentials. + + In v3.2.0, this generates a single passphrase (not daily phrases). + At least one of use_pin or use_rsa must be True. + + Args: + use_pin: Whether to generate a PIN + use_rsa: Whether to generate an RSA key + pin_length: PIN length (default 6) + rsa_bits: RSA key size (default 2048) + passphrase_words: Number of words in passphrase (default 4) + rsa_password: Optional password for RSA key + + Returns: + Credentials object with passphrase, PIN, and/or RSA key + + Raises: + ValueError: If neither PIN nor RSA is selected + + Example: + >>> creds = generate_credentials(use_pin=True, use_rsa=False) + >>> len(creds.passphrase.split()) + 4 + >>> len(creds.pin) + 6 + """ + if not use_pin and not use_rsa: + raise ValueError("Must select at least one security factor (PIN or RSA key)") + + debug.print( + f"Generating credentials: PIN={use_pin}, RSA={use_rsa}, " + f"passphrase_words={passphrase_words}" + ) + + # Generate passphrase (single, not daily) + passphrase = generate_phrase(passphrase_words) + + # Generate PIN if requested + pin = _generate_pin(pin_length) if use_pin else None + + # Generate RSA key if requested + rsa_key_pem = None + if use_rsa: + rsa_key_obj = _generate_rsa_key(rsa_bits) + rsa_key_bytes = export_rsa_key_pem(rsa_key_obj, rsa_password) + rsa_key_pem = rsa_key_bytes.decode("utf-8") + + # Create Credentials object (v3.2.0 format) + creds = Credentials( + passphrase=passphrase, + pin=pin, + rsa_key_pem=rsa_key_pem, + rsa_bits=rsa_bits if use_rsa else None, + words_per_passphrase=passphrase_words, + ) + + debug.print(f"Credentials generated: {creds.total_entropy} bits total entropy") + return creds diff --git a/src/soosef/stegasoo/image_utils.py b/src/soosef/stegasoo/image_utils.py new file mode 100644 index 0000000..d695a26 --- /dev/null +++ b/src/soosef/stegasoo/image_utils.py @@ -0,0 +1,170 @@ +""" +Stegasoo Image Utilities (v3.2.0) + +Functions for analyzing images and comparing capacity. +""" + +import io + +from PIL import Image + +from .constants import EMBED_MODE_LSB +from .debug import debug +from .models import CapacityComparison, ImageInfo +from .steganography import calculate_capacity, has_dct_support + + +def get_image_info(image_data: bytes) -> ImageInfo: + """ + Get detailed information about an image. + + Args: + image_data: Image file bytes + + Returns: + ImageInfo with dimensions, format, capacity estimates + + Example: + >>> info = get_image_info(carrier_bytes) + >>> print(f"{info.width}x{info.height}, {info.lsb_capacity_kb} KB capacity") + """ + img = Image.open(io.BytesIO(image_data)) + + width, height = img.size + pixels = width * height + format_str = img.format or "Unknown" + mode = img.mode + + # Calculate LSB capacity + lsb_capacity = calculate_capacity(image_data, bits_per_channel=1) + + # Calculate DCT capacity if available + dct_capacity = None + if has_dct_support(): + try: + from .dct_steganography import calculate_dct_capacity + + dct_info = calculate_dct_capacity(image_data) + dct_capacity = dct_info.usable_capacity_bytes + except Exception as e: + debug.print(f"Could not calculate DCT capacity: {e}") + + info = ImageInfo( + width=width, + height=height, + pixels=pixels, + format=format_str, + mode=mode, + file_size=len(image_data), + lsb_capacity_bytes=lsb_capacity, + lsb_capacity_kb=lsb_capacity / 1024, + dct_capacity_bytes=dct_capacity, + dct_capacity_kb=dct_capacity / 1024 if dct_capacity else None, + ) + + debug.print( + f"Image info: {width}x{height}, LSB={lsb_capacity} bytes, " + f"DCT={dct_capacity or 'N/A'} bytes" + ) + + return info + + +def compare_capacity( + carrier_image: bytes, + reference_photo: bytes | None = None, +) -> CapacityComparison: + """ + Compare embedding capacity between LSB and DCT modes. + + Args: + carrier_image: Carrier image bytes + reference_photo: Optional reference photo (not used in v3.2.0, kept for API compatibility) + + Returns: + CapacityComparison with capacity info for both modes + + Example: + >>> comparison = compare_capacity(carrier_bytes) + >>> print(f"LSB: {comparison.lsb_kb:.1f} KB") + >>> print(f"DCT: {comparison.dct_kb:.1f} KB") + """ + img = Image.open(io.BytesIO(carrier_image)) + width, height = img.size + + # LSB capacity + lsb_bytes = calculate_capacity(carrier_image, bits_per_channel=1) + lsb_kb = lsb_bytes / 1024 + + # DCT capacity + dct_available = has_dct_support() + dct_bytes = None + dct_kb = None + + if dct_available: + try: + from .dct_steganography import calculate_dct_capacity + + dct_info = calculate_dct_capacity(carrier_image) + dct_bytes = dct_info.usable_capacity_bytes + dct_kb = dct_bytes / 1024 + except Exception as e: + debug.print(f"DCT capacity calculation failed: {e}") + dct_available = False + + comparison = CapacityComparison( + image_width=width, + image_height=height, + lsb_available=True, + lsb_bytes=lsb_bytes, + lsb_kb=lsb_kb, + lsb_output_format="PNG/BMP (color)", + dct_available=dct_available, + dct_bytes=dct_bytes, + dct_kb=dct_kb, + dct_output_formats=["PNG (grayscale)", "JPEG (grayscale)"] if dct_available else None, + dct_ratio_vs_lsb=(dct_bytes / lsb_bytes * 100) if dct_bytes else None, + ) + + debug.print(f"Capacity comparison: LSB={lsb_kb:.1f}KB, DCT={dct_kb or 'N/A'}KB") + + return comparison + + +def validate_carrier_capacity( + carrier_image: bytes, + payload_size: int, + embed_mode: str = EMBED_MODE_LSB, +) -> dict: + """ + Check if a payload will fit in a carrier image. + + Args: + carrier_image: Carrier image bytes + payload_size: Size of payload in bytes + embed_mode: 'lsb' or 'dct' + + Returns: + Dict with 'fits', 'capacity', 'usage_percent', 'headroom' + """ + from .steganography import calculate_capacity_by_mode + + capacity_info = calculate_capacity_by_mode(carrier_image, embed_mode) + capacity = capacity_info["capacity_bytes"] + + # Add encryption overhead estimate + estimated_size = payload_size + 200 # Approximate overhead + + fits = estimated_size <= capacity + usage_percent = (estimated_size / capacity * 100) if capacity > 0 else 100.0 + headroom = capacity - estimated_size + + return { + "fits": fits, + "capacity": capacity, + "payload_size": payload_size, + "estimated_size": estimated_size, + "usage_percent": min(usage_percent, 100.0), + "headroom": headroom, + "mode": embed_mode, + } diff --git a/src/soosef/stegasoo/keygen.py b/src/soosef/stegasoo/keygen.py new file mode 100644 index 0000000..d959143 --- /dev/null +++ b/src/soosef/stegasoo/keygen.py @@ -0,0 +1,410 @@ +""" +Stegasoo Key Generation (v3.2.0) + +Generate PINs, passphrases, and RSA keys. + +Changes in v3.2.0: +- generate_credentials() now returns Credentials with single passphrase +- Removed generate_day_phrases() from main API (kept for legacy compatibility) +- Updated to use PASSPHRASE constants +""" + +import secrets + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes +from cryptography.hazmat.primitives.serialization import load_pem_private_key + +from .constants import ( + DAY_NAMES, + DEFAULT_PASSPHRASE_WORDS, + DEFAULT_PIN_LENGTH, + DEFAULT_RSA_BITS, + MAX_PASSPHRASE_WORDS, + MAX_PIN_LENGTH, + MIN_PASSPHRASE_WORDS, + MIN_PIN_LENGTH, + VALID_RSA_SIZES, + get_wordlist, +) +from .debug import debug +from .exceptions import KeyGenerationError, KeyPasswordError +from .models import Credentials, KeyInfo + + +def generate_pin(length: int = DEFAULT_PIN_LENGTH) -> str: + """ + Generate a random PIN. + + PINs never start with zero for usability. + + Args: + length: PIN length (6-9 digits) + + Returns: + PIN string + + Example: + >>> generate_pin(6) + "812345" + """ + debug.validate( + MIN_PIN_LENGTH <= length <= MAX_PIN_LENGTH, + f"PIN length must be between {MIN_PIN_LENGTH} and {MAX_PIN_LENGTH}", + ) + + length = max(MIN_PIN_LENGTH, min(MAX_PIN_LENGTH, length)) + + # First digit: 1-9 (no leading zero) + first_digit = str(secrets.randbelow(9) + 1) + + # Remaining digits: 0-9 + rest = "".join(str(secrets.randbelow(10)) for _ in range(length - 1)) + + pin = first_digit + rest + debug.print(f"Generated PIN: {pin}") + return pin + + +def generate_phrase(words_per_phrase: int = DEFAULT_PASSPHRASE_WORDS) -> str: + """ + Generate a random passphrase from BIP-39 wordlist. + + Args: + words_per_phrase: Number of words (3-12) + + Returns: + Space-separated phrase + + Example: + >>> generate_phrase(4) + "apple forest thunder mountain" + """ + debug.validate( + MIN_PASSPHRASE_WORDS <= words_per_phrase <= MAX_PASSPHRASE_WORDS, + f"Words per phrase must be between {MIN_PASSPHRASE_WORDS} and {MAX_PASSPHRASE_WORDS}", + ) + + words_per_phrase = max(MIN_PASSPHRASE_WORDS, min(MAX_PASSPHRASE_WORDS, words_per_phrase)) + wordlist = get_wordlist() + + words = [secrets.choice(wordlist) for _ in range(words_per_phrase)] + phrase = " ".join(words) + debug.print(f"Generated phrase: {phrase}") + return phrase + + +# Alias for backward compatibility and public API consistency +generate_passphrase = generate_phrase + + +def generate_day_phrases(words_per_phrase: int = DEFAULT_PASSPHRASE_WORDS) -> dict[str, str]: + """ + Generate phrases for all days of the week. + + DEPRECATED in v3.2.0: Use generate_phrase() for single passphrase. + Kept for legacy compatibility and organizational use cases. + + Args: + words_per_phrase: Number of words per phrase (3-12) + + Returns: + Dict mapping day names to phrases + + Example: + >>> generate_day_phrases(3) + {'Monday': 'apple forest thunder', 'Tuesday': 'banana river lightning', ...} + """ + import warnings + + warnings.warn( + "generate_day_phrases() is deprecated in v3.2.0. " + "Use generate_phrase() for single passphrase.", + DeprecationWarning, + stacklevel=2, + ) + + phrases = {day: generate_phrase(words_per_phrase) for day in DAY_NAMES} + debug.print(f"Generated phrases for {len(phrases)} days") + return phrases + + +def generate_rsa_key(bits: int = DEFAULT_RSA_BITS) -> rsa.RSAPrivateKey: + """ + Generate an RSA private key. + + Args: + bits: Key size (2048 or 3072) + + Returns: + RSA private key object + + Raises: + KeyGenerationError: If generation fails + + Example: + >>> key = generate_rsa_key(2048) + >>> key.key_size + 2048 + """ + debug.validate(bits in VALID_RSA_SIZES, f"RSA key size must be one of {VALID_RSA_SIZES}") + + if bits not in VALID_RSA_SIZES: + bits = DEFAULT_RSA_BITS + + debug.print(f"Generating {bits}-bit RSA key...") + try: + key = rsa.generate_private_key( + public_exponent=65537, key_size=bits, backend=default_backend() + ) + debug.print(f"RSA key generated: {bits} bits") + return key + except Exception as e: + debug.exception(e, "RSA key generation") + raise KeyGenerationError(f"Failed to generate RSA key: {e}") from e + + +def export_rsa_key_pem(private_key: rsa.RSAPrivateKey, password: str | None = None) -> bytes: + """ + Export RSA key to PEM format. + + Args: + private_key: RSA private key object + password: Optional password for encryption + + Returns: + PEM-encoded key bytes + + Example: + >>> key = generate_rsa_key() + >>> pem = export_rsa_key_pem(key) + >>> pem[:50] + b'-----BEGIN PRIVATE KEY-----\\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYw' + """ + debug.validate(private_key is not None, "Private key cannot be None") + + encryption_algorithm: serialization.BestAvailableEncryption | serialization.NoEncryption + + if password: + encryption_algorithm = serialization.BestAvailableEncryption(password.encode()) + debug.print("Exporting RSA key with encryption") + else: + encryption_algorithm = serialization.NoEncryption() + debug.print("Exporting RSA key without encryption") + + return private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=encryption_algorithm, + ) + + +def load_rsa_key(key_data: bytes, password: str | None = None) -> rsa.RSAPrivateKey: + """ + Load RSA private key from PEM data. + + Args: + key_data: PEM-encoded key bytes + password: Password if key is encrypted + + Returns: + RSA private key object + + Raises: + KeyPasswordError: If password is wrong or missing + KeyGenerationError: If key is invalid + + Example: + >>> key = load_rsa_key(pem_data, "my_password") + """ + debug.validate(key_data is not None and len(key_data) > 0, "Key data cannot be empty") + + try: + pwd_bytes = password.encode() if password else None + debug.print(f"Loading RSA key (encrypted: {bool(password)})") + key: PrivateKeyTypes = load_pem_private_key( + key_data, password=pwd_bytes, backend=default_backend() + ) + + # Verify it's an RSA key + if not isinstance(key, rsa.RSAPrivateKey): + raise KeyGenerationError(f"Expected RSA key, got {type(key).__name__}") + + debug.print(f"RSA key loaded: {key.key_size} bits") + return key + except TypeError: + debug.print("RSA key is password-protected but no password provided") + raise KeyPasswordError("RSA key is password-protected. Please provide the password.") + except ValueError as e: + error_msg = str(e).lower() + if "password" in error_msg or "encrypted" in error_msg: + debug.print("Incorrect password for RSA key") + raise KeyPasswordError("Incorrect password for RSA key.") + debug.exception(e, "RSA key loading") + raise KeyGenerationError(f"Invalid RSA key: {e}") from e + except Exception as e: + debug.exception(e, "RSA key loading") + raise KeyGenerationError(f"Could not load RSA key: {e}") from e + + +def get_key_info(key_data: bytes, password: str | None = None) -> KeyInfo: + """ + Get information about an RSA key. + + Args: + key_data: PEM-encoded key bytes + password: Password if key is encrypted + + Returns: + KeyInfo with key size and encryption status + + Example: + >>> info = get_key_info(pem_data) + >>> info.key_size + 2048 + >>> info.is_encrypted + False + """ + debug.print("Getting RSA key info") + # Check if encrypted + is_encrypted = b"ENCRYPTED" in key_data + + private_key = load_rsa_key(key_data, password) + + info = KeyInfo(key_size=private_key.key_size, is_encrypted=is_encrypted, pem_data=key_data) + + debug.print(f"Key info: {info.key_size} bits, encrypted: {info.is_encrypted}") + return info + + +def generate_credentials( + use_pin: bool = True, + use_rsa: bool = False, + pin_length: int = DEFAULT_PIN_LENGTH, + rsa_bits: int = DEFAULT_RSA_BITS, + passphrase_words: int = DEFAULT_PASSPHRASE_WORDS, + rsa_password: str | None = None, +) -> Credentials: + """ + Generate a complete set of credentials. + + v3.2.0: Now generates a single passphrase instead of daily phrases. + At least one of use_pin or use_rsa must be True. + + Args: + use_pin: Whether to generate a PIN + use_rsa: Whether to generate an RSA key + pin_length: PIN length if generating (default 6) + rsa_bits: RSA key size if generating (default 2048) + passphrase_words: Words in passphrase (default 4) + rsa_password: Optional password for RSA key encryption + + Returns: + Credentials object with passphrase, PIN, and/or RSA key + + Raises: + ValueError: If neither PIN nor RSA is selected + + Example: + >>> creds = generate_credentials(use_pin=True, use_rsa=False) + >>> creds.passphrase + "apple forest thunder mountain" + >>> creds.pin + "812345" + """ + debug.validate(use_pin or use_rsa, "Must select at least one security factor (PIN or RSA key)") + + if not use_pin and not use_rsa: + raise ValueError("Must select at least one security factor (PIN or RSA key)") + + debug.print( + f"Generating credentials: PIN={use_pin}, RSA={use_rsa}, " + f"passphrase_words={passphrase_words}" + ) + + # Generate single passphrase (v3.2.0 - no daily rotation) + passphrase = generate_phrase(passphrase_words) + + # Generate PIN if requested + pin = generate_pin(pin_length) if use_pin else None + + # Generate RSA key if requested + rsa_key_pem = None + if use_rsa: + rsa_key_obj = generate_rsa_key(rsa_bits) + rsa_key_pem = export_rsa_key_pem(rsa_key_obj, rsa_password).decode("utf-8") + + # Create Credentials object (v3.2.0 format with single passphrase) + creds = Credentials( + passphrase=passphrase, + pin=pin, + rsa_key_pem=rsa_key_pem, + rsa_bits=rsa_bits if use_rsa else None, + words_per_passphrase=passphrase_words, + ) + + debug.print(f"Credentials generated: {creds.total_entropy} bits total entropy") + return creds + + +# ============================================================================= +# LEGACY COMPATIBILITY +# ============================================================================= + + +def generate_credentials_legacy( + use_pin: bool = True, + use_rsa: bool = False, + pin_length: int = DEFAULT_PIN_LENGTH, + rsa_bits: int = DEFAULT_RSA_BITS, + words_per_phrase: int = DEFAULT_PASSPHRASE_WORDS, +) -> dict: + """ + Generate credentials in legacy format (v3.1.0 style with daily phrases). + + DEPRECATED: Use generate_credentials() for v3.2.0 format. + + This function exists only for migration tools that need to work with + old-format credentials. + + Args: + use_pin: Whether to generate a PIN + use_rsa: Whether to generate an RSA key + pin_length: PIN length if generating + rsa_bits: RSA key size if generating + words_per_phrase: Words per daily phrase + + Returns: + Dict with 'phrases' (dict), 'pin', 'rsa_key_pem', etc. + """ + import warnings + + warnings.warn( + "generate_credentials_legacy() returns v3.1.0 format. " + "Use generate_credentials() for v3.2.0 format.", + DeprecationWarning, + stacklevel=2, + ) + + if not use_pin and not use_rsa: + raise ValueError("Must select at least one security factor (PIN or RSA key)") + + # Generate daily phrases (old format) + phrases = {day: generate_phrase(words_per_phrase) for day in DAY_NAMES} + + pin = generate_pin(pin_length) if use_pin else None + + rsa_key_pem = None + if use_rsa: + rsa_key_obj = generate_rsa_key(rsa_bits) + rsa_key_pem = export_rsa_key_pem(rsa_key_obj).decode("utf-8") + + return { + "phrases": phrases, + "pin": pin, + "rsa_key_pem": rsa_key_pem, + "rsa_bits": rsa_bits if use_rsa else None, + "words_per_phrase": words_per_phrase, + } diff --git a/src/soosef/stegasoo/models.py b/src/soosef/stegasoo/models.py new file mode 100644 index 0000000..3ee72b4 --- /dev/null +++ b/src/soosef/stegasoo/models.py @@ -0,0 +1,391 @@ +""" +Stegasoo Data Models (v3.2.0) + +Dataclasses for structured data exchange between modules and frontends. + +Changes in v3.2.0: +- Renamed day_phrase → passphrase +- Credentials now uses single passphrase instead of day mapping +- Removed date_str from EncodeInput (date no longer used in crypto) +- Made date_used optional in EncodeResult (cosmetic only) +- Added ImageInfo, CapacityComparison, GenerateResult +""" + +from dataclasses import dataclass, field + + +@dataclass +class Credentials: + """ + Generated credentials for encoding/decoding. + + v3.2.0: Simplified to use single passphrase instead of daily rotation. + """ + + passphrase: str # Single passphrase (no daily rotation) + pin: str | None = None + rsa_key_pem: str | None = None + rsa_bits: int | None = None + words_per_passphrase: int = 4 # Increased from 3 in v3.1.0 + + # Optional: backup passphrases for multi-factor or rotation + backup_passphrases: list[str] | None = None + + @property + def passphrase_entropy(self) -> int: + """Entropy in bits from passphrase (~11 bits per BIP-39 word).""" + return self.words_per_passphrase * 11 + + @property + def pin_entropy(self) -> int: + """Entropy in bits from PIN (~3.32 bits per digit).""" + if self.pin: + return int(len(self.pin) * 3.32) + return 0 + + @property + def rsa_entropy(self) -> int: + """Effective entropy from RSA key.""" + if self.rsa_key_pem and self.rsa_bits: + return min(self.rsa_bits // 16, 128) + return 0 + + @property + def total_entropy(self) -> int: + """Total entropy in bits (excluding reference photo).""" + return self.passphrase_entropy + self.pin_entropy + self.rsa_entropy + + # Legacy property for compatibility + @property + def phrase_entropy(self) -> int: + """Alias for passphrase_entropy (backward compatibility).""" + return self.passphrase_entropy + + +@dataclass +class FilePayload: + """Represents a file to be embedded.""" + + data: bytes + filename: str + mime_type: str | None = None + + @property + def size(self) -> int: + return len(self.data) + + @classmethod + def from_file(cls, filepath: str, filename: str | None = None) -> "FilePayload": + """Create FilePayload from a file path.""" + import mimetypes + from pathlib import Path + + path = Path(filepath) + data = path.read_bytes() + name = filename or path.name + mime, _ = mimetypes.guess_type(name) + + return cls(data=data, filename=name, mime_type=mime) + + +@dataclass +class EncodeInput: + """ + Input parameters for encoding a message. + + v3.2.0: Removed date_str (date no longer used in crypto). + """ + + message: str | bytes | FilePayload # Text, raw bytes, or file + reference_photo: bytes + carrier_image: bytes + passphrase: str # Renamed from day_phrase + pin: str = "" + rsa_key_data: bytes | None = None + rsa_password: str | None = None + + +@dataclass +class EncodeResult: + """ + Result of encoding operation. + + v3.2.0: date_used is now optional/cosmetic (not used in crypto). + """ + + stego_image: bytes + filename: str + pixels_modified: int + total_pixels: int + capacity_used: float # 0.0 - 1.0 + date_used: str | None = None # Cosmetic only (for filename organization) + + @property + def capacity_percent(self) -> float: + """Capacity used as percentage.""" + return self.capacity_used * 100 + + +@dataclass +class DecodeInput: + """ + Input parameters for decoding a message. + + v3.2.0: Renamed day_phrase → passphrase, no date needed. + """ + + stego_image: bytes + reference_photo: bytes + passphrase: str # Renamed from day_phrase + pin: str = "" + rsa_key_data: bytes | None = None + rsa_password: str | None = None + + +@dataclass +class DecodeResult: + """ + Result of decoding operation. + + v3.2.0: date_encoded is always None (date removed from crypto). + """ + + payload_type: str # 'text' or 'file' + message: str | None = None # For text payloads + file_data: bytes | None = None # For file payloads + filename: str | None = None # Original filename for file payloads + mime_type: str | None = None # MIME type hint + date_encoded: str | None = None # Always None in v3.2.0 (kept for compatibility) + + @property + def is_file(self) -> bool: + return self.payload_type == "file" + + @property + def is_text(self) -> bool: + return self.payload_type == "text" + + def get_content(self) -> str | bytes: + """Get the decoded content (text or bytes).""" + if self.is_text: + return self.message or "" + return self.file_data or b"" + + +@dataclass +class EmbedStats: + """Statistics from image embedding.""" + + pixels_modified: int + total_pixels: int + capacity_used: float + bytes_embedded: int + + @property + def modification_percent(self) -> float: + """Percentage of pixels modified.""" + return (self.pixels_modified / self.total_pixels) * 100 if self.total_pixels > 0 else 0 + + +@dataclass +class KeyInfo: + """Information about an RSA key.""" + + key_size: int + is_encrypted: bool + pem_data: bytes + + +@dataclass +class ValidationResult: + """Result of input validation.""" + + is_valid: bool + error_message: str = "" + details: dict = field(default_factory=dict) + warning: str | None = None # v3.2.0: Added for passphrase length warnings + + @classmethod + def ok(cls, warning: str | None = None, **details) -> "ValidationResult": + """Create a successful validation result.""" + result = cls(is_valid=True, details=details) + if warning: + result.warning = warning + return result + + @classmethod + def error(cls, message: str, **details) -> "ValidationResult": + """Create a failed validation result.""" + return cls(is_valid=False, error_message=message, details=details) + + +# ============================================================================= +# NEW MODELS FOR V3.2.0 PUBLIC API +# ============================================================================= + + +@dataclass +class ImageInfo: + """Information about an image for steganography.""" + + width: int + height: int + pixels: int + format: str + mode: str + file_size: int + lsb_capacity_bytes: int + lsb_capacity_kb: float + dct_capacity_bytes: int | None = None + dct_capacity_kb: float | None = None + + +@dataclass +class CapacityComparison: + """Comparison of embedding capacity between modes.""" + + image_width: int + image_height: int + lsb_available: bool + lsb_bytes: int + lsb_kb: float + lsb_output_format: str + dct_available: bool + dct_bytes: int | None = None + dct_kb: float | None = None + dct_output_formats: list[str] | None = None + dct_ratio_vs_lsb: float | None = None + + +@dataclass +class GenerateResult: + """Result of credential generation.""" + + passphrase: str + pin: str | None = None + rsa_key_pem: str | None = None + passphrase_words: int = 4 + passphrase_entropy: int = 0 + pin_entropy: int = 0 + rsa_entropy: int = 0 + total_entropy: int = 0 + + def __str__(self) -> str: + lines = [ + "Generated Credentials:", + f" Passphrase: {self.passphrase}", + ] + if self.pin: + lines.append(f" PIN: {self.pin}") + if self.rsa_key_pem: + lines.append(f" RSA Key: {len(self.rsa_key_pem)} bytes PEM") + lines.append(f" Total Entropy: {self.total_entropy} bits") + return "\n".join(lines) + + +# ============================================================================= +# AUDIO STEGANOGRAPHY MODELS (v4.3.0) +# ============================================================================= + + +@dataclass +class AudioEmbedStats: + """Statistics from audio embedding.""" + + samples_modified: int + total_samples: int + capacity_used: float # 0.0 - 1.0 + bytes_embedded: int + sample_rate: int + channels: int + duration_seconds: float + embed_mode: str # "audio_lsb" or "audio_spread" + chip_tier: int | None = None # v4.4.0: spread spectrum chip tier (0/1/2) + chip_length: int | None = None # v4.4.0: samples per chip + embeddable_channels: int | None = None # v4.4.0: channels used (excl. LFE) + + @property + def modification_percent(self) -> float: + """Percentage of samples modified.""" + return (self.samples_modified / self.total_samples) * 100 if self.total_samples > 0 else 0 + + +@dataclass +class AudioInfo: + """Information about an audio file.""" + + sample_rate: int + channels: int + duration_seconds: float + num_samples: int + format: str # "wav", "flac", "mp3", etc. + bitrate: int | None = None # For lossy formats + bit_depth: int | None = None # For lossless formats + + +@dataclass +class AudioCapacityInfo: + """Capacity information for audio steganography.""" + + total_samples: int + usable_capacity_bytes: int + embed_mode: str + sample_rate: int + duration_seconds: float + chip_tier: int | None = None # v4.4.0: spread spectrum chip tier + chip_length: int | None = None # v4.4.0: samples per chip + embeddable_channels: int | None = None # v4.4.0: channels used (excl. LFE) + total_channels: int | None = None # v4.4.0: total channels in carrier + + +# ============================================================================= +# VIDEO STEGANOGRAPHY MODELS (v4.4.0) +# ============================================================================= + + +@dataclass +class VideoEmbedStats: + """Statistics from video embedding.""" + + frames_modified: int + total_frames: int + capacity_used: float # 0.0 - 1.0 + bytes_embedded: int + width: int + height: int + fps: float + duration_seconds: float + embed_mode: str # "video_lsb" + codec: str # Output codec (e.g., "ffv1") + + @property + def modification_percent(self) -> float: + """Percentage of frames modified.""" + return (self.frames_modified / self.total_frames) * 100 if self.total_frames > 0 else 0 + + +@dataclass +class VideoInfo: + """Information about a video file.""" + + width: int + height: int + fps: float + duration_seconds: float + total_frames: int + i_frame_count: int + format: str # "mp4", "mkv", "webm", etc. + codec: str # "h264", "vp9", "ffv1", etc. + bitrate: int | None = None # For lossy formats + + +@dataclass +class VideoCapacityInfo: + """Capacity information for video steganography.""" + + total_frames: int + i_frames: int + usable_capacity_bytes: int + embed_mode: str + resolution: tuple[int, int] + duration_seconds: float diff --git a/src/soosef/stegasoo/platform_presets.py b/src/soosef/stegasoo/platform_presets.py new file mode 100644 index 0000000..5ccceab --- /dev/null +++ b/src/soosef/stegasoo/platform_presets.py @@ -0,0 +1,169 @@ +""" +Platform-Calibrated DCT Presets (v4.4.0) + +Pre-tuned DCT embedding parameters for social media platforms. Each platform +recompresses uploaded images differently — these presets bake in the known +parameters so payloads survive the round-trip. + +Usage:: + + from stegasoo.platform_presets import get_preset, PLATFORMS + + preset = get_preset("telegram") + # Use preset.quant_step, preset.jpeg_quality, etc. in DCT encode + +Preset parameters were derived from empirical testing. Platform compression +behavior can change without notice — use ``pre_verify_survival()`` to confirm +payloads survive before relying on a preset. +""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class PlatformPreset: + """Tuned DCT parameters for a specific platform.""" + + name: str + jpeg_quality: int # Platform's recompression quality + max_dimension: int # Max width/height before platform resizes + quant_step: int # QIM quantization step (higher = more robust) + embed_start: int # Start index into EMBED_POSITIONS (skip low-freq) + embed_end: int # End index into EMBED_POSITIONS (skip high-freq) + recompress_quality: int # Quality to simulate platform recompression for pre-verify + notes: str = "" + + +# Platform presets — derived from empirical testing of each platform's +# image processing pipeline. These WILL change as platforms update. +# Last verified: 2026-03-25 + +PRESETS: dict[str, PlatformPreset] = { + "telegram": PlatformPreset( + name="Telegram", + jpeg_quality=82, + max_dimension=2560, + quant_step=35, + embed_start=4, + embed_end=16, + recompress_quality=80, + notes="~81KB max embeddable. Moderate recompression.", + ), + "discord": PlatformPreset( + name="Discord", + jpeg_quality=85, + max_dimension=4096, + quant_step=30, + embed_start=4, + embed_end=18, + recompress_quality=83, + notes="Varies with Nitro. Non-Nitro users get more aggressive compression.", + ), + "signal": PlatformPreset( + name="Signal", + jpeg_quality=80, + max_dimension=2048, + quant_step=40, + embed_start=5, + embed_end=15, + recompress_quality=78, + notes="Aggressive recompression. Use smaller payloads for reliability.", + ), + "whatsapp": PlatformPreset( + name="WhatsApp", + jpeg_quality=70, + max_dimension=1600, + quant_step=50, + embed_start=5, + embed_end=14, + recompress_quality=68, + notes="Most lossy. Capacity is significantly reduced.", + ), +} + +PLATFORMS = sorted(PRESETS.keys()) + + +def get_preset(platform: str) -> PlatformPreset: + """Get the preset for a platform. + + Args: + platform: Platform name (telegram, discord, signal, whatsapp). + + Returns: + PlatformPreset with tuned DCT parameters. + + Raises: + ValueError: If platform is not recognized. + """ + key = platform.lower() + if key not in PRESETS: + available = ", ".join(PLATFORMS) + raise ValueError(f"Unknown platform '{platform}'. Available: {available}") + return PRESETS[key] + + +def get_embed_positions(preset: PlatformPreset) -> list[tuple[int, int]]: + """Get the embed positions for a preset. + + Args: + preset: Platform preset. + + Returns: + List of (row, col) DCT coefficient positions. + """ + from .dct_steganography import EMBED_POSITIONS + + return EMBED_POSITIONS[preset.embed_start : preset.embed_end] + + +def pre_verify_survival( + stego_image: bytes, + seed: bytes, + preset: PlatformPreset, +) -> bool: + """Verify that a payload survives simulated platform recompression. + + Encodes → recompresses at platform quality → attempts extraction. + If extraction succeeds, the payload should survive the real platform. + + Args: + stego_image: The stego JPEG image bytes (already encoded). + seed: The same seed used for encoding. + preset: Platform preset to simulate. + + Returns: + True if payload survived simulated recompression. + """ + import io + + from PIL import Image + + from .dct_steganography import extract_from_dct + + # Simulate platform recompression + img = Image.open(io.BytesIO(stego_image)) + + # Resize if over max dimension + w, h = img.size + if max(w, h) > preset.max_dimension: + scale = preset.max_dimension / max(w, h) + new_size = (int(w * scale), int(h * scale)) + img = img.resize(new_size, Image.LANCZOS) + + # Recompress at platform quality + buf = io.BytesIO() + if img.mode != "RGB": + img = img.convert("RGB") + img.save(buf, format="JPEG", quality=preset.recompress_quality) + img.close() + recompressed = buf.getvalue() + + # Try extraction + try: + result = extract_from_dct(recompressed, seed) + return result is not None and len(result) > 0 + except Exception: + return False diff --git a/src/soosef/stegasoo/py.typed b/src/soosef/stegasoo/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/soosef/stegasoo/qr_utils.py b/src/soosef/stegasoo/qr_utils.py new file mode 100644 index 0000000..eabc35c --- /dev/null +++ b/src/soosef/stegasoo/qr_utils.py @@ -0,0 +1,625 @@ +""" +Stegasoo QR Code Utilities + +Functions for generating and reading QR codes containing RSA keys. +Supports automatic compression for large keys. + +IMPROVEMENTS IN THIS VERSION: +- Much more robust PEM normalization +- Better handling of QR code extraction edge cases +- Improved error messages +- v4.2.0: Added zstd compression (better ratio than zlib) +""" + +import base64 +import io +import zlib + +from PIL import Image + +# Optional ZSTD support (better compression ratio) +try: + import zstandard as zstd + + HAS_ZSTD = True +except ImportError: + HAS_ZSTD = False + +# QR code generation +try: + import qrcode + from qrcode.constants import ERROR_CORRECT_L, ERROR_CORRECT_M + + HAS_QRCODE_WRITE = True +except ImportError: + HAS_QRCODE_WRITE = False + +# QR code reading +try: + from pyzbar.pyzbar import ZBarSymbol + from pyzbar.pyzbar import decode as pyzbar_decode + + HAS_QRCODE_READ = True +except ImportError: + HAS_QRCODE_READ = False + + +from .constants import ( + QR_CROP_MIN_PADDING_PX, + QR_CROP_PADDING_PERCENT, + QR_MAX_BINARY, +) + +# Constants +COMPRESSION_PREFIX_ZLIB = "STEGASOO-Z:" # Legacy zlib compression +COMPRESSION_PREFIX_ZSTD = "STEGASOO-ZS:" # v4.2.0: New zstd compression (better ratio) +COMPRESSION_PREFIX = COMPRESSION_PREFIX_ZSTD if HAS_ZSTD else COMPRESSION_PREFIX_ZLIB + + +def compress_data(data: str) -> str: + """ + Compress string data for QR code storage. + + Uses zstd if available (better ratio), falls back to zlib. + + Args: + data: String to compress + + Returns: + Compressed string with STEGASOO-ZS: (zstd) or STEGASOO-Z: (zlib) prefix + """ + data_bytes = data.encode("utf-8") + + if HAS_ZSTD: + # Use zstd (better compression ratio) + cctx = zstd.ZstdCompressor(level=19) + compressed = cctx.compress(data_bytes) + encoded = base64.b64encode(compressed).decode("ascii") + return COMPRESSION_PREFIX_ZSTD + encoded + else: + # Fall back to zlib + compressed = zlib.compress(data_bytes, level=9) + encoded = base64.b64encode(compressed).decode("ascii") + return COMPRESSION_PREFIX_ZLIB + encoded + + +def decompress_data(data: str) -> str: + """ + Decompress data from QR code. + + Supports both zstd (STEGASOO-ZS:) and zlib (STEGASOO-Z:) formats. + + Args: + data: Compressed string with STEGASOO-ZS: or STEGASOO-Z: prefix + + Returns: + Original uncompressed string + + Raises: + ValueError: If data is not valid compressed format + """ + if data.startswith(COMPRESSION_PREFIX_ZSTD): + # v4.2.0: ZSTD compression + if not HAS_ZSTD: + raise ValueError( + "Data compressed with zstd but zstandard package not installed. " + "Run: pip install zstandard" + ) + encoded = data[len(COMPRESSION_PREFIX_ZSTD) :] + compressed = base64.b64decode(encoded) + dctx = zstd.ZstdDecompressor() + return dctx.decompress(compressed).decode("utf-8") + + elif data.startswith(COMPRESSION_PREFIX_ZLIB): + # Legacy zlib compression + encoded = data[len(COMPRESSION_PREFIX_ZLIB) :] + compressed = base64.b64decode(encoded) + return zlib.decompress(compressed).decode("utf-8") + + else: + raise ValueError("Data is not in compressed format") + + +def normalize_pem(pem_data: str) -> str: + """ + Normalize PEM data to ensure proper formatting for cryptography library. + + The cryptography library is very particular about PEM formatting. + This function handles all common issues from QR code extraction: + - Inconsistent line endings (CRLF, LF, CR) + - Missing newlines after header/before footer + - Extra whitespace, tabs, multiple spaces + - Non-ASCII characters + - Incorrect base64 padding + - Malformed headers/footers + + Args: + pem_data: Raw PEM string from QR code + + Returns: + Properly formatted PEM string that cryptography library will accept + """ + import re + + # Step 1: Normalize ALL line endings to \n + pem_data = pem_data.replace("\r\n", "\n").replace("\r", "\n") + + # Step 2: Remove leading/trailing whitespace + pem_data = pem_data.strip() + + # Step 3: Remove any non-ASCII characters (QR artifacts) + pem_data = "".join(char for char in pem_data if ord(char) < 128) + + # Step 4: Extract header, content, and footer with flexible regex + # This handles variations like: + # - "PRIVATE KEY" vs "RSA PRIVATE KEY" + # - Extra spaces in headers + # - Missing spaces + pattern = r"(-----BEGIN[^-]*-----)(.*?)(-----END[^-]*-----)" + match = re.search(pattern, pem_data, re.DOTALL | re.IGNORECASE) + + if not match: + # Fallback: try even more permissive pattern + pattern = r"(-+BEGIN[^-]+-+)(.*?)(-+END[^-]+-+)" + match = re.search(pattern, pem_data, re.DOTALL | re.IGNORECASE) + + if not match: + # Last resort: return original if can't parse + return pem_data + + header_raw = match.group(1).strip() + content_raw = match.group(2) + footer_raw = match.group(3).strip() + + # Step 5: Normalize header and footer + # Standardize spacing and ensure proper format + header = re.sub(r"\s+", " ", header_raw) + footer = re.sub(r"\s+", " ", footer_raw) + + # Ensure exactly 5 dashes on each side + header = re.sub(r"^-+", "-----", header) + header = re.sub(r"-+$", "-----", header) + footer = re.sub(r"^-+", "-----", footer) + footer = re.sub(r"-+$", "-----", footer) + + # Step 6: Clean the base64 content THOROUGHLY + # Remove ALL whitespace: spaces, tabs, newlines + # Keep only valid base64 characters: A-Z, a-z, 0-9, +, /, = + content_clean = "".join(char for char in content_raw if char.isalnum() or char in "+/=") + + # Double-check: remove any remaining invalid characters + content_clean = re.sub(r"[^A-Za-z0-9+/=]", "", content_clean) + + # Step 7: Fix base64 padding + # Base64 strings must be divisible by 4 + remainder = len(content_clean) % 4 + if remainder: + content_clean += "=" * (4 - remainder) + + # Step 8: Split into 64-character lines (PEM standard) + lines = [content_clean[i : i + 64] for i in range(0, len(content_clean), 64)] + + # Step 9: Reconstruct with EXACT PEM formatting + # Format: header\ncontent_line1\ncontent_line2\n...\nfooter\n + return header + "\n" + "\n".join(lines) + "\n" + footer + "\n" + + +def is_compressed(data: str) -> bool: + """Check if data has compression prefix (zstd or zlib).""" + return data.startswith(COMPRESSION_PREFIX_ZSTD) or data.startswith(COMPRESSION_PREFIX_ZLIB) + + +def auto_decompress(data: str) -> str: + """ + Automatically decompress data if compressed, otherwise return as-is. + + Args: + data: Possibly compressed string + + Returns: + Decompressed string + """ + if is_compressed(data): + return decompress_data(data) + return data + + +def get_compressed_size(data: str) -> int: + """Get size of data after compression (including prefix).""" + return len(compress_data(data)) + + +def can_fit_in_qr(data: str, compress: bool = False) -> bool: + """ + Check if data can fit in a QR code. + + Args: + data: String data + compress: Whether compression will be used + + Returns: + True if data fits + """ + if compress: + size = get_compressed_size(data) + else: + size = len(data.encode("utf-8")) + return size <= QR_MAX_BINARY + + +def needs_compression(data: str) -> bool: + """Check if data needs compression to fit in QR code.""" + return not can_fit_in_qr(data, compress=False) and can_fit_in_qr(data, compress=True) + + +def generate_qr_code( + data: str, + compress: bool = False, + error_correction=None, + output_format: str = "png", +) -> bytes: + """ + Generate a QR code image from string data. + + Args: + data: String data to encode + compress: Whether to compress data first + error_correction: QR error correction level (default: auto) + output_format: Image format - 'png' or 'jpg'/'jpeg' + + Returns: + Image bytes in requested format + + Raises: + RuntimeError: If qrcode library not available + ValueError: If data too large for QR code + """ + if not HAS_QRCODE_WRITE: + raise RuntimeError("qrcode library not installed. Run: pip install qrcode[pil]") + + qr_data = data + + # Compress if requested + if compress: + qr_data = compress_data(data) + + # Check size + if len(qr_data.encode("utf-8")) > QR_MAX_BINARY: + raise ValueError( + f"Data too large for QR code ({len(qr_data)} bytes). " f"Maximum: {QR_MAX_BINARY} bytes" + ) + + # Use lower error correction for larger data + if error_correction is None: + error_correction = ERROR_CORRECT_L if len(qr_data) > 1000 else ERROR_CORRECT_M + + qr = qrcode.QRCode( + version=None, + error_correction=error_correction, + box_size=10, + border=4, + ) + qr.add_data(qr_data) + qr.make(fit=True) + + img = qr.make_image(fill_color="black", back_color="white") + + buf = io.BytesIO() + fmt = output_format.lower() + if fmt in ("jpg", "jpeg"): + # Convert to RGB for JPEG (no alpha channel) + img = img.convert("RGB") + img.save(buf, format="JPEG", quality=95) + else: + img.save(buf, format="PNG") + buf.seek(0) + return buf.getvalue() + + +def generate_qr_ascii( + data: str, + compress: bool = False, + invert: bool = False, +) -> str: + """ + Generate an ASCII representation of a QR code. + + Uses Unicode block characters for compact display. + + Args: + data: String data to encode + compress: Whether to compress data first + invert: Invert colors (white on black for dark terminals) + + Returns: + ASCII string representation of QR code + + Raises: + RuntimeError: If qrcode library not available + ValueError: If data too large for QR code + """ + if not HAS_QRCODE_WRITE: + raise RuntimeError("qrcode library not installed. Run: pip install qrcode[pil]") + + qr_data = data + + # Compress if requested + if compress: + qr_data = compress_data(data) + + # Check size + if len(qr_data.encode("utf-8")) > QR_MAX_BINARY: + raise ValueError( + f"Data too large for QR code ({len(qr_data)} bytes). " f"Maximum: {QR_MAX_BINARY} bytes" + ) + + qr = qrcode.QRCode( + version=None, + error_correction=ERROR_CORRECT_L, + box_size=1, + border=2, + ) + qr.add_data(qr_data) + qr.make(fit=True) + + # Get the QR matrix + # Use print_ascii to a StringIO to capture output + import sys + from io import StringIO + + old_stdout = sys.stdout + sys.stdout = StringIO() + try: + qr.print_ascii(invert=invert) + ascii_qr = sys.stdout.getvalue() + finally: + sys.stdout = old_stdout + + return ascii_qr + + +def read_qr_code(image_data: bytes) -> str | None: + """ + Read QR code from image data. + + Args: + image_data: Image bytes (PNG, JPG, etc.) + + Returns: + Decoded string, or None if no QR code found + + Raises: + RuntimeError: If pyzbar library not available + """ + if not HAS_QRCODE_READ: + raise RuntimeError( + "pyzbar library not installed. Run: pip install pyzbar\n" + "Also requires system library: sudo apt-get install libzbar0" + ) + + try: + img: Image.Image = Image.open(io.BytesIO(image_data)) + + # Convert to RGB if necessary (pyzbar works best with RGB/grayscale) + if img.mode not in ("RGB", "L"): + img = img.convert("RGB") + + # Decode QR codes + decoded = pyzbar_decode(img, symbols=[ZBarSymbol.QRCODE]) + + if not decoded: + return None + + # Return first QR code found + result: str = decoded[0].data.decode("utf-8") + return result + + except Exception: + return None + + +def read_qr_code_from_file(filepath: str) -> str | None: + """ + Read QR code from image file. + + Args: + filepath: Path to image file + + Returns: + Decoded string, or None if no QR code found + """ + with open(filepath, "rb") as f: + return read_qr_code(f.read()) + + +def extract_key_from_qr(image_data: bytes) -> str | None: + """ + Extract RSA key from QR code image, auto-decompressing if needed. + + This function is more robust than the original, with better error handling + and PEM normalization. + + Args: + image_data: Image bytes containing QR code + + Returns: + PEM-encoded RSA key string, or None if not found/invalid + """ + # Step 1: Read QR code + qr_data = read_qr_code(image_data) + + if not qr_data: + return None + + # Step 2: Auto-decompress if needed + try: + if is_compressed(qr_data): + key_pem = decompress_data(qr_data) + else: + key_pem = qr_data + except Exception: + # If decompression fails, try using data as-is + key_pem = qr_data + + # Step 3: Validate it looks like a PEM key + if "-----BEGIN" not in key_pem or "-----END" not in key_pem: + return None + + # Step 4: Aggressively normalize PEM format + # This is crucial - QR codes can introduce subtle formatting issues + try: + key_pem = normalize_pem(key_pem) + except Exception: + # If normalization fails, return None rather than broken PEM + return None + + # Step 5: Final validation - ensure it still looks like PEM + if "-----BEGIN" in key_pem and "-----END" in key_pem: + return key_pem + + return None + + +def extract_key_from_qr_file(filepath: str) -> str | None: + """ + Extract RSA key from QR code image file. + + Args: + filepath: Path to image file containing QR code + + Returns: + PEM-encoded RSA key string, or None if not found/invalid + """ + with open(filepath, "rb") as f: + return extract_key_from_qr(f.read()) + + +def detect_and_crop_qr( + image_data: bytes, + padding_percent: float = QR_CROP_PADDING_PERCENT, + min_padding_px: int = QR_CROP_MIN_PADDING_PX, +) -> bytes | None: + """ + Detect QR code in image and crop to it, handling rotation. + + Uses the QR code's corner coordinates to compute an axis-aligned + bounding box, then adds padding to ensure rotated QR codes aren't clipped. + + Args: + image_data: Input image bytes (PNG, JPG, etc.) + padding_percent: Padding as fraction of QR size (default 10%) + min_padding_px: Minimum padding in pixels (default 10) + + Returns: + Cropped PNG image bytes, or None if no QR code found + + Raises: + RuntimeError: If pyzbar library not available + """ + if not HAS_QRCODE_READ: + raise RuntimeError( + "pyzbar library not installed. Run: pip install pyzbar\n" + "Also requires system library: sudo apt-get install libzbar0" + ) + + try: + img: Image.Image = Image.open(io.BytesIO(image_data)) + original_mode = img.mode + + # Convert for pyzbar detection + if img.mode not in ("RGB", "L"): + detect_img = img.convert("RGB") + else: + detect_img = img + + # Decode QR codes to get corner positions + decoded = pyzbar_decode(detect_img, symbols=[ZBarSymbol.QRCODE]) + + if not decoded: + return None + + # Get the polygon corners of the first QR code + # pyzbar returns a Polygon with Point objects (x, y attributes) + polygon = decoded[0].polygon + + if len(polygon) < 4: + # Fallback to rect if polygon not available + rect = decoded[0].rect + min_x, min_y = rect.left, rect.top + max_x, max_y = rect.left + rect.width, rect.top + rect.height + else: + # Extract corner coordinates - handles any rotation + xs = [p.x for p in polygon] + ys = [p.y for p in polygon] + min_x, max_x = min(xs), max(xs) + min_y, max_y = min(ys), max(ys) + + # Calculate QR dimensions and padding + qr_width = max_x - min_x + qr_height = max_y - min_y + + # Use larger dimension for padding calculation (handles rotation) + qr_size = max(qr_width, qr_height) + padding = max(int(qr_size * padding_percent), min_padding_px) + + # Calculate crop box with padding, clamped to image bounds + img_width, img_height = img.size + crop_left = max(0, min_x - padding) + crop_top = max(0, min_y - padding) + crop_right = min(img_width, max_x + padding) + crop_bottom = min(img_height, max_y + padding) + + # Crop the original image (preserves original mode/quality) + cropped = img.crop((crop_left, crop_top, crop_right, crop_bottom)) + + # Convert to PNG bytes + buf = io.BytesIO() + # Preserve transparency if present + if original_mode in ("RGBA", "LA", "P"): + cropped.save(buf, format="PNG") + else: + cropped.save(buf, format="PNG") + buf.seek(0) + return buf.getvalue() + + except Exception as e: + # Log for debugging but return None for clean API + import sys + + print(f"QR crop error: {e}", file=sys.stderr) + return None + + +def detect_and_crop_qr_file( + filepath: str, + padding_percent: float = QR_CROP_PADDING_PERCENT, + min_padding_px: int = QR_CROP_MIN_PADDING_PX, +) -> bytes | None: + """ + Detect QR code in image file and crop to it. + + Args: + filepath: Path to image file + padding_percent: Padding as fraction of QR size (default 10%) + min_padding_px: Minimum padding in pixels (default 10) + + Returns: + Cropped PNG image bytes, or None if no QR code found + """ + with open(filepath, "rb") as f: + return detect_and_crop_qr(f.read(), padding_percent, min_padding_px) + + +def has_qr_write() -> bool: + """Check if QR code writing is available.""" + return HAS_QRCODE_WRITE + + +def has_qr_read() -> bool: + """Check if QR code reading is available.""" + return HAS_QRCODE_READ + + +def has_qr_support() -> bool: + """Check if full QR code support is available.""" + return HAS_QRCODE_WRITE and HAS_QRCODE_READ diff --git a/src/soosef/stegasoo/recovery.py b/src/soosef/stegasoo/recovery.py new file mode 100644 index 0000000..3b7835c --- /dev/null +++ b/src/soosef/stegasoo/recovery.py @@ -0,0 +1,445 @@ +""" +Stegasoo Admin Recovery Module (v4.1.0) + +Generates and manages recovery keys for admin password reset. + +Recovery keys use the same format as channel keys (32 alphanumeric chars +with dashes) but serve a different purpose - they allow resetting the +admin password when locked out. + +Security model: +- Recovery key is generated once during setup +- Only the hash is stored in the database +- The actual key is shown once and must be saved by the user +- Key can reset any admin account's password +- No recovery key = no password reset possible (most secure) + +Usage: + # During setup - generate and show to user + key = generate_recovery_key() + key_hash = hash_recovery_key(key) + # Store key_hash in database, show key to user + + # During recovery - verify user's key + if verify_recovery_key(user_input, stored_hash): + # Allow password reset +""" + +import base64 +import hashlib +import secrets +from io import BytesIO + +from .constants import RECOVERY_OBFUSCATION_KEY +from .debug import debug + + +def _xor_bytes(data: bytes, key: bytes) -> bytes: + """XOR data with repeating key.""" + return bytes(b ^ key[i % len(key)] for i, b in enumerate(data)) + + +def obfuscate_key(key: str) -> str: + """ + Obfuscate a recovery key for QR encoding. + + XORs the key with magic header hash and base64 encodes. + Result looks like random gibberish when scanned. + + Args: + key: Plain recovery key (formatted or normalized) + + Returns: + Obfuscated string prefixed with "STEGO:" marker + """ + normalized = normalize_recovery_key(key) + key_bytes = normalized.encode("utf-8") + xored = _xor_bytes(key_bytes, RECOVERY_OBFUSCATION_KEY) + encoded = base64.b64encode(xored).decode("ascii") + return f"STEGO:{encoded}" + + +def deobfuscate_key(obfuscated: str) -> str | None: + """ + Deobfuscate a recovery key from QR data. + + Reverses the obfuscation process. + + Args: + obfuscated: Obfuscated string from QR scan + + Returns: + Formatted recovery key, or None if invalid + """ + if not obfuscated.startswith("STEGO:"): + # Not obfuscated - try as plain key + try: + return format_recovery_key(obfuscated) + except ValueError: + return None + + try: + encoded = obfuscated[6:] # Strip "STEGO:" prefix + xored = base64.b64decode(encoded) + key_bytes = _xor_bytes(xored, RECOVERY_OBFUSCATION_KEY) + normalized = key_bytes.decode("utf-8") + return format_recovery_key(normalized) + except Exception: + return None + + +# ============================================================================= +# STEGO BACKUP - Hide recovery key in an image using Stegasoo itself +# ============================================================================= + +# Fixed credentials for recovery key stego (internal, not user-facing) +# These are hardcoded - security is in the obscurity of the stego image +_RECOVERY_STEGO_PASSPHRASE = "stegasoo-recovery-v1" +_RECOVERY_STEGO_PIN = "314159" # Pi digits - fixed, not secret + +# Size limits for carrier image +STEGO_BACKUP_MIN_SIZE = 50 * 1024 # 50 KB +STEGO_BACKUP_MAX_SIZE = 2 * 1024 * 1024 # 2 MB + + +def create_stego_backup( + recovery_key: str, + carrier_image: bytes, +) -> bytes: + """ + Hide recovery key in an image using Stegasoo steganography. + + Uses the same image as both carrier and reference for simplicity. + Fixed internal passphrase, no PIN required - obscurity is the security. + + Args: + recovery_key: The recovery key to hide + carrier_image: JPEG image bytes (50KB-2MB, used as carrier AND reference) + + Returns: + PNG image with hidden recovery key + + Raises: + ValueError: If image size out of range or invalid format + """ + from .encode import encode + + # Validate image size + size = len(carrier_image) + if size < STEGO_BACKUP_MIN_SIZE: + raise ValueError(f"Image too small: {size // 1024}KB (min 50KB)") + if size > STEGO_BACKUP_MAX_SIZE: + raise ValueError(f"Image too large: {size // 1024}KB (max 2MB)") + + # Normalize key for embedding + formatted_key = format_recovery_key(recovery_key) + + # Encode using Stegasoo - same image as carrier and reference + result = encode( + message=formatted_key, + reference_photo=carrier_image, # Same image for simplicity + carrier_image=carrier_image, + passphrase=_RECOVERY_STEGO_PASSPHRASE, + pin=_RECOVERY_STEGO_PIN, + ) + + debug.print(f"Created stego backup: {len(result.stego_image)} bytes") + return result.stego_image + + +def extract_stego_backup( + stego_image: bytes, + reference_photo: bytes, +) -> str | None: + """ + Extract recovery key from a stego backup image. + + Args: + stego_image: The stego image containing hidden key + reference_photo: Original reference photo (same as was used for carrier) + + Returns: + Extracted recovery key (formatted), or None if extraction fails + """ + from .decode import decode + from .exceptions import DecryptionError + + try: + result = decode( + stego_image=stego_image, + reference_photo=reference_photo, + passphrase=_RECOVERY_STEGO_PASSPHRASE, + pin=_RECOVERY_STEGO_PIN, + ) + + # Validate it's a proper recovery key + extracted = result.message or "" + formatted = format_recovery_key(extracted) + debug.print(f"Extracted recovery key from stego: {get_recovery_fingerprint(formatted)}") + return formatted + + except (DecryptionError, ValueError) as e: + debug.print(f"Stego backup extraction failed: {e}") + return None + + +# Recovery key format: same as channel key (32 chars, 8 groups of 4) +RECOVERY_KEY_LENGTH = 32 +RECOVERY_KEY_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + + +def generate_recovery_key() -> str: + """ + Generate a new random recovery key. + + Format: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX + (32 alphanumeric characters with dashes) + + Returns: + Formatted recovery key string + + Example: + >>> key = generate_recovery_key() + >>> len(key) + 39 + >>> key.count('-') + 7 + """ + # Generate 32 random alphanumeric characters + raw_key = "".join(secrets.choice(RECOVERY_KEY_ALPHABET) for _ in range(RECOVERY_KEY_LENGTH)) + + # Format with dashes every 4 characters + formatted = "-".join(raw_key[i : i + 4] for i in range(0, RECOVERY_KEY_LENGTH, 4)) + + debug.print(f"Generated recovery key: {formatted[:4]}-••••-...-{formatted[-4:]}") + return formatted + + +def normalize_recovery_key(key: str) -> str: + """ + Normalize a recovery key for validation/hashing. + + Removes dashes, spaces, converts to uppercase. + + Args: + key: Raw key input (may have dashes, spaces, mixed case) + + Returns: + Normalized key (32 uppercase alphanumeric chars) + + Raises: + ValueError: If key has invalid length or characters + + Example: + >>> normalize_recovery_key("abcd-1234-efgh-5678-ijkl-9012-mnop-3456") + "ABCD1234EFGH5678IJKL9012MNOP3456" + """ + # Remove dashes and spaces, uppercase + clean = key.replace("-", "").replace(" ", "").upper() + + # Validate length + if len(clean) != RECOVERY_KEY_LENGTH: + raise ValueError( + f"Recovery key must be {RECOVERY_KEY_LENGTH} characters " f"(got {len(clean)})" + ) + + # Validate characters + if not all(c in RECOVERY_KEY_ALPHABET for c in clean): + raise ValueError("Recovery key must contain only letters A-Z and digits 0-9") + + return clean + + +def format_recovery_key(key: str) -> str: + """ + Format a recovery key with dashes for display. + + Args: + key: Raw or normalized key + + Returns: + Formatted key (XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX) + + Example: + >>> format_recovery_key("ABCD1234EFGH5678IJKL9012MNOP3456") + "ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456" + """ + clean = normalize_recovery_key(key) + return "-".join(clean[i : i + 4] for i in range(0, RECOVERY_KEY_LENGTH, 4)) + + +def hash_recovery_key(key: str) -> str: + """ + Hash a recovery key for secure storage. + + Uses SHA-256 with a fixed salt prefix. The hash is stored in the + database; the original key is never stored. + + Args: + key: Recovery key (formatted or raw) + + Returns: + Hex-encoded hash string (64 chars) + + Example: + >>> key = "ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456" + >>> len(hash_recovery_key(key)) + 64 + """ + clean = normalize_recovery_key(key) + + # Use a fixed salt prefix for recovery keys + # This differentiates from other hashes in the system + salted = f"stegasoo-recovery-v1:{clean}" + + hash_bytes = hashlib.sha256(salted.encode("utf-8")).digest() + hash_hex = hash_bytes.hex() + + debug.print(f"Hashed recovery key: {hash_hex[:8]}...") + return hash_hex + + +def verify_recovery_key(key: str, stored_hash: str) -> bool: + """ + Verify a recovery key against a stored hash. + + Args: + key: User-provided recovery key + stored_hash: Hash from database + + Returns: + True if key matches, False otherwise + + Example: + >>> key = generate_recovery_key() + >>> h = hash_recovery_key(key) + >>> verify_recovery_key(key, h) + True + >>> verify_recovery_key("WRONG-KEY!", h) + False + """ + try: + computed_hash = hash_recovery_key(key) + # Use constant-time comparison to prevent timing attacks + matches = secrets.compare_digest(computed_hash, stored_hash) + debug.print(f"Recovery key verification: {'success' if matches else 'failed'}") + return matches + except ValueError: + # Invalid key format + debug.print("Recovery key verification: invalid format") + return False + + +def get_recovery_fingerprint(key: str) -> str: + """ + Get a short fingerprint for display (first and last 4 chars). + + Args: + key: Recovery key + + Returns: + Fingerprint like "ABCD-••••-...-3456" + + Example: + >>> get_recovery_fingerprint("ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456") + "ABCD-••••-••••-••••-••••-••••-••••-3456" + """ + formatted = format_recovery_key(key) + parts = formatted.split("-") + masked = [parts[0]] + ["••••"] * 6 + [parts[-1]] + return "-".join(masked) + + +def generate_recovery_qr(key: str) -> bytes: + """ + Generate a QR code image for the recovery key. + + The key is obfuscated using XOR with Stegasoo's magic headers, + so scanning the QR shows gibberish instead of the actual key. + + Args: + key: Recovery key + + Returns: + PNG image bytes + + Raises: + ImportError: If qrcode library not available + + Example: + >>> key = generate_recovery_key() + >>> png_bytes = generate_recovery_qr(key) + >>> len(png_bytes) > 0 + True + """ + try: + import qrcode + except ImportError: + raise ImportError("qrcode library required: pip install qrcode[pil]") + + # Obfuscate so scanning shows gibberish, not the actual key + obfuscated = obfuscate_key(key) + + qr = qrcode.QRCode( + version=1, + error_correction=qrcode.constants.ERROR_CORRECT_M, + box_size=10, + border=4, + ) + qr.add_data(obfuscated) + qr.make(fit=True) + + img = qr.make_image(fill_color="black", back_color="white") + + buffer = BytesIO() + img.save(buffer, format="PNG") + buffer.seek(0) + + debug.print(f"Generated recovery QR (obfuscated): {len(buffer.getvalue())} bytes") + return buffer.getvalue() + + +def extract_key_from_qr(image_data: bytes) -> str | None: + """ + Extract recovery key from a QR code image. + + Handles both obfuscated (STEGO:...) and plain key formats. + + Args: + image_data: PNG/JPEG image bytes containing QR code + + Returns: + Extracted and validated recovery key, or None if not found/invalid + + Example: + >>> key = generate_recovery_key() + >>> qr = generate_recovery_qr(key) + >>> extract_key_from_qr(qr) == format_recovery_key(key) + True + """ + try: + from PIL import Image + from pyzbar import pyzbar + except ImportError: + debug.print("pyzbar/PIL not available for QR reading") + return None + + try: + img = Image.open(BytesIO(image_data)) + decoded = pyzbar.decode(img) + + for obj in decoded: + data = obj.data.decode("utf-8").strip() + + # Try deobfuscation first (handles both obfuscated and plain) + result = deobfuscate_key(data) + if result: + debug.print(f"Extracted recovery key from QR: {get_recovery_fingerprint(result)}") + return result + + debug.print("No valid recovery key found in QR") + return None + + except Exception as e: + debug.print(f"QR extraction error: {e}") + return None diff --git a/src/soosef/stegasoo/spread_steganography.py b/src/soosef/stegasoo/spread_steganography.py new file mode 100644 index 0000000..5f8f08e --- /dev/null +++ b/src/soosef/stegasoo/spread_steganography.py @@ -0,0 +1,1089 @@ +""" +Spread Spectrum Audio Steganography Module (v4.4.0 — Per-Channel Hybrid) + +Hides data in audio by adding keyed pseudo-random noise (spread spectrum) +below the threshold of audibility. Designed to survive lossy compression +(MP3, AAC, Opus) better than LSB embedding, which requires lossless carriers. + +v4.4.0 changes: + - Per-channel independent embedding (preserves spatial stereo/surround mix) + - Adaptive chip length tiers (256/512/1024) for capacity vs robustness + - LFE channel skipping for 5.1+ layouts + - Round-robin bit distribution across embeddable channels + - v2 header format with backward-compatible v0 decode fallback + +How it works: + Each payload bit is "spread" over chip_length audio samples using a unique + ChaCha20-derived chip sequence. A '1' bit adds the chip pattern; a '0' bit + subtracts it. On extraction, correlating the stego audio against the same + chip sequence recovers each bit. + +v2 data layout in the carrier: + Header (20 bytes, channel 0 only, chip=1024 for robustness): + [4B magic AUDS] [1B version=0x02] [1B chip_tier] [1B num_channels] + [1B flags] [4B length x3 copies] + Payload (round-robin across embeddable channels, chip=tier-selected): + RS-encoded payload bits distributed: bit 0→ch0, bit 1→ch1, ... + + Legacy v0 layout (backward compat): + [4B magic AUDS] [4B length x3 copies] [RS-encoded payload] — all mono + +Error correction: + The raw payload is protected with Reed-Solomon coding (AUDIO_SS_RS_NSYM + parity symbols per 255-byte block) so that bit errors introduced by + compression or DAC/ADC round-trips can be corrected transparently. + +Requires: soundfile, numpy, cryptography, reedsolo (optional but recommended) +""" + +from __future__ import annotations + +import io +import struct + +import numpy as np +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms + +from .constants import ( + AUDIO_LFE_CHANNEL_INDEX, + AUDIO_LFE_MIN_CHANNELS, + AUDIO_MAGIC_SPREAD, + AUDIO_SS_AMPLITUDE, + AUDIO_SS_CHIP_LENGTH, + AUDIO_SS_CHIP_LENGTHS, + AUDIO_SS_DEFAULT_CHIP_TIER, + AUDIO_SS_HEADER_VERSION, + AUDIO_SS_RS_NSYM, + EMBED_MODE_AUDIO_SPREAD, +) +from .debug import debug +from .exceptions import AudioCapacityError, AudioError +from .models import AudioCapacityInfo, AudioEmbedStats + +# Lazy import for soundfile +try: + import soundfile as sf + + HAS_SOUNDFILE = True +except ImportError: + HAS_SOUNDFILE = False + sf = None # type: ignore[assignment] + +# Lazy import for reedsolo +try: + from reedsolo import ReedSolomonError, RSCodec + + HAS_REEDSOLO = True +except ImportError: + HAS_REEDSOLO = False + RSCodec = None # type: ignore[assignment,misc] + ReedSolomonError = None # type: ignore[assignment,misc] + + +# Header sizes +_V0_HEADER_SIZE = 16 # Legacy: 4B magic + 3x4B length +_V2_HEADER_SIZE = 20 # v2: 4B magic + 1B ver + 1B tier + 1B nch + 1B flags + 3x4B length +_MAGIC_SIZE = 4 +_LENGTH_COPIES = 3 + +# Header chip length — always 1024 for maximum robustness regardless of payload tier +_HEADER_CHIP_LENGTH = 1024 + +# v2 header flags +_FLAG_LFE_SKIPPED = 0x01 + +# Progress reporting interval (every N bits) +_PROGRESS_INTERVAL = 500 + + +# ============================================================================= +# PROGRESS REPORTING +# ============================================================================= + + +def _write_progress( + progress_file: str | None, current: int, total: int, phase: str = "embedding" +) -> None: + """Write progress to file for frontend polling.""" + if progress_file is None: + return + try: + import json + + with open(progress_file, "w") as f: + json.dump( + { + "current": current, + "total": total, + "percent": round((current / total) * 100, 1) if total > 0 else 0, + "phase": phase, + }, + f, + ) + except Exception: + pass # Don't let progress writing break encoding + + +# ============================================================================= +# REED-SOLOMON +# ============================================================================= + + +def _rs_encode(data: bytes) -> bytes: + """ + Wrap data in Reed-Solomon error correction. + + Adds AUDIO_SS_RS_NSYM parity symbols per 255-byte block, allowing + recovery of up to RS_NSYM/2 byte errors per block. + """ + if not HAS_REEDSOLO: + return data + rs = RSCodec(AUDIO_SS_RS_NSYM) + return bytes(rs.encode(data)) + + +def _rs_decode(data: bytes) -> bytes | None: + """ + Decode Reed-Solomon protected data. + + Returns the corrected payload bytes, or None if the data is + too corrupted for error correction to recover. + """ + if not HAS_REEDSOLO: + return data + rs = RSCodec(AUDIO_SS_RS_NSYM) + try: + decoded, _, errata_pos = rs.decode(data) + if errata_pos: + debug.print(f"RS corrected {len(errata_pos)} byte errors") + return bytes(decoded) + except ReedSolomonError as e: + debug.print(f"RS decode failed (too many errors): {e}") + return None + + +def _rs_encoded_size(data_length: int) -> int: + """Calculate the RS-encoded size for a given data length.""" + if not HAS_REEDSOLO or AUDIO_SS_RS_NSYM <= 0: + return data_length + data_block_size = 255 - AUDIO_SS_RS_NSYM + num_blocks = (data_length + data_block_size - 1) // data_block_size + return data_length + num_blocks * AUDIO_SS_RS_NSYM + + +# ============================================================================= +# CHANNEL MANAGEMENT +# ============================================================================= + + +def _embeddable_channels(num_channels: int) -> list[int]: + """ + Return the list of channel indices suitable for embedding. + + Skips the LFE channel (index 3) for 5.1+ layouts because LFE is + bandlimited to ~120Hz — terrible as a spread spectrum carrier. + """ + if num_channels < AUDIO_LFE_MIN_CHANNELS: + return list(range(num_channels)) + return [i for i in range(num_channels) if i != AUDIO_LFE_CHANNEL_INDEX] + + +# ============================================================================= +# CHIP SEQUENCE GENERATION (ChaCha20 CSPRNG) +# ============================================================================= + + +def _generate_chip_sequence( + seed: bytes, chip_index: int, length: int, channel_index: int = 0 +) -> np.ndarray: + """ + Generate a pseudo-random chip sequence for spread spectrum embedding. + + Uses ChaCha20 as a CSPRNG keyed by ``seed``, with ``chip_index`` and + ``channel_index`` encoded into the nonce so that each (channel, bit) + pair gets a unique, deterministic spreading code. + + Args: + seed: 32-byte key for ChaCha20. Padded/hashed to 32B if shorter. + chip_index: Index of the bit being embedded (used as nonce material). + length: Number of samples in the chip. + channel_index: Audio channel index (0 for legacy/header, per-channel for v2). + + Returns: + Float64 numpy array of ``length`` elements in bipolar {-1, +1}. + """ + # Ensure seed is exactly 32 bytes + if len(seed) < 32: + import hashlib + + seed = hashlib.sha256(seed).digest() + elif len(seed) > 32: + seed = seed[:32] + + # Build a 16-byte nonce encoding both channel and chip index + # v2: channel_index in high 4 bytes, chip_index in low 12 bytes + # v0 compat: channel_index=0 → nonce is just chip_index in full 16 bytes, + # which matches the old layout since high bytes are 0 + nonce = channel_index.to_bytes(4, byteorder="big") + chip_index.to_bytes(12, byteorder="big") + + cipher = Cipher(algorithms.ChaCha20(seed, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + random_bytes = encryptor.update(b"\x00" * length) + + # Map bytes to bipolar ±1 spreading code (DSSS standard) + raw = np.frombuffer(random_bytes, dtype=np.uint8) + chip = np.where(raw < 128, np.float64(-1.0), np.float64(1.0)) + + return chip + + +def _generate_chip_sequence_v0(seed: bytes, chip_index: int, length: int) -> np.ndarray: + """Legacy v0 chip sequence generation — chip_index fills entire 16-byte nonce.""" + if len(seed) < 32: + import hashlib + + seed = hashlib.sha256(seed).digest() + elif len(seed) > 32: + seed = seed[:32] + + nonce = chip_index.to_bytes(16, byteorder="big") + cipher = Cipher(algorithms.ChaCha20(seed, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + random_bytes = encryptor.update(b"\x00" * length) + + raw = np.frombuffer(random_bytes, dtype=np.uint8) + return np.where(raw < 128, np.float64(-1.0), np.float64(1.0)) + + +# ============================================================================= +# BIT CONVERSION UTILITIES +# ============================================================================= + + +def _bytes_to_bits(data: bytes) -> list[int]: + """Convert a byte string to a list of 0/1 ints (MSB first per byte).""" + bits: list[int] = [] + for byte in data: + for shift in range(7, -1, -1): + bits.append((byte >> shift) & 1) + return bits + + +def _bits_to_bytes(bits: list[int]) -> bytes: + """Convert a list of 0/1 ints back to bytes (MSB first per byte).""" + result = bytearray() + for i in range(0, len(bits) - 7, 8): + byte_val = 0 + for j in range(8): + byte_val = (byte_val << 1) | bits[i + j] + result.append(byte_val) + return bytes(result) + + +# ============================================================================= +# MAJORITY VOTING +# ============================================================================= + + +def _majority_vote_length(length_bytes: bytes) -> int | None: + """ + Extract the payload length from three 4-byte copies via majority voting. + + Each copy is a big-endian uint32. The value that appears at least twice + wins. Returns None if all three disagree. + """ + if len(length_bytes) < 12: + return None + + copies = [ + struct.unpack(">I", length_bytes[0:4])[0], + struct.unpack(">I", length_bytes[4:8])[0], + struct.unpack(">I", length_bytes[8:12])[0], + ] + + debug.print(f"Length copies for majority vote: {copies}") + + if copies[0] == copies[1] or copies[0] == copies[2]: + return copies[0] + if copies[1] == copies[2]: + return copies[1] + + debug.print("Majority vote failed: all three length copies disagree") + return None + + +# ============================================================================= +# HEADER CONSTRUCTION & PARSING +# ============================================================================= + + +def _build_header_v0(data_length: int) -> bytes: + """Build legacy v0 header: 4B magic + 3x4B length = 16 bytes.""" + length_packed = struct.pack(">I", data_length) + return AUDIO_MAGIC_SPREAD + length_packed * _LENGTH_COPIES + + +def _build_header_v2( + data_length: int, chip_tier: int, num_embeddable_channels: int, lfe_skipped: bool +) -> bytes: + """ + Build v2 header (20 bytes). + + Layout: + [4B magic AUDS] [1B version=0x02] [1B chip_tier] [1B num_channels] + [1B flags] [4B length x3 copies] + """ + flags = _FLAG_LFE_SKIPPED if lfe_skipped else 0 + length_packed = struct.pack(">I", data_length) + return ( + AUDIO_MAGIC_SPREAD + + struct.pack("BBBB", AUDIO_SS_HEADER_VERSION, chip_tier, num_embeddable_channels, flags) + + length_packed * _LENGTH_COPIES + ) + + +def _parse_header( + header_bytes: bytes, +) -> tuple[bool, int, int | None, int | None, int | None, bool]: + """ + Parse and validate the spread spectrum header (version-dispatching). + + Reads byte 4 after magic to determine version: + - 0x02 → v2 header (20 bytes) + - anything else → v0 header (16 bytes, legacy mono) + + Returns: + (magic_valid, version, payload_length, chip_tier, num_channels, lfe_skipped) + version is 0 for legacy, 2 for v2. + payload_length is None if majority vote fails. + chip_tier/num_channels are None for v0. + """ + if len(header_bytes) < _V0_HEADER_SIZE: + return False, 0, None, None, None, False + + magic = header_bytes[:_MAGIC_SIZE] + if magic != AUDIO_MAGIC_SPREAD: + debug.print(f"Magic mismatch: got {magic!r}, expected {AUDIO_MAGIC_SPREAD!r}") + return False, 0, None, None, None, False + + # Check version byte + version_byte = header_bytes[4] + + if version_byte == AUDIO_SS_HEADER_VERSION: + # v2 header — 20 bytes + if len(header_bytes) < _V2_HEADER_SIZE: + return False, 2, None, None, None, False + + chip_tier = header_bytes[5] + num_channels = header_bytes[6] + flags = header_bytes[7] + lfe_skipped = bool(flags & _FLAG_LFE_SKIPPED) + + length = _majority_vote_length(header_bytes[8:20]) + debug.print( + f"v2 header: tier={chip_tier}, channels={num_channels}, " + f"lfe_skip={lfe_skipped}, length={length}" + ) + return True, 2, length, chip_tier, num_channels, lfe_skipped + else: + # v0 header — 16 bytes, byte 4 is part of the first length copy + length = _majority_vote_length(header_bytes[_MAGIC_SIZE:_V0_HEADER_SIZE]) + debug.print(f"v0 header: length={length}") + return True, 0, length, None, None, False + + +# ============================================================================= +# SPREAD SPECTRUM CORE — PER-CHANNEL (v2) +# ============================================================================= + + +def _embed_channel( + channel_samples: np.ndarray, + bits: list[int], + seed: bytes, + amplitude: float, + chip_length: int, + channel_index: int, + offset: int = 0, +) -> int: + """ + Embed bits into a single channel's samples. Returns count of bits embedded. + """ + count = 0 + for i, bit in enumerate(bits): + start = offset + i * chip_length + end = start + chip_length + + if end > len(channel_samples): + break + + chip = _generate_chip_sequence(seed, i, chip_length, channel_index=channel_index) + + if bit == 1: + channel_samples[start:end] += amplitude * chip + else: + channel_samples[start:end] -= amplitude * chip + count += 1 + + return count + + +def _extract_channel( + channel_samples: np.ndarray, + num_bits: int, + seed: bytes, + chip_length: int, + channel_index: int, + offset: int = 0, +) -> list[int]: + """Extract bits from a single channel's samples.""" + bits: list[int] = [] + for i in range(num_bits): + start = offset + i * chip_length + end = start + chip_length + + if end > len(channel_samples): + break + + chip = _generate_chip_sequence(seed, i, chip_length, channel_index=channel_index) + correlation = np.dot(channel_samples[start:end], chip) + bits.append(1 if correlation > 0 else 0) + + return bits + + +def _distribute_bits_round_robin( + bits: list[int], num_channels: int +) -> list[list[int]]: + """ + Distribute bits round-robin across channels. + + bit 0 → ch0, bit 1 → ch1, ..., bit N → ch(N % num_channels), ... + + Returns a list of per-channel bit lists. + """ + per_channel: list[list[int]] = [[] for _ in range(num_channels)] + for i, bit in enumerate(bits): + per_channel[i % num_channels].append(bit) + return per_channel + + +def _collect_bits_round_robin(per_channel_bits: list[list[int]]) -> list[int]: + """ + Reassemble bits from per-channel lists back into original order. + + Inverse of _distribute_bits_round_robin. + """ + num_channels = len(per_channel_bits) + if num_channels == 0: + return [] + + max_len = max(len(ch) for ch in per_channel_bits) + result: list[int] = [] + for i in range(max_len): + for ch in range(num_channels): + if i < len(per_channel_bits[ch]): + result.append(per_channel_bits[ch][i]) + return result + + +# ============================================================================= +# LEGACY MONO CORE (v0 compat) +# ============================================================================= + + +def _embed_spread_spectrum_v0( + samples: np.ndarray, + bits: list[int], + seed: bytes, + amplitude: float, + offset: int = 0, + progress_file: str | None = None, +) -> np.ndarray: + """Legacy v0 mono embedding with original nonce layout.""" + total_bits = len(bits) + for i, bit in enumerate(bits): + start = offset + i * AUDIO_SS_CHIP_LENGTH + end = start + AUDIO_SS_CHIP_LENGTH + + if end > len(samples): + debug.print(f"Warning: ran out of samples at bit {i}/{total_bits}") + break + + chip = _generate_chip_sequence_v0(seed, i, AUDIO_SS_CHIP_LENGTH) + + if bit == 1: + samples[start:end] += amplitude * chip + else: + samples[start:end] -= amplitude * chip + + if progress_file and i % _PROGRESS_INTERVAL == 0: + _write_progress(progress_file, i, total_bits, "embedding") + + return samples + + +def _extract_spread_spectrum_v0( + samples: np.ndarray, + num_bits: int, + seed: bytes, + offset: int = 0, + progress_file: str | None = None, +) -> list[int]: + """Legacy v0 mono extraction with original nonce layout.""" + bits: list[int] = [] + for i in range(num_bits): + start = offset + i * AUDIO_SS_CHIP_LENGTH + end = start + AUDIO_SS_CHIP_LENGTH + + if end > len(samples): + debug.print(f"Warning: ran out of samples at bit {i}/{num_bits}") + break + + chip = _generate_chip_sequence_v0(seed, i, AUDIO_SS_CHIP_LENGTH) + correlation = np.dot(samples[start:end], chip) + bits.append(1 if correlation > 0 else 0) + + if progress_file and i % _PROGRESS_INTERVAL == 0: + _write_progress(progress_file, i, num_bits, "extracting") + + return bits + + +# ============================================================================= +# PUBLIC API +# ============================================================================= + + +def calculate_audio_spread_capacity( + audio_data: bytes, chip_tier: int = AUDIO_SS_DEFAULT_CHIP_TIER +) -> AudioCapacityInfo: + """ + Calculate embedding capacity for spread spectrum audio steganography. + + Args: + audio_data: Raw bytes of a WAV file. + chip_tier: Chip tier (0=lossless/256, 1=high_lossy/512, 2=low_lossy/1024). + + Returns: + AudioCapacityInfo with capacity details. + + Raises: + AudioError: If the audio cannot be read. + """ + if not HAS_SOUNDFILE: + raise AudioError("soundfile is required for audio spread spectrum steganography") + + try: + info = sf.info(io.BytesIO(audio_data)) + except Exception as e: + raise AudioError(f"Failed to read audio file: {e}") from e + + chip_length = AUDIO_SS_CHIP_LENGTHS.get(chip_tier, AUDIO_SS_CHIP_LENGTH) + embed_channels = _embeddable_channels(info.channels) + num_embed_ch = len(embed_channels) + num_frames = info.frames + duration = num_frames / info.samplerate + + # Header is always in channel 0 with chip=1024 + header_bits = _V2_HEADER_SIZE * 8 # 160 bits + header_samples = header_bits * _HEADER_CHIP_LENGTH # always 1024 + + # Payload samples available per channel (after header eats into ch0) + samples_per_channel_for_payload = num_frames - header_samples + if samples_per_channel_for_payload <= 0: + return AudioCapacityInfo( + total_samples=num_frames * info.channels, + usable_capacity_bytes=0, + embed_mode=EMBED_MODE_AUDIO_SPREAD, + sample_rate=info.samplerate, + duration_seconds=duration, + chip_tier=chip_tier, + chip_length=chip_length, + embeddable_channels=num_embed_ch, + total_channels=info.channels, + ) + + # Bits per channel for payload + bits_per_channel = samples_per_channel_for_payload // chip_length + + # Total payload bits across all embeddable channels + total_payload_bits = bits_per_channel * num_embed_ch + total_payload_bytes = total_payload_bits // 8 + + # Account for RS overhead + if HAS_REEDSOLO and AUDIO_SS_RS_NSYM > 0: + usable_bytes = int(total_payload_bytes * (255 - AUDIO_SS_RS_NSYM) / 255) + else: + usable_bytes = total_payload_bytes + + debug.print( + f"Spread capacity (tier {chip_tier}, chip={chip_length}): {usable_bytes} bytes " + f"({num_frames} frames, {num_embed_ch}/{info.channels} ch, " + f"{info.samplerate} Hz, {duration:.2f}s)" + ) + + return AudioCapacityInfo( + total_samples=num_frames * info.channels, + usable_capacity_bytes=usable_bytes, + embed_mode=EMBED_MODE_AUDIO_SPREAD, + sample_rate=info.samplerate, + duration_seconds=duration, + chip_tier=chip_tier, + chip_length=chip_length, + embeddable_channels=num_embed_ch, + total_channels=info.channels, + ) + + +def embed_in_audio_spread( + data: bytes, + carrier_audio: bytes, + seed: bytes, + chip_tier: int = AUDIO_SS_DEFAULT_CHIP_TIER, + progress_file: str | None = None, +) -> tuple[bytes, AudioEmbedStats]: + """ + Embed data into audio using per-channel spread spectrum steganography. + + v4.4.0: Embeds independently per channel with round-robin bit distribution, + preserving the spatial stereo/surround mix. Header is always in channel 0 + with chip=1024 for robustness. Payload uses the selected chip tier. + + Args: + data: Raw payload bytes to embed (already encrypted by caller). + carrier_audio: Raw bytes of the carrier WAV file. + seed: Key material for chip sequence generation. + chip_tier: Chip tier (0=256, 1=512, 2=1024). Default 2 (most robust). + progress_file: Optional path for frontend progress polling. + + Returns: + Tuple of (stego WAV bytes, AudioEmbedStats). + + Raises: + AudioCapacityError: If the payload is too large for the carrier. + AudioError: On any other embedding failure. + """ + if not HAS_SOUNDFILE: + raise AudioError("soundfile is required for audio spread spectrum steganography") + + debug.print(f"Spread spectrum v2 embedding {len(data)} bytes, tier={chip_tier}") + + try: + # 1. Read carrier audio as float64 2D (frames x channels) + buf = io.BytesIO(carrier_audio) + samples, sample_rate = sf.read(buf, dtype="float64", always_2d=True) + num_frames, channels = samples.shape + duration = num_frames / sample_rate + + buf.seek(0) + carrier_info = sf.info(buf) + output_subtype = carrier_info.subtype if carrier_info.subtype else "PCM_16" + + # 2. Build embeddable channels list + embed_ch = _embeddable_channels(channels) + num_embed_ch = len(embed_ch) + lfe_skipped = len(embed_ch) < channels + chip_length = AUDIO_SS_CHIP_LENGTHS.get(chip_tier, AUDIO_SS_CHIP_LENGTH) + + debug.print( + f"Carrier: {sample_rate} Hz, {channels} ch ({num_embed_ch} embeddable), " + f"{num_frames} frames, {duration:.2f}s, chip={chip_length}" + ) + + # 3. RS-encode the payload + rs_data = _rs_encode(data) + debug.print(f"RS-encoded payload: {len(data)} -> {len(rs_data)} bytes") + + # 4. Build v2 header + header = _build_header_v2(len(data), chip_tier, num_embed_ch, lfe_skipped) + header_bits = _bytes_to_bits(header) + + # 5. Embed header in channel 0 only, chip=1024, starting at sample 0 + header_samples_needed = len(header_bits) * _HEADER_CHIP_LENGTH + if header_samples_needed > num_frames: + raise AudioCapacityError(len(header), num_frames // _HEADER_CHIP_LENGTH // 8) + + _embed_channel( + samples[:, embed_ch[0]], + header_bits, + seed, + AUDIO_SS_AMPLITUDE, + _HEADER_CHIP_LENGTH, + channel_index=0, + offset=0, + ) + + # 6. Calculate payload offset (in samples, same for all channels) + payload_offset = header_samples_needed + + # 7. Convert payload to bits and distribute round-robin + payload_bits = _bytes_to_bits(rs_data) + total_payload_bits = len(payload_bits) + + # Check capacity across all channels + payload_samples_available = num_frames - payload_offset + bits_per_channel = payload_samples_available // chip_length + total_capacity_bits = bits_per_channel * num_embed_ch + + if total_payload_bits > total_capacity_bits: + max_bytes = (total_capacity_bits // 8) + if HAS_REEDSOLO and AUDIO_SS_RS_NSYM > 0: + max_bytes = int(max_bytes * (255 - AUDIO_SS_RS_NSYM) / 255) + raise AudioCapacityError(len(data), max_bytes) + + per_channel_bits = _distribute_bits_round_robin(payload_bits, num_embed_ch) + + debug.print( + f"Distributing {total_payload_bits} payload bits across {num_embed_ch} channels " + f"(~{total_payload_bits // num_embed_ch} bits/ch)" + ) + + _write_progress(progress_file, 0, total_payload_bits, "embedding") + + # 8. Embed payload bits per channel + total_embedded = 0 + for ch_idx, ch in enumerate(embed_ch): + bits_for_ch = per_channel_bits[ch_idx] + if not bits_for_ch: + continue + count = _embed_channel( + samples[:, ch], + bits_for_ch, + seed, + AUDIO_SS_AMPLITUDE, + chip_length, + channel_index=ch, + offset=payload_offset, + ) + total_embedded += count + + if progress_file: + _write_progress(progress_file, total_embedded, total_payload_bits, "embedding") + + # 9. Clip to [-1.0, 1.0] + np.clip(samples, -1.0, 1.0, out=samples) + + _write_progress(progress_file, total_payload_bits, total_payload_bits, "saving") + + # 10. Write back as WAV + output_buf = io.BytesIO() + sf.write(output_buf, samples, sample_rate, format="WAV", subtype=output_subtype) + output_buf.seek(0) + stego_bytes = output_buf.getvalue() + + # Calculate capacity used + header_capacity = header_samples_needed # in ch0 + payload_capacity = total_embedded * chip_length # across all channels + total_sample_slots = num_frames * num_embed_ch + capacity_used = (header_capacity + payload_capacity) / total_sample_slots + + stats = AudioEmbedStats( + samples_modified=header_capacity + payload_capacity, + total_samples=num_frames * channels, + capacity_used=capacity_used, + bytes_embedded=len(header) + len(rs_data), + sample_rate=sample_rate, + channels=channels, + duration_seconds=duration, + embed_mode=EMBED_MODE_AUDIO_SPREAD, + chip_tier=chip_tier, + chip_length=chip_length, + embeddable_channels=num_embed_ch, + ) + + debug.print( + f"Spread spectrum v2 embedding complete: {len(stego_bytes)} byte WAV, " + f"capacity used {capacity_used * 100:.1f}%" + ) + return stego_bytes, stats + + except AudioCapacityError: + raise + except Exception as e: + debug.exception(e, "embed_in_audio_spread") + raise AudioError(f"Failed to embed data in audio via spread spectrum: {e}") from e + + +def extract_from_audio_spread( + audio_data: bytes, + seed: bytes, + progress_file: str | None = None, +) -> bytes | None: + """ + Extract hidden data from audio using spread spectrum correlation. + + Auto-detects header version: + - v2: per-channel extraction with round-robin reassembly + - v0: legacy mono extraction (backward compat) + + Args: + audio_data: Raw bytes of the stego WAV file. + seed: Key material (must match the seed used for embedding). + progress_file: Optional path for frontend progress polling. + + Returns: + Extracted payload bytes, or None if extraction fails. + """ + if not HAS_SOUNDFILE: + debug.print("soundfile not available for spread spectrum extraction") + return None + + debug.print(f"Spread spectrum extracting from {len(audio_data)} byte audio") + + try: + # 1. Read stego audio as float64 2D + samples, sample_rate = sf.read(io.BytesIO(audio_data), dtype="float64", always_2d=True) + num_frames, channels = samples.shape + + debug.print(f"Stego audio: {sample_rate} Hz, {channels} ch, {num_frames} frames") + + # 2. Extract header from channel 0 with chip=1024 + # We need at least v2 header size (20 bytes = 160 bits) + header_bits_needed = _V2_HEADER_SIZE * 8 + header_samples_needed = header_bits_needed * _HEADER_CHIP_LENGTH + + # Get channel 0 (first embeddable channel — always index 0 for header) + embed_ch = _embeddable_channels(channels) + ch0_samples = samples[:, embed_ch[0]] + + if header_samples_needed > len(ch0_samples): + debug.print("Audio too short to contain spread spectrum header") + return None + + _write_progress(progress_file, 0, header_bits_needed, "extracting header") + + # Extract header bits from channel 0 with channel_index=0 + header_bits = _extract_channel( + ch0_samples, + header_bits_needed, + seed, + _HEADER_CHIP_LENGTH, + channel_index=0, + offset=0, + ) + + if len(header_bits) < header_bits_needed: + debug.print(f"Short header extraction: {len(header_bits)}/{header_bits_needed}") + return None + + header_bytes = _bits_to_bytes(header_bits) + + # 3. Parse header (version-dispatching) + magic_valid, version, data_length, chip_tier, num_ch, lfe_skipped = _parse_header( + header_bytes + ) + + if not magic_valid: + # Try legacy v0 with mono extraction + debug.print("v2 header not found, trying legacy v0 mono extraction") + return _extract_v0_fallback(samples, seed, progress_file) + + if data_length is None: + debug.print("Could not determine payload length (majority vote failed)") + return None + + if version == 0: + # v0 header detected — use legacy mono path + debug.print(f"v0 header detected, payload_length={data_length}") + return _extract_v0_with_known_length(samples, seed, data_length, progress_file) + + # v2 extraction + debug.print( + f"v2 header: payload_length={data_length}, tier={chip_tier}, " + f"channels={num_ch}, lfe_skip={lfe_skipped}" + ) + + chip_length = AUDIO_SS_CHIP_LENGTHS.get(chip_tier, AUDIO_SS_CHIP_LENGTH) + payload_offset = _V2_HEADER_SIZE * 8 * _HEADER_CHIP_LENGTH + + # 4. Calculate RS-encoded size + rs_encoded_size = _rs_encoded_size(data_length) + total_payload_bits = rs_encoded_size * 8 + + # Sanity check + payload_samples_available = num_frames - payload_offset + bits_per_channel = payload_samples_available // chip_length + max_capacity_bits = bits_per_channel * num_ch + + if total_payload_bits > max_capacity_bits: + debug.print( + f"Payload too large for carrier: need {total_payload_bits} bits, " + f"have {max_capacity_bits}" + ) + return None + + # 5. Calculate how many bits per channel + bits_per_ch_needed = [0] * num_ch + for i in range(total_payload_bits): + bits_per_ch_needed[i % num_ch] += 1 + + debug.print( + f"Extracting {total_payload_bits} payload bits from {num_ch} channels " + f"(chip={chip_length})" + ) + + _write_progress(progress_file, 0, total_payload_bits, "extracting") + + # 6. Extract per-channel bits + per_channel_bits: list[list[int]] = [] + total_extracted = 0 + + for ch_idx in range(num_ch): + # Map ch_idx back to actual channel index + ch = embed_ch[ch_idx] if ch_idx < len(embed_ch) else ch_idx + + ch_bits = _extract_channel( + samples[:, ch], + bits_per_ch_needed[ch_idx], + seed, + chip_length, + channel_index=ch, + offset=payload_offset, + ) + per_channel_bits.append(ch_bits) + total_extracted += len(ch_bits) + + if progress_file: + _write_progress(progress_file, total_extracted, total_payload_bits, "extracting") + + # 7. Reassemble bits in round-robin order + all_payload_bits = _collect_bits_round_robin(per_channel_bits) + + if len(all_payload_bits) < total_payload_bits: + debug.print( + f"Short extraction: {len(all_payload_bits)}/{total_payload_bits} bits" + ) + return None + + _write_progress(progress_file, total_payload_bits, total_payload_bits, "decoding") + + # 8. Convert to bytes and RS-decode + rs_payload = _bits_to_bytes(all_payload_bits[:total_payload_bits]) + + if len(rs_payload) < rs_encoded_size: + debug.print(f"RS payload too short: {len(rs_payload)}/{rs_encoded_size} bytes") + return None + + rs_payload = rs_payload[:rs_encoded_size] + decoded = _rs_decode(rs_payload) + if decoded is None: + debug.print("Reed-Solomon decoding failed — data too corrupted") + return None + + if len(decoded) < data_length: + debug.print(f"Decoded data shorter than expected: {len(decoded)}/{data_length}") + return None + + payload = decoded[:data_length] + debug.print(f"Spread spectrum v2 extraction successful: {len(payload)} bytes") + return payload + + except Exception as e: + debug.exception(e, "extract_from_audio_spread") + return None + + +# ============================================================================= +# LEGACY V0 EXTRACTION (backward compat) +# ============================================================================= + + +def _extract_v0_fallback( + samples: np.ndarray, seed: bytes, progress_file: str | None = None +) -> bytes | None: + """Full v0 extraction — read header + payload from mono mix.""" + channels = samples.shape[1] + + if channels > 1: + mono_samples = np.mean(samples, axis=1) + else: + mono_samples = samples[:, 0].copy() + + total_samples = len(mono_samples) + + # Extract v0 header (16 bytes = 128 bits) + header_bits_needed = _V0_HEADER_SIZE * 8 + header_samples_needed = header_bits_needed * AUDIO_SS_CHIP_LENGTH + + if header_samples_needed > total_samples: + debug.print("Audio too short for v0 header") + return None + + header_bits = _extract_spread_spectrum_v0(mono_samples, header_bits_needed, seed, offset=0) + if len(header_bits) < header_bits_needed: + return None + + header_bytes = _bits_to_bytes(header_bits) + + magic = header_bytes[:_MAGIC_SIZE] + if magic != AUDIO_MAGIC_SPREAD: + debug.print("v0 fallback: magic mismatch") + return None + + data_length = _majority_vote_length(header_bytes[_MAGIC_SIZE:_V0_HEADER_SIZE]) + if data_length is None: + return None + + return _extract_v0_with_known_length(samples, seed, data_length, progress_file) + + +def _extract_v0_with_known_length( + samples: np.ndarray, seed: bytes, data_length: int, progress_file: str | None = None +) -> bytes | None: + """v0 extraction with known payload length — mono mix path.""" + channels = samples.shape[1] + + if channels > 1: + mono_samples = np.mean(samples, axis=1) + else: + mono_samples = samples[:, 0].copy() + + total_samples = len(mono_samples) + + # Sanity check + max_payload = (total_samples // AUDIO_SS_CHIP_LENGTH) // 8 - _V0_HEADER_SIZE + if data_length < 1 or data_length > max_payload: + debug.print(f"v0: invalid payload length {data_length} (max {max_payload})") + return None + + rs_encoded_size = _rs_encoded_size(data_length) + total_payload_bytes = _V0_HEADER_SIZE + rs_encoded_size + total_bits_needed = total_payload_bytes * 8 + total_samples_needed = total_bits_needed * AUDIO_SS_CHIP_LENGTH + + if total_samples_needed > total_samples: + debug.print(f"v0: need {total_samples_needed} samples, have {total_samples}") + return None + + debug.print(f"v0 extraction: {total_bits_needed} bits ({data_length}B payload)") + + _write_progress(progress_file, 0, total_bits_needed, "extracting") + + all_bits = _extract_spread_spectrum_v0( + mono_samples, total_bits_needed, seed, offset=0, progress_file=progress_file + ) + + if len(all_bits) < total_bits_needed: + return None + + _write_progress(progress_file, total_bits_needed, total_bits_needed, "decoding") + + all_bytes = _bits_to_bytes(all_bits) + rs_payload = all_bytes[_V0_HEADER_SIZE : _V0_HEADER_SIZE + rs_encoded_size] + + if len(rs_payload) < rs_encoded_size: + return None + + decoded = _rs_decode(rs_payload) + if decoded is None: + debug.print("v0: Reed-Solomon decoding failed") + return None + + if len(decoded) < data_length: + return None + + payload = decoded[:data_length] + debug.print(f"v0 extraction successful: {len(payload)} bytes") + return payload diff --git a/src/soosef/stegasoo/steganalysis.py b/src/soosef/stegasoo/steganalysis.py new file mode 100644 index 0000000..f417534 --- /dev/null +++ b/src/soosef/stegasoo/steganalysis.py @@ -0,0 +1,281 @@ +""" +Steganalysis Self-Check Module (v4.4.0) + +Statistical analysis to estimate detectability risk of stego images. +Runs chi-square and RS (Regular-Singular) analysis on pixel data +to assess how visible the embedding is to an attacker. + +Currently LSB-only. DCT steganalysis (calibration attack) deferred. + +Usage:: + + from stegasoo.steganalysis import check_image + + result = check_image(image_data) + print(result["risk"]) # "low", "medium", or "high" + print(result["chi_square"]) # per-channel chi-square p-values + print(result["rs"]) # per-channel RS embedding estimates +""" + +from __future__ import annotations + +import io +from dataclasses import dataclass, field + +import numpy as np +from PIL import Image + +from .constants import ( + STEGANALYSIS_CHI_SUSPICIOUS_THRESHOLD, + STEGANALYSIS_RS_HIGH_THRESHOLD, + STEGANALYSIS_RS_MEDIUM_THRESHOLD, +) + + +@dataclass +class SteganalysisResult: + """Result of steganalysis on an image.""" + + risk: str # "low", "medium", or "high" + chi_square: dict = field(default_factory=dict) # per-channel p-values + rs: dict = field(default_factory=dict) # per-channel embedding estimates + width: int = 0 + height: int = 0 + channels: int = 0 + mode: str = "lsb" + + +def chi_square_analysis(channel_data: np.ndarray) -> float: + """Chi-square test on LSB distribution of a single channel. + + Groups pixel values into pairs (2i, 2i+1) — so-called "pairs of values" + (PoVs). In a clean image, each pair has a natural frequency ratio. + LSB embedding with random data forces each pair toward equal frequency. + + The test measures H0: "pairs are equalized" (consistent with embedding). + + Args: + channel_data: Flattened 1-D array of pixel values (uint8). + + Returns: + p-value from chi-square test. + HIGH p-value (close to 1.0) → pairs are equalized → suspicious. + LOW p-value (close to 0.0) → pairs are not equalized → less suspicious. + """ + from scipy.stats import chi2 + + # Count occurrences of each value 0-255 + histogram = np.bincount(channel_data.ravel(), minlength=256) + + # Group into 128 pairs: (0,1), (2,3), ..., (254,255) + chi_sq = 0.0 + degrees_of_freedom = 0 + + for i in range(0, 256, 2): + observed_even = histogram[i] + observed_odd = histogram[i + 1] + total = observed_even + observed_odd + + if total == 0: + continue + + expected = total / 2.0 + chi_sq += (observed_even - expected) ** 2 / expected + chi_sq += (observed_odd - expected) ** 2 / expected + degrees_of_freedom += 1 + + if degrees_of_freedom == 0: + return 1.0 # No data to analyze + + # p-value: probability of observing this chi-square value by chance + # Low p-value = LSBs are suspiciously uniform = likely embedded + p_value = 1.0 - chi2.cdf(chi_sq, degrees_of_freedom) + return float(p_value) + + +def rs_analysis(channel_data: np.ndarray, block_size: int = 8) -> float: + """Regular-Singular groups analysis on a single channel. + + Divides the image channel into groups of `block_size` pixels and measures + the "smoothness" (variation) of each group. Applying a flipping function + F1 (flip LSB) and F-1 (flip LSB of value-1) produces Regular (smoother) + and Singular (rougher) groups. + + In a clean image: R_m ≈ R_{-m} and S_m ≈ S_{-m}. + LSB embedding causes R_m and S_{-m} to converge while S_m and R_{-m} + diverge, allowing estimation of the embedding rate. + + Args: + channel_data: Flattened 1-D array of pixel values (uint8). + block_size: Number of pixels per group (default 8). + + Returns: + Estimated embedding rate (0.0 = clean, 1.0 = fully embedded). + Values > 0.5 strongly indicate LSB embedding. + """ + data = channel_data.ravel().astype(np.int16) + n = len(data) + # Trim to multiple of block_size + n_blocks = n // block_size + if n_blocks < 10: + return 0.0 # Not enough data + + data = data[: n_blocks * block_size].reshape(n_blocks, block_size) + + def variation(block: np.ndarray) -> float: + """Sum of absolute differences between adjacent pixels.""" + return float(np.sum(np.abs(np.diff(block)))) + + def flip_positive(block: np.ndarray) -> np.ndarray: + """F1: flip LSB (0↔1, 2↔3, 4↔5, ...).""" + return block ^ 1 + + def flip_negative(block: np.ndarray) -> np.ndarray: + """F-1: flip LSB of (value - 1), i.e. -1↔0, 1↔2, 3↔4, ...""" + result = block.copy() + even_mask = (block % 2) == 0 + result[even_mask] -= 1 + result[~even_mask] += 1 + return result + + r_m = s_m = r_neg = s_neg = 0 + + for i in range(n_blocks): + block = data[i] + v_orig = variation(block) + + v_f1 = variation(flip_positive(block)) + if v_f1 > v_orig: + r_m += 1 + elif v_f1 < v_orig: + s_m += 1 + + v_fn1 = variation(flip_negative(block)) + if v_fn1 > v_orig: + r_neg += 1 + elif v_fn1 < v_orig: + s_neg += 1 + + # Estimate embedding rate using the RS quadratic formula + # d0 = R_m - S_m, d1 = R_{-m} - S_{-m} + # The embedding rate p satisfies: d(p/2) = d0, d(1 - p/2) = d1 + # Simplified estimator: p ≈ (R_m - S_m) / (R_{-m} - S_{-m}) divergence + d0 = r_m - s_m + d1 = r_neg - s_neg + + if n_blocks == 0: + return 0.0 + + # Use the simplified dual-statistic estimator + # In clean images: d0 ≈ d1 (both positive) + # In embedded images: d0 → 0 while d1 stays positive + if d1 == 0: + # Can't estimate — likely very embedded or degenerate + return 0.5 if d0 == 0 else 0.0 + + # Ratio-based estimate: how much has d0 dropped relative to d1 + ratio = d0 / d1 + if ratio >= 1.0: + return 0.0 # d0 ≥ d1 means no evidence of embedding + if ratio <= 0.0: + return 1.0 # d0 collapsed or inverted + + # Linear interpolation: ratio=1 → 0% embedded, ratio=0 → 100% embedded + estimate = 1.0 - ratio + return float(np.clip(estimate, 0.0, 1.0)) + + +def assess_risk(chi_p_values: dict[str, float], rs_estimates: dict[str, float]) -> str: + """Map analysis results to a risk level. + + RS analysis is the primary metric (reliable for both sequential and + random-order embedding). Chi-square is supplementary — high p-values + indicate equalized PoV pairs, which is suspicious for random LSB embedding. + + Args: + chi_p_values: Per-channel chi-square p-values (high = suspicious). + rs_estimates: Per-channel RS embedding rate estimates (high = suspicious). + + Returns: + "low", "medium", or "high" detectability risk. + """ + if not chi_p_values and not rs_estimates: + return "low" + + # RS is the primary indicator: any channel with high embedding estimate + max_rs = max(rs_estimates.values()) if rs_estimates else 0.0 + + # Chi-square: high p-value means pairs are equalized (suspicious) + max_chi_p = max(chi_p_values.values()) if chi_p_values else 0.0 + chi_suspicious = max_chi_p > STEGANALYSIS_CHI_SUSPICIOUS_THRESHOLD + + # High risk: RS strongly indicates embedding + if max_rs > STEGANALYSIS_RS_HIGH_THRESHOLD: + return "high" + + # Medium risk: moderate RS signal, or RS + chi-square both flagging + if max_rs > STEGANALYSIS_RS_MEDIUM_THRESHOLD: + return "medium" + if chi_suspicious and max_rs > 0.05: + return "medium" + + return "low" + + +def check_image(image_data: bytes, mode: str = "lsb") -> dict: + """Run steganalysis on an image and return detectability assessment. + + Args: + image_data: Raw image bytes (PNG, BMP, etc.). + mode: Analysis mode — currently only "lsb" is supported. + + Returns: + Dict with keys: risk, chi_square, rs, width, height, channels, mode. + """ + if mode not in ("lsb", "auto"): + raise ValueError(f"Unsupported steganalysis mode: {mode}. Use 'lsb' or 'auto'.") + + img = Image.open(io.BytesIO(image_data)) + if img.mode not in ("RGB", "RGBA", "L"): + img = img.convert("RGB") + + width, height = img.size + pixels = np.array(img) + img.close() + + channel_names = ["R", "G", "B"] if pixels.ndim == 3 else ["L"] + if pixels.ndim == 2: + pixels = pixels[:, :, np.newaxis] + + num_channels = min(pixels.shape[2], 3) # Skip alpha + + chi_p_values = {} + rs_estimates = {} + + for i in range(num_channels): + name = channel_names[i] + channel = pixels[:, :, i].ravel() + chi_p_values[name] = chi_square_analysis(channel) + rs_estimates[name] = rs_analysis(channel) + + risk = assess_risk(chi_p_values, rs_estimates) + + result = SteganalysisResult( + risk=risk, + chi_square=chi_p_values, + rs=rs_estimates, + width=width, + height=height, + channels=num_channels, + mode=mode, + ) + + return { + "risk": result.risk, + "chi_square": result.chi_square, + "rs": result.rs, + "width": result.width, + "height": result.height, + "channels": result.channels, + "mode": result.mode, + } diff --git a/src/soosef/stegasoo/steganography.py b/src/soosef/stegasoo/steganography.py new file mode 100644 index 0000000..c750510 --- /dev/null +++ b/src/soosef/stegasoo/steganography.py @@ -0,0 +1,1135 @@ +""" +Stegasoo Steganography Functions (v3.2.0) + +This is the core embedding/extraction module. Two modes available: + +LSB (Least Significant Bit) Mode: +- Classic steganography technique - hide bits in the least significant bit of pixel values +- Works on any image, outputs lossless PNG/BMP +- Higher capacity than DCT, but destroyed by JPEG compression +- Great for: high-capacity needs, lossless workflows + +DCT Mode (see dct_steganography.py): +- Hides data in frequency-domain coefficients +- Survives some image processing, works with JPEG +- Lower capacity but more robust +- Great for: JPEG images, robustness needs + +Both modes use pseudo-random pixel/coefficient selection based on a key. +Without the key, you don't know where to look - security through obscurity +PLUS actual encryption of the payload. + +v3.0: Added DCT mode with scipy +v3.0.1: DCT output format options (PNG/JPEG, grayscale/color) +v3.2.0: Fixed overhead calculations after removing date field +""" + +import io +import struct +from typing import TYPE_CHECKING, Union + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms +from PIL import Image + +if TYPE_CHECKING: + from .dct_steganography import DCTEmbedStats + +from .constants import ( + EMBED_MODE_AUTO, + EMBED_MODE_DCT, + EMBED_MODE_LSB, + VALID_EMBED_MODES, +) +from .debug import debug +from .exceptions import CapacityError, EmbeddingError +from .models import EmbedStats, FilePayload + +# Progress reporting interval +PROGRESS_INTERVAL = 1000 # Write every N pixels for LSB + + +def _write_progress(progress_file: str | None, current: int, total: int, phase: str = "embedding"): + """Write progress to file for frontend polling.""" + if progress_file is None: + return + try: + import json + + with open(progress_file, "w") as f: + json.dump( + { + "current": current, + "total": total, + "percent": round((current / total) * 100, 1) if total > 0 else 0, + "phase": phase, + }, + f, + ) + except Exception: + pass # Don't let progress writing break encoding + + +# Lossless formats that preserve LSB data +LOSSLESS_FORMATS = {"PNG", "BMP", "TIFF"} + +# Format to extension mapping +FORMAT_TO_EXT = { + "PNG": "png", + "BMP": "bmp", + "TIFF": "tiff", +} + +# Extension to PIL format mapping +EXT_TO_FORMAT = { + "png": "PNG", + "bmp": "BMP", + "tiff": "TIFF", + "tif": "TIFF", +} + +# ============================================================================= +# OVERHEAD CONSTANTS +# ============================================================================= +# +# Every stego image has some overhead before the actual payload: +# +# The encrypted message format (v4.0.0): +# ┌─────────────────────────────────────────────────────────────────┐ +# │ \x89ST3 │ v5 │ flags │ salt (32) │ iv (12) │ tag (16) │ ... │ +# │ magic │ ver│ │ │ │ │ data│ +# └─────────────────────────────────────────────────────────────────┘ +# 4 bytes 1 1 32 12 16 var +# +# Plus LSB embedding adds a 4-byte length prefix so we know where to stop. +# +# History of overhead sizes (in case you're debugging old images): +# - v3.1.0: 76 bytes (had date field - 10+1 bytes) +# - v3.2.0: 65 bytes (removed date, simpler) +# - v4.0.0: 66 bytes (added flags byte for channel key) +# - v4.4.0: 82 bytes (added 16-byte message nonce for HKDF) + +HEADER_OVERHEAD = 82 # What the crypto layer adds to any message (v6 format) +LENGTH_PREFIX = 4 # We prepend the payload length for LSB extraction +ENCRYPTION_OVERHEAD = HEADER_OVERHEAD + LENGTH_PREFIX # Total: 86 bytes + +# That 86 bytes is your minimum image capacity requirement. +# A tiny 100x100 image gives you ~3750 bytes capacity, minus 86 = ~3664 usable. + +# DCT output format options (v3.0.1) +DCT_OUTPUT_PNG = "png" +DCT_OUTPUT_JPEG = "jpeg" + +# DCT color mode options (v3.0.1) +DCT_COLOR_GRAYSCALE = "grayscale" +DCT_COLOR_COLOR = "color" + + +# ============================================================================= +# DCT MODULE LAZY LOADING +# ============================================================================= + +_dct_module = None + + +def _get_dct_module(): + """Lazy load DCT module to avoid scipy import if not needed.""" + global _dct_module + if _dct_module is None: + from . import dct_steganography + + _dct_module = dct_steganography + return _dct_module + + +def has_dct_support() -> bool: + """ + Check if DCT steganography mode is available. + + Returns: + True if scipy is installed and DCT functions work + + Example: + >>> if has_dct_support(): + ... result = encode(..., embed_mode='dct') + """ + try: + dct_mod = _get_dct_module() + return dct_mod.has_dct_support() + except (ImportError, ValueError): + # ValueError: numpy binary incompatibility (e.g., jpeglib built against numpy 2.x) + return False + + +# ============================================================================= +# FORMAT UTILITIES +# ============================================================================= + + +def get_output_format(input_format: str | None) -> tuple[str, str]: + """ + Determine the output format based on input format. + + Args: + input_format: PIL format string of input image (e.g., 'JPEG', 'PNG') + + Returns: + Tuple of (PIL format string, file extension) for output + Falls back to PNG for lossy or unknown formats. + """ + debug.validate( + input_format is None or isinstance(input_format, str), "Input format must be string or None" + ) + + if input_format and input_format.upper() in LOSSLESS_FORMATS: + fmt = input_format.upper() + ext = FORMAT_TO_EXT.get(fmt, "png") + debug.print(f"Using lossless format: {fmt} -> .{ext}") + return fmt, ext + + debug.print(f"Input format {input_format} is lossy or unknown, defaulting to PNG") + return "PNG", "png" + + +# ============================================================================= +# CAPACITY FUNCTIONS +# ============================================================================= + + +def will_fit( + payload: str | bytes | FilePayload | int, + carrier_image: bytes, + bits_per_channel: int = 1, + include_compression_estimate: bool = True, +) -> dict: + """ + Check if a payload will fit in a carrier image (LSB mode). + + Args: + payload: Message string, raw bytes, FilePayload, or size in bytes + carrier_image: Carrier image bytes + bits_per_channel: Bits to use per color channel (1-2) + include_compression_estimate: Estimate compressed size + + Returns: + Dict with fits, capacity, usage info + """ + # Determine payload size + if isinstance(payload, int): + payload_size = payload + payload_data = None + elif isinstance(payload, str): + payload_data = payload.encode("utf-8") + payload_size = len(payload_data) + elif isinstance(payload, FilePayload): + payload_data = payload.data + filename_overhead = len(payload.filename.encode("utf-8")) if payload.filename else 0 + mime_overhead = len(payload.mime_type.encode("utf-8")) if payload.mime_type else 0 + payload_size = len(payload.data) + filename_overhead + mime_overhead + 5 + else: + payload_data = payload + payload_size = len(payload) + + capacity = calculate_capacity(carrier_image, bits_per_channel) + + # Estimate encrypted size with padding + # Padding adds 64-319 bytes, rounded up to 256-byte boundary + # Average case: ~190 bytes padding + estimated_padding = 190 + estimated_encrypted_size = payload_size + estimated_padding + ENCRYPTION_OVERHEAD + + compressed_estimate = None + if include_compression_estimate and payload_data is not None and len(payload_data) >= 64: + try: + import zlib + + compressed = zlib.compress(payload_data, level=6) + compressed_size = len(compressed) + 9 # Compression header + if compressed_size < payload_size: + compressed_estimate = compressed_size + estimated_encrypted_size = compressed_size + estimated_padding + ENCRYPTION_OVERHEAD + except Exception: + pass + + headroom = capacity - estimated_encrypted_size + fits = headroom >= 0 + usage_percent = (estimated_encrypted_size / capacity * 100) if capacity > 0 else 100.0 + + return { + "fits": fits, + "payload_size": payload_size, + "estimated_encrypted_size": estimated_encrypted_size, + "capacity": capacity, + "usage_percent": min(usage_percent, 100.0), + "headroom": headroom, + "compressed_estimate": compressed_estimate, + "mode": EMBED_MODE_LSB, + } + + +def calculate_capacity(image_data: bytes, bits_per_channel: int = 1) -> int: + """ + Calculate the maximum message capacity of an image (LSB mode). + + Args: + image_data: Image bytes + bits_per_channel: Bits to use per color channel + + Returns: + Maximum bytes that can be embedded (minus overhead) + """ + debug.validate( + bits_per_channel in (1, 2), f"bits_per_channel must be 1 or 2, got {bits_per_channel}" + ) + + img_file = Image.open(io.BytesIO(image_data)) + try: + num_pixels = img_file.size[0] * img_file.size[1] + bits_per_pixel = 3 * bits_per_channel + max_bytes = (num_pixels * bits_per_pixel) // 8 + + capacity = max(0, max_bytes - ENCRYPTION_OVERHEAD) + debug.print(f"LSB capacity: {capacity} bytes at {bits_per_channel} bit(s)/channel") + return capacity + finally: + img_file.close() + + +def calculate_capacity_by_mode( + image_data: bytes, + embed_mode: str = EMBED_MODE_LSB, + bits_per_channel: int = 1, +) -> dict: + """ + Calculate capacity for specified embedding mode. + + Args: + image_data: Carrier image bytes + embed_mode: 'lsb' or 'dct' + bits_per_channel: Bits per channel for LSB mode + + Returns: + Dict with capacity information + """ + if embed_mode == EMBED_MODE_DCT: + if not has_dct_support(): + raise ImportError("scipy required for DCT mode. Install: pip install scipy") + + dct_mod = _get_dct_module() + dct_info = dct_mod.calculate_dct_capacity(image_data) + + return { + "mode": EMBED_MODE_DCT, + "capacity_bytes": dct_info.usable_capacity_bytes, + "capacity_bits": dct_info.total_capacity_bits, + "width": dct_info.width, + "height": dct_info.height, + "total_blocks": dct_info.total_blocks, + } + else: + capacity = calculate_capacity(image_data, bits_per_channel) + img = Image.open(io.BytesIO(image_data)) + try: + width, height = img.size + finally: + img.close() + + return { + "mode": EMBED_MODE_LSB, + "capacity_bytes": capacity, + "capacity_bits": capacity * 8, + "width": width, + "height": height, + "bits_per_channel": bits_per_channel, + } + + +def will_fit_by_mode( + payload: str | bytes | FilePayload | int, + carrier_image: bytes, + embed_mode: str = EMBED_MODE_LSB, + bits_per_channel: int = 1, +) -> dict: + """ + Check if payload fits in specified mode. + + Args: + payload: Message, bytes, FilePayload, or size in bytes + carrier_image: Carrier image bytes + embed_mode: 'lsb' or 'dct' + bits_per_channel: For LSB mode + + Returns: + Dict with fits, capacity, usage info + """ + if embed_mode == EMBED_MODE_DCT: + if not has_dct_support(): + return {"fits": False, "error": "scipy not available", "mode": EMBED_MODE_DCT} + + if isinstance(payload, int): + payload_size = payload + elif isinstance(payload, str): + payload_size = len(payload.encode("utf-8")) + elif hasattr(payload, "data"): + payload_size = len(payload.data) + else: + payload_size = len(payload) + + estimated_size = payload_size + ENCRYPTION_OVERHEAD + 190 # padding estimate + + dct_mod = _get_dct_module() + fits = dct_mod.will_fit_dct(estimated_size, carrier_image) + capacity_info = dct_mod.calculate_dct_capacity(carrier_image) + capacity = capacity_info.usable_capacity_bytes + + usage_percent = (estimated_size / capacity * 100) if capacity > 0 else 100.0 + + return { + "fits": fits, + "payload_size": payload_size, + "capacity": capacity, + "usage_percent": min(usage_percent, 100.0), + "headroom": capacity - estimated_size, + "mode": EMBED_MODE_DCT, + } + else: + return will_fit(payload, carrier_image, bits_per_channel) + + +def get_available_modes() -> dict: + """ + Get available embedding modes and their status. + + Returns: + Dict mapping mode name to availability info + """ + return { + EMBED_MODE_LSB: { + "available": True, + "name": "Spatial LSB", + "description": "Embed in pixel LSBs, outputs PNG/BMP", + "output_format": "PNG (color)", + }, + EMBED_MODE_DCT: { + "available": has_dct_support(), + "name": "DCT Domain", + "description": "Embed in DCT coefficients, outputs grayscale PNG or JPEG", + "output_formats": ["PNG (grayscale)", "JPEG (grayscale)"], + "requires": "scipy", + }, + } + + +def compare_modes(image_data: bytes) -> dict: + """ + Compare embedding modes for a carrier image. + + Args: + image_data: Carrier image bytes + + Returns: + Dict with comparison of LSB vs DCT modes + """ + img = Image.open(io.BytesIO(image_data)) + try: + width, height = img.size + finally: + img.close() + + lsb_bytes = calculate_capacity(image_data, 1) + + if has_dct_support(): + dct_mod = _get_dct_module() + dct_info = dct_mod.calculate_dct_capacity(image_data) + dct_bytes = dct_info.usable_capacity_bytes + dct_available = True + else: + safe_blocks = (height // 8) * (width // 8) + dct_bytes = (safe_blocks * 16) // 8 # Estimated + dct_available = False + + return { + "width": width, + "height": height, + "lsb": { + "capacity_bytes": lsb_bytes, + "capacity_kb": lsb_bytes / 1024, + "available": True, + "output": "PNG (color)", + }, + "dct": { + "capacity_bytes": dct_bytes, + "capacity_kb": dct_bytes / 1024, + "available": dct_available, + "output": "PNG or JPEG (grayscale)", + "ratio_vs_lsb": (dct_bytes / lsb_bytes * 100) if lsb_bytes > 0 else 0, + }, + } + + +# ============================================================================= +# PIXEL INDEX GENERATION +# ============================================================================= +# +# The key insight: we don't hide data in sequential pixels (that's easy to find). +# Instead, we scatter the data across pseudo-random pixel locations. +# +# The pixel selection key (derived from passphrase + photo + pin) determines +# WHICH pixels get modified. Without the key, an attacker would have to: +# 1. Know we're using LSB steganography +# 2. Try every possible subset of pixels +# 3. Decrypt the result (which they also can't do without the key) +# +# We use ChaCha20 as a CSPRNG (Cryptographically Secure PRNG). It's: +# - Fast (faster than AES-CTR on most CPUs) +# - Deterministic (same key = same sequence, needed for extraction) +# - Secure (can't predict the sequence without the key) + + +@debug.time +def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> list[int]: + """ + Generate pseudo-random pixel indices for embedding. + + This is the "where do we hide the bits?" function. We use ChaCha20 + to generate a deterministic sequence of pixel indices that only + someone with the same key can reproduce. + + Two strategies based on how much of the image we're using: + - >= 50% capacity: Full Fisher-Yates shuffle (sample without replacement) + - < 50% capacity: Direct random sampling (faster, same result) + """ + debug.validate(len(key) == 32, f"Pixel key must be 32 bytes, got {len(key)}") + debug.validate(num_pixels > 0, f"Number of pixels must be positive, got {num_pixels}") + debug.validate(num_needed > 0, f"Number needed must be positive, got {num_needed}") + debug.validate( + num_needed <= num_pixels, f"Cannot select {num_needed} pixels from {num_pixels} available" + ) + + debug.print(f"Generating {num_needed} pixel indices from {num_pixels} total pixels") + + # Strategy 1: Full shuffle when we need a lot of pixels + # Fisher-Yates shuffle is O(n) and gives us perfect random sampling + if num_needed >= num_pixels // 2: + debug.print(f"Using full shuffle (needed {num_needed}/{num_pixels} pixels)") + nonce = b"\x00" * 16 + cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + + indices = list(range(num_pixels)) + # Get enough random bytes to do the shuffle + random_bytes = encryptor.update(b"\x00" * (num_pixels * 4)) + + # Fisher-Yates shuffle - swap each element with a random earlier element + for i in range(num_pixels - 1, 0, -1): + j_bytes = random_bytes[(num_pixels - 1 - i) * 4 : (num_pixels - i) * 4] + j = int.from_bytes(j_bytes, "big") % (i + 1) + indices[i], indices[j] = indices[j], indices[i] + + selected = indices[:num_needed] + debug.print(f"Generated {len(selected)} indices via shuffle") + return selected + + # Strategy 2: Direct sampling when we need fewer pixels + # Generate random indices until we have enough unique ones + debug.print(f"Using optimized selection (needed {num_needed}/{num_pixels} pixels)") + selected = [] + used = set() # Track which pixels we've already picked + + nonce = b"\x00" * 16 + cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + + # Pre-generate 2x the bytes we think we'll need (for collision handling) + bytes_needed = (num_needed * 2) * 4 + random_bytes = encryptor.update(b"\x00" * bytes_needed) + + byte_offset = 0 + collisions = 0 + while len(selected) < num_needed and byte_offset < len(random_bytes) - 4: + idx = int.from_bytes(random_bytes[byte_offset : byte_offset + 4], "big") % num_pixels + byte_offset += 4 + + if idx not in used: + used.add(idx) + selected.append(idx) + else: + collisions += 1 # Birthday paradox in action + + # Edge case: ran out of pre-generated bytes (very high collision rate) + if len(selected) < num_needed: + debug.print(f"Need {num_needed - len(selected)} more indices, generating...") + extra_needed = num_needed - len(selected) + for _ in range(extra_needed * 2): + extra_bytes = encryptor.update(b"\x00" * 4) + idx = int.from_bytes(extra_bytes, "big") % num_pixels + if idx not in used: + used.add(idx) + selected.append(idx) + if len(selected) == num_needed: + break + + debug.print(f"Generated {len(selected)} indices with {collisions} collisions") + debug.validate( + len(selected) == num_needed, + f"Failed to generate enough indices: {len(selected)}/{num_needed}", + ) + return selected + + +# ============================================================================= +# EMBEDDING FUNCTIONS +# ============================================================================= +# +# The actual bit-hiding magic happens here. LSB embedding is conceptually simple: +# +# Original pixel RGB: (142, 87, 201) +# In binary: (10001110, 01010111, 11001001) +# ^ ^ ^ +# These are the LSBs (least significant bits) +# +# To hide the bits [1, 0, 1]: +# Modified pixel RGB: (10001111, 01010110, 11001001) = (143, 86, 201) +# ^ ^ ^ +# Changed! Changed! Already 1, no change needed +# +# The human eye can't see the difference between 142 and 143. +# But we've hidden 3 bits of secret data in one pixel. +# +# With a 1000x1000 image: 1 million pixels * 3 channels = 3 million bits = 375 KB! + + +@debug.time +def embed_in_image( + data: bytes, + image_data: bytes, + pixel_key: bytes, + bits_per_channel: int = 1, + output_format: str | None = None, + embed_mode: str = EMBED_MODE_LSB, + dct_output_format: str = DCT_OUTPUT_PNG, + dct_color_mode: str = "color", + progress_file: str | None = None, + quant_step: int | None = None, + jpeg_quality: int | None = None, + max_dimension: int | None = None, +) -> tuple[bytes, Union[EmbedStats, "DCTEmbedStats"], str]: + """ + Embed data into an image using specified mode. + + Args: + data: Data to embed (encrypted payload) + image_data: Carrier image bytes + pixel_key: Key for pixel/coefficient selection + bits_per_channel: Bits per channel (LSB mode only) + output_format: Force output format (LSB mode only) + embed_mode: 'lsb' (default) or 'dct' + dct_output_format: For DCT mode - 'png' (lossless) or 'jpeg' (smaller) + dct_color_mode: For DCT mode - 'grayscale' (default) or 'color' (preserves colors) + + Returns: + Tuple of (stego image bytes, stats, file extension) + + Raises: + CapacityError: If data won't fit + EmbeddingError: If embedding fails + ImportError: If DCT mode requested but scipy unavailable + """ + debug.print(f"embed_in_image: mode={embed_mode}, data={len(data)} bytes") + debug.validate( + embed_mode in VALID_EMBED_MODES, f"Invalid embed_mode: {embed_mode}. Use 'lsb' or 'dct'" + ) + + # Dispatch via backend registry + from .backends import registry + + backend = registry.get(embed_mode) + if not backend.is_available(): + raise ImportError( + f"Dependencies for '{embed_mode}' mode are not installed. " + f"Install with: pip install stegasoo[dct]" + ) + + if embed_mode == EMBED_MODE_DCT: + # Validate DCT-specific options + if dct_output_format not in (DCT_OUTPUT_PNG, DCT_OUTPUT_JPEG): + debug.print(f"Invalid dct_output_format '{dct_output_format}', defaulting to PNG") + dct_output_format = DCT_OUTPUT_PNG + if dct_color_mode not in ("grayscale", "color"): + debug.print(f"Invalid dct_color_mode '{dct_color_mode}', defaulting to color") + dct_color_mode = "color" + + stego_bytes, dct_stats = backend.embed( + data, + image_data, + pixel_key, + progress_file=progress_file, + dct_output_format=dct_output_format, + dct_color_mode=dct_color_mode, + quant_step=quant_step, + jpeg_quality=jpeg_quality, + max_dimension=max_dimension, + ) + ext = "jpg" if dct_output_format == DCT_OUTPUT_JPEG else "png" + debug.print( + f"DCT embedding complete: {dct_output_format.upper()} output, " + f"color_mode={dct_color_mode}, ext={ext}" + ) + return stego_bytes, dct_stats, ext + + # LSB and other image backends + stego_bytes, stats = backend.embed( + data, + image_data, + pixel_key, + progress_file=progress_file, + bits_per_channel=bits_per_channel, + output_format=output_format, + ) + ext = getattr(stats, "output_extension", "png") + return stego_bytes, stats, ext + + +def _embed_lsb( + data: bytes, + image_data: bytes, + pixel_key: bytes, + bits_per_channel: int = 1, + output_format: str | None = None, + progress_file: str | None = None, +) -> tuple[bytes, EmbedStats, str]: + """ + Embed data using LSB steganography (internal implementation). + """ + debug.print(f"LSB embedding {len(data)} bytes into image") + debug.data(pixel_key, "Pixel key for embedding") + debug.validate( + bits_per_channel in (1, 2), f"bits_per_channel must be 1 or 2, got {bits_per_channel}" + ) + debug.validate(len(pixel_key) == 32, f"Pixel key must be 32 bytes, got {len(pixel_key)}") + + img_file = None + img = None + stego_img = None + + try: + img_file = Image.open(io.BytesIO(image_data)) + input_format = img_file.format + + debug.print(f"Carrier image: {img_file.size[0]}x{img_file.size[1]}, format: {input_format}") + + img = img_file.convert("RGB") if img_file.mode != "RGB" else img_file.copy() + if img_file.mode != "RGB": + debug.print(f"Converting image from {img_file.mode} to RGB") + + pixels = list(img.getdata()) + num_pixels = len(pixels) + + bits_per_pixel = 3 * bits_per_channel + max_bytes = (num_pixels * bits_per_pixel) // 8 + + debug.print(f"Image capacity: {max_bytes} bytes at {bits_per_channel} bit(s)/channel") + + data_with_len = struct.pack(">I", len(data)) + data + + if len(data_with_len) > max_bytes: + debug.print(f"Capacity error: need {len(data_with_len)}, have {max_bytes}") + raise CapacityError(len(data_with_len), max_bytes) + + debug.print( + f"Total data to embed: {len(data_with_len)} bytes " + f"({len(data_with_len)/max_bytes*100:.1f}% of capacity)" + ) + + binary_data = "".join(format(b, "08b") for b in data_with_len) + pixels_needed = (len(binary_data) + bits_per_pixel - 1) // bits_per_pixel + + debug.print(f"Need {pixels_needed} pixels to embed {len(binary_data)} bits") + + selected_indices = generate_pixel_indices(pixel_key, num_pixels, pixels_needed) + + new_pixels = list(pixels) + clear_mask = 0xFF ^ ((1 << bits_per_channel) - 1) + + bit_idx = 0 + modified_pixels = 0 + total_pixels_to_process = len(selected_indices) + + # Initial progress write - signals prep is done, embedding starting + if progress_file: + _write_progress(progress_file, 5, 100, "embedding") + + for progress_idx, pixel_idx in enumerate(selected_indices): + if bit_idx >= len(binary_data): + break + + r, g, b = new_pixels[pixel_idx] + modified = False + + for channel_idx, channel_val in enumerate([r, g, b]): + if bit_idx >= len(binary_data): + break + bits = binary_data[bit_idx : bit_idx + bits_per_channel].ljust( + bits_per_channel, "0" + ) + new_val = (channel_val & clear_mask) | int(bits, 2) + + if channel_val != new_val: + modified = True + if channel_idx == 0: + r = new_val + elif channel_idx == 1: + g = new_val + else: + b = new_val + + bit_idx += bits_per_channel + + if modified: + new_pixels[pixel_idx] = (r, g, b) + modified_pixels += 1 + + # Report progress periodically + if progress_file and progress_idx % PROGRESS_INTERVAL == 0: + _write_progress(progress_file, progress_idx, total_pixels_to_process, "embedding") + + # Final progress before save + if progress_file: + _write_progress( + progress_file, total_pixels_to_process, total_pixels_to_process, "saving" + ) + + debug.print(f"Modified {modified_pixels} pixels (out of {len(selected_indices)} selected)") + + stego_img = Image.new("RGB", img.size) + stego_img.putdata(new_pixels) + + if output_format: + out_fmt = output_format.upper() + out_ext = FORMAT_TO_EXT.get(out_fmt, "png") + debug.print(f"Using forced output format: {out_fmt}") + else: + out_fmt, out_ext = get_output_format(input_format) + debug.print(f"Auto-selected output format: {out_fmt}") + + output = io.BytesIO() + stego_img.save(output, out_fmt) + output.seek(0) + + stats = EmbedStats( + pixels_modified=modified_pixels, + total_pixels=num_pixels, + capacity_used=len(data_with_len) / max_bytes, + bytes_embedded=len(data_with_len), + ) + + debug.print(f"LSB embedding complete: {out_fmt} image, {len(output.getvalue())} bytes") + return output.getvalue(), stats, out_ext + + except CapacityError: + raise + except Exception as e: + debug.exception(e, "embed_lsb") + raise EmbeddingError(f"Failed to embed data: {e}") from e + finally: + # Properly close all PIL Images to prevent memory leaks + if stego_img is not None: + stego_img.close() + if img is not None and img is not img_file: + img.close() + if img_file is not None: + img_file.close() + + +# ============================================================================= +# EXTRACTION FUNCTIONS +# ============================================================================= + + +@debug.time +def extract_from_image( + image_data: bytes, + pixel_key: bytes, + bits_per_channel: int = 1, + embed_mode: str = EMBED_MODE_AUTO, + progress_file: str | None = None, + quant_step: int | None = None, +) -> bytes | None: + """ + Extract hidden data from a stego image. + + Args: + image_data: Stego image bytes + pixel_key: Key for pixel/coefficient selection (must match encoding) + bits_per_channel: Bits per channel (LSB mode only) + embed_mode: 'auto' (try both), 'lsb', or 'dct' + progress_file: Optional path to write progress JSON for UI polling + + Returns: + Extracted data bytes, or None if extraction fails + """ + debug.print(f"extract_from_image: mode={embed_mode}") + + from .backends import registry + + # AUTO MODE: Try LSB first (cheaper), then other backends + if embed_mode == EMBED_MODE_AUTO: + auto_order = [EMBED_MODE_LSB] + [ + m for m in registry.available_modes(carrier_type="image") if m != EMBED_MODE_LSB + ] + for mode in auto_order: + backend = registry.get(mode) + debug.print(f"Auto-detect: trying {mode}") + result = backend.extract( + image_data, + pixel_key, + progress_file=progress_file, + bits_per_channel=bits_per_channel, + quant_step=quant_step, + ) + if result is not None: + debug.print(f"Auto-detect: {mode} extraction succeeded") + return result + debug.print("Auto-detect: All modes failed") + return None + + # EXPLICIT MODE + backend = registry.get(embed_mode) + if not backend.is_available(): + raise ImportError(f"Dependencies for '{embed_mode}' mode are not installed.") + return backend.extract( + image_data, + pixel_key, + progress_file=progress_file, + bits_per_channel=bits_per_channel, + quant_step=quant_step, + ) + + +def _extract_dct( + image_data: bytes, + pixel_key: bytes, + progress_file: str | None = None, +) -> bytes | None: + """Extract using DCT mode.""" + try: + dct_mod = _get_dct_module() + return dct_mod.extract_from_dct(image_data, pixel_key, progress_file) + except Exception as e: + debug.print(f"DCT extraction failed: {e}") + return None + + +def _extract_lsb(image_data: bytes, pixel_key: bytes, bits_per_channel: int = 1) -> bytes | None: + """ + Extract using LSB mode (internal implementation). + """ + debug.print(f"LSB extracting from {len(image_data)} byte image") + debug.data(pixel_key, "Pixel key for extraction") + debug.validate( + bits_per_channel in (1, 2), f"bits_per_channel must be 1 or 2, got {bits_per_channel}" + ) + + img_file = None + img = None + + try: + img_file = Image.open(io.BytesIO(image_data)) + debug.print(f"Image: {img_file.size[0]}x{img_file.size[1]}, format: {img_file.format}") + + img = img_file.convert("RGB") if img_file.mode != "RGB" else img_file.copy() + if img_file.mode != "RGB": + debug.print(f"Converting image from {img_file.mode} to RGB") + + pixels = list(img.getdata()) + num_pixels = len(pixels) + bits_per_pixel = 3 * bits_per_channel + + debug.print(f"Image has {num_pixels} pixels, {bits_per_pixel} bits/pixel") + + initial_pixels = (32 + bits_per_pixel - 1) // bits_per_pixel + 10 + debug.print(f"Extracting initial {initial_pixels} pixels to find length") + + initial_indices = generate_pixel_indices(pixel_key, num_pixels, initial_pixels) + + binary_data = "" + for pixel_idx in initial_indices: + r, g, b = pixels[pixel_idx] + for channel in [r, g, b]: + for bit_pos in range(bits_per_channel - 1, -1, -1): + binary_data += str((channel >> bit_pos) & 1) + + try: + length_bits = binary_data[:32] + if len(length_bits) < 32: + debug.print(f"Not enough bits for length: {len(length_bits)}/32") + return None + + data_length = struct.unpack(">I", int(length_bits, 2).to_bytes(4, "big"))[0] + debug.print(f"Extracted length: {data_length} bytes") + except Exception as e: + debug.print(f"Failed to parse length: {e}") + return None + + max_possible = (num_pixels * bits_per_pixel) // 8 - 4 + if data_length > max_possible or data_length < 10: + debug.print(f"Invalid data length: {data_length} (max possible: {max_possible})") + return None + + total_bits = (4 + data_length) * 8 + pixels_needed = (total_bits + bits_per_pixel - 1) // bits_per_pixel + + debug.print(f"Need {pixels_needed} pixels to extract {data_length} bytes") + + selected_indices = generate_pixel_indices(pixel_key, num_pixels, pixels_needed) + + binary_data = "" + for pixel_idx in selected_indices: + r, g, b = pixels[pixel_idx] + for channel in [r, g, b]: + for bit_pos in range(bits_per_channel - 1, -1, -1): + binary_data += str((channel >> bit_pos) & 1) + + data_bits = binary_data[32 : 32 + (data_length * 8)] + + if len(data_bits) < data_length * 8: + debug.print(f"Insufficient bits: {len(data_bits)} < {data_length * 8}") + return None + + data_bytes = bytearray() + for i in range(0, len(data_bits), 8): + byte_bits = data_bits[i : i + 8] + if len(byte_bits) == 8: + data_bytes.append(int(byte_bits, 2)) + + debug.print(f"LSB successfully extracted {len(data_bytes)} bytes") + return bytes(data_bytes) + + except Exception as e: + debug.exception(e, "extract_lsb") + return None + finally: + # Properly close all PIL Images to prevent memory leaks + if img is not None and img is not img_file: + img.close() + if img_file is not None: + img_file.close() + + +# ============================================================================= +# UTILITY FUNCTIONS +# ============================================================================= + + +def get_image_dimensions(image_data: bytes) -> tuple[int, int]: + """Get image dimensions without loading full image.""" + debug.validate(len(image_data) > 0, "Image data cannot be empty") + img = Image.open(io.BytesIO(image_data)) + try: + dimensions = img.size + debug.print(f"Image dimensions: {dimensions[0]}x{dimensions[1]}") + return dimensions + finally: + img.close() + + +def get_image_format(image_data: bytes) -> str | None: + """Get image format (PIL format string like 'PNG', 'JPEG').""" + try: + img = Image.open(io.BytesIO(image_data)) + try: + format_str = img.format + debug.print(f"Image format: {format_str}") + return format_str + finally: + img.close() + except Exception as e: + debug.print(f"Failed to get image format: {e}") + return None + + +def is_lossless_format(image_data: bytes) -> bool: + """Check if image is in a lossless format suitable for steganography.""" + fmt = get_image_format(image_data) + is_lossless = fmt is not None and fmt.upper() in LOSSLESS_FORMATS + debug.print(f"Image is lossless: {is_lossless} (format: {fmt})") + return is_lossless + + +def peek_image(image_data: bytes) -> dict: + """ + Check if an image contains Stegasoo hidden data without decrypting. + + Attempts to detect LSB and DCT headers by extracting the first few bytes + and looking for Stegasoo magic markers. + + Args: + image_data: Raw image bytes + + Returns: + dict with: + - has_stegasoo: bool - True if header detected + - mode: str or None - 'lsb', 'dct', or None + - confidence: str - 'high', 'low', or None + + Example: + >>> result = peek_image(suspicious_image_bytes) + >>> if result['has_stegasoo']: + ... print(f"Found {result['mode']} data!") + """ + from .constants import EMBED_MODE_DCT, EMBED_MODE_LSB + + result = {"has_stegasoo": False, "mode": None, "confidence": None} + + # Try LSB extraction (look for header bytes) + try: + img = Image.open(io.BytesIO(image_data)) + pixels = list(img.getdata()) + img.close() + + # Extract first 32 bits (4 bytes) from LSB + extracted = [] + for i in range(32): + if i < len(pixels): + pixel = pixels[i] + if isinstance(pixel, tuple): + extracted.append(pixel[0] & 1) + else: + extracted.append(pixel & 1) + + # Convert bits to bytes + header_bytes = bytearray() + for i in range(0, len(extracted), 8): + byte = 0 + for j in range(8): + if i + j < len(extracted): + byte = (byte << 1) | extracted[i + j] + header_bytes.append(byte) + + # Check for LSB magic: \x89ST3 + if bytes(header_bytes[:4]) == b"\x89ST3": + result["has_stegasoo"] = True + result["mode"] = EMBED_MODE_LSB + result["confidence"] = "high" + return result + except Exception: + pass + + # Try DCT extraction (requires scipy/jpeglib) + try: + from .dct_steganography import HAS_JPEGLIB, HAS_SCIPY + + if HAS_SCIPY or HAS_JPEGLIB: + from .dct_steganography import extract_from_dct + + # Extract first few bytes to check header + extracted = extract_from_dct(image_data, seed=b"\x00" * 32, length=4) + if extracted == b"\x89DCT": + result["has_stegasoo"] = True + result["mode"] = EMBED_MODE_DCT + result["confidence"] = "high" + return result + except Exception: + pass + + return result diff --git a/src/soosef/stegasoo/utils.py b/src/soosef/stegasoo/utils.py new file mode 100644 index 0000000..046fd12 --- /dev/null +++ b/src/soosef/stegasoo/utils.py @@ -0,0 +1,532 @@ +""" +Stegasoo Utilities + +Secure deletion, filename generation, and other helpers. +""" + +import io +import os +import random +import secrets +import shutil +from datetime import date +from pathlib import Path + +from PIL import Image + +from .constants import DAY_NAMES +from .debug import debug + + +def read_image_exif(image_data: bytes) -> dict: + """ + Read EXIF metadata from an image. + + Args: + image_data: Raw image bytes + + Returns: + Dict with EXIF fields (tag names as keys) + + Example: + >>> exif = read_image_exif(photo_bytes) + >>> print(exif.get('Make')) # Camera manufacturer + """ + from PIL.ExifTags import GPSTAGS, TAGS + + result = {} + + try: + img = Image.open(io.BytesIO(image_data)) + exif_data = img._getexif() + + if exif_data: + for tag_id, value in exif_data.items(): + tag = TAGS.get(tag_id, str(tag_id)) + + # Handle GPS data specially + if tag == "GPSInfo" and isinstance(value, dict): + gps = {} + for gps_tag_id, gps_value in value.items(): + gps_tag = GPSTAGS.get(gps_tag_id, str(gps_tag_id)) + # Convert tuples/IFDRational to simple types + if hasattr(gps_value, "numerator"): + gps[gps_tag] = float(gps_value) + elif isinstance(gps_value, tuple): + gps[gps_tag] = [ + float(v) if hasattr(v, "numerator") else v for v in gps_value + ] + else: + gps[gps_tag] = gps_value + result[tag] = gps + # Convert IFDRational to float + elif hasattr(value, "numerator"): + result[tag] = float(value) + # Convert bytes to string if possible + elif isinstance(value, bytes): + try: + # Try to decode as ASCII/UTF-8 text + decoded = value.decode("utf-8", errors="strict").strip("\x00") + # Only keep if it looks like printable text + if decoded.isprintable() or all( + c.isspace() or c.isprintable() for c in decoded + ): + result[tag] = decoded + else: + result[tag] = f"<{len(value)} bytes binary>" + except (UnicodeDecodeError, Exception): + result[tag] = f"<{len(value)} bytes binary>" + # Handle tuples of IFDRational + elif isinstance(value, tuple) and value and hasattr(value[0], "numerator"): + result[tag] = [float(v) for v in value] + else: + result[tag] = value + + img.close() + except Exception as e: + debug.print(f"Error reading EXIF: {e}") + + return result + + +def write_image_exif(image_data: bytes, exif_updates: dict) -> bytes: + """ + Write/update EXIF metadata in a JPEG image. + + Args: + image_data: Raw JPEG image bytes + exif_updates: Dict of EXIF fields to update (tag names as keys) + Use None as value to delete a field + + Returns: + Image bytes with updated EXIF + + Raises: + ValueError: If image is not JPEG or piexif not available + + Example: + >>> updated = write_image_exif(jpeg_bytes, {"Artist": "John Doe"}) + """ + try: + import piexif + except ImportError: + raise ValueError("piexif required for EXIF editing: pip install piexif") + + # Verify it's a JPEG + if not image_data[:2] == b"\xff\xd8": + raise ValueError("EXIF editing only supported for JPEG images") + + debug.print(f"Writing EXIF updates: {list(exif_updates.keys())}") + + # Load existing EXIF + try: + exif_dict = piexif.load(image_data) + except Exception: + # No existing EXIF, start fresh + exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None} + + # Map common tag names to piexif IFD and tag IDs + tag_mapping = { + # 0th IFD (main image) + "Make": (piexif.ImageIFD.Make, "0th"), + "Model": (piexif.ImageIFD.Model, "0th"), + "Software": (piexif.ImageIFD.Software, "0th"), + "Artist": (piexif.ImageIFD.Artist, "0th"), + "Copyright": (piexif.ImageIFD.Copyright, "0th"), + "ImageDescription": (piexif.ImageIFD.ImageDescription, "0th"), + "DateTime": (piexif.ImageIFD.DateTime, "0th"), + "Orientation": (piexif.ImageIFD.Orientation, "0th"), + # Exif IFD + "DateTimeOriginal": (piexif.ExifIFD.DateTimeOriginal, "Exif"), + "DateTimeDigitized": (piexif.ExifIFD.DateTimeDigitized, "Exif"), + "UserComment": (piexif.ExifIFD.UserComment, "Exif"), + "ExposureTime": (piexif.ExifIFD.ExposureTime, "Exif"), + "FNumber": (piexif.ExifIFD.FNumber, "Exif"), + "ISOSpeedRatings": (piexif.ExifIFD.ISOSpeedRatings, "Exif"), + "FocalLength": (piexif.ExifIFD.FocalLength, "Exif"), + "LensMake": (piexif.ExifIFD.LensMake, "Exif"), + "LensModel": (piexif.ExifIFD.LensModel, "Exif"), + } + + for tag_name, value in exif_updates.items(): + if tag_name not in tag_mapping: + debug.print(f"Unknown EXIF tag: {tag_name}, skipping") + continue + + tag_id, ifd = tag_mapping[tag_name] + + if value is None: + # Delete the tag + if tag_id in exif_dict[ifd]: + del exif_dict[ifd][tag_id] + debug.print(f"Deleted EXIF tag: {tag_name}") + else: + # Set the tag (encode strings as bytes) + if isinstance(value, str): + value = value.encode("utf-8") + exif_dict[ifd][tag_id] = value + debug.print(f"Set EXIF tag: {tag_name}") + + # Serialize EXIF and insert into image + exif_bytes = piexif.dump(exif_dict) + output = io.BytesIO() + img = Image.open(io.BytesIO(image_data)) + img.save(output, "JPEG", exif=exif_bytes, quality=95) + output.seek(0) + + debug.print(f"EXIF updated: {len(image_data)} -> {len(output.getvalue())} bytes") + return output.getvalue() + + +def strip_image_metadata(image_data: bytes, output_format: str = "PNG") -> bytes: + """ + Remove all metadata (EXIF, ICC profiles, etc.) from an image. + + Creates a fresh image with only pixel data - no EXIF, GPS coordinates, + camera info, timestamps, or other potentially sensitive metadata. + + Args: + image_data: Raw image bytes + output_format: Output format ('PNG', 'BMP', 'TIFF') + + Returns: + Clean image bytes with no metadata + + Example: + >>> clean = strip_image_metadata(photo_bytes) + >>> # EXIF data is now removed + """ + debug.print(f"Stripping metadata, output format: {output_format}") + + img = Image.open(io.BytesIO(image_data)) + + # Convert to RGB if needed (handles RGBA, P, L, etc.) + if img.mode not in ("RGB", "RGBA"): + img = img.convert("RGB") + + # Create fresh image - this discards all metadata + clean = Image.new(img.mode, img.size) + clean.putdata(list(img.getdata())) + + output = io.BytesIO() + clean.save(output, output_format.upper()) + output.seek(0) + + debug.print(f"Metadata stripped: {len(image_data)} -> {len(output.getvalue())} bytes") + return output.getvalue() + + +def generate_filename(date_str: str | None = None, prefix: str = "", extension: str = "png") -> str: + """ + Generate a filename for stego images. + + Format: {prefix}{random}_{YYYYMMDD}.{extension} + + Args: + date_str: Date string (YYYY-MM-DD), defaults to today + prefix: Optional prefix + extension: File extension without dot (default: 'png') + + Returns: + Filename string + + Example: + >>> generate_filename("2023-12-25", "secret_", "png") + "secret_a1b2c3d4_20231225.png" + """ + debug.validate( + bool(extension) and "." not in extension, + f"Extension must not contain dot, got '{extension}'", + ) + + if date_str is None: + date_str = date.today().isoformat() + + date_compact = date_str.replace("-", "") + random_hex = secrets.token_hex(4) + + # Ensure extension doesn't have a leading dot + extension = extension.lstrip(".") + + filename = f"{prefix}{random_hex}_{date_compact}.{extension}" + debug.print(f"Generated filename: {filename}") + return filename + + +def parse_date_from_filename(filename: str) -> str | None: + """ + Extract date from a stego filename. + + Looks for patterns like _20251227 or _2025-12-27 + + Args: + filename: Filename to parse + + Returns: + Date string (YYYY-MM-DD) or None + + Example: + >>> parse_date_from_filename("secret_a1b2c3d4_20231225.png") + "2023-12-25" + """ + import re + + # Try YYYYMMDD format + match = re.search(r"_(\d{4})(\d{2})(\d{2})(?:\.|$)", filename) + if match: + year, month, day = match.groups() + date_str = f"{year}-{month}-{day}" + debug.print(f"Parsed date (compact): {date_str}") + return date_str + + # Try YYYY-MM-DD format + match = re.search(r"_(\d{4})-(\d{2})-(\d{2})(?:\.|$)", filename) + if match: + year, month, day = match.groups() + date_str = f"{year}-{month}-{day}" + debug.print(f"Parsed date (dashed): {date_str}") + return date_str + + debug.print(f"No date found in filename: {filename}") + return None + + +def get_day_from_date(date_str: str) -> str: + """ + Get day of week name from date string. + + Args: + date_str: Date string (YYYY-MM-DD) + + Returns: + Day name (e.g., "Monday") + + Example: + >>> get_day_from_date("2023-12-25") + "Monday" + """ + debug.validate( + len(date_str) == 10 and date_str[4] == "-" and date_str[7] == "-", + f"Invalid date format: {date_str}, expected YYYY-MM-DD", + ) + + try: + year, month, day = map(int, date_str.split("-")) + d = date(year, month, day) + day_name = DAY_NAMES[d.weekday()] + debug.print(f"Date {date_str} is {day_name}") + return day_name + except Exception as e: + debug.exception(e, f"get_day_from_date for {date_str}") + return "" + + +def get_today_date() -> str: + """ + Get today's date as YYYY-MM-DD. + + Returns: + Today's date string + + Example: + >>> get_today_date() + "2023-12-25" + """ + today = date.today().isoformat() + debug.print(f"Today's date: {today}") + return today + + +def get_today_day() -> str: + """ + Get today's day name. + + Returns: + Today's day name + + Example: + >>> get_today_day() + "Monday" + """ + today_day = DAY_NAMES[date.today().weekday()] + debug.print(f"Today is {today_day}") + return today_day + + +class SecureDeleter: + """ + Securely delete files by overwriting with random data. + + Implements multi-pass overwriting before deletion. + + Example: + >>> deleter = SecureDeleter("secret.txt", passes=3) + >>> deleter.execute() + """ + + def __init__(self, path: str | Path, passes: int = 7): + """ + Initialize secure deleter. + + Args: + path: Path to file or directory + passes: Number of overwrite passes + """ + debug.validate(passes > 0, f"Passes must be positive, got {passes}") + + self.path = Path(path) + self.passes = passes + debug.print(f"SecureDeleter initialized for {self.path} with {passes} passes") + + def _overwrite_file(self, file_path: Path) -> None: + """Overwrite file with random data multiple times.""" + if not file_path.exists() or not file_path.is_file(): + debug.print(f"File does not exist or is not a file: {file_path}") + return + + length = file_path.stat().st_size + debug.print(f"Overwriting file {file_path} ({length} bytes)") + + if length == 0: + debug.print("File is empty, nothing to overwrite") + return + + patterns = [b"\x00", b"\xff", bytes([random.randint(0, 255)])] + + for pass_num in range(self.passes): + debug.print(f"Overwrite pass {pass_num + 1}/{self.passes}") + with open(file_path, "r+b") as f: + for pattern_idx, pattern in enumerate(patterns): + f.seek(0) + # Write pattern in chunks for large files + chunk_size = 1024 * 1024 # 1MB chunks + for offset in range(0, length, chunk_size): + chunk = min(chunk_size, length - offset) + f.write(pattern * (chunk // len(pattern))) + f.write(pattern[: chunk % len(pattern)]) + + # Final pass with random data + f.seek(0) + f.write(os.urandom(length)) + + debug.print(f"Completed {self.passes} overwrite passes") + + def delete_file(self) -> None: + """Securely delete a single file.""" + if self.path.is_file(): + debug.print(f"Securely deleting file: {self.path}") + self._overwrite_file(self.path) + self.path.unlink() + debug.print(f"File deleted: {self.path}") + else: + debug.print(f"Not a file: {self.path}") + + def delete_directory(self) -> None: + """Securely delete a directory and all contents.""" + if not self.path.is_dir(): + debug.print(f"Not a directory: {self.path}") + return + + debug.print(f"Securely deleting directory: {self.path}") + + # First, securely overwrite all files + file_count = 0 + for file_path in self.path.rglob("*"): + if file_path.is_file(): + self._overwrite_file(file_path) + file_count += 1 + + debug.print(f"Overwrote {file_count} files") + + # Then remove the directory tree + shutil.rmtree(self.path) + debug.print(f"Directory deleted: {self.path}") + + def execute(self) -> None: + """Securely delete the path (file or directory).""" + debug.print(f"Executing secure deletion: {self.path}") + if self.path.is_file(): + self.delete_file() + elif self.path.is_dir(): + self.delete_directory() + else: + debug.print(f"Path does not exist: {self.path}") + + +def secure_delete(path: str | Path, passes: int = 7) -> None: + """ + Convenience function for secure deletion. + + Args: + path: Path to file or directory + passes: Number of overwrite passes + + Example: + >>> secure_delete("secret.txt", passes=3) + """ + debug.print(f"secure_delete called: {path}, passes={passes}") + SecureDeleter(path, passes).execute() + + +def format_file_size(size_bytes: int) -> str: + """ + Format file size for display. + + Args: + size_bytes: Size in bytes + + Returns: + Human-readable string (e.g., "1.5 MB") + + Example: + >>> format_file_size(1500000) + "1.5 MB" + """ + debug.validate(size_bytes >= 0, f"File size cannot be negative: {size_bytes}") + + size: float = float(size_bytes) + for unit in ["B", "KB", "MB", "GB"]: + if size < 1024: + if unit == "B": + return f"{int(size)} {unit}" + return f"{size:.1f} {unit}" + size /= 1024 + return f"{size:.1f} TB" + + +def format_number(n: int) -> str: + """ + Format number with commas. + + Args: + n: Integer to format + + Returns: + Formatted string + + Example: + >>> format_number(1234567) + "1,234,567" + """ + debug.validate(isinstance(n, int), f"Input must be integer, got {type(n)}") + return f"{n:,}" + + +def clamp(value: int, min_val: int, max_val: int) -> int: + """ + Clamp value to range. + + Args: + value: Value to clamp + min_val: Minimum allowed value + max_val: Maximum allowed value + + Returns: + Clamped value + + Example: + >>> clamp(15, 0, 10) + 10 + """ + debug.validate(min_val <= max_val, f"min_val ({min_val}) must be <= max_val ({max_val})") + return max(min_val, min(max_val, value)) diff --git a/src/soosef/stegasoo/validation.py b/src/soosef/stegasoo/validation.py new file mode 100644 index 0000000..18e7c2d --- /dev/null +++ b/src/soosef/stegasoo/validation.py @@ -0,0 +1,515 @@ +""" +Stegasoo Input Validation (v3.2.0) + +Validators for all user inputs with clear error messages. + +Changes in v3.2.0: +- Renamed validate_phrase() → validate_passphrase() +- Added word count validation with warnings for passphrases +- Added validators for embed modes and DCT parameters +""" + +import io + +from PIL import Image + +from .debug import get_logger + +logger = get_logger(__name__) + +from .constants import ( + ALLOWED_AUDIO_EXTENSIONS, + ALLOWED_IMAGE_EXTENSIONS, + ALLOWED_KEY_EXTENSIONS, + EMBED_MODE_AUDIO_AUTO, + EMBED_MODE_AUTO, + EMBED_MODE_DCT, + EMBED_MODE_LSB, + MAX_FILE_PAYLOAD_SIZE, + MAX_FILE_SIZE, + MAX_IMAGE_PIXELS, + MAX_MESSAGE_SIZE, + MAX_PIN_LENGTH, + MIN_KEY_PASSWORD_LENGTH, + MIN_PASSPHRASE_WORDS, + MIN_PIN_LENGTH, + MIN_RSA_BITS, + RECOMMENDED_PASSPHRASE_WORDS, + VALID_AUDIO_EMBED_MODES, +) +from .exceptions import ( + AudioValidationError, + ImageValidationError, + KeyValidationError, + MessageValidationError, + PinValidationError, + SecurityFactorError, +) +from .keygen import load_rsa_key +from .models import FilePayload, ValidationResult + + +def validate_pin(pin: str, required: bool = False) -> ValidationResult: + """ + Validate PIN format. + + Rules: + - 6-9 digits only + - Cannot start with zero + - Empty is OK if not required + + Args: + pin: PIN string to validate + required: Whether PIN is required + + Returns: + ValidationResult + """ + if not pin: + if required: + return ValidationResult.error("PIN is required") + return ValidationResult.ok() + + if not pin.isdigit(): + return ValidationResult.error("PIN must contain only digits") + + if len(pin) < MIN_PIN_LENGTH or len(pin) > MAX_PIN_LENGTH: + return ValidationResult.error(f"PIN must be {MIN_PIN_LENGTH}-{MAX_PIN_LENGTH} digits") + + if pin[0] == "0": + return ValidationResult.error("PIN cannot start with zero") + + return ValidationResult.ok(length=len(pin)) + + +def validate_message(message: str) -> ValidationResult: + """ + Validate text message content and size. + + Args: + message: Message text + + Returns: + ValidationResult + """ + if not message: + return ValidationResult.error("Message is required") + + if len(message) > MAX_MESSAGE_SIZE: + return ValidationResult.error( + f"Message too long ({len(message):,} chars). Maximum: {MAX_MESSAGE_SIZE:,} characters" + ) + + return ValidationResult.ok(length=len(message)) + + +def validate_payload(payload: str | bytes | FilePayload) -> ValidationResult: + """ + Validate a payload (text message, bytes, or file). + + Args: + payload: Text string, raw bytes, or FilePayload + + Returns: + ValidationResult + """ + if isinstance(payload, str): + return validate_message(payload) + + elif isinstance(payload, FilePayload): + if not payload.data: + return ValidationResult.error("File is empty") + + if len(payload.data) > MAX_FILE_PAYLOAD_SIZE: + return ValidationResult.error( + f"File too large ({len(payload.data):,} bytes). " + f"Maximum: {MAX_FILE_PAYLOAD_SIZE:,} bytes ({MAX_FILE_PAYLOAD_SIZE // 1024} KB)" + ) + + return ValidationResult.ok( + size=len(payload.data), filename=payload.filename, mime_type=payload.mime_type + ) + + elif isinstance(payload, bytes): + if not payload: + return ValidationResult.error("Payload is empty") + + if len(payload) > MAX_FILE_PAYLOAD_SIZE: + return ValidationResult.error( + f"Payload too large ({len(payload):,} bytes). " + f"Maximum: {MAX_FILE_PAYLOAD_SIZE:,} bytes ({MAX_FILE_PAYLOAD_SIZE // 1024} KB)" + ) + + return ValidationResult.ok(size=len(payload)) + + else: + return ValidationResult.error(f"Invalid payload type: {type(payload)}") + + +def validate_file_payload( + file_data: bytes, filename: str = "", max_size: int = MAX_FILE_PAYLOAD_SIZE +) -> ValidationResult: + """ + Validate a file for embedding. + + Args: + file_data: Raw file bytes + filename: Original filename (for display in errors) + max_size: Maximum allowed size in bytes + + Returns: + ValidationResult + """ + if not file_data: + return ValidationResult.error("File is empty") + + if len(file_data) > max_size: + size_kb = len(file_data) / 1024 + max_kb = max_size / 1024 + return ValidationResult.error( + f"File '{filename or 'unnamed'}' too large ({size_kb:.1f} KB). " + f"Maximum: {max_kb:.0f} KB" + ) + + return ValidationResult.ok(size=len(file_data), filename=filename) + + +def validate_image( + image_data: bytes, name: str = "Image", check_size: bool = True +) -> ValidationResult: + """ + Validate image data and dimensions. + + Args: + image_data: Raw image bytes + name: Name for error messages + check_size: Whether to check pixel dimensions + + Returns: + ValidationResult with width, height, pixels + """ + if not image_data: + return ValidationResult.error(f"{name} is required") + + if len(image_data) > MAX_FILE_SIZE: + return ValidationResult.error( + f"{name} too large ({len(image_data):,} bytes). Maximum: {MAX_FILE_SIZE:,} bytes" + ) + + try: + img = Image.open(io.BytesIO(image_data)) + width, height = img.size + num_pixels = width * height + + if check_size and num_pixels > MAX_IMAGE_PIXELS: + max_dim = int(MAX_IMAGE_PIXELS**0.5) + return ValidationResult.error( + f"{name} too large ({width}×{height} = {num_pixels:,} pixels). " + f"Maximum: ~{MAX_IMAGE_PIXELS:,} pixels ({max_dim}×{max_dim})" + ) + + return ValidationResult.ok( + width=width, height=height, pixels=num_pixels, mode=img.mode, format=img.format + ) + + except Exception as e: + return ValidationResult.error(f"Could not read {name}: {e}") + + +def validate_rsa_key( + key_data: bytes, password: str | None = None, required: bool = False +) -> ValidationResult: + """ + Validate RSA private key. + + Args: + key_data: PEM-encoded key bytes + password: Password if key is encrypted + required: Whether key is required + + Returns: + ValidationResult with key_size + """ + if not key_data: + if required: + return ValidationResult.error("RSA key is required") + return ValidationResult.ok() + + try: + private_key = load_rsa_key(key_data, password) + key_size = private_key.key_size + + if key_size < MIN_RSA_BITS: + return ValidationResult.error( + f"RSA key must be at least {MIN_RSA_BITS} bits (got {key_size})" + ) + + return ValidationResult.ok(key_size=key_size) + + except Exception as e: + return ValidationResult.error(str(e)) + + +def validate_security_factors(pin: str, rsa_key_data: bytes | None) -> ValidationResult: + """ + Validate that at least one security factor is provided. + + Args: + pin: PIN string (may be empty) + rsa_key_data: RSA key bytes (may be None/empty) + + Returns: + ValidationResult + """ + has_pin = bool(pin and pin.strip()) + has_key = bool(rsa_key_data and len(rsa_key_data) > 0) + + if not has_pin and not has_key: + return ValidationResult.error("You must provide at least a PIN or RSA Key") + + return ValidationResult.ok(has_pin=has_pin, has_key=has_key) + + +def validate_file_extension( + filename: str, allowed: set[str], file_type: str = "File" +) -> ValidationResult: + """ + Validate file extension. + + Args: + filename: Filename to check + allowed: Set of allowed extensions (lowercase, no dot) + file_type: Name for error messages + + Returns: + ValidationResult with extension + """ + if not filename or "." not in filename: + return ValidationResult.error(f"{file_type} must have a file extension") + + ext = filename.rsplit(".", 1)[1].lower() + + if ext not in allowed: + return ValidationResult.error( + f"Unsupported {file_type.lower()} type: .{ext}. " + f"Allowed: {', '.join(sorted('.' + e for e in allowed))}" + ) + + return ValidationResult.ok(extension=ext) + + +def validate_image_file(filename: str) -> ValidationResult: + """Validate image file extension.""" + return validate_file_extension(filename, ALLOWED_IMAGE_EXTENSIONS, "Image") + + +def validate_key_file(filename: str) -> ValidationResult: + """Validate key file extension.""" + return validate_file_extension(filename, ALLOWED_KEY_EXTENSIONS, "Key file") + + +def validate_key_password(password: str) -> ValidationResult: + """ + Validate password for key encryption. + + Args: + password: Password string + + Returns: + ValidationResult + """ + if not password: + return ValidationResult.error("Password is required") + + if len(password) < MIN_KEY_PASSWORD_LENGTH: + return ValidationResult.error( + f"Password must be at least {MIN_KEY_PASSWORD_LENGTH} characters" + ) + + return ValidationResult.ok(length=len(password)) + + +def validate_passphrase(passphrase: str) -> ValidationResult: + """ + Validate passphrase. + + v3.2.0: Recommend 4+ words for good entropy (since date is no longer used). + + Args: + passphrase: Passphrase string + + Returns: + ValidationResult with word_count and optional warning + """ + if not passphrase or not passphrase.strip(): + return ValidationResult.error("Passphrase is required") + + words = passphrase.strip().split() + + if len(words) < MIN_PASSPHRASE_WORDS: + return ValidationResult.error( + f"Passphrase should have at least {MIN_PASSPHRASE_WORDS} words" + ) + + # Provide warning if below recommended length + if len(words) < RECOMMENDED_PASSPHRASE_WORDS: + return ValidationResult.ok( + word_count=len(words), + warning=f"Recommend {RECOMMENDED_PASSPHRASE_WORDS}+ words for better security", + ) + + return ValidationResult.ok(word_count=len(words)) + + +# ============================================================================= +# NEW VALIDATORS FOR V3.2.0 +# ============================================================================= + + +def validate_reference_photo(photo_data: bytes) -> ValidationResult: + """Validate reference photo. Alias for validate_image.""" + return validate_image(photo_data, "Reference photo") + + +def validate_carrier(carrier_data: bytes) -> ValidationResult: + """Validate carrier image. Alias for validate_image.""" + return validate_image(carrier_data, "Carrier image") + + +def validate_embed_mode(mode: str) -> ValidationResult: + """ + Validate embedding mode. + + Args: + mode: Embedding mode string + + Returns: + ValidationResult + """ + valid_modes = {EMBED_MODE_LSB, EMBED_MODE_DCT, EMBED_MODE_AUTO} + + if mode not in valid_modes: + return ValidationResult.error( + f"Invalid embed_mode: '{mode}'. Valid options: {', '.join(sorted(valid_modes))}" + ) + + return ValidationResult.ok(mode=mode) + + +def validate_dct_output_format(format_str: str) -> ValidationResult: + """ + Validate DCT output format. + + Args: + format_str: Output format ('png' or 'jpeg') + + Returns: + ValidationResult + """ + valid_formats = {"png", "jpeg"} + + if format_str.lower() not in valid_formats: + return ValidationResult.error( + f"Invalid DCT output format: '{format_str}'. Valid options: {', '.join(sorted(valid_formats))}" + ) + + return ValidationResult.ok(format=format_str.lower()) + + +def validate_dct_color_mode(mode: str) -> ValidationResult: + """ + Validate DCT color mode. + + Args: + mode: Color mode ('grayscale' or 'color') + + Returns: + ValidationResult + """ + valid_modes = {"grayscale", "color"} + + if mode.lower() not in valid_modes: + return ValidationResult.error( + f"Invalid DCT color mode: '{mode}'. Valid options: {', '.join(sorted(valid_modes))}" + ) + + return ValidationResult.ok(mode=mode.lower()) + + +# ============================================================================ +# EXCEPTION-RAISING VALIDATORS (for CLI/API use) +# ============================================================================ + + +def require_valid_pin(pin: str, required: bool = False) -> None: + """Validate PIN, raising exception on failure.""" + result = validate_pin(pin, required) + if not result.is_valid: + raise PinValidationError(result.error_message) + + +def require_valid_message(message: str) -> None: + """Validate message, raising exception on failure.""" + result = validate_message(message) + if not result.is_valid: + raise MessageValidationError(result.error_message) + + +def require_valid_payload(payload: str | bytes | FilePayload) -> None: + """Validate payload (text, bytes, or file), raising exception on failure.""" + result = validate_payload(payload) + if not result.is_valid: + raise MessageValidationError(result.error_message) + + +def require_valid_image(image_data: bytes, name: str = "Image") -> None: + """Validate image, raising exception on failure.""" + result = validate_image(image_data, name) + if not result.is_valid: + raise ImageValidationError(result.error_message) + + +def require_valid_rsa_key( + key_data: bytes, password: str | None = None, required: bool = False +) -> None: + """Validate RSA key, raising exception on failure.""" + result = validate_rsa_key(key_data, password, required) + if not result.is_valid: + raise KeyValidationError(result.error_message) + + +def require_security_factors(pin: str, rsa_key_data: bytes | None) -> None: + """Validate security factors, raising exception on failure.""" + result = validate_security_factors(pin, rsa_key_data) + if not result.is_valid: + raise SecurityFactorError(result.error_message) + + +# ============================================================================= +# AUDIO VALIDATORS (v4.3.0) +# ============================================================================= + + +def validate_audio_file(filename: str) -> ValidationResult: + """Validate audio file extension.""" + return validate_file_extension(filename, ALLOWED_AUDIO_EXTENSIONS, "Audio file") + + +def validate_audio_embed_mode(mode: str) -> ValidationResult: + """Validate audio embedding mode.""" + valid_modes = VALID_AUDIO_EMBED_MODES | {EMBED_MODE_AUDIO_AUTO} + if mode not in valid_modes: + return ValidationResult.error( + f"Invalid audio embed_mode: '{mode}'. " + f"Valid options: {', '.join(sorted(valid_modes))}" + ) + return ValidationResult.ok(mode=mode) + + +def require_valid_audio(audio_data: bytes, name: str = "Audio") -> None: + """Validate audio, raising AudioValidationError on failure.""" + from .audio_utils import validate_audio + + result = validate_audio(audio_data, name) + if not result.is_valid: + raise AudioValidationError(result.error_message) diff --git a/src/soosef/stegasoo/video_steganography.py b/src/soosef/stegasoo/video_steganography.py new file mode 100644 index 0000000..37b5714 --- /dev/null +++ b/src/soosef/stegasoo/video_steganography.py @@ -0,0 +1,496 @@ +""" +Stegasoo Video Steganography — LSB Embedding/Extraction (v4.4.0) + +Frame-based LSB embedding for video files. + +Hides data in the least significant bits of video frame pixels. Uses the +existing image steganography engine for per-frame embedding, providing +high capacity across multiple I-frames. + +Strategy: +1. Extract I-frames (keyframes) from video using ffmpeg +2. Embed payload across I-frames using existing LSB engine +3. Re-encode video with modified frames using FFV1 lossless codec +4. Output: MKV container with embedded data + +Uses ChaCha20 as a CSPRNG for pseudo-random frame selection and pixel +selection within frames, ensuring that without the key an attacker cannot +determine which frames/pixels were modified. +""" + +import struct +import tempfile +from pathlib import Path + +from .constants import ( + EMBED_MODE_VIDEO_LSB, + VIDEO_MAGIC_LSB, + VIDEO_OUTPUT_CODEC, +) +from .debug import debug +from .exceptions import VideoCapacityError, VideoError +from .models import VideoEmbedStats +from .steganography import ENCRYPTION_OVERHEAD, _embed_lsb, _extract_lsb +from .video_utils import extract_frames, get_video_info, reassemble_video + +# Progress reporting interval — write every N frames +PROGRESS_INTERVAL = 5 + + +# ============================================================================= +# PROGRESS REPORTING +# ============================================================================= + + +def _write_progress(progress_file: str | None, current: int, total: int, phase: str = "embedding"): + """Write progress to file for frontend polling.""" + if progress_file is None: + return + try: + import json + + with open(progress_file, "w") as f: + json.dump( + { + "current": current, + "total": total, + "percent": round((current / total) * 100, 1) if total > 0 else 0, + "phase": phase, + }, + f, + ) + except Exception: + pass # Don't let progress writing break encoding + + +# ============================================================================= +# CAPACITY +# ============================================================================= + + +def calculate_video_lsb_capacity(video_data: bytes) -> int: + """ + Calculate the maximum bytes that can be embedded in a video via LSB. + + Calculates capacity based on I-frames (keyframes) only. Each I-frame + provides capacity proportional to its pixel count. + + Args: + video_data: Raw bytes of a video file. + + Returns: + Maximum embeddable payload size in bytes (after subtracting overhead). + + Raises: + VideoError: If the video cannot be read or is in an unsupported format. + """ + from .video_utils import calculate_video_capacity + + capacity_info = calculate_video_capacity(video_data, EMBED_MODE_VIDEO_LSB) + + debug.print( + f"Video LSB capacity: {capacity_info.usable_capacity_bytes} bytes " + f"({capacity_info.i_frames} I-frames, {capacity_info.resolution[0]}x{capacity_info.resolution[1]})" + ) + + return capacity_info.usable_capacity_bytes + + +# ============================================================================= +# FRAME INDEX GENERATION (ChaCha20 CSPRNG) +# ============================================================================= + + +def generate_frame_indices(key: bytes, num_frames: int, num_needed: int) -> list[int]: + """ + Generate pseudo-random frame indices using ChaCha20 as a CSPRNG. + + Produces a deterministic sequence of unique frame indices so that + the same key always yields the same embedding locations. + + Args: + key: 32-byte key for the ChaCha20 cipher. + num_frames: Total number of frames available. + num_needed: How many unique frame indices are required. + + Returns: + List of ``num_needed`` unique indices in [0, num_frames). + """ + from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms + + debug.validate(len(key) == 32, f"Frame key must be 32 bytes, got {len(key)}") + debug.validate(num_frames > 0, f"Number of frames must be positive, got {num_frames}") + debug.validate(num_needed > 0, f"Number needed must be positive, got {num_needed}") + debug.validate( + num_needed <= num_frames, + f"Cannot select {num_needed} frames from {num_frames} available", + ) + + debug.print(f"Generating {num_needed} frame indices from {num_frames} total frames") + + # Use a different nonce offset for frame selection (vs pixel selection) + nonce = b"\x01" + b"\x00" * 15 # Different from pixel selection nonce + + if num_needed >= num_frames // 2: + # Full Fisher-Yates shuffle + cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + + indices = list(range(num_frames)) + random_bytes = encryptor.update(b"\x00" * (num_frames * 4)) + + for i in range(num_frames - 1, 0, -1): + j_bytes = random_bytes[(num_frames - 1 - i) * 4 : (num_frames - i) * 4] + j = int.from_bytes(j_bytes, "big") % (i + 1) + indices[i], indices[j] = indices[j], indices[i] + + return indices[:num_needed] + + # Direct sampling + selected: list[int] = [] + used: set[int] = set() + + cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend()) + encryptor = cipher.encryptor() + + bytes_needed = (num_needed * 2) * 4 + random_bytes = encryptor.update(b"\x00" * bytes_needed) + + byte_offset = 0 + while len(selected) < num_needed and byte_offset < len(random_bytes) - 4: + idx = int.from_bytes(random_bytes[byte_offset : byte_offset + 4], "big") % num_frames + byte_offset += 4 + + if idx not in used: + used.add(idx) + selected.append(idx) + + debug.validate( + len(selected) == num_needed, + f"Failed to generate enough indices: {len(selected)}/{num_needed}", + ) + return selected + + +# ============================================================================= +# EMBEDDING +# ============================================================================= + + +@debug.time +def embed_in_video_lsb( + data: bytes, + carrier_video: bytes, + pixel_key: bytes, + progress_file: str | None = None, +) -> tuple[bytes, VideoEmbedStats]: + """ + Embed data into video frames using LSB steganography. + + The payload is prepended with a 4-byte magic header and a 4-byte + big-endian length prefix. Data is distributed across I-frames using + pseudo-random selection based on the pixel_key. + + The output video uses FFV1 lossless codec in MKV container to + preserve the embedded data perfectly. + + Args: + data: Encrypted payload bytes to embed. + carrier_video: Raw bytes of the carrier video file. + pixel_key: 32-byte key for frame and pixel selection. + progress_file: Optional path for progress JSON (frontend polling). + + Returns: + Tuple of (stego video bytes, VideoEmbedStats). + + Raises: + VideoCapacityError: If the payload is too large for the carrier. + VideoError: On any other embedding failure. + """ + debug.print(f"Video LSB embedding {len(data)} bytes") + debug.data(pixel_key, "Pixel key for embedding") + debug.validate(len(pixel_key) == 32, f"Pixel key must be 32 bytes, got {len(pixel_key)}") + + try: + # Get video info + video_info = get_video_info(carrier_video) + debug.print( + f"Carrier video: {video_info.width}x{video_info.height}, " + f"{video_info.fps:.2f} fps, {video_info.duration_seconds:.1f}s, " + f"{video_info.i_frame_count} I-frames" + ) + + # Prepend magic + length prefix + header = VIDEO_MAGIC_LSB + struct.pack(">I", len(data)) + payload = header + data + debug.print(f"Payload with header: {len(payload)} bytes") + + # Calculate capacity and check fit + capacity = calculate_video_lsb_capacity(carrier_video) + if len(payload) > capacity + ENCRYPTION_OVERHEAD: + raise VideoCapacityError(len(payload), capacity) + + # Extract I-frames to temp directory + with tempfile.TemporaryDirectory(prefix="stegasoo_video_") as temp_dir_str: + temp_dir = Path(temp_dir_str) + + _write_progress(progress_file, 5, 100, "extracting_frames") + + frames, _ = extract_frames(carrier_video, temp_dir, keyframes_only=True) + num_frames = len(frames) + + debug.print(f"Extracted {num_frames} I-frames for embedding") + + if num_frames == 0: + raise VideoError("No I-frames found in video") + + # Calculate bytes per frame (minus 4 byte length prefix used by _embed_lsb) + pixels_per_frame = video_info.width * video_info.height + bytes_per_frame = (pixels_per_frame * 3) // 8 - 4 # 3 bits per pixel, minus len prefix + + # For simplicity, embed entire payload in first frame if it fits + # This makes extraction straightforward + if len(payload) <= bytes_per_frame: + debug.print(f"Payload fits in single frame ({len(payload)} <= {bytes_per_frame})") + frame_path = frames[0] + + with open(frame_path, "rb") as f: + frame_data = f.read() + + try: + stego_frame, stats, ext = _embed_lsb( + payload, + frame_data, + pixel_key, + bits_per_channel=1, + output_format="PNG", + ) + + with open(frame_path, "wb") as f: + f.write(stego_frame) + + modified_frames = 1 + + except Exception as e: + debug.print(f"Failed to embed in frame: {e}") + raise VideoError(f"Failed to embed in frame: {e}") + else: + # For larger payloads, we need to split across frames + # Each frame stores: 4-byte chunk length + chunk data + debug.print("Splitting payload across multiple frames") + + frames_needed = (len(payload) + bytes_per_frame - 1) // bytes_per_frame + frames_needed = min(frames_needed, num_frames) + + debug.print(f"Using {frames_needed} frames to embed {len(payload)} bytes") + + # For now, use sequential frames for simplicity + modified_frames = 0 + bytes_remaining = len(payload) + payload_offset = 0 + + for frame_idx in range(frames_needed): + if bytes_remaining <= 0: + break + + frame_path = frames[frame_idx] + + with open(frame_path, "rb") as f: + frame_data = f.read() + + chunk_size = min(bytes_remaining, bytes_per_frame) + chunk = payload[payload_offset : payload_offset + chunk_size] + + try: + stego_frame, stats, ext = _embed_lsb( + chunk, + frame_data, + pixel_key, + bits_per_channel=1, + output_format="PNG", + ) + + with open(frame_path, "wb") as f: + f.write(stego_frame) + + modified_frames += 1 + payload_offset += chunk_size + bytes_remaining -= chunk_size + + except Exception as e: + debug.print(f"Failed to embed in frame {frame_idx}: {e}") + raise VideoError(f"Failed to embed in frame {frame_idx}: {e}") + + if progress_file and frame_idx % PROGRESS_INTERVAL == 0: + pct = 10 + int((frame_idx / frames_needed) * 70) + _write_progress(progress_file, pct, 100, "embedding") + + _write_progress(progress_file, 80, 100, "reassembling") + + # Reassemble video with modified frames + stego_video = reassemble_video( + frames, + carrier_video, + fps=1.0, # I-frame only videos use 1 fps + ) + + _write_progress(progress_file, 100, 100, "complete") + + video_stats = VideoEmbedStats( + frames_modified=modified_frames, + total_frames=video_info.total_frames, + capacity_used=len(payload) / (capacity + ENCRYPTION_OVERHEAD), + bytes_embedded=len(payload), + width=video_info.width, + height=video_info.height, + fps=video_info.fps, + duration_seconds=video_info.duration_seconds, + embed_mode=EMBED_MODE_VIDEO_LSB, + codec=VIDEO_OUTPUT_CODEC, + ) + + debug.print( + f"Video LSB embedding complete: {len(stego_video)} bytes, " + f"{modified_frames} frames modified" + ) + + return stego_video, video_stats + + except VideoCapacityError: + raise + except VideoError: + raise + except Exception as e: + debug.exception(e, "embed_in_video_lsb") + raise VideoError(f"Failed to embed data in video: {e}") from e + + +# ============================================================================= +# EXTRACTION +# ============================================================================= + + +@debug.time +def extract_from_video_lsb( + video_data: bytes, + pixel_key: bytes, + progress_file: str | None = None, +) -> bytes | None: + """ + Extract hidden data from video using LSB steganography. + + Extracts I-frames, reads LSBs from the same pseudo-random locations + used during embedding, and reconstructs the payload. + + Args: + video_data: Raw bytes of the stego video file. + pixel_key: 32-byte key (must match the one used for embedding). + progress_file: Optional path for progress JSON. + + Returns: + Extracted payload bytes (without magic/length prefix), or ``None`` + if extraction fails (wrong key, no data, corrupted). + """ + debug.print(f"Video LSB extracting from {len(video_data)} byte video") + debug.data(pixel_key, "Pixel key for extraction") + + try: + # Get video info + video_info = get_video_info(video_data) + debug.print( + f"Video: {video_info.width}x{video_info.height}, " + f"{video_info.i_frame_count} I-frames" + ) + + # Extract I-frames + with tempfile.TemporaryDirectory(prefix="stegasoo_video_extract_") as temp_dir_str: + temp_dir = Path(temp_dir_str) + + _write_progress(progress_file, 5, 100, "extracting_frames") + + frames, _ = extract_frames(video_data, temp_dir, keyframes_only=True) + num_frames = len(frames) + + if num_frames == 0: + debug.print("No I-frames found in video") + return None + + debug.print(f"Extracted {num_frames} I-frames for extraction") + + _write_progress(progress_file, 20, 100, "extracting_data") + + # First, try to extract from frame 0 to get magic and total length + frame_path = frames[0] + with open(frame_path, "rb") as f: + frame_data = f.read() + + first_chunk = _extract_lsb(frame_data, pixel_key, bits_per_channel=1) + if first_chunk is None or len(first_chunk) < 8: + debug.print("Failed to extract initial data from first frame") + return None + + # Check magic bytes + magic = first_chunk[:4] + if magic != VIDEO_MAGIC_LSB: + debug.print(f"Magic mismatch: got {magic!r}, expected {VIDEO_MAGIC_LSB!r}") + return None + + # Get total payload length + total_length = struct.unpack(">I", first_chunk[4:8])[0] + debug.print(f"Total payload length: {total_length} bytes") + + # Sanity check + pixels_per_frame = video_info.width * video_info.height + bytes_per_frame = (pixels_per_frame * 3) // 8 - 4 # minus length prefix + max_possible = bytes_per_frame * num_frames + + if total_length > max_possible or total_length < 1: + debug.print(f"Invalid payload length: {total_length}") + return None + + # If the entire payload fits in the first frame, return it directly + # This matches the simplified single-frame embedding approach + if len(first_chunk) >= 8 + total_length: + debug.print("Payload fits in single frame, extracting directly") + payload = first_chunk[8 : 8 + total_length] + else: + # Multi-frame extraction + debug.print("Multi-frame extraction needed") + frames_needed = (total_length + 8 + bytes_per_frame - 1) // bytes_per_frame + frames_needed = min(frames_needed, num_frames) + + # Extract sequentially (matching the embedding approach) + extracted_chunks = [first_chunk] + for frame_idx in range(1, frames_needed): + frame_path = frames[frame_idx] + with open(frame_path, "rb") as f: + frame_data = f.read() + + chunk = _extract_lsb(frame_data, pixel_key, bits_per_channel=1) + if chunk: + extracted_chunks.append(chunk) + + if progress_file and frame_idx % PROGRESS_INTERVAL == 0: + pct = 20 + int((frame_idx / frames_needed) * 70) + _write_progress(progress_file, pct, 100, "extracting_data") + + # Combine chunks + combined = b"".join(extracted_chunks) + + if len(combined) < 8 + total_length: + debug.print( + f"Insufficient data: have {len(combined) - 8}, need {total_length}" + ) + return None + + payload = combined[8 : 8 + total_length] + + _write_progress(progress_file, 100, 100, "complete") + + debug.print(f"Video LSB successfully extracted {len(payload)} bytes") + return payload + + except Exception as e: + debug.exception(e, "extract_from_video_lsb") + return None diff --git a/src/soosef/stegasoo/video_utils.py b/src/soosef/stegasoo/video_utils.py new file mode 100644 index 0000000..2fa144e --- /dev/null +++ b/src/soosef/stegasoo/video_utils.py @@ -0,0 +1,732 @@ +""" +Stegasoo Video Utilities (v4.4.0) + +Video format detection, frame extraction, and transcoding for video steganography. + +Dependencies: +- ffmpeg binary: Required for all video operations +- numpy: For frame data manipulation +- PIL/Pillow: For frame image handling + +Uses ffmpeg for: +- Format detection and metadata extraction +- I-frame extraction +- Video reassembly with FFV1 lossless codec +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import tempfile +from pathlib import Path + +from .constants import ( + EMBED_MODE_VIDEO_AUTO, + EMBED_MODE_VIDEO_LSB, + MAX_VIDEO_DURATION, + MAX_VIDEO_FILE_SIZE, + MAX_VIDEO_RESOLUTION, + MIN_VIDEO_RESOLUTION, + VALID_VIDEO_EMBED_MODES, + VIDEO_OUTPUT_CODEC, + VIDEO_OUTPUT_CONTAINER, +) +from .debug import get_logger +from .exceptions import ( + UnsupportedVideoFormatError, + VideoTranscodeError, + VideoValidationError, +) +from .models import ValidationResult, VideoCapacityInfo, VideoInfo + +logger = get_logger(__name__) + + +# ============================================================================= +# FFMPEG AVAILABILITY +# ============================================================================= + + +def has_ffmpeg_support() -> bool: + """Check if ffmpeg is available on the system. + + Returns: + True if ffmpeg is found on PATH, False otherwise. + """ + return shutil.which("ffmpeg") is not None + + +def has_ffprobe_support() -> bool: + """Check if ffprobe is available on the system. + + Returns: + True if ffprobe is found on PATH, False otherwise. + """ + return shutil.which("ffprobe") is not None + + +def _require_ffmpeg() -> None: + """Raise error if ffmpeg is not available.""" + if not has_ffmpeg_support(): + raise VideoTranscodeError( + "ffmpeg is required for video operations. Install ffmpeg on your system." + ) + + +def _require_ffprobe() -> None: + """Raise error if ffprobe is not available.""" + if not has_ffprobe_support(): + raise VideoTranscodeError( + "ffprobe is required for video metadata. Install ffmpeg on your system." + ) + + +# ============================================================================= +# FORMAT DETECTION +# ============================================================================= + + +def detect_video_format(video_data: bytes) -> str: + """Detect video format from magic bytes. + + Examines the first bytes of video data to identify the container format. + + Magic byte signatures: + - MP4/M4V: b"ftyp" at offset 4 + - MKV/WebM: b"\\x1a\\x45\\xdf\\xa3" (EBML header) + - AVI: b"RIFF" at offset 0 + b"AVI " at offset 8 + - MOV: b"ftyp" with "qt" brand or b"moov"/"mdat" early + + Args: + video_data: Raw video file bytes. + + Returns: + Format string: "mp4", "mkv", "webm", "avi", "mov", or "unknown". + """ + if len(video_data) < 12: + logger.debug("detect_video_format: data too short (%d bytes)", len(video_data)) + return "unknown" + + # MP4/M4V/MOV: "ftyp" atom at offset 4 + if video_data[4:8] == b"ftyp": + # Check brand for specific type + brand = video_data[8:12] + if brand in (b"qt ", b"mqt "): + return "mov" + if brand in (b"isom", b"iso2", b"mp41", b"mp42", b"avc1", b"M4V "): + return "mp4" + # Default to mp4 for ftyp containers + return "mp4" + + # MKV/WebM: EBML header + if video_data[:4] == b"\x1a\x45\xdf\xa3": + # Check doctype to distinguish MKV from WebM + # WebM uses "webm" doctype, MKV uses "matroska" + # Simple heuristic: search for doctype string in first 64 bytes + header = video_data[:64] + if b"webm" in header.lower(): + return "webm" + return "mkv" + + # AVI: RIFF....AVI + if video_data[:4] == b"RIFF" and video_data[8:12] == b"AVI ": + return "avi" + + # MOV without ftyp (older format): check for moov/mdat atoms + if video_data[4:8] in (b"moov", b"mdat", b"wide", b"free"): + return "mov" + + return "unknown" + + +# ============================================================================= +# METADATA EXTRACTION +# ============================================================================= + + +def get_video_info(video_data: bytes) -> VideoInfo: + """Extract video metadata from raw video bytes. + + Uses ffprobe to extract detailed video information including + resolution, frame rate, duration, codec, and I-frame count. + + Args: + video_data: Raw video file bytes. + + Returns: + VideoInfo dataclass with video metadata. + + Raises: + UnsupportedVideoFormatError: If the format cannot be detected. + VideoTranscodeError: If metadata extraction fails. + """ + _require_ffprobe() + + fmt = detect_video_format(video_data) + if fmt == "unknown": + raise UnsupportedVideoFormatError( + "Cannot detect video format. Supported: MP4, MKV, WebM, AVI, MOV." + ) + + # Write to temp file for ffprobe + with tempfile.NamedTemporaryFile(suffix=f".{fmt}", delete=False) as f: + f.write(video_data) + temp_path = f.name + + try: + # Get stream info + result = subprocess.run( + [ + "ffprobe", + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + "-show_streams", + "-select_streams", + "v:0", + temp_path, + ], + capture_output=True, + text=True, + timeout=60, + ) + + if result.returncode != 0: + raise VideoTranscodeError(f"ffprobe failed: {result.stderr}") + + info = json.loads(result.stdout) + + # Extract video stream info + if not info.get("streams"): + raise VideoTranscodeError("No video stream found in file") + + stream = info["streams"][0] + format_info = info.get("format", {}) + + width = int(stream.get("width", 0)) + height = int(stream.get("height", 0)) + codec = stream.get("codec_name", "unknown") + + # Parse frame rate (can be "30/1" or "29.97") + fps_str = stream.get("r_frame_rate", "0/1") + if "/" in fps_str: + num, den = fps_str.split("/") + fps = float(num) / float(den) if float(den) > 0 else 0.0 + else: + fps = float(fps_str) + + # Get duration + duration = float(stream.get("duration", format_info.get("duration", 0))) + + # Get total frames + nb_frames = stream.get("nb_frames") + if nb_frames: + total_frames = int(nb_frames) + else: + # Estimate from duration and fps + total_frames = int(duration * fps) if fps > 0 else 0 + + # Get bitrate + bitrate = None + if format_info.get("bit_rate"): + bitrate = int(format_info["bit_rate"]) + + # Count I-frames using ffprobe + i_frame_count = _count_i_frames(temp_path, timeout=120) + + return VideoInfo( + width=width, + height=height, + fps=fps, + duration_seconds=duration, + total_frames=total_frames, + i_frame_count=i_frame_count, + format=fmt, + codec=codec, + bitrate=bitrate, + ) + + except json.JSONDecodeError as e: + raise VideoTranscodeError(f"Failed to parse ffprobe output: {e}") + except subprocess.TimeoutExpired: + raise VideoTranscodeError("ffprobe timed out") + finally: + os.unlink(temp_path) + + +def _count_i_frames(video_path: str, timeout: int = 120) -> int: + """Count I-frames (keyframes) in a video file. + + Args: + video_path: Path to video file. + timeout: Maximum time in seconds. + + Returns: + Number of I-frames in the video. + """ + try: + result = subprocess.run( + [ + "ffprobe", + "-v", + "quiet", + "-select_streams", + "v:0", + "-show_entries", + "frame=pict_type", + "-of", + "csv=p=0", + video_path, + ], + capture_output=True, + text=True, + timeout=timeout, + ) + + if result.returncode != 0: + logger.warning("Failed to count I-frames: %s", result.stderr) + return 0 + + # Count lines containing 'I' + return sum(1 for line in result.stdout.strip().split("\n") if line.strip() == "I") + + except subprocess.TimeoutExpired: + logger.warning("I-frame counting timed out") + return 0 + except Exception as e: + logger.warning("I-frame counting failed: %s", e) + return 0 + + +# ============================================================================= +# FRAME EXTRACTION +# ============================================================================= + + +def extract_frames( + video_data: bytes, + output_dir: Path | None = None, + keyframes_only: bool = True, +) -> tuple[list[Path], VideoInfo]: + """Extract frames from video as PNG images. + + Uses ffmpeg to extract frames from the video. By default extracts only + I-frames (keyframes) which are more robust to re-encoding. + + Args: + video_data: Raw video file bytes. + output_dir: Directory to save frames (temp dir if None). + keyframes_only: If True, only extract I-frames (keyframes). + + Returns: + Tuple of (list of frame paths sorted by frame number, VideoInfo). + + Raises: + VideoTranscodeError: If frame extraction fails. + """ + _require_ffmpeg() + + fmt = detect_video_format(video_data) + if fmt == "unknown": + raise UnsupportedVideoFormatError( + "Cannot detect video format. Supported: MP4, MKV, WebM, AVI, MOV." + ) + + # Get video info first + video_info = get_video_info(video_data) + + # Create output directory + if output_dir is None: + output_dir = Path(tempfile.mkdtemp(prefix="stegasoo_frames_")) + else: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Write video to temp file + with tempfile.NamedTemporaryFile(suffix=f".{fmt}", delete=False) as f: + f.write(video_data) + video_path = f.name + + try: + # Build ffmpeg command + cmd = [ + "ffmpeg", + "-i", + video_path, + "-vsync", + "0", + ] + + if keyframes_only: + # Extract only I-frames + cmd.extend(["-vf", "select='eq(pict_type,I)'"]) + + # Output as PNG with frame number + output_pattern = str(output_dir / "frame_%06d.png") + cmd.extend(["-start_number", "0", output_pattern]) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=600, # 10 minute timeout + ) + + if result.returncode != 0: + raise VideoTranscodeError(f"Frame extraction failed: {result.stderr}") + + # Collect extracted frames + frames = sorted(output_dir.glob("frame_*.png")) + + if not frames: + raise VideoTranscodeError("No frames were extracted from video") + + logger.info( + "Extracted %d %s from video", + len(frames), + "I-frames" if keyframes_only else "frames", + ) + + return frames, video_info + + except subprocess.TimeoutExpired: + raise VideoTranscodeError("Frame extraction timed out") + finally: + os.unlink(video_path) + + +# ============================================================================= +# VIDEO REASSEMBLY +# ============================================================================= + + +def reassemble_video( + frames: list[Path], + original_video_data: bytes, + output_path: Path | None = None, + fps: float | None = None, + audio_data: bytes | None = None, +) -> bytes: + """Reassemble frames back into a video file. + + Creates a new video from the modified frames using FFV1 lossless codec + in an MKV container. This preserves the embedded data perfectly. + + Args: + frames: List of frame image paths in order. + original_video_data: Original video bytes (for audio track extraction). + output_path: Optional output path (temp file if None). + fps: Frame rate (auto-detected from original if None). + audio_data: Optional audio track data to mux in. + + Returns: + Video file bytes (MKV container with FFV1 codec). + + Raises: + VideoTranscodeError: If reassembly fails. + """ + _require_ffmpeg() + + if not frames: + raise VideoTranscodeError("No frames provided for reassembly") + + # Get original video format + fmt = detect_video_format(original_video_data) + + if fps is None: + # Use a fixed low framerate for I-frame sequences + # since I-frames are sparse (typically 1 per 30-60 frames) + fps = 1.0 # 1 fps for I-frame only videos + + # Create temp directory for work + with tempfile.TemporaryDirectory(prefix="stegasoo_reassemble_") as temp_dir_str: + temp_dir = Path(temp_dir_str) + + # Write original video for audio extraction + original_path = temp_dir / f"original.{fmt}" + original_path.write_bytes(original_video_data) + + # Create frame list file for ffmpeg + frame_list = temp_dir / "frames.txt" + with open(frame_list, "w") as f: + for frame in frames: + # FFmpeg concat format + f.write(f"file '{frame.absolute()}'\n") + f.write(f"duration {1.0 / fps}\n") + + # Output path + if output_path is None: + output_file = temp_dir / f"output.{VIDEO_OUTPUT_CONTAINER}" + else: + output_file = Path(output_path) + + # Build ffmpeg command + cmd = [ + "ffmpeg", + "-y", # Overwrite output + "-f", + "concat", + "-safe", + "0", + "-i", + str(frame_list), + ] + + # Add audio from original video if available + # Check if original has audio + has_audio = _video_has_audio(original_path) + if has_audio: + cmd.extend(["-i", str(original_path)]) + + # Video encoding settings (FFV1 lossless) + cmd.extend( + [ + "-c:v", + VIDEO_OUTPUT_CODEC, + "-level", + "3", # FFV1 level 3 for better compression + "-coder", + "1", # Range coder + "-context", + "1", # Large context + "-slicecrc", + "1", # Error detection + ] + ) + + # Audio settings + if has_audio: + cmd.extend( + [ + "-map", + "0:v", # Video from frames + "-map", + "1:a?", # Audio from original (if exists) + "-c:a", + "copy", # Copy audio without re-encoding + ] + ) + + cmd.append(str(output_file)) + + logger.debug("Running ffmpeg: %s", " ".join(cmd)) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=600, + ) + + if result.returncode != 0: + raise VideoTranscodeError(f"Video reassembly failed: {result.stderr}") + + # Read output + return output_file.read_bytes() + + +def _video_has_audio(video_path: Path) -> bool: + """Check if a video file has an audio stream. + + Args: + video_path: Path to video file. + + Returns: + True if video has audio, False otherwise. + """ + try: + result = subprocess.run( + [ + "ffprobe", + "-v", + "quiet", + "-select_streams", + "a:0", + "-show_entries", + "stream=index", + "-of", + "csv=p=0", + str(video_path), + ], + capture_output=True, + text=True, + timeout=30, + ) + return bool(result.stdout.strip()) + except Exception: + return False + + +# ============================================================================= +# VALIDATION +# ============================================================================= + + +def validate_video( + video_data: bytes, + name: str = "Video", + check_duration: bool = True, +) -> ValidationResult: + """Validate video data for steganography. + + Checks: + - Not empty + - Not too large (MAX_VIDEO_FILE_SIZE) + - Valid video format (detectable via magic bytes) + - Duration within limits (MAX_VIDEO_DURATION) if check_duration=True + - Resolution within limits (MIN/MAX_VIDEO_RESOLUTION) + + Args: + video_data: Raw video file bytes. + name: Descriptive name for error messages (default: "Video"). + check_duration: Whether to enforce duration limit (default: True). + + Returns: + ValidationResult with video info in details on success. + """ + if not video_data: + return ValidationResult.error(f"{name} is required") + + if len(video_data) > MAX_VIDEO_FILE_SIZE: + size_gb = len(video_data) / (1024**3) + max_gb = MAX_VIDEO_FILE_SIZE / (1024**3) + return ValidationResult.error( + f"{name} too large ({size_gb:.1f} GB). Maximum: {max_gb:.0f} GB" + ) + + # Detect format + fmt = detect_video_format(video_data) + if fmt == "unknown": + return ValidationResult.error( + f"Could not detect {name} format. " "Supported formats: MP4, MKV, WebM, AVI, MOV." + ) + + # Check ffmpeg availability + if not has_ffmpeg_support(): + return ValidationResult.error( + "ffmpeg is required for video processing. Please install ffmpeg." + ) + + # Extract metadata for further validation + try: + info = get_video_info(video_data) + except (VideoTranscodeError, UnsupportedVideoFormatError) as e: + return ValidationResult.error(f"Could not read {name}: {e}") + except Exception as e: + return ValidationResult.error(f"Could not read {name}: {e}") + + # Check duration + if check_duration and info.duration_seconds > MAX_VIDEO_DURATION: + return ValidationResult.error( + f"{name} too long ({info.duration_seconds:.1f}s). " + f"Maximum: {MAX_VIDEO_DURATION}s ({MAX_VIDEO_DURATION // 60} minutes)" + ) + + # Check resolution + if info.width < MIN_VIDEO_RESOLUTION[0] or info.height < MIN_VIDEO_RESOLUTION[1]: + return ValidationResult.error( + f"{name} resolution too small ({info.width}x{info.height}). " + f"Minimum: {MIN_VIDEO_RESOLUTION[0]}x{MIN_VIDEO_RESOLUTION[1]}" + ) + + if info.width > MAX_VIDEO_RESOLUTION[0] or info.height > MAX_VIDEO_RESOLUTION[1]: + return ValidationResult.error( + f"{name} resolution too large ({info.width}x{info.height}). " + f"Maximum: {MAX_VIDEO_RESOLUTION[0]}x{MAX_VIDEO_RESOLUTION[1]}" + ) + + # Check I-frame count + if info.i_frame_count < 1: + return ValidationResult.error(f"{name} has no I-frames (keyframes) for embedding") + + return ValidationResult.ok( + width=info.width, + height=info.height, + fps=info.fps, + duration=info.duration_seconds, + total_frames=info.total_frames, + i_frame_count=info.i_frame_count, + format=info.format, + codec=info.codec, + bitrate=info.bitrate, + ) + + +def require_valid_video(video_data: bytes, name: str = "Video") -> None: + """Validate video, raising VideoValidationError on failure. + + Args: + video_data: Raw video file bytes. + name: Descriptive name for error messages. + + Raises: + VideoValidationError: If validation fails. + """ + result = validate_video(video_data, name) + if not result.is_valid: + raise VideoValidationError(result.error_message) + + +def validate_video_embed_mode(mode: str) -> ValidationResult: + """Validate video embedding mode string. + + Args: + mode: Embedding mode to validate. + + Returns: + ValidationResult with mode in details on success. + """ + valid_modes = VALID_VIDEO_EMBED_MODES | {EMBED_MODE_VIDEO_AUTO} + if mode not in valid_modes: + return ValidationResult.error( + f"Invalid video embed_mode: '{mode}'. " + f"Valid options: {', '.join(sorted(valid_modes))}" + ) + return ValidationResult.ok(mode=mode) + + +# ============================================================================= +# CAPACITY CALCULATION +# ============================================================================= + + +def calculate_video_capacity(video_data: bytes, embed_mode: str = EMBED_MODE_VIDEO_LSB) -> VideoCapacityInfo: + """Calculate steganographic capacity for a video file. + + Capacity is based on I-frames only (keyframes). Each I-frame provides + capacity similar to an image of the same dimensions. + + Args: + video_data: Raw video file bytes. + embed_mode: Embedding mode (currently only video_lsb). + + Returns: + VideoCapacityInfo with capacity details. + """ + info = get_video_info(video_data) + + # Calculate capacity per I-frame + # RGB image: 3 bits per pixel (1 bit per channel) / 8 = 0.375 bytes per pixel + # Subtract overhead per frame for header + pixels_per_frame = info.width * info.height + bytes_per_frame = (pixels_per_frame * 3) // 8 # 3 bits per pixel + + # Total capacity across all I-frames + # Subtract 70 bytes overhead for the encrypted payload header + from .steganography import ENCRYPTION_OVERHEAD + + total_capacity = (bytes_per_frame * info.i_frame_count) - ENCRYPTION_OVERHEAD + + return VideoCapacityInfo( + total_frames=info.total_frames, + i_frames=info.i_frame_count, + usable_capacity_bytes=max(0, total_capacity), + embed_mode=embed_mode, + resolution=(info.width, info.height), + duration_seconds=info.duration_seconds, + ) diff --git a/src/soosef/verisoo/__init__.py b/src/soosef/verisoo/__init__.py new file mode 100644 index 0000000..e017944 --- /dev/null +++ b/src/soosef/verisoo/__init__.py @@ -0,0 +1,28 @@ +""" +Verisoo - Decentralized image provenance and attestation. + +Part of the Soo Suite: +- Stegasoo: covert communication, hiding encrypted messages in images +- Verisoo: overt attestation, proving provenance and building decentralized reputation +""" + +__version__ = "0.1.0" + +try: + from .models import Attestation, AttestationRecord, Identity + from .exceptions import VerisooError, AttestationError, VerificationError + + _AVAILABLE = True +except ImportError: + _AVAILABLE = False + +__all__ = [ + "__version__", + "_AVAILABLE", + "Attestation", + "AttestationRecord", + "Identity", + "VerisooError", + "AttestationError", + "VerificationError", +] diff --git a/src/soosef/verisoo/api.py b/src/soosef/verisoo/api.py new file mode 100644 index 0000000..9f74db0 --- /dev/null +++ b/src/soosef/verisoo/api.py @@ -0,0 +1,582 @@ +""" +FastAPI verification service for Verisoo. + +Lightweight REST API for: +- Verifying images against attestation records +- Looking up attestations by ID +- Getting proof links + +Designed for media orgs and fact-checkers to integrate easily. + +Run with: uvicorn verisoo.api:app --host 0.0.0.0 --port 8000 +""" + +from __future__ import annotations + +import os +from datetime import datetime +from pathlib import Path +from typing import Annotated + +try: + from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile + from fastapi.middleware.cors import CORSMiddleware + from fastapi.responses import JSONResponse + from pydantic import BaseModel +except ImportError: + raise ImportError("API requires fastapi: pip install verisoo[api]") + +from .hashing import compute_all_distances, hash_image, is_same_image +from .models import AttestationRecord, ImageHashes, ProofLink +from .storage import LocalStorage +from .crypto import verify_signature, load_public_key_from_bytes + +# Configuration via environment +DATA_DIR = Path(os.environ.get("SOOSEF_DATA_DIR", Path.home() / ".soosef")) +BASE_URL = os.environ.get("VERISOO_BASE_URL", "https://verisoo.io") + +app = FastAPI( + title="Verisoo", + description="Decentralized image provenance and attestation API", + version="0.1.0", + docs_url="/docs", + redoc_url="/redoc", +) + +# CORS for browser-based verification widgets +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Configure appropriately for production + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# --- Response models --- + + +class LocationResponse(BaseModel): + latitude: float + longitude: float + accuracy_meters: float | None = None + name: str | None = None + + +class DeviceResponse(BaseModel): + make: str | None = None + model: str | None = None + software: str | None = None + + +class AttestorResponse(BaseModel): + fingerprint: str + name: str | None = None + + +class AttestationResponse(BaseModel): + record_id: str + short_id: str + attestor: AttestorResponse + attested_at: datetime + captured_at: datetime | None = None + location: LocationResponse | None = None + device: DeviceResponse | None = None + caption: str | None = None + proof_url: str + match_type: str # "exact", "phash", "dhash", etc. + hash_distances: dict[str, int] + + +class VerifyResponse(BaseModel): + verified: bool + message: str + attestations: list[AttestationResponse] + image_sha256: str + checked_at: datetime + + +class StatusResponse(BaseModel): + status: str + record_count: int + merkle_root: str | None + log_size_bytes: int + + +class ProofResponse(BaseModel): + record_id: str + short_id: str + attestor: AttestorResponse + attested_at: datetime + captured_at: datetime | None = None + location: LocationResponse | None = None + device: DeviceResponse | None = None + caption: str | None = None + image_sha256: str + proof_url: str + full_proof_url: str + + +# --- Helper functions --- + + +def get_storage() -> LocalStorage: + """Get storage instance (could be cached/pooled in production).""" + return LocalStorage(DATA_DIR) + + +def record_to_attestation_response( + record, + identity, + match_type: str, + distances: dict[str, int], +) -> AttestationResponse: + """Convert internal record to API response.""" + cm = record.capture_metadata + proof = ProofLink(record.record_id, BASE_URL) + + location = None + if record.location: + loc = record.location + location = LocationResponse( + latitude=loc.latitude, + longitude=loc.longitude, + accuracy_meters=loc.accuracy_meters, + name=loc.location_name, + ) + + device = None + if cm and cm.device: + dev = cm.device + device = DeviceResponse( + make=dev.make, + model=dev.model, + software=dev.software, + ) + + return AttestationResponse( + record_id=record.record_id, + short_id=record.short_id, + attestor=AttestorResponse( + fingerprint=record.attestor_fingerprint, + name=identity.display_name if identity else None, + ), + attested_at=record.timestamp, + captured_at=record.captured_at, + location=location, + device=device, + caption=cm.caption if cm else None, + proof_url=proof.url, + match_type=match_type, + hash_distances=distances, + ) + + +# --- Endpoints --- + + +@app.get("/", response_class=JSONResponse) +async def root(): + """API root - basic info.""" + return { + "service": "Verisoo", + "description": "Decentralized image provenance and attestation", + "docs": "/docs", + "verify": "POST /verify with image file", + } + + +@app.get("/status", response_model=StatusResponse) +async def status(): + """Get service status and statistics.""" + storage = get_storage() + stats = storage.get_stats() + + return StatusResponse( + status="ok", + record_count=stats.record_count, + merkle_root=stats.merkle_root, + log_size_bytes=stats.log_file_size, + ) + + +@app.post("/verify", response_model=VerifyResponse) +async def verify_image( + image: Annotated[UploadFile, File(description="Image to verify")], + threshold: Annotated[int, Query(ge=0, le=50)] = 10, +): + """ + Verify an image against known attestations. + + Upload an image (even a mangled social media version) and get back + any matching attestations with provenance information. + + The threshold parameter controls how strict matching is: + - 0: Exact perceptual match only + - 5: Very strict + - 10: Default - catches most social media compression + - 15-20: Looser - may catch more heavily modified images + """ + # Read and hash the uploaded image + image_data = await image.read() + + try: + image_hashes = hash_image(image_data, robust=True) + except Exception as e: + raise HTTPException(status_code=400, detail=f"Invalid image: {e}") + + storage = get_storage() + matches: list[AttestationResponse] = [] + + # Search for matching attestations + for record in storage.iterate_records(): + is_match, match_type = is_same_image( + image_hashes, + record.image_hashes, + perceptual_threshold=threshold, + ) + + if is_match: + # Get attestor identity for display name + identity = storage.load_identity(record.attestor_fingerprint) + + # Compute distances for transparency + distances = compute_all_distances(image_hashes, record.image_hashes) + + matches.append( + record_to_attestation_response(record, identity, match_type, distances) + ) + + # Sort by attestation time (oldest first - establishes priority) + matches.sort(key=lambda m: m.attested_at) + + if matches: + first = matches[0] + if first.location: + message = f"Image attested by {first.attestor.name or first.attestor.fingerprint[:8]} in {first.location.name or 'recorded location'} on {first.attested_at.strftime('%Y-%m-%d')}" + else: + message = f"Image attested by {first.attestor.name or first.attestor.fingerprint[:8]} on {first.attested_at.strftime('%Y-%m-%d')}" + else: + message = "No matching attestations found" + + return VerifyResponse( + verified=len(matches) > 0, + message=message, + attestations=matches, + image_sha256=image_hashes.sha256, + checked_at=datetime.utcnow(), + ) + + +@app.get("/v/{short_id}", response_model=ProofResponse) +async def get_proof_short(short_id: str): + """ + Get attestation proof by short ID. + + This is the endpoint for shareable proof links: + verisoo.io/v/a8f3c2d1e9b7 + """ + return await _get_proof(short_id) + + +@app.get("/verify/{record_id}", response_model=ProofResponse) +async def get_proof_full(record_id: str): + """Get attestation proof by full record ID.""" + return await _get_proof(record_id) + + +async def _get_proof(id_prefix: str) -> ProofResponse: + """Internal: look up proof by ID prefix.""" + storage = get_storage() + + # Search for matching record + for record in storage.iterate_records(): + if record.record_id.startswith(id_prefix) or record.short_id == id_prefix: + identity = storage.load_identity(record.attestor_fingerprint) + cm = record.capture_metadata + proof = ProofLink(record.record_id, BASE_URL) + + location = None + if record.location: + loc = record.location + location = LocationResponse( + latitude=loc.latitude, + longitude=loc.longitude, + accuracy_meters=loc.accuracy_meters, + name=loc.location_name, + ) + + device = None + if cm and cm.device: + dev = cm.device + device = DeviceResponse( + make=dev.make, + model=dev.model, + software=dev.software, + ) + + return ProofResponse( + record_id=record.record_id, + short_id=record.short_id, + attestor=AttestorResponse( + fingerprint=record.attestor_fingerprint, + name=identity.display_name if identity else None, + ), + attested_at=record.timestamp, + captured_at=record.captured_at, + location=location, + device=device, + caption=cm.caption if cm else None, + image_sha256=record.image_hashes.sha256, + proof_url=proof.url, + full_proof_url=proof.full_url, + ) + + raise HTTPException(status_code=404, detail=f"Attestation not found: {id_prefix}") + + +@app.get("/attestor/{fingerprint}") +async def get_attestor_records( + fingerprint: str, + limit: Annotated[int, Query(ge=1, le=100)] = 20, +): + """Get recent attestations by an attestor.""" + storage = get_storage() + + # Check if this is a known identity + identity = storage.load_identity(fingerprint) + + records = storage.get_records_by_attestor(fingerprint) + recent = records[-limit:] if len(records) > limit else records + + return { + "attestor": { + "fingerprint": fingerprint, + "name": identity.display_name if identity else None, + "known": identity is not None, + }, + "total_attestations": len(records), + "recent": [ + { + "record_id": r.record_id, + "short_id": r.short_id, + "attested_at": r.timestamp.isoformat(), + "image_sha256": r.image_hashes.sha256[:16] + "...", + "proof_url": ProofLink(r.record_id, BASE_URL).url, + } + for r in recent + ], + } + + +# --- Mobile sync endpoint --- + + +class MobileAttestationRequest(BaseModel): + """Attestation submitted from mobile app.""" + sha256: str # Image SHA-256 (mobile computed) + attestor_fingerprint: str + attestor_public_key: str # Hex-encoded Ed25519 public key + timestamp: datetime + signature: str # Hex-encoded Ed25519 signature + metadata: dict | None = None # Location, caption, device, etc. + + +class MobileAttestationResponse(BaseModel): + """Response after processing mobile attestation.""" + success: bool + record_id: str + short_id: str + proof_url: str + embedded_image_available: bool + message: str + + +@app.post("/attest", response_model=MobileAttestationResponse) +async def attest_from_mobile( + image: Annotated[UploadFile, File(description="Original image from mobile")], + attestation: Annotated[str, Form(description="JSON attestation data")], +): + """ + Receive attestation from mobile app and finalize it. + + Mobile sends: + - Original image bytes + - Pre-signed attestation (SHA-256 + signature + metadata) + + Server: + 1. Verifies signature + 2. Computes perceptual hashes (pHash, dHash) + 3. Embeds proof link via DCT (JPEG only) + 4. Stores in log + 5. Returns confirmation + embedded image endpoint + + The mobile app can then download the embedded image. + """ + import base64 + import json + + # Parse attestation JSON + try: + data = json.loads(attestation) + req = MobileAttestationRequest(**data) + except Exception as e: + raise HTTPException(status_code=400, detail=f"Invalid attestation data: {e}") + + # Read image + image_data = await image.read() + + # Verify SHA-256 matches + from .hashing import hash_image + computed_hashes = hash_image(image_data, robust=True) + + if computed_hashes.sha256 != req.sha256: + raise HTTPException( + status_code=400, + detail=f"SHA-256 mismatch: expected {req.sha256[:16]}..., got {computed_hashes.sha256[:16]}..." + ) + + # Verify signature + try: + public_key_bytes = bytes.fromhex(req.attestor_public_key) + signature_bytes = bytes.fromhex(req.signature) + + # Reconstruct signing payload (must match mobile's format) + signing_payload = f"{req.sha256}|{req.attestor_fingerprint}|{req.timestamp.isoformat()}" + if req.metadata: + signing_payload += f"|{json.dumps(req.metadata, sort_keys=True, separators=(',', ':'))}" + + is_valid = verify_signature( + signing_payload.encode(), + signature_bytes, + public_key_bytes, + ) + + if not is_valid: + raise HTTPException(status_code=401, detail="Invalid signature") + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=400, detail=f"Signature verification failed: {e}") + + # Create full attestation record with perceptual hashes + record = AttestationRecord( + image_hashes=computed_hashes, # Includes pHash, dHash from server + attestor_fingerprint=req.attestor_fingerprint, + timestamp=req.timestamp, + signature=signature_bytes, + metadata=req.metadata or {}, + ) + + # Store in log + storage = get_storage() + index = storage.append_record(record) + + # Update Merkle tree + merkle_log = storage.load_merkle_log() + storage.set_merkle_root(merkle_log.root_hash) + + # Try to embed proof link (JPEG only) + embedded_available = False + proof_link = f"{BASE_URL}/v/{record.short_id}" + + try: + from .embed import embed_proof_link, get_embed_method + from pathlib import Path + import tempfile + + # Check if we can embed (JPEG with stegasoo available) + # Save image temporarily to check format + with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f: + f.write(image_data) + temp_path = Path(f.name) + + method = get_embed_method(temp_path) + + if method == "dct": + from .embed import embed_proof_in_jpeg + + embedded_bytes, stats = embed_proof_in_jpeg(image_data, proof_link) + + # Save embedded image for later retrieval + embedded_path = storage.base_path / "embedded" / f"{record.short_id}.jpg" + embedded_path.parent.mkdir(parents=True, exist_ok=True) + embedded_path.write_bytes(embedded_bytes) + embedded_available = True + + # Cleanup temp file + temp_path.unlink(missing_ok=True) + + except Exception as e: + # DCT embedding failed, continue without it + import logging + logging.warning(f"DCT embedding failed: {e}") + + # Register the attestor's identity if not known + from .models import Identity + if not storage.load_identity(req.attestor_fingerprint): + identity = Identity( + public_key=public_key_bytes, + fingerprint=req.attestor_fingerprint, + ) + storage.save_identity(identity) + + return MobileAttestationResponse( + success=True, + record_id=record.record_id, + short_id=record.short_id, + proof_url=proof_link, + embedded_image_available=embedded_available, + message=f"Attestation stored at index {index}" + ( + ", embedded image available" if embedded_available else "" + ), + ) + + +@app.get("/embedded/{short_id}") +async def get_embedded_image(short_id: str): + """ + Download the DCT-embedded version of an attested image. + + Only available for JPEG images that were successfully embedded. + """ + from fastapi.responses import FileResponse + + storage = get_storage() + embedded_path = storage.base_path / "embedded" / f"{short_id}.jpg" + + if not embedded_path.exists(): + raise HTTPException( + status_code=404, + detail="Embedded image not available. Either not a JPEG or embedding failed." + ) + + return FileResponse( + embedded_path, + media_type="image/jpeg", + filename=f"{short_id}_attested.jpg", + ) + + +# --- Health check --- + + +@app.get("/health") +async def health(): + """Health check for load balancers.""" + return {"status": "healthy"} + + +# --- Run directly --- + + +def serve(host: str = "0.0.0.0", port: int = 8000): + """Run the API server.""" + import uvicorn + + uvicorn.run(app, host=host, port=port) + + +if __name__ == "__main__": + serve() diff --git a/src/soosef/verisoo/attestation.py b/src/soosef/verisoo/attestation.py new file mode 100644 index 0000000..1ddd2b2 --- /dev/null +++ b/src/soosef/verisoo/attestation.py @@ -0,0 +1,665 @@ +""" +Attestation Creation Module for Verisoo. + +This module is the core of Verisoo's provenance system. An attestation is a +cryptographic proof that binds together: + + 1. AN IMAGE - identified by multiple hashes (SHA-256 + perceptual) + 2. AN IDENTITY - the attestor's Ed25519 public key fingerprint + 3. A TIMESTAMP - when the attestation was created + 4. METADATA - optional context (location, device, caption, etc.) + +The binding is secured by an Ed25519 digital signature over all components, +making it computationally infeasible to forge or modify after creation. + +Architecture Overview: +---------------------- + + ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ + │ Image Data │ │ Private │ │ Metadata │ + │ (bytes) │ │ Key │ │ (optional) │ + └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ + ▼ │ ▼ + ┌─────────────┐ │ ┌─────────────┐ + │ Compute │ │ │ Extract │ + │ Hashes │ │ │ EXIF │ + │ (SHA+pHash)│ │ │ (auto) │ + └──────┬──────┘ │ └──────┬──────┘ + │ │ │ + └───────────┬───────┴───────────┬───────┘ + │ │ + ▼ ▼ + ┌─────────────────────────────────┐ + │ Build Canonical Signing Payload │ + │ (deterministic byte string) │ + └───────────────┬─────────────────┘ + │ + ▼ + ┌───────────────┐ + │ Ed25519 Sign │ + └───────┬───────┘ + │ + ▼ + ┌───────────────┐ + │ Attestation │ + │ Record │ + └───────────────┘ + +Security Properties: +------------------- +- **Authenticity**: Only the private key holder can create valid signatures +- **Integrity**: Any modification invalidates the signature +- **Non-repudiation**: Attestor cannot deny creating the attestation +- **Timestamping**: Proves the image existed at attestation time + +Usage Example: +------------- + from .attestation import create_attestation + from .crypto import load_private_key + + # Load attestor's private key + private_key = load_private_key("~/.verisoo/private.pem") + + # Create attestation with auto EXIF extraction + attestation = create_attestation( + image_data=open("photo.jpg", "rb").read(), + private_key=private_key, + metadata={"caption": "Street scene in Kyiv"}, + ) + + # The attestation.record can now be stored in the append-only log +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any + +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey + +from .crypto import create_identity, sign +from .hashing import hash_image +from .models import ( + Attestation, + AttestationRecord, + CaptureDevice, + CaptureMetadata, + GeoLocation, + ImageHashes, +) + + +# ============================================================================= +# EXIF METADATA EXTRACTION +# ============================================================================= +# +# EXIF (Exchangeable Image File Format) is metadata embedded in images by +# cameras and phones. It contains valuable provenance information: +# - When the photo was taken (DateTimeOriginal) +# - Where it was taken (GPS coordinates) +# - What device captured it (Make, Model) +# +# We extract this automatically to enrich attestations, but user-provided +# metadata always takes precedence (in case EXIF is wrong or missing). +# ============================================================================= + + +def extract_exif_metadata(image_data: bytes) -> dict[str, Any]: + """ + Extract EXIF metadata from image bytes for attestation enrichment. + + This function parses embedded EXIF data from JPEG/TIFF images to extract + provenance-relevant information. The extracted data provides evidence of + when, where, and how an image was captured. + + Extraction Priority: + ------------------- + 1. **Timestamp**: DateTimeOriginal > DateTimeDigitized > DateTime + - DateTimeOriginal: When shutter was pressed (most reliable) + - DateTimeDigitized: When image was digitized (scanner use case) + - DateTime: Last modification time (least reliable for capture) + + 2. **Location**: GPS coordinates with hemisphere references + - Converted from DMS (degrees/minutes/seconds) to decimal degrees + - Hemisphere (N/S, E/W) determines sign + + 3. **Device**: Camera/phone identification + - Make: Manufacturer (e.g., "Apple", "Canon") + - Model: Device model (e.g., "iPhone 15 Pro", "EOS R5") + - Software: Firmware/app version + + 4. **Dimensions**: Image width and height in pixels + + Args: + image_data: Raw image bytes (JPEG, TIFF, or other EXIF-capable format) + + Returns: + Dictionary with extracted fields. May contain: + - captured_at: datetime object (UTC) + - location: GeoLocation object + - device: CaptureDevice object + - width: int (pixels) + - height: int (pixels) + + Returns empty dict if: + - exifread library not installed + - Image has no EXIF data + - EXIF parsing fails + + Note: + Uses 'exifread' library (optional dependency). Gracefully degrades + to empty dict if unavailable, allowing attestations without EXIF. + + Example: + >>> exif = extract_exif_metadata(jpeg_bytes) + >>> if exif.get("location"): + ... print(f"Photo taken at {exif['location']}") + """ + # ------------------------------------------------------------------------- + # Import exifread lazily - it's an optional dependency + # If not installed, we simply skip EXIF extraction (graceful degradation) + # ------------------------------------------------------------------------- + try: + import io + + import exifread + except ImportError: + # exifread not installed - return empty dict, attestation still works + return {} + + # ------------------------------------------------------------------------- + # Parse EXIF tags from image bytes + # details=False skips thumbnail and MakerNote (faster, less memory) + # ------------------------------------------------------------------------- + try: + tags = exifread.process_file(io.BytesIO(image_data), details=False) + except Exception: + # Malformed EXIF or unsupported format - continue without EXIF + return {} + + if not tags: + return {} + + result: dict[str, Any] = {} + + # ------------------------------------------------------------------------- + # TIMESTAMP EXTRACTION + # Try multiple EXIF date fields in order of reliability for capture time + # EXIF stores dates as strings: "YYYY:MM:DD HH:MM:SS" (note colons in date) + # ------------------------------------------------------------------------- + for tag_name in ["EXIF DateTimeOriginal", "EXIF DateTimeDigitized", "Image DateTime"]: + if tag_name in tags: + try: + dt_str = str(tags[tag_name]) + # Parse EXIF date format (colons in date part, space separator) + dt = datetime.strptime(dt_str, "%Y:%m:%d %H:%M:%S") + # Assume UTC if no timezone info (EXIF doesn't store timezone) + result["captured_at"] = dt.replace(tzinfo=timezone.utc) + break # Use first valid timestamp found + except (ValueError, TypeError): + continue # Malformed date string, try next field + + # ------------------------------------------------------------------------- + # GPS COORDINATE EXTRACTION + # EXIF stores GPS as rational numbers in DMS (degrees, minutes, seconds) + # with separate reference tags for hemisphere (N/S for lat, E/W for lon) + # ------------------------------------------------------------------------- + gps_lat = tags.get("GPS GPSLatitude") + gps_lat_ref = tags.get("GPS GPSLatitudeRef") # "N" or "S" + gps_lon = tags.get("GPS GPSLongitude") + gps_lon_ref = tags.get("GPS GPSLongitudeRef") # "E" or "W" + + if gps_lat and gps_lon: + try: + # Convert DMS to decimal degrees + lat = _parse_gps_coord(gps_lat.values) + lon = _parse_gps_coord(gps_lon.values) + + # Apply hemisphere sign (South and West are negative) + if gps_lat_ref and str(gps_lat_ref) == "S": + lat = -lat + if gps_lon_ref and str(gps_lon_ref) == "W": + lon = -lon + + result["location"] = GeoLocation(latitude=lat, longitude=lon) + except (ValueError, TypeError, AttributeError): + # Malformed GPS data - skip location rather than fail + pass + + # ------------------------------------------------------------------------- + # DEVICE INFORMATION EXTRACTION + # Identifies the camera/phone that captured the image + # Useful for establishing authenticity ("taken with iPhone, not Photoshop") + # ------------------------------------------------------------------------- + make = tags.get("Image Make") # e.g., "Apple", "Canon", "Nikon" + model = tags.get("Image Model") # e.g., "iPhone 15 Pro", "EOS R5" + software = tags.get("Image Software") # e.g., "iOS 17.4", "Lightroom" + + if make or model or software: + result["device"] = CaptureDevice( + make=str(make).strip() if make else None, + model=str(model).strip() if model else None, + software=str(software).strip() if software else None, + ) + + # ------------------------------------------------------------------------- + # IMAGE DIMENSIONS + # May differ from actual pixel dimensions if image was resized after EXIF + # ------------------------------------------------------------------------- + width = tags.get("EXIF ExifImageWidth") or tags.get("Image ImageWidth") + height = tags.get("EXIF ExifImageLength") or tags.get("Image ImageLength") + + if width: + try: + result["width"] = int(str(width)) + except (ValueError, TypeError): + pass + if height: + try: + result["height"] = int(str(height)) + except (ValueError, TypeError): + pass + + return result + + +def _parse_gps_coord(coord_values: list) -> float: + """ + Convert EXIF GPS coordinate from DMS to decimal degrees. + + EXIF stores GPS coordinates as three rational numbers representing + degrees, minutes, and seconds. Each rational has a numerator and + denominator (to handle fractional seconds precisely). + + Formula: decimal = degrees + (minutes / 60) + (seconds / 3600) + + Args: + coord_values: List of three IfdTag Ratio objects [degrees, minutes, seconds] + Each has .num (numerator) and .den (denominator) attributes + + Returns: + Decimal degrees as float (always positive; caller applies hemisphere sign) + + Example: + GPS coords "50° 27' 0.36"" stored as: + [Ratio(50/1), Ratio(27/1), Ratio(36/100)] + Returns: 50.45010 (50 + 27/60 + 0.36/3600) + """ + # Extract degrees, minutes, seconds as floats from rational values + d = float(coord_values[0].num) / float(coord_values[0].den) # Degrees + m = float(coord_values[1].num) / float(coord_values[1].den) # Minutes + s = float(coord_values[2].num) / float(coord_values[2].den) # Seconds + + # Convert to decimal: degrees + minutes/60 + seconds/3600 + return d + (m / 60.0) + (s / 3600.0) + + +# ============================================================================= +# ATTESTATION CREATION +# ============================================================================= +# +# The main entry point for creating attestations. This function orchestrates: +# 1. Metadata preparation (EXIF extraction + user overrides) +# 2. Image hashing (cryptographic + perceptual) +# 3. Payload construction (canonical, deterministic format) +# 4. Digital signing (Ed25519) +# 5. Record assembly +# +# The result is an Attestation object containing both the image data and the +# signed record, ready for storage in the append-only log. +# ============================================================================= + + +def create_attestation( + image_data: bytes, + private_key: Ed25519PrivateKey, + metadata: dict[str, Any] | CaptureMetadata | None = None, + timestamp: datetime | None = None, + auto_exif: bool = True, +) -> Attestation: + """ + Create a cryptographically signed attestation for an image. + + This is the primary function for creating image attestations. It binds + an image to an identity at a specific point in time, with optional + metadata providing additional context. + + The Attestation Process: + ----------------------- + + Input Image ──► Hash (SHA-256 + pHash + dHash) + │ + EXIF Data ───► Extract ─────┤ + │ + User Metadata ──────────────┤ + ▼ + ┌───────────────────┐ + │ Canonical Payload │ + │ (deterministic) │ + └─────────┬─────────┘ + │ + Private Key ──► Sign ─────┤ + ▼ + ┌───────────────────┐ + │ AttestationRecord │ + └───────────────────┘ + + Metadata Precedence: + ------------------- + When auto_exif=True, metadata is merged with the following precedence: + 1. User-provided metadata (highest priority - always wins) + 2. EXIF-extracted metadata (fills in gaps) + + This allows users to correct wrong EXIF data (e.g., wrong camera time) + while still benefiting from automatic extraction when available. + + Args: + image_data: + Raw image bytes. Supports any format PIL can read (JPEG, PNG, + TIFF, WebP, etc.). The bytes are hashed directly for SHA-256, + and decoded for perceptual hashing. + + private_key: + Ed25519 private key for signing. This establishes WHO is making + the attestation. The corresponding public key fingerprint is + embedded in the record. + + metadata: + Optional provenance metadata. Can be: + - dict: Raw key-value pairs (flexible schema) + - CaptureMetadata: Structured object with typed fields + - None: No user metadata (may still extract EXIF) + + Common fields: location, caption, device, captured_at, tags + + timestamp: + Override the attestation timestamp. Defaults to current UTC time. + Note: This is ATTESTATION time, not CAPTURE time. Capture time + should be in metadata (auto-extracted from EXIF or user-provided). + + auto_exif: + If True (default), automatically extract EXIF metadata from the + image and merge with user-provided metadata. Set to False to: + - Skip EXIF extraction for speed + - Avoid including potentially sensitive EXIF data + - When EXIF is known to be incorrect/missing + + Returns: + Attestation object containing: + - image_data: Original image bytes (for storage/verification) + - image_hashes: Computed hashes (SHA-256 + perceptual) + - record: Signed AttestationRecord ready for the log + + Raises: + ValueError: If image cannot be decoded for hashing + TypeError: If private_key is not an Ed25519PrivateKey + + Example: + >>> # Simple attestation with auto EXIF + >>> attestation = create_attestation(image_bytes, private_key) + + >>> # With manual location override + >>> attestation = create_attestation( + ... image_bytes, + ... private_key, + ... metadata={"location": {"lat": 50.45, "lon": 30.52, "name": "Kyiv"}}, + ... ) + + >>> # Disable EXIF extraction + >>> attestation = create_attestation( + ... image_bytes, + ... private_key, + ... auto_exif=False, + ... ) + + Security Notes: + - The signature covers ALL fields (hashes, fingerprint, timestamp, metadata) + - Changing any field invalidates the signature + - Timestamp is attestation time, not necessarily capture time + - Verify attestations using verisoo.verification module + """ + # ------------------------------------------------------------------------- + # STEP 1: Establish attestation timestamp + # Use provided timestamp or current UTC time + # ------------------------------------------------------------------------- + timestamp = timestamp or datetime.now(timezone.utc) + + # ------------------------------------------------------------------------- + # STEP 2: Normalize metadata to dict format + # Accept CaptureMetadata objects, dicts, or None + # ------------------------------------------------------------------------- + if isinstance(metadata, CaptureMetadata): + # Convert structured object to dict for JSON serialization + metadata_dict = metadata.to_dict() + elif metadata is None: + metadata_dict = {} + else: + # Make a copy to avoid mutating caller's dict + metadata_dict = dict(metadata) + + # ------------------------------------------------------------------------- + # STEP 3: Auto-extract and merge EXIF metadata (if enabled) + # EXIF provides defaults; user metadata takes precedence + # ------------------------------------------------------------------------- + if auto_exif: + exif_data = extract_exif_metadata(image_data) + if exif_data: + # Start with empty merged dict, add EXIF fields not in user metadata + merged: dict[str, Any] = {} + + # Convert EXIF objects to JSON-serializable dict format + # Only include if not already provided by user + if "captured_at" in exif_data and "captured_at" not in metadata_dict: + merged["captured_at"] = exif_data["captured_at"].isoformat() + + if "location" in exif_data and "location" not in metadata_dict: + merged["location"] = exif_data["location"].to_dict() + + if "device" in exif_data and "device" not in metadata_dict: + merged["device"] = exif_data["device"].to_dict() + + if "width" in exif_data and "width" not in metadata_dict: + merged["width"] = exif_data["width"] + + if "height" in exif_data and "height" not in metadata_dict: + merged["height"] = exif_data["height"] + + # User metadata overwrites EXIF (higher precedence) + merged.update(metadata_dict) + metadata_dict = merged + + metadata = metadata_dict + + # ------------------------------------------------------------------------- + # STEP 4: Compute image hashes + # SHA-256 for exact matching, pHash/dHash for perceptual matching + # These survive social media compression, resizing, format conversion + # ------------------------------------------------------------------------- + image_hashes = hash_image(image_data) + + # ------------------------------------------------------------------------- + # STEP 5: Get attestor identity from private key + # The fingerprint is SHA-256(public_key)[:16] in hex (32 chars) + # ------------------------------------------------------------------------- + identity = create_identity(private_key) + + # ------------------------------------------------------------------------- + # STEP 6: Build canonical signing payload + # Deterministic byte string that will be signed + # Must be reproducible for verification + # ------------------------------------------------------------------------- + signing_payload = _build_signing_payload( + image_hashes, identity.fingerprint, timestamp, metadata + ) + + # ------------------------------------------------------------------------- + # STEP 7: Sign the payload with Ed25519 + # Produces 64-byte signature + # ------------------------------------------------------------------------- + signature = sign(private_key, signing_payload) + + # ------------------------------------------------------------------------- + # STEP 8: Assemble the attestation record + # This is what gets stored in the append-only log + # ------------------------------------------------------------------------- + record = AttestationRecord( + image_hashes=image_hashes, + signature=signature, + attestor_fingerprint=identity.fingerprint, + timestamp=timestamp, + metadata=metadata, + ) + + # Return full Attestation with image data (for storage) and record + return Attestation( + image_data=image_data, + image_hashes=image_hashes, + record=record, + ) + + +def create_attestation_from_hashes( + image_hashes: ImageHashes, + private_key: Ed25519PrivateKey, + metadata: dict[str, Any] | None = None, + timestamp: datetime | None = None, +) -> AttestationRecord: + """ + Create attestation record from pre-computed hashes (without image bytes). + + This is a lower-level function for scenarios where you have image hashes + but not the original image data: + + Use Cases: + --------- + - **Distributed systems**: Hashes computed on one node, signed on another + - **Batch processing**: Pre-compute hashes, sign later + - **Re-attestation**: Create new attestation for known hashes + - **Testing**: Create records without actual images + + Unlike create_attestation(), this function: + - Does NOT compute hashes (uses provided ImageHashes) + - Does NOT extract EXIF (no image bytes available) + - Returns AttestationRecord directly (no Attestation wrapper) + + Args: + image_hashes: Pre-computed ImageHashes object with sha256, phash, dhash + private_key: Ed25519 private key for signing + metadata: Optional metadata dict (no auto-extraction) + timestamp: Override timestamp (defaults to now, UTC) + + Returns: + AttestationRecord ready for storage (no image data attached) + + Example: + >>> # Re-attest an image you've already hashed + >>> from .hashing import hash_image + >>> hashes = hash_image(image_bytes) + >>> # ... later, on a different system ... + >>> record = create_attestation_from_hashes(hashes, private_key) + """ + timestamp = timestamp or datetime.now(timezone.utc) + metadata = metadata or {} + + identity = create_identity(private_key) + signing_payload = _build_signing_payload( + image_hashes, identity.fingerprint, timestamp, metadata + ) + signature = sign(private_key, signing_payload) + + return AttestationRecord( + image_hashes=image_hashes, + signature=signature, + attestor_fingerprint=identity.fingerprint, + timestamp=timestamp, + metadata=metadata, + ) + + +# ============================================================================= +# SIGNING PAYLOAD CONSTRUCTION +# ============================================================================= +# +# The signing payload is the exact byte sequence that gets signed. It MUST be: +# 1. Deterministic - same inputs always produce same payload +# 2. Unambiguous - different inputs never produce same payload +# 3. Complete - covers all attested data +# +# We achieve this with: +# - Newline-separated fields (unambiguous delimiter) +# - Sorted JSON keys (deterministic object serialization) +# - No whitespace in JSON (canonical form) +# - UTF-8 encoding (explicit byte representation) +# ============================================================================= + + +def _build_signing_payload( + image_hashes: ImageHashes, + attestor_fingerprint: str, + timestamp: datetime, + metadata: dict[str, Any], +) -> bytes: + """ + Build the canonical byte string for Ed25519 signing. + + This function constructs a deterministic representation of all attested + data. The same inputs MUST always produce the same output bytes, as this + is required for signature verification. + + Payload Format: + -------------- + Line 1: SHA-256 hash (64 hex characters) + Line 2: pHash (perceptual hash, typically 16 hex chars) + Line 3: dHash (difference hash, typically 16 hex chars) + Line 4: Attestor fingerprint (32 hex characters) + Line 5: Timestamp (ISO 8601 format with timezone) + Line 6: Metadata (JSON, sorted keys, no whitespace) + + Example payload: + a1b2c3d4... (SHA-256, 64 chars) + f8e7d6c5... (pHash, 16 chars) + 1a2b3c4d... (dHash, 16 chars) + 9f8e7d6c... (fingerprint, 32 chars) + 2024-01-15T10:30:00+00:00 + {"caption":"Test","location":{"lat":50.45,"lon":30.52}} + + Canonicalization Rules: + ---------------------- + - JSON keys are sorted alphabetically (Python's sort_keys=True) + - No whitespace in JSON (separators=(",", ":")) + - Timestamp includes timezone (isoformat()) + - All lines joined with single newline (no trailing newline) + - Final encoding is UTF-8 + + Args: + image_hashes: ImageHashes object with sha256, phash, dhash + attestor_fingerprint: Hex string identifying the attestor + timestamp: Datetime object (should have timezone info) + metadata: Dict to serialize as JSON + + Returns: + UTF-8 encoded bytes ready for signing + + Security Notes: + - Changing any field changes the payload (and invalidates signature) + - The format is designed to be unambiguous (no field can "bleed" into another) + - Verification must use IDENTICAL canonicalization + """ + import json + + # Canonical JSON: sorted keys, no whitespace (compact, deterministic) + metadata_json = json.dumps(metadata, sort_keys=True, separators=(",", ":")) + + # Join all fields with newlines (unambiguous separator) + payload = "\n".join([ + image_hashes.sha256, # Line 1: Cryptographic hash + image_hashes.phash, # Line 2: Perceptual hash (DCT-based) + image_hashes.dhash, # Line 3: Difference hash + attestor_fingerprint, # Line 4: Who is attesting + timestamp.isoformat(), # Line 5: When (ISO 8601) + metadata_json, # Line 6: Additional context + ]) + + # Encode as UTF-8 bytes for signing + return payload.encode("utf-8") diff --git a/src/soosef/verisoo/binlog.py b/src/soosef/verisoo/binlog.py new file mode 100644 index 0000000..83c9ab1 --- /dev/null +++ b/src/soosef/verisoo/binlog.py @@ -0,0 +1,285 @@ +""" +Binary append-only log for attestation records. + +Format: + [8 bytes: magic "VERISOO\x00"] + [4 bytes: version (uint32 LE)] + [record]* + +Each record: + [4 bytes: length (uint32 LE, not including this header)] + [4 bytes: CRC32 of record bytes] + [length bytes: record data] + +This format is: +- Append-only (no rewriting) +- Self-describing (magic + version) +- Corruption-detectable (CRC32 per record) +- Streamable (length-prefixed, can read from offset) +- Federation-friendly (just ship bytes from offset N) +""" + +from __future__ import annotations + +import struct +import zlib +from dataclasses import dataclass +from pathlib import Path +from typing import BinaryIO, Iterator + +from .exceptions import VerisooError + + +MAGIC = b"VERISOO\x00" +VERSION = 1 +HEADER_SIZE = len(MAGIC) + 4 # magic + version +RECORD_HEADER_SIZE = 8 # length + crc32 + + +class LogCorruptionError(VerisooError): + """Log file is corrupted.""" + + def __init__(self, message: str, offset: int) -> None: + super().__init__(message) + self.offset = offset + + +@dataclass +class LogEntry: + """An entry in the binary log.""" + + offset: int # Byte offset in file (start of record header) + index: int # Logical index (0, 1, 2, ...) + data: bytes # Record payload + crc32: int # Stored CRC32 + + +class BinaryLog: + """ + Append-only binary log file. + + Thread-safe for concurrent reads, single-writer for appends. + """ + + def __init__(self, path: Path) -> None: + self.path = path + self._size = 0 # Number of records + self._file_size = 0 # Bytes written + + if path.exists(): + self._validate_and_count() + else: + self._initialize() + + def _initialize(self) -> None: + """Create new log file with header.""" + with open(self.path, "wb") as f: + f.write(MAGIC) + f.write(struct.pack(" None: + """Validate existing log and count records.""" + with open(self.path, "rb") as f: + # Check magic + magic = f.read(len(MAGIC)) + if magic != MAGIC: + raise LogCorruptionError(f"Invalid magic: {magic!r}", 0) + + # Check version + version_bytes = f.read(4) + if len(version_bytes) < 4: + raise LogCorruptionError("Truncated header", len(MAGIC)) + version = struct.unpack(" VERSION: + raise LogCorruptionError(f"Unknown version: {version}", len(MAGIC)) + + # Count records + count = 0 + while True: + offset = f.tell() + header = f.read(RECORD_HEADER_SIZE) + if not header: + break + if len(header) < RECORD_HEADER_SIZE: + raise LogCorruptionError("Truncated record header", offset) + + length, stored_crc = struct.unpack(" int: + """Number of records in the log.""" + return self._size + + @property + def file_size(self) -> int: + """Total bytes in the log file.""" + return self._file_size + + def append(self, data: bytes) -> int: + """ + Append a record to the log. + + Returns the index of the new record. + """ + crc = zlib.crc32(data) & 0xFFFFFFFF + header = struct.pack(" bytes: + """Read record at index.""" + if index < 0 or index >= self._size: + raise IndexError(f"Index {index} out of range [0, {self._size})") + + for entry in self._iterate_from(0): + if entry.index == index: + return entry.data + + raise IndexError(f"Record {index} not found") # Should never happen + + def read_range(self, start: int, end: int) -> list[bytes]: + """Read records in range [start, end).""" + if start < 0: + start = 0 + if end > self._size: + end = self._size + + results = [] + for entry in self._iterate_from(0): + if entry.index >= end: + break + if entry.index >= start: + results.append(entry.data) + + return results + + def iterate(self) -> Iterator[LogEntry]: + """Iterate over all records.""" + yield from self._iterate_from(0) + + def _iterate_from(self, start_index: int) -> Iterator[LogEntry]: + """Iterate records starting from index.""" + with open(self.path, "rb") as f: + f.seek(HEADER_SIZE) + + index = 0 + while True: + offset = f.tell() + header = f.read(RECORD_HEADER_SIZE) + if not header: + break + if len(header) < RECORD_HEADER_SIZE: + break + + length, stored_crc = struct.unpack("= start_index: + yield LogEntry( + offset=offset, + index=index, + data=data, + crc32=stored_crc, + ) + + index += 1 + + def get_bytes_from_offset(self, offset: int) -> bytes: + """ + Get raw bytes from offset to end of file. + + Used for federation: "send me everything after byte N" + """ + if offset < HEADER_SIZE: + offset = HEADER_SIZE + + with open(self.path, "rb") as f: + f.seek(offset) + return f.read() + + def append_raw_bytes(self, data: bytes) -> int: + """ + Append raw bytes (pre-formatted records) from federation sync. + + Returns number of records appended. + """ + # Validate the incoming data before appending + records_added = 0 + pos = 0 + + while pos < len(data): + if pos + RECORD_HEADER_SIZE > len(data): + raise LogCorruptionError("Truncated record in sync data", pos) + + length, stored_crc = struct.unpack(" len(data): + raise LogCorruptionError("Truncated record data in sync data", pos) + + record_data = data[pos : pos + length] + actual_crc = zlib.crc32(record_data) & 0xFFFFFFFF + + if actual_crc != stored_crc: + raise LogCorruptionError(f"CRC mismatch in sync data at offset {pos}", pos) + + pos += length + records_added += 1 + + # All validated, append in one write + with open(self.path, "ab") as f: + f.write(data) + + self._size += records_added + self._file_size += len(data) + + return records_added + + def truncate_to(self, size: int) -> None: + """ + Truncate log to first `size` records. + + DANGEROUS: Only use for recovery from corruption. + """ + if size >= self._size: + return + + # Find byte offset after record `size-1` + target_offset = HEADER_SIZE + for entry in self._iterate_from(0): + if entry.index >= size: + target_offset = entry.offset + break + target_offset = entry.offset + RECORD_HEADER_SIZE + len(entry.data) + + with open(self.path, "r+b") as f: + f.truncate(target_offset) + + self._size = size + self._file_size = target_offset diff --git a/src/soosef/verisoo/cli.py b/src/soosef/verisoo/cli.py new file mode 100644 index 0000000..681979f --- /dev/null +++ b/src/soosef/verisoo/cli.py @@ -0,0 +1,721 @@ +""" +Command-Line Interface (CLI) for Verisoo. + +This module provides the `verisoo` command-line tool for interacting with +the image provenance system. It wraps the core library functionality in +a user-friendly interface. + +Command Structure: +----------------- + + verisoo + ├── identity # Manage attestor identity (Ed25519 keypair) + │ ├── generate # Create new identity + │ └── show # Display current identity fingerprint + │ + ├── attest # Create attestation for an image + │ ├── --location, -l # GPS coordinates + │ ├── --caption, -c # Photographer's notes + │ ├── --tag, -t # Metadata tags (repeatable) + │ └── --no-exif # Disable EXIF extraction + │ + ├── verify # Check image against known attestations + │ └── --exact # Require byte-exact match (no perceptual) + │ + ├── log # Query the attestation log + │ ├── status # Show log statistics + │ └── list # List recent attestations + │ + ├── peer # Manage federation peers + │ ├── add # Add a peer node + │ └── list # List configured peers + │ + └── serve # Run the API server + ├── --host # Bind address (default: 0.0.0.0) + └── --port # Listen port (default: 8000) + +Global Options: +-------------- + --data-dir PATH Override default data directory (~/.verisoo) + --json Output in JSON format (for scripting) + +Data Directory Structure: +------------------------ + ~/.verisoo/ + ├── private.pem # Ed25519 private key (PEM format) + ├── public.pem # Ed25519 public key (PEM format) + ├── identity.json # Identity metadata (name, created_at) + ├── attestations.binlog # Append-only binary log of attestation records + ├── index.lmdb/ # LMDB index for fast lookups + ├── merkle.json # Current Merkle tree root hash + └── peers.json # Federation peer list + +Usage Examples: +-------------- + # First-time setup + $ verisoo identity generate --name "Photographer Name" + + # Attest a photo with location + $ verisoo attest photo.jpg -l "50.45,30.52,10,Kyiv" -c "Morning scene" + + # Verify an image (even after social media compression) + $ verisoo verify downloaded_photo.jpg + + # Start API server for remote verification + $ verisoo serve --port 8000 + + # Check log status + $ verisoo log status + +Exit Codes: +---------- + 0: Success + 1: Verification failed (no matching attestation found) + 2: Error (missing identity, invalid input, etc.) +""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +# ----------------------------------------------------------------------------- +# DEPENDENCY CHECK +# Click is required for CLI but optional for library-only usage +# ----------------------------------------------------------------------------- +try: + import click +except ImportError: + print("CLI requires click: pip install verisoo[cli]", file=sys.stderr) + sys.exit(1) + + +@click.group() +@click.option("--data-dir", type=click.Path(path_type=Path), help="Data directory") +@click.option("--json", "json_output", is_flag=True, help="Output as JSON") +@click.pass_context +def main(ctx: click.Context, data_dir: Path | None, json_output: bool) -> None: + """ + Verisoo - Decentralized image provenance and attestation. + + Part of the Soo Suite. Prove when images were created and by whom. + """ + ctx.ensure_object(dict) + ctx.obj["data_dir"] = data_dir + ctx.obj["json"] = json_output + + +# --- Identity commands --- + + +@main.group() +def identity() -> None: + """Manage attestor identity.""" + pass + + +@identity.command("generate") +@click.option("--name", help="Display name for this identity") +@click.option("--password", is_flag=True, help="Encrypt private key with password") +@click.pass_context +def identity_generate(ctx: click.Context, name: str | None, password: bool) -> None: + """Generate a new attestor identity (Ed25519 keypair).""" + from .crypto import generate_keypair, create_identity, save_private_key, save_public_key + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + + if storage.has_node_identity(): + raise click.ClickException("Identity already exists. Use --force to overwrite.") + + # Generate keypair + private_key, public_key = generate_keypair() + + # Get password if requested + key_password = None + if password: + key_password = click.prompt("Password", hide_input=True, confirmation_prompt=True).encode() + + # Save keys + save_private_key(private_key, storage.private_key_path, key_password) + save_public_key(public_key, storage.public_key_path) + + # Create and save identity + metadata = {"name": name} if name else {} + ident = create_identity(private_key, metadata) + storage.save_identity(ident) + + if ctx.obj.get("json"): + click.echo(json.dumps({"fingerprint": ident.fingerprint, "created": ident.created_at.isoformat()})) + else: + click.echo(f"Identity created: {ident.fingerprint}") + click.echo(f"Public key: {storage.public_key_path}") + + +@identity.command("show") +@click.pass_context +def identity_show(ctx: click.Context) -> None: + """Show current identity.""" + from .crypto import load_public_key, fingerprint_from_pubkey + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + + if not storage.has_node_identity(): + raise click.ClickException("No identity configured. Run: verisoo identity generate") + + public_key = load_public_key(storage.public_key_path) + fingerprint = fingerprint_from_pubkey(public_key.public_key_bytes()) + + if ctx.obj.get("json"): + click.echo(json.dumps({"fingerprint": fingerprint})) + else: + click.echo(f"Fingerprint: {fingerprint}") + click.echo(f"Public key: {storage.public_key_path}") + + +# ============================================================================= +# ATTESTATION COMMANDS +# ============================================================================= +# +# The `attest` command is the primary user-facing operation. It: +# 1. Reads an image file +# 2. Optionally extracts EXIF metadata (enabled by default) +# 3. Merges user-provided metadata (location, caption, tags) +# 4. Creates a cryptographically signed attestation +# 5. Appends the record to the local log +# 6. Updates the Merkle tree root +# +# The result is a tamper-evident proof that the image existed at attestation +# time, attested by the configured identity. +# ============================================================================= + + +def _parse_location(location_str: str) -> dict[str, Any]: + """ + Parse a location string from CLI into a GeoLocation-compatible dict. + + This function provides a flexible, human-friendly way to specify GPS + coordinates on the command line. It supports multiple formats to balance + convenience with precision. + + Supported Formats: + ----------------- + 1. Basic coordinates: + "50.4501,30.5234" + → {"lat": 50.4501, "lon": 30.5234} + + 2. With accuracy (meters): + "50.4501,30.5234,10" + → {"lat": 50.4501, "lon": 30.5234, "accuracy": 10.0} + + 3. With accuracy and name: + "50.4501,30.5234,10,Kyiv, Ukraine" + → {"lat": 50.4501, "lon": 30.5234, "accuracy": 10.0, "name": "Kyiv, Ukraine"} + + 4. With name only (if 3rd part is not a number): + "50.4501,30.5234,Kyiv, Ukraine" + → {"lat": 50.4501, "lon": 30.5234, "name": "Kyiv, Ukraine"} + + Note: The name can contain commas (e.g., "City, Country") because we + use split(",", 3) to limit splitting to first 3 commas. + + Args: + location_str: Comma-separated location string from CLI + + Returns: + Dict with lat, lon, and optionally accuracy and name + + Raises: + click.ClickException: If format is invalid or coordinates out of range + + Validation: + - Latitude must be between -90 and 90 (poles) + - Longitude must be between -180 and 180 (date line) + - At minimum, lat and lon are required + """ + # Split into at most 4 parts: lat, lon, accuracy, name + # Using maxsplit=3 allows the name to contain commas + parts = location_str.split(",", 3) + + # Validate minimum required parts + if len(parts) < 2: + raise click.ClickException( + "Invalid location format. Use: lat,lon or lat,lon,accuracy or lat,lon,accuracy,name" + ) + + # Parse latitude and longitude + try: + lat = float(parts[0].strip()) + lon = float(parts[1].strip()) + except ValueError: + raise click.ClickException("Invalid latitude/longitude values") + + # Validate coordinate ranges + if not (-90 <= lat <= 90): + raise click.ClickException("Latitude must be between -90 and 90") + if not (-180 <= lon <= 180): + raise click.ClickException("Longitude must be between -180 and 180") + + # Build location dict with required fields + location: dict[str, Any] = {"lat": lat, "lon": lon} + + # Parse optional accuracy (3rd part) + if len(parts) >= 3: + try: + accuracy = float(parts[2].strip()) + location["accuracy"] = accuracy + except ValueError: + # 3rd part is not a number - treat everything from here as name + # This handles: "50.45,30.52,Kyiv, Ukraine" (no accuracy) + location["name"] = ",".join(parts[2:]).strip() + return location + + # Parse optional name (4th part, may contain commas) + if len(parts) >= 4: + location["name"] = parts[3].strip() + + return location + + +@main.command() +@click.argument("image", type=click.Path(exists=True, path_type=Path)) +@click.option("--password", is_flag=True, help="Private key is encrypted") +@click.option("--tag", "-t", multiple=True, help="Add metadata tags") +@click.option("--location", "-l", "location_str", help='GPS coords: "lat,lon" or "lat,lon,accuracy,name"') +@click.option("--caption", "-c", help="Photographer's notes") +@click.option("--no-exif", "no_exif", is_flag=True, help="Disable auto EXIF extraction") +@click.option("--embed", "-e", is_flag=True, help="Embed proof link in image (JPEG: DCT, other: XMP sidecar)") +@click.option("--base-url", default="https://verisoo.io", help="Base URL for proof links") +@click.pass_context +def attest( + ctx: click.Context, + image: Path, + password: bool, + tag: tuple[str, ...], + location_str: str | None, + caption: str | None, + no_exif: bool, + embed: bool, + base_url: str, +) -> None: + """ + Create a cryptographic attestation for an image. + + This command creates a signed record proving that YOU attested THIS IMAGE + at THIS TIME with THIS METADATA. The attestation is stored in your local + log and can be synced to federation peers. + + \b + METADATA SOURCES (in order of precedence): + 1. Command-line options (--location, --caption, --tag) + 2. EXIF data from the image (unless --no-exif) + + \b + PROOF EMBEDDING (--embed): + For JPEG: Uses DCT steganography (invisible, survives compression) + For PNG/RAW: Creates XMP sidecar file readable by Lightroom/Darktable + + \b + EXAMPLES: + # Basic attestation (auto-extracts EXIF) + verisoo attest photo.jpg + + # With proof link embedded in image + verisoo attest photo.jpg --embed + + # With manual location (overrides EXIF GPS) + verisoo attest photo.jpg -l "50.45,30.52,10,Kyiv" + + # With caption and tags + verisoo attest photo.jpg -c "Morning scene" -t news -t ukraine + + # Skip EXIF extraction + verisoo attest photo.jpg --no-exif + + \b + OUTPUT: + Attested: photo.jpg + SHA-256: a1b2c3d4e5f6... + Index: 42 + Root: 9f8e7d6c5b4a... + Embedded: photo_attested.jpg (DCT) + """ + from .attestation import create_attestation + from .crypto import load_private_key + from .storage import LocalStorage + + # ------------------------------------------------------------------------- + # Initialize storage and verify identity exists + # ------------------------------------------------------------------------- + storage = LocalStorage(ctx.obj.get("data_dir")) + + if not storage.has_node_identity(): + raise click.ClickException("No identity configured. Run: verisoo identity generate") + + # ------------------------------------------------------------------------- + # Load the attestor's private key + # Optionally decrypt if password-protected + # ------------------------------------------------------------------------- + key_password = None + if password: + key_password = click.prompt("Password", hide_input=True).encode() + + private_key = load_private_key(storage.private_key_path, key_password) + + # ------------------------------------------------------------------------- + # Read image file + # ------------------------------------------------------------------------- + image_data = image.read_bytes() + + # ------------------------------------------------------------------------- + # Build metadata from CLI options + # These will override any EXIF data with the same keys + # ------------------------------------------------------------------------- + metadata: dict[str, Any] = {} + + # Tags (can specify multiple: -t news -t ukraine) + if tag: + metadata["tags"] = list(tag) + + # Always record the original filename + metadata["filename"] = image.name + + # Parse and add location if provided via CLI + # This OVERRIDES any GPS data from EXIF + if location_str: + metadata["location"] = _parse_location(location_str) + + # Add caption (photographer's notes) + if caption: + metadata["caption"] = caption + + # ------------------------------------------------------------------------- + # Create the attestation + # This: computes hashes, extracts EXIF (if enabled), signs the record + # ------------------------------------------------------------------------- + attestation = create_attestation( + image_data, private_key, metadata, auto_exif=not no_exif + ) + + # ------------------------------------------------------------------------- + # Store the attestation record + # Appends to binary log and updates LMDB index + # ------------------------------------------------------------------------- + index = storage.append_record(attestation.record) + + # ------------------------------------------------------------------------- + # Update Merkle tree root + # This provides tamper-evidence for the entire log + # ------------------------------------------------------------------------- + merkle_log = storage.load_merkle_log() + storage.set_merkle_root(merkle_log.root_hash) + + # ------------------------------------------------------------------------- + # Embed proof link if requested + # ------------------------------------------------------------------------- + embed_result = None + if embed: + from .embed import embed_proof_link + + # Generate proof link from attestation record + proof_link = f"{base_url}/v/{attestation.record.short_id}" + + embed_result = embed_proof_link( + image_path=image, + proof_link=proof_link, + fingerprint=attestation.record.attestor_fingerprint, + attested_at=attestation.record.timestamp, + image_sha256=attestation.image_hashes.sha256, + caption=caption, + ) + + # ------------------------------------------------------------------------- + # Output results + # ------------------------------------------------------------------------- + if ctx.obj.get("json"): + result = { + "index": index, + "sha256": attestation.image_hashes.sha256, + "attestor": attestation.record.attestor_fingerprint, + "timestamp": attestation.record.timestamp.isoformat(), + "root": merkle_log.root_hash, + } + if embed_result: + result["embed"] = { + "success": embed_result.success, + "method": embed_result.method, + "output": str(embed_result.output_path) if embed_result.output_path else None, + "proof_link": embed_result.proof_link, + "message": embed_result.message, + } + click.echo(json.dumps(result)) + else: + click.echo(f"Attested: {image.name}") + click.echo(f" SHA-256: {attestation.image_hashes.sha256[:16]}...") + click.echo(f" Index: {index}") + click.echo(f" Root: {merkle_log.root_hash[:16]}...") + if embed_result: + if embed_result.success: + click.echo(f" Embedded: {embed_result.output_path.name if embed_result.output_path else 'N/A'} ({embed_result.method.upper()})") + click.echo(f" Proof: {embed_result.proof_link}") + else: + click.echo(f" Embed failed: {embed_result.message}") + + +@main.command() +@click.argument("image", type=click.Path(exists=True, path_type=Path)) +@click.option("--exact", is_flag=True, help="Require exact byte match (not perceptual)") +@click.pass_context +def verify(ctx: click.Context, image: Path, exact: bool) -> None: + """Verify an image against known attestations.""" + from .hashing import hash_image + from .verification import find_attestations_for_image + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + + # Read image and compute hashes + image_data = image.read_bytes() + hashes = hash_image(image_data) + + # Find matching attestations + records = list(storage.iterate_records()) + matches = find_attestations_for_image( + image_data, records, perceptual_threshold=0 if exact else 10 + ) + + if ctx.obj.get("json"): + result = { + "image": str(image), + "sha256": hashes.sha256, + "matches": len(matches), + "attestations": [ + { + "attestor": m.attestor_fingerprint, + "timestamp": m.timestamp.isoformat(), + "exact": m.image_hashes.sha256 == hashes.sha256, + } + for m in matches + ], + } + click.echo(json.dumps(result)) + else: + if not matches: + click.echo(f"No attestations found for {image.name}") + click.echo(f" SHA-256: {hashes.sha256[:16]}...") + sys.exit(1) + + click.echo(f"Found {len(matches)} attestation(s) for {image.name}") + for m in matches: + match_type = "exact" if m.image_hashes.sha256 == hashes.sha256 else "perceptual" + click.echo(f" [{match_type}] {m.attestor_fingerprint[:16]}... @ {m.timestamp.isoformat()}") + + +# --- Log commands --- + + +@main.group() +def log() -> None: + """Query the attestation log.""" + pass + + +@log.command("status") +@click.pass_context +def log_status(ctx: click.Context) -> None: + """Show log status (size, root hash).""" + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + stats = storage.get_stats() + + if ctx.obj.get("json"): + click.echo(json.dumps({ + "records": stats.record_count, + "root": stats.merkle_root, + "log_size_bytes": stats.log_file_size, + "unique_images": stats.unique_images_sha256, + "attestors": stats.attestor_count, + })) + else: + click.echo(f"Records: {stats.record_count}") + click.echo(f"Root: {stats.merkle_root or '(empty)'}") + click.echo(f"Log size: {stats.log_file_size:,} bytes") + click.echo(f"Unique images: {stats.unique_images_sha256}") + click.echo(f"Attestors: {stats.attestor_count}") + + +@log.command("list") +@click.option("--limit", "-n", default=10, help="Number of records to show") +@click.pass_context +def log_list(ctx: click.Context, limit: int) -> None: + """List recent attestations.""" + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + + records = list(storage.iterate_records()) + recent = records[-limit:] if len(records) > limit else records + + if ctx.obj.get("json"): + click.echo(json.dumps([ + { + "sha256": r.image_hashes.sha256, + "attestor": r.attestor_fingerprint, + "timestamp": r.timestamp.isoformat(), + } + for r in recent + ])) + else: + for r in recent: + click.echo(f"{r.timestamp.isoformat()} {r.attestor_fingerprint[:16]}... {r.image_hashes.sha256[:16]}...") + + +# --- Peer commands --- + + +@main.group() +def peer() -> None: + """Manage federation peers.""" + pass + + +@peer.command("add") +@click.argument("url") +@click.argument("fingerprint") +@click.pass_context +def peer_add(ctx: click.Context, url: str, fingerprint: str) -> None: + """Add a federation peer.""" + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + peers = storage.load_peers() + + # Check for duplicates + if any(p["url"] == url for p in peers): + raise click.ClickException(f"Peer already exists: {url}") + + peers.append({ + "url": url, + "fingerprint": fingerprint, + "added": datetime.now(timezone.utc).isoformat(), + }) + storage.save_peers(peers) + + click.echo(f"Added peer: {url}") + + +@peer.command("list") +@click.pass_context +def peer_list(ctx: click.Context) -> None: + """List federation peers.""" + from .storage import LocalStorage + + storage = LocalStorage(ctx.obj.get("data_dir")) + peers = storage.load_peers() + + if ctx.obj.get("json"): + click.echo(json.dumps(peers)) + else: + if not peers: + click.echo("No peers configured") + else: + for p in peers: + click.echo(f"{p['url']} ({p['fingerprint'][:16]}...)") + + +# ============================================================================= +# API SERVER COMMAND +# ============================================================================= +# +# The `serve` command launches a FastAPI-based HTTP server for: +# - Remote image verification (POST /verify) +# - Proof lookup by ID (GET /v/{short_id}) +# - Status and health checks +# +# This enables integration with: +# - Newsroom editorial systems +# - Fact-checking workflows +# - Mobile apps +# - Browser-based verification widgets +# +# The server reads from the local attestation log - it doesn't create +# attestations (that requires the private key, which should stay local). +# ============================================================================= + + +@main.command() +@click.option("--host", default="0.0.0.0", help="Host to bind to") +@click.option("--port", default=8000, type=int, help="Port to listen on") +def serve(host: str, port: int) -> None: + """ + Run the Verisoo verification API server. + + Starts a FastAPI server that exposes verification endpoints. This allows + remote clients to verify images against your local attestation log. + + \b + ENDPOINTS: + POST /verify Upload image for verification + GET /v/{short_id} Get proof by short ID + GET /status Server status and statistics + GET /health Health check for load balancers + + \b + EXAMPLES: + # Start on default port + verisoo serve + + # Custom port + verisoo serve --port 9000 + + # Bind to localhost only (no external access) + verisoo serve --host 127.0.0.1 + + \b + CLIENT USAGE: + # Verify an image + curl -X POST -F "image=@photo.jpg" http://localhost:8000/verify + + # Get proof details + curl http://localhost:8000/v/a8f3c2d1e9b7 + + \b + ENVIRONMENT VARIABLES: + VERISOO_DATA_DIR Override data directory + VERISOO_BASE_URL Base URL for proof links (default: https://verisoo.io) + + \b + SECURITY NOTES: + - The server is READ-ONLY (cannot create attestations) + - CORS is enabled by default (configure for production) + - No authentication by default (add API keys for production) + """ + # ------------------------------------------------------------------------- + # Import the API module (requires fastapi, uvicorn) + # These are optional dependencies: pip install verisoo[api] + # ------------------------------------------------------------------------- + try: + from .api import serve as run_server + except ImportError: + raise click.ClickException("API server requires fastapi: pip install verisoo[api]") + + # ------------------------------------------------------------------------- + # Start the server + # Uses uvicorn as the ASGI server + # ------------------------------------------------------------------------- + click.echo(f"Starting Verisoo API server on {host}:{port}") + click.echo("Press Ctrl+C to stop") + run_server(host=host, port=port) + + +# ============================================================================= +# ENTRY POINT +# ============================================================================= + +if __name__ == "__main__": + main() diff --git a/src/soosef/verisoo/crypto.py b/src/soosef/verisoo/crypto.py new file mode 100644 index 0000000..c28bf4f --- /dev/null +++ b/src/soosef/verisoo/crypto.py @@ -0,0 +1,178 @@ +""" +Cryptographic primitives for Verisoo. + +Ed25519 for signatures (fast, small keys, deterministic). +SHA-256 for content hashing. +""" + +from __future__ import annotations + +import hashlib +import secrets +from datetime import datetime, timezone +from pathlib import Path + +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.asymmetric.ed25519 import ( + Ed25519PrivateKey, + Ed25519PublicKey, +) + +from .exceptions import IdentityError, SignatureError +from .models import Identity + + +def generate_keypair() -> tuple[Ed25519PrivateKey, Ed25519PublicKey]: + """Generate a new Ed25519 keypair.""" + private_key = Ed25519PrivateKey.generate() + public_key = private_key.public_key() + return private_key, public_key + + +def create_identity( + private_key: Ed25519PrivateKey, metadata: dict | None = None +) -> Identity: + """ + Create an Identity from a private key. + + The fingerprint is SHA-256(public_key_bytes)[:16] hex-encoded (32 chars). + """ + public_key = private_key.public_key() + public_bytes = public_key.public_bytes( + encoding=serialization.Encoding.Raw, + format=serialization.PublicFormat.Raw, + ) + fingerprint = hashlib.sha256(public_bytes).hexdigest()[:32] + + return Identity( + public_key=public_bytes, + fingerprint=fingerprint, + created_at=datetime.now(timezone.utc), + metadata=metadata or {}, + ) + + +def sign(private_key: Ed25519PrivateKey, data: bytes) -> bytes: + """Sign data with Ed25519.""" + return private_key.sign(data) + + +def verify(public_key: bytes | Ed25519PublicKey, signature: bytes, data: bytes) -> bool: + """ + Verify an Ed25519 signature. + + Returns True if valid, raises SignatureError if invalid. + """ + if isinstance(public_key, bytes): + public_key = Ed25519PublicKey.from_public_bytes(public_key) + + try: + public_key.verify(signature, data) + return True + except Exception as e: + raise SignatureError(f"Signature verification failed: {e}") + + +def hash_bytes(data: bytes) -> str: + """SHA-256 hash of bytes, hex-encoded.""" + return hashlib.sha256(data).hexdigest() + + +def fingerprint_from_pubkey(public_key: bytes) -> str: + """Compute fingerprint from public key bytes.""" + return hashlib.sha256(public_key).hexdigest()[:32] + + +def save_private_key( + private_key: Ed25519PrivateKey, + path: Path, + password: bytes | None = None, +) -> None: + """ + Save private key to file. + + If password is provided, encrypts with scrypt + AES-256-GCM. + """ + if password: + encryption = serialization.BestAvailableEncryption(password) + else: + encryption = serialization.NoEncryption() + + pem = private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=encryption, + ) + + path.write_bytes(pem) + # Restrict permissions on Unix + try: + path.chmod(0o600) + except OSError: + pass + + +def load_private_key(path: Path, password: bytes | None = None) -> Ed25519PrivateKey: + """Load private key from file.""" + pem = path.read_bytes() + + try: + private_key = serialization.load_pem_private_key(pem, password=password) + if not isinstance(private_key, Ed25519PrivateKey): + raise IdentityError(f"Expected Ed25519 key, got {type(private_key)}") + return private_key + except Exception as e: + raise IdentityError(f"Failed to load private key: {e}") + + +def save_public_key(public_key: Ed25519PublicKey | bytes, path: Path) -> None: + """Save public key to file.""" + if isinstance(public_key, bytes): + public_key = Ed25519PublicKey.from_public_bytes(public_key) + + pem = public_key.public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo, + ) + path.write_bytes(pem) + + +def load_public_key(path: Path) -> Ed25519PublicKey: + """Load public key from file.""" + pem = path.read_bytes() + + try: + public_key = serialization.load_pem_public_key(pem) + if not isinstance(public_key, Ed25519PublicKey): + raise IdentityError(f"Expected Ed25519 key, got {type(public_key)}") + return public_key + except Exception as e: + raise IdentityError(f"Failed to load public key: {e}") + + +def load_public_key_from_bytes(public_key_bytes: bytes) -> Ed25519PublicKey: + """Load public key from raw 32-byte Ed25519 public key.""" + try: + return Ed25519PublicKey.from_public_bytes(public_key_bytes) + except Exception as e: + raise IdentityError(f"Failed to load public key from bytes: {e}") + + +def verify_signature(data: bytes, signature: bytes, public_key_bytes: bytes) -> bool: + """ + Verify an Ed25519 signature. + + Args: + data: The signed data + signature: 64-byte Ed25519 signature + public_key_bytes: 32-byte Ed25519 public key + + Returns: + True if valid, False otherwise + """ + try: + public_key = Ed25519PublicKey.from_public_bytes(public_key_bytes) + public_key.verify(signature, data) + return True + except Exception: + return False diff --git a/src/soosef/verisoo/embed.py b/src/soosef/verisoo/embed.py new file mode 100644 index 0000000..d076682 --- /dev/null +++ b/src/soosef/verisoo/embed.py @@ -0,0 +1,537 @@ +""" +Proof Link Embedding Module for Verisoo. + +This module handles embedding proof links into images after attestation. +Two strategies are used depending on the image format: + +1. JPEG: DCT steganography via stegasoo + - Embeds in frequency domain (survives recompression) + - Uses center region for robustness against cropping + - Invisible to human eye + +2. PNG/RAW: XMP sidecar file + - Standard metadata format used by Lightroom/Darktable/etc. + - Travels with the image file + - Not steganographic (plaintext XML) + +The proof link format: https://verisoo.io/v/{short_id} +""" + +from __future__ import annotations + +import io +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any + +from PIL import Image + +# Stegasoo integration — imported as a pip dependency (no path hacks needed). +# Install stegasoo[dct] to enable DCT steganography for JPEG proof embedding. +try: + from soosef.stegasoo.dct_steganography import ( + embed_in_dct, + extract_from_dct, + has_dct_support, + has_jpegio_support, + calculate_dct_capacity, + ) + HAS_STEGASOO = True +except ImportError: + HAS_STEGASOO = False + has_dct_support = lambda: False + has_jpegio_support = lambda: False + + +# ============================================================================= +# CONSTANTS +# ============================================================================= + +# Fixed public seed for Verisoo proof links +# This is intentionally public - anyone should be able to extract the proof link +VERISOO_SEED = b"verisoo" + +# Base URL for proof links +DEFAULT_BASE_URL = "https://verisoo.io" + +# XMP namespace for Verisoo +XMP_NAMESPACE = "https://verisoo.io/ns/1.0/" + +# Supported formats for DCT embedding +DCT_FORMATS = {".jpg", ".jpeg"} + +# Formats that need XMP sidecar +XMP_FORMATS = {".png", ".tiff", ".tif", ".webp", ".heic", ".heif"} +RAW_FORMATS = {".cr2", ".cr3", ".nef", ".arw", ".raf", ".orf", ".rw2", ".dng", ".raw"} + + +# ============================================================================= +# DATA CLASSES +# ============================================================================= + +@dataclass +class EmbedResult: + """Result of a proof link embedding operation.""" + success: bool + method: str # "dct", "xmp", "none" + output_path: Path | None + proof_link: str + message: str + + +@dataclass +class ExtractResult: + """Result of extracting a proof link from an image.""" + success: bool + method: str # "dct", "xmp", "none" + proof_link: str | None + message: str + + +# ============================================================================= +# XMP SIDECAR GENERATION +# ============================================================================= + +def generate_xmp_sidecar( + proof_link: str, + fingerprint: str, + attested_at: datetime, + image_sha256: str, + caption: str | None = None, +) -> str: + """ + Generate XMP sidecar XML content for a Verisoo attestation. + + This creates a standard XMP file that can be read by Lightroom, + Darktable, and other photo management software. + + Args: + proof_link: Full proof URL (e.g., "https://verisoo.io/v/abc123") + fingerprint: Attestor's fingerprint + attested_at: Attestation timestamp + image_sha256: SHA-256 hash of the image + caption: Optional caption/description + + Returns: + XMP XML content as string + """ + # Escape XML special characters + def escape(s: str) -> str: + return (s.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """)) + + caption_attr = f'\n verisoo:Caption="{escape(caption)}"' if caption else "" + + return f''' + + + + + +''' + + +def write_xmp_sidecar( + image_path: Path, + proof_link: str, + fingerprint: str, + attested_at: datetime, + image_sha256: str, + caption: str | None = None, +) -> Path: + """ + Write an XMP sidecar file alongside an image. + + The sidecar is named {image_name}.xmp and placed in the same directory. + + Args: + image_path: Path to the original image + proof_link: Full proof URL + fingerprint: Attestor's fingerprint + attested_at: Attestation timestamp + image_sha256: SHA-256 hash of the image + caption: Optional caption + + Returns: + Path to the created XMP sidecar file + """ + xmp_content = generate_xmp_sidecar( + proof_link=proof_link, + fingerprint=fingerprint, + attested_at=attested_at, + image_sha256=image_sha256, + caption=caption, + ) + + # Standard sidecar naming: image.jpg -> image.jpg.xmp + # Some software prefers image.xmp, but .jpg.xmp is more explicit + xmp_path = image_path.with_suffix(image_path.suffix + ".xmp") + xmp_path.write_text(xmp_content, encoding="utf-8") + + return xmp_path + + +def read_xmp_sidecar(image_path: Path) -> dict[str, str] | None: + """ + Read Verisoo metadata from an XMP sidecar file. + + Args: + image_path: Path to the image (sidecar path is derived) + + Returns: + Dict with proof_link, fingerprint, etc. or None if not found + """ + import re + + xmp_path = image_path.with_suffix(image_path.suffix + ".xmp") + if not xmp_path.exists(): + # Try alternate naming + xmp_path = image_path.with_suffix(".xmp") + if not xmp_path.exists(): + return None + + try: + content = xmp_path.read_text(encoding="utf-8") + + # Simple regex extraction (not full XML parsing for speed) + result = {} + + patterns = { + "proof_link": r'verisoo:ProofLink="([^"]*)"', + "fingerprint": r'verisoo:Fingerprint="([^"]*)"', + "attested_at": r'verisoo:AttestedAt="([^"]*)"', + "image_sha256": r'verisoo:ImageSHA256="([^"]*)"', + "caption": r'verisoo:Caption="([^"]*)"', + } + + for key, pattern in patterns.items(): + match = re.search(pattern, content) + if match: + result[key] = match.group(1) + + return result if result else None + + except Exception: + return None + + +# ============================================================================= +# DCT EMBEDDING (JPEG) +# ============================================================================= + +def embed_proof_in_jpeg( + image_data: bytes, + proof_link: str, +) -> tuple[bytes, dict[str, Any]]: + """ + Embed a proof link into a JPEG image using DCT steganography. + + Uses stegasoo's DCT embedding with: + - Fixed public seed (b"verisoo") so anyone can extract + - Center-biased embedding for crop resistance (TODO) + - Minimal quality impact (only ~25 blocks needed) + + Args: + image_data: Original JPEG bytes + proof_link: Proof URL to embed (e.g., "https://verisoo.io/v/abc123") + + Returns: + Tuple of (embedded_image_bytes, stats_dict) + + Raises: + ImportError: If stegasoo is not available + ValueError: If image is too small or embedding fails + """ + if not HAS_STEGASOO: + raise ImportError( + "DCT embedding requires stegasoo. " + "Ensure stegasoo is installed or available at ../stegasoo" + ) + + if not has_jpegio_support(): + raise ImportError( + "Native JPEG embedding requires jpeglib. " + "Install with: pip install jpeglib" + ) + + # Embed the proof link + payload = proof_link.encode("utf-8") + + stego_bytes, stats = embed_in_dct( + data=payload, + carrier_image=image_data, + seed=VERISOO_SEED, + output_format="jpeg", + color_mode="color", + ) + + return stego_bytes, { + "blocks_used": stats.blocks_used, + "blocks_available": stats.blocks_available, + "usage_percent": stats.usage_percent, + "output_format": stats.output_format, + } + + +def extract_proof_from_jpeg(image_data: bytes) -> str | None: + """ + Extract a proof link from a JPEG image. + + Args: + image_data: JPEG image bytes + + Returns: + Proof URL string or None if not found/invalid + """ + if not HAS_STEGASOO: + return None + + try: + payload = extract_from_dct( + stego_image=image_data, + seed=VERISOO_SEED, + ) + + # Validate it looks like a proof link + proof_link = payload.decode("utf-8") + if "verisoo" in proof_link.lower() or proof_link.startswith("http"): + return proof_link + return None + + except Exception: + return None + + +# ============================================================================= +# UNIFIED EMBEDDING API +# ============================================================================= + +def get_embed_method(image_path: Path) -> str: + """ + Determine the appropriate embedding method for an image. + + Args: + image_path: Path to the image file + + Returns: + "dct" for JPEG, "xmp" for PNG/RAW, "none" if unsupported + """ + suffix = image_path.suffix.lower() + + if suffix in DCT_FORMATS: + if HAS_STEGASOO and has_jpegio_support(): + return "dct" + else: + return "xmp" # Fallback to XMP if stegasoo unavailable + + if suffix in XMP_FORMATS or suffix in RAW_FORMATS: + return "xmp" + + return "none" + + +def embed_proof_link( + image_path: Path, + proof_link: str, + fingerprint: str, + attested_at: datetime, + image_sha256: str, + caption: str | None = None, + output_path: Path | None = None, +) -> EmbedResult: + """ + Embed a proof link into an image using the appropriate method. + + For JPEG: Uses DCT steganography (invisible, survives compression) + For PNG/RAW: Creates XMP sidecar file + + Args: + image_path: Path to the original image + proof_link: Full proof URL to embed + fingerprint: Attestor's fingerprint + attested_at: Attestation timestamp + image_sha256: SHA-256 hash of the image + caption: Optional caption + output_path: Optional output path for embedded image (JPEG only) + + Returns: + EmbedResult with success status, method used, and output path + """ + method = get_embed_method(image_path) + + if method == "dct": + try: + image_data = image_path.read_bytes() + stego_bytes, stats = embed_proof_in_jpeg(image_data, proof_link) + + # Determine output path + if output_path is None: + stem = image_path.stem + output_path = image_path.parent / f"{stem}_attested.jpg" + + output_path.write_bytes(stego_bytes) + + return EmbedResult( + success=True, + method="dct", + output_path=output_path, + proof_link=proof_link, + message=f"Embedded via DCT ({stats['usage_percent']:.2f}% capacity used)", + ) + + except Exception as e: + # Fall back to XMP on DCT failure + return _embed_xmp_fallback( + image_path, proof_link, fingerprint, attested_at, + image_sha256, caption, str(e) + ) + + elif method == "xmp": + try: + xmp_path = write_xmp_sidecar( + image_path=image_path, + proof_link=proof_link, + fingerprint=fingerprint, + attested_at=attested_at, + image_sha256=image_sha256, + caption=caption, + ) + + return EmbedResult( + success=True, + method="xmp", + output_path=xmp_path, + proof_link=proof_link, + message=f"Created XMP sidecar: {xmp_path.name}", + ) + + except Exception as e: + return EmbedResult( + success=False, + method="xmp", + output_path=None, + proof_link=proof_link, + message=f"XMP sidecar failed: {e}", + ) + + else: + return EmbedResult( + success=False, + method="none", + output_path=None, + proof_link=proof_link, + message=f"Unsupported format: {image_path.suffix}", + ) + + +def _embed_xmp_fallback( + image_path: Path, + proof_link: str, + fingerprint: str, + attested_at: datetime, + image_sha256: str, + caption: str | None, + dct_error: str, +) -> EmbedResult: + """Fallback to XMP when DCT fails.""" + try: + xmp_path = write_xmp_sidecar( + image_path=image_path, + proof_link=proof_link, + fingerprint=fingerprint, + attested_at=attested_at, + image_sha256=image_sha256, + caption=caption, + ) + + return EmbedResult( + success=True, + method="xmp", + output_path=xmp_path, + proof_link=proof_link, + message=f"DCT failed ({dct_error}), created XMP sidecar instead", + ) + + except Exception as e: + return EmbedResult( + success=False, + method="none", + output_path=None, + proof_link=proof_link, + message=f"Both DCT and XMP failed: {dct_error}; {e}", + ) + + +def extract_proof_link(image_path: Path) -> ExtractResult: + """ + Extract a proof link from an image (DCT or XMP). + + Tries DCT extraction for JPEG, XMP sidecar for all formats. + + Args: + image_path: Path to the image + + Returns: + ExtractResult with proof link if found + """ + suffix = image_path.suffix.lower() + + # Try DCT for JPEG + if suffix in DCT_FORMATS and HAS_STEGASOO: + try: + image_data = image_path.read_bytes() + proof_link = extract_proof_from_jpeg(image_data) + if proof_link: + return ExtractResult( + success=True, + method="dct", + proof_link=proof_link, + message="Extracted from DCT steganography", + ) + except Exception: + pass + + # Try XMP sidecar + xmp_data = read_xmp_sidecar(image_path) + if xmp_data and "proof_link" in xmp_data: + return ExtractResult( + success=True, + method="xmp", + proof_link=xmp_data["proof_link"], + message="Extracted from XMP sidecar", + ) + + return ExtractResult( + success=False, + method="none", + proof_link=None, + message="No embedded proof link found", + ) + + +# ============================================================================= +# UTILITY FUNCTIONS +# ============================================================================= + +def can_embed_dct() -> bool: + """Check if DCT embedding is available.""" + return HAS_STEGASOO and has_jpegio_support() + + +def get_embed_capabilities() -> dict[str, Any]: + """Get information about available embedding capabilities.""" + return { + "dct_available": HAS_STEGASOO and has_dct_support(), + "jpeg_native": HAS_STEGASOO and has_jpegio_support(), + "xmp_available": True, # Always available + "supported_dct_formats": list(DCT_FORMATS) if can_embed_dct() else [], + "supported_xmp_formats": list(XMP_FORMATS | RAW_FORMATS), + } diff --git a/src/soosef/verisoo/exceptions.py b/src/soosef/verisoo/exceptions.py new file mode 100644 index 0000000..50c4279 --- /dev/null +++ b/src/soosef/verisoo/exceptions.py @@ -0,0 +1,64 @@ +""" +Exception hierarchy for Verisoo. + +Follows the pattern established in the Soo Suite for typed, informative errors. +""" + +from __future__ import annotations + + +class VerisooError(Exception): + """Base exception for all Verisoo errors.""" + + pass + + +class AttestationError(VerisooError): + """Errors during attestation creation or signing.""" + + pass + + +class VerificationError(VerisooError): + """Errors during attestation verification.""" + + def __init__(self, message: str, *, reason: str | None = None) -> None: + super().__init__(message) + self.reason = reason + + +class HashMismatchError(VerificationError): + """Image hash does not match attestation record.""" + + def __init__( + self, message: str, *, expected: str, actual: str, hash_type: str + ) -> None: + super().__init__(message, reason="hash_mismatch") + self.expected = expected + self.actual = actual + self.hash_type = hash_type + + +class SignatureError(VerificationError): + """Signature verification failed.""" + + def __init__(self, message: str) -> None: + super().__init__(message, reason="invalid_signature") + + +class IdentityError(VerisooError): + """Errors related to identity/key management.""" + + pass + + +class MerkleError(VerisooError): + """Errors in merkle tree operations.""" + + pass + + +class FederationError(VerisooError): + """Errors in peer communication and sync.""" + + pass diff --git a/src/soosef/verisoo/federation.py b/src/soosef/verisoo/federation.py new file mode 100644 index 0000000..eec8156 --- /dev/null +++ b/src/soosef/verisoo/federation.py @@ -0,0 +1,318 @@ +""" +Federation and gossip protocol for Verisoo. + +Nodes sync their merkle logs via gossip: +1. Periodically exchange merkle roots with peers +2. If roots differ, request consistency proof +3. Fetch missing records and append to local log + +Eventually consistent - "eventually" depends on gossip frequency and topology. +No central coordinator, no leader election, no consensus protocol. +Just append-only logs that converge. +""" + +from __future__ import annotations + +import asyncio +import hashlib +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Callable, Protocol +from urllib.parse import urlparse + +from .exceptions import FederationError +from .merkle import ConsistencyProof, InclusionProof, MerkleLog +from .models import AttestationRecord + +logger = logging.getLogger(__name__) + + +@dataclass +class PeerInfo: + """Information about a federation peer.""" + + url: str + fingerprint: str # Ed25519 fingerprint for peer authentication + last_seen: datetime | None = None + last_root: str | None = None + last_size: int = 0 + healthy: bool = True + consecutive_failures: int = 0 + + +@dataclass +class SyncStatus: + """Result of a sync attempt with a peer.""" + + peer: str + success: bool + records_received: int = 0 + our_size_before: int = 0 + our_size_after: int = 0 + their_size: int = 0 + error: str | None = None + + +class PeerTransport(Protocol): + """Protocol for peer communication.""" + + async def get_status(self, peer: PeerInfo) -> tuple[str, int]: + """Get peer's current root hash and log size.""" + ... + + async def get_records( + self, peer: PeerInfo, start_index: int, count: int + ) -> list[AttestationRecord]: + """Fetch records from peer.""" + ... + + async def get_consistency_proof( + self, peer: PeerInfo, old_size: int + ) -> ConsistencyProof: + """Get proof that peer's log is consistent with ours.""" + ... + + async def push_records( + self, peer: PeerInfo, records: list[AttestationRecord] + ) -> int: + """Push records to peer, returns number accepted.""" + ... + + +class GossipNode: + """ + A node in the Verisoo federation network. + + Manages: + - Local merkle log + - Peer connections + - Periodic sync via gossip + """ + + def __init__( + self, + log: MerkleLog, + transport: PeerTransport, + node_id: str | None = None, + ) -> None: + self.log = log + self.transport = transport + self.node_id = node_id or self._generate_node_id() + self.peers: dict[str, PeerInfo] = {} + self._running = False + self._sync_task: asyncio.Task | None = None + + def add_peer(self, url: str, fingerprint: str) -> None: + """Register a peer for gossip.""" + peer = PeerInfo(url=url, fingerprint=fingerprint) + self.peers[url] = peer + logger.info(f"Added peer: {url} ({fingerprint[:16]}...)") + + def remove_peer(self, url: str) -> None: + """Remove a peer from gossip.""" + if url in self.peers: + del self.peers[url] + logger.info(f"Removed peer: {url}") + + async def start(self, interval_seconds: float = 60.0) -> None: + """Start the gossip loop.""" + if self._running: + return + + self._running = True + self._sync_task = asyncio.create_task(self._gossip_loop(interval_seconds)) + logger.info(f"Gossip started, interval={interval_seconds}s") + + async def stop(self) -> None: + """Stop the gossip loop.""" + self._running = False + if self._sync_task: + self._sync_task.cancel() + try: + await self._sync_task + except asyncio.CancelledError: + pass + logger.info("Gossip stopped") + + async def sync_with_peer(self, peer_url: str) -> SyncStatus: + """ + Sync with a specific peer. + + 1. Get their status (root, size) + 2. If they have more records, fetch them + 3. Verify consistency before accepting + 4. Append new records to our log + """ + peer = self.peers.get(peer_url) + if not peer: + return SyncStatus( + peer=peer_url, + success=False, + error="Unknown peer", + ) + + our_size_before = self.log.size + + try: + # Get peer status + their_root, their_size = await self.transport.get_status(peer) + + # Update peer info + peer.last_seen = datetime.now(timezone.utc) + peer.last_root = their_root + peer.last_size = their_size + + # Already in sync? + if their_size <= our_size_before: + if their_root == self.log.root_hash: + peer.healthy = True + peer.consecutive_failures = 0 + return SyncStatus( + peer=peer_url, + success=True, + our_size_before=our_size_before, + our_size_after=our_size_before, + their_size=their_size, + ) + + # They have records we don't - fetch them + if their_size > our_size_before: + # First verify consistency + if our_size_before > 0: + proof = await self.transport.get_consistency_proof(peer, our_size_before) + if not self._verify_consistency(proof): + raise FederationError( + f"Peer {peer_url} failed consistency check - possible fork" + ) + + # Fetch new records + new_records = await self.transport.get_records( + peer, our_size_before, their_size - our_size_before + ) + + # Append to our log + for record in new_records: + self.log.append(record) + + peer.healthy = True + peer.consecutive_failures = 0 + + return SyncStatus( + peer=peer_url, + success=True, + records_received=their_size - our_size_before, + our_size_before=our_size_before, + our_size_after=self.log.size, + their_size=their_size, + ) + + except Exception as e: + peer.consecutive_failures += 1 + if peer.consecutive_failures >= 3: + peer.healthy = False + logger.warning(f"Peer {peer_url} marked unhealthy after 3 failures") + + return SyncStatus( + peer=peer_url, + success=False, + error=str(e), + our_size_before=our_size_before, + our_size_after=self.log.size, + ) + + async def broadcast_record(self, record: AttestationRecord) -> dict[str, bool]: + """ + Push a new record to all peers. + + Returns dict of peer_url -> success. + """ + results = {} + for url, peer in self.peers.items(): + if not peer.healthy: + results[url] = False + continue + + try: + accepted = await self.transport.push_records(peer, [record]) + results[url] = accepted > 0 + except Exception as e: + logger.warning(f"Failed to push to {url}: {e}") + results[url] = False + + return results + + async def _gossip_loop(self, interval: float) -> None: + """Background task that periodically syncs with peers.""" + while self._running: + try: + await self._gossip_round() + except Exception as e: + logger.error(f"Gossip round failed: {e}") + + await asyncio.sleep(interval) + + async def _gossip_round(self) -> None: + """One round of gossip with all healthy peers.""" + healthy_peers = [p for p in self.peers.values() if p.healthy] + if not healthy_peers: + return + + # Sync with all healthy peers concurrently + tasks = [self.sync_with_peer(p.url) for p in healthy_peers] + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Log summary + success_count = sum( + 1 for r in results if isinstance(r, SyncStatus) and r.success + ) + logger.debug(f"Gossip round: {success_count}/{len(healthy_peers)} peers synced") + + def _verify_consistency(self, proof: ConsistencyProof) -> bool: + """Verify a consistency proof from a peer.""" + # Simplified: trust the proof structure for now + # Full implementation would verify the merkle path + return proof.old_size <= self.log.size + + def _generate_node_id(self) -> str: + """Generate a random node ID.""" + import secrets + + return hashlib.sha256(secrets.token_bytes(32)).hexdigest()[:16] + + +# Placeholder for HTTP transport implementation +class HttpTransport: + """ + HTTP-based peer transport. + + Endpoints expected on peers: + - GET /status -> {"root": "...", "size": N} + - GET /records?start=N&count=M -> [records...] + - GET /consistency-proof?old_size=N -> proof + - POST /records -> accept records, return count + """ + + def __init__(self, timeout: float = 30.0) -> None: + self.timeout = timeout + # Will use aiohttp when federation extra is installed + + async def get_status(self, peer: PeerInfo) -> tuple[str, int]: + """Get peer's current root hash and log size.""" + raise NotImplementedError("Install verisoo[federation] for HTTP transport") + + async def get_records( + self, peer: PeerInfo, start_index: int, count: int + ) -> list[AttestationRecord]: + raise NotImplementedError("Install verisoo[federation] for HTTP transport") + + async def get_consistency_proof( + self, peer: PeerInfo, old_size: int + ) -> ConsistencyProof: + raise NotImplementedError("Install verisoo[federation] for HTTP transport") + + async def push_records( + self, peer: PeerInfo, records: list[AttestationRecord] + ) -> int: + raise NotImplementedError("Install verisoo[federation] for HTTP transport") diff --git a/src/soosef/verisoo/hashing.py b/src/soosef/verisoo/hashing.py new file mode 100644 index 0000000..635ecce --- /dev/null +++ b/src/soosef/verisoo/hashing.py @@ -0,0 +1,290 @@ +""" +Multi-algorithm image hashing for Verisoo. + +Designed to survive social media mangling: +- JPEG recompression (Instagram, Twitter, Facebook) +- Resizing (thumbnails, feed images) +- Format conversion (PNG → JPEG) +- Cropping (platform auto-crops, user crops) +- Color adjustments (filters, auto-enhance) + +Strategy: compute multiple hash types, match if ANY is within threshold. +This is how professional systems (TinEye, Content ID) work. +""" + +from __future__ import annotations + +import hashlib +from io import BytesIO + +from PIL import Image + +from .models import ImageHashes + +# Optional dependency - graceful degradation +try: + import imagehash + + IMAGEHASH_AVAILABLE = True +except ImportError: + IMAGEHASH_AVAILABLE = False + + +def hash_image(image_data: bytes, *, robust: bool = True) -> ImageHashes: + """ + Compute all hashes for an image. + + Args: + image_data: Raw image bytes + robust: If True, compute all hash types for best social media survival. + If False, only compute required hashes (faster). + + Returns: + ImageHashes with sha256, phash, dhash, and optionally ahash, colorhash, crop_resistant + """ + # Cryptographic hash of raw bytes + sha256 = hashlib.sha256(image_data).hexdigest() + + # Decode image for perceptual hashing + img = Image.open(BytesIO(image_data)) + + if IMAGEHASH_AVAILABLE: + phash = str(imagehash.phash(img)) + dhash = str(imagehash.dhash(img)) + + ahash = None + colorhash = None + crop_resistant = None + + if robust: + ahash = str(imagehash.average_hash(img)) + colorhash = str(imagehash.colorhash(img)) + crop_resistant = _compute_crop_resistant_hash(img) + else: + # Fallback without imagehash - less robust but functional + phash, dhash = _fallback_perceptual_hashes(img) + ahash = None + colorhash = None + crop_resistant = None + + return ImageHashes( + sha256=sha256, + phash=phash, + dhash=dhash, + ahash=ahash, + colorhash=colorhash, + crop_resistant=crop_resistant, + ) + + +def hash_image_file(path: str, *, robust: bool = True) -> ImageHashes: + """Hash an image from file path.""" + with open(path, "rb") as f: + return hash_image(f.read(), robust=robust) + + +def _compute_crop_resistant_hash(img: Image.Image) -> str: + """ + Compute hash of center region - survives edge crops. + + Takes center 60% of image and hashes that. + """ + if not IMAGEHASH_AVAILABLE: + return "" + + width, height = img.size + # Crop to center 60% + left = int(width * 0.2) + top = int(height * 0.2) + right = int(width * 0.8) + bottom = int(height * 0.8) + + center = img.crop((left, top, right, bottom)) + return str(imagehash.phash(center)) + + +def _fallback_perceptual_hashes(img: Image.Image) -> tuple[str, str]: + """ + Fallback perceptual hashes when imagehash not installed. + + Not truly perceptual, but allows code to run. + Uses normalized pixel data hashing. + """ + # Normalize: resize to 32x32, convert to grayscale + normalized = img.convert("L").resize((32, 32), Image.Resampling.LANCZOS) + pixel_bytes = normalized.tobytes() + + # Use different portions for phash vs dhash simulation + phash = hashlib.sha256(pixel_bytes).hexdigest()[:16] + dhash = hashlib.sha256(pixel_bytes[:512]).hexdigest()[:16] + + return phash, dhash + + +def perceptual_distance(hash1: str, hash2: str) -> int: + """ + Compute hamming distance between two perceptual hashes. + + Lower = more similar. + 0 = identical + 1-10 = likely same image, different encoding + 11-20 = possibly same image, significant changes + >20 = likely different images + """ + if len(hash1) != len(hash2): + # Pad shorter hash + max_len = max(len(hash1), len(hash2)) + hash1 = hash1.zfill(max_len) + hash2 = hash2.zfill(max_len) + + val1 = int(hash1, 16) + val2 = int(hash2, 16) + return bin(val1 ^ val2).count("1") + + +def is_same_image( + hashes1: ImageHashes, + hashes2: ImageHashes, + *, + perceptual_threshold: int = 10, +) -> tuple[bool, str]: + """ + Determine if two sets of hashes represent the same image. + + Uses multi-algorithm matching - succeeds if ANY algorithm matches. + + Returns: + (is_same, match_type) where match_type is one of: + - "exact" - SHA-256 identical + - "phash" - DCT perceptual match + - "dhash" - Difference hash match + - "ahash" - Average hash match + - "colorhash" - Color distribution match + - "crop_resistant" - Center region match + - "none" - Not the same image + """ + # Exact match first + if hashes1.sha256 == hashes2.sha256: + return True, "exact" + + # Try perceptual matches + matches, match_type = hashes1.matches_perceptually(hashes2, perceptual_threshold) + if matches: + return True, match_type or "perceptual" + + return False, "none" + + +def compute_all_distances(hashes1: ImageHashes, hashes2: ImageHashes) -> dict[str, int]: + """ + Compute hamming distance for all available hash types. + + Useful for debugging and understanding match quality. + """ + distances = {} + + distances["phash"] = perceptual_distance(hashes1.phash, hashes2.phash) + distances["dhash"] = perceptual_distance(hashes1.dhash, hashes2.dhash) + + if hashes1.ahash and hashes2.ahash: + distances["ahash"] = perceptual_distance(hashes1.ahash, hashes2.ahash) + if hashes1.colorhash and hashes2.colorhash: + distances["colorhash"] = perceptual_distance(hashes1.colorhash, hashes2.colorhash) + if hashes1.crop_resistant and hashes2.crop_resistant: + distances["crop_resistant"] = perceptual_distance( + hashes1.crop_resistant, hashes2.crop_resistant + ) + + return distances + + +def extract_exif_metadata(image_data: bytes) -> dict: + """ + Extract EXIF metadata from image for CaptureMetadata. + + Returns dict with keys matching CaptureMetadata fields where available. + """ + from datetime import datetime + + from .models import CaptureDevice, GeoLocation + + img = Image.open(BytesIO(image_data)) + exif = img.getexif() if hasattr(img, "getexif") else {} + + result: dict = { + "width": img.width, + "height": img.height, + "mime_type": Image.MIME.get(img.format, f"image/{img.format.lower()}") if img.format else None, + } + + if not exif: + return result + + # EXIF tag IDs + MAKE = 271 + MODEL = 272 + SOFTWARE = 305 + DATETIME_ORIGINAL = 36867 + GPS_INFO = 34853 + + # Device info + make = exif.get(MAKE) + model = exif.get(MODEL) + software = exif.get(SOFTWARE) + if make or model: + result["device"] = CaptureDevice( + make=make, + model=model, + software=software, + ).to_dict() + + # Capture time + datetime_str = exif.get(DATETIME_ORIGINAL) + if datetime_str: + try: + # EXIF format: "2024:03:15 14:32:00" + captured = datetime.strptime(datetime_str, "%Y:%m:%d %H:%M:%S") + result["captured_at"] = captured.isoformat() + except ValueError: + pass + + # GPS coordinates (simplified - full parsing is complex) + gps_info = exif.get(GPS_INFO) + if gps_info and isinstance(gps_info, dict): + try: + lat = _parse_gps_coord(gps_info.get(2), gps_info.get(1)) # GPSLatitude, GPSLatitudeRef + lon = _parse_gps_coord(gps_info.get(4), gps_info.get(3)) # GPSLongitude, GPSLongitudeRef + if lat is not None and lon is not None: + result["location"] = GeoLocation( + latitude=lat, + longitude=lon, + ).to_dict() + except (TypeError, ValueError, KeyError): + pass + + return result + + +def _parse_gps_coord(coord_tuple: tuple | None, ref: str | None) -> float | None: + """Parse EXIF GPS coordinate tuple to decimal degrees.""" + if not coord_tuple or not ref: + return None + + try: + # coord_tuple is ((deg_num, deg_den), (min_num, min_den), (sec_num, sec_den)) + # or sometimes just (deg, min, sec) as floats + if isinstance(coord_tuple[0], tuple): + degrees = coord_tuple[0][0] / coord_tuple[0][1] + minutes = coord_tuple[1][0] / coord_tuple[1][1] + seconds = coord_tuple[2][0] / coord_tuple[2][1] + else: + degrees, minutes, seconds = coord_tuple + + decimal = degrees + minutes / 60 + seconds / 3600 + + if ref in ("S", "W"): + decimal = -decimal + + return decimal + except (TypeError, ZeroDivisionError, IndexError): + return None diff --git a/src/soosef/verisoo/lmdb_store.py b/src/soosef/verisoo/lmdb_store.py new file mode 100644 index 0000000..96322ac --- /dev/null +++ b/src/soosef/verisoo/lmdb_store.py @@ -0,0 +1,319 @@ +""" +LMDB-backed index and state storage for Verisoo. + +Provides fast lookups on top of the append-only binary log: +- record_id -> log offset +- attestor_fingerprint -> [record indices] +- image_sha256 -> [record indices] +- image_phash -> [record indices] (for perceptual search) +- merkle tree state + +LMDB is memory-mapped, so reads are zero-copy and very fast. +Single-writer model fits append-only perfectly. +""" + +from __future__ import annotations + +import json +import struct +from contextlib import contextmanager +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Iterator + +import lmdb + +from .exceptions import VerisooError +from .models import AttestationRecord, Identity, ImageHashes + + +class IndexError(VerisooError): + """Error in index operations.""" + + pass + + +# LMDB database names (sub-databases within the environment) +DB_RECORDS = b"records" # index (uint64) -> record_id +DB_BY_ATTESTOR = b"by_attestor" # fingerprint -> [indices] (JSON array) +DB_BY_SHA256 = b"by_sha256" # sha256 -> [indices] +DB_BY_PHASH = b"by_phash" # phash -> [indices] +DB_IDENTITIES = b"identities" # fingerprint -> identity JSON +DB_META = b"meta" # key -> value (merkle root, log size, etc.) + + +@dataclass +class IndexStats: + """Statistics about the index.""" + + record_count: int + attestor_count: int + unique_images_sha256: int + unique_images_phash: int + merkle_root: str | None + log_file_size: int + + +class LMDBIndex: + """ + LMDB-backed index for attestation records. + + Works alongside BinaryLog - the log stores raw records, + this index provides fast lookups by various keys. + """ + + # 1GB default max size, grows as needed up to this + DEFAULT_MAP_SIZE = 1 * 1024 * 1024 * 1024 + + def __init__( + self, + path: Path, + *, + map_size: int = DEFAULT_MAP_SIZE, + readonly: bool = False, + ) -> None: + self.path = path + path.mkdir(parents=True, exist_ok=True) + + self._env = lmdb.open( + str(path), + map_size=map_size, + max_dbs=10, + readonly=readonly, + # Allow multiple readers + max_readers=126, + # Don't sync on every write (we have CRC in binlog for durability) + sync=False, + # Memory-map is the whole point + writemap=True, + ) + + # Open sub-databases + with self._env.begin(write=True) as txn: + self._db_records = self._env.open_db(DB_RECORDS, txn=txn) + self._db_by_attestor = self._env.open_db(DB_BY_ATTESTOR, txn=txn) + self._db_by_sha256 = self._env.open_db(DB_BY_SHA256, txn=txn) + self._db_by_phash = self._env.open_db(DB_BY_PHASH, txn=txn) + self._db_identities = self._env.open_db(DB_IDENTITIES, txn=txn) + self._db_meta = self._env.open_db(DB_META, txn=txn) + + def close(self) -> None: + """Close the LMDB environment.""" + self._env.close() + + def __enter__(self) -> LMDBIndex: + return self + + def __exit__(self, *args) -> None: + self.close() + + # --- Record indexing --- + + def index_record( + self, + index: int, + record: AttestationRecord, + record_id: str, + ) -> None: + """ + Add a record to the index. + + Called after appending to the binary log. + """ + with self._env.begin(write=True) as txn: + # Store index -> record_id mapping + txn.put( + struct.pack(" str | None: + """Get record ID for index.""" + with self._env.begin() as txn: + data = txn.get(struct.pack(" list[int]: + """Get all record indices for an attestor.""" + with self._env.begin() as txn: + return self._get_list(txn, self._db_by_attestor, fingerprint.encode()) + + def get_indices_by_sha256(self, sha256: str) -> list[int]: + """Get all record indices for an image by SHA-256.""" + with self._env.begin() as txn: + return self._get_list(txn, self._db_by_sha256, sha256.encode()) + + def get_indices_by_phash(self, phash: str) -> list[int]: + """Get all record indices for an image by perceptual hash.""" + with self._env.begin() as txn: + return self._get_list(txn, self._db_by_phash, phash.encode()) + + def find_similar_phash(self, phash: str, max_distance: int = 10) -> list[tuple[str, int]]: + """ + Find perceptual hashes within hamming distance. + + Returns list of (phash, distance) tuples. + This is O(n) over unique phashes - for large scale, would need LSH. + """ + results = [] + target = int(phash, 16) + + with self._env.begin() as txn: + cursor = txn.cursor(db=self._db_by_phash) + for key, _ in cursor: + candidate = key.decode() + try: + candidate_int = int(candidate, 16) + distance = bin(target ^ candidate_int).count("1") + if distance <= max_distance: + results.append((candidate, distance)) + except ValueError: + continue + + return sorted(results, key=lambda x: x[1]) + + # --- Identity storage --- + + def save_identity(self, identity: Identity) -> None: + """Save an identity (public key + metadata).""" + data = { + "public_key": identity.public_key.hex(), + "fingerprint": identity.fingerprint, + "created_at": identity.created_at.isoformat(), + "metadata": identity.metadata, + } + with self._env.begin(write=True) as txn: + txn.put( + identity.fingerprint.encode(), + json.dumps(data).encode(), + db=self._db_identities, + ) + + def get_identity(self, fingerprint: str) -> Identity | None: + """Get identity by fingerprint.""" + with self._env.begin() as txn: + data = txn.get(fingerprint.encode(), db=self._db_identities) + if not data: + return None + + obj = json.loads(data.decode()) + return Identity( + public_key=bytes.fromhex(obj["public_key"]), + fingerprint=obj["fingerprint"], + created_at=datetime.fromisoformat(obj["created_at"]), + metadata=obj.get("metadata", {}), + ) + + def list_identities(self) -> list[str]: + """List all identity fingerprints.""" + fingerprints = [] + with self._env.begin() as txn: + cursor = txn.cursor(db=self._db_identities) + for key, _ in cursor: + fingerprints.append(key.decode()) + return fingerprints + + # --- Metadata storage --- + + def set_meta(self, key: str, value: str) -> None: + """Set a metadata value.""" + with self._env.begin(write=True) as txn: + txn.put(key.encode(), value.encode(), db=self._db_meta) + + def get_meta(self, key: str) -> str | None: + """Get a metadata value.""" + with self._env.begin() as txn: + data = txn.get(key.encode(), db=self._db_meta) + return data.decode() if data else None + + def set_merkle_root(self, root: str) -> None: + """Store current merkle root.""" + self.set_meta("merkle_root", root) + + def get_merkle_root(self) -> str | None: + """Get stored merkle root.""" + return self.get_meta("merkle_root") + + def set_record_count(self, count: int) -> None: + """Store record count (for consistency checks).""" + self.set_meta("record_count", str(count)) + + def get_record_count(self) -> int: + """Get stored record count.""" + val = self.get_meta("record_count") + return int(val) if val else 0 + + # --- Stats --- + + def get_stats(self) -> IndexStats: + """Get index statistics.""" + with self._env.begin() as txn: + record_count = txn.stat(self._db_records)["entries"] + attestor_count = txn.stat(self._db_by_attestor)["entries"] + sha256_count = txn.stat(self._db_by_sha256)["entries"] + phash_count = txn.stat(self._db_by_phash)["entries"] + + return IndexStats( + record_count=record_count, + attestor_count=attestor_count, + unique_images_sha256=sha256_count, + unique_images_phash=phash_count, + merkle_root=self.get_merkle_root(), + log_file_size=0, # Filled by storage layer + ) + + # --- Internal helpers --- + + def _append_to_list(self, txn, db, key: bytes, value: int) -> None: + """Append an integer to a JSON list stored at key.""" + existing = txn.get(key, db=db) + if existing: + items = json.loads(existing.decode()) + else: + items = [] + + items.append(value) + txn.put(key, json.dumps(items).encode(), db=db) + + def _get_list(self, txn, db, key: bytes) -> list[int]: + """Get JSON list of integers from key.""" + data = txn.get(key, db=db) + if not data: + return [] + return json.loads(data.decode()) + + def sync(self) -> None: + """Force sync to disk.""" + self._env.sync() + + def compact(self) -> None: + """Compact the database (copy to remove free pages).""" + compact_path = self.path.parent / f"{self.path.name}.compact" + self._env.copy(str(compact_path), compact=True) + # Could swap files here, but leave that to caller diff --git a/src/soosef/verisoo/merkle.py b/src/soosef/verisoo/merkle.py new file mode 100644 index 0000000..2d278b5 --- /dev/null +++ b/src/soosef/verisoo/merkle.py @@ -0,0 +1,304 @@ +""" +Merkle tree and append-only log for Verisoo. + +Inspired by Certificate Transparency (RFC 6962). Each attestation record +becomes a leaf in a merkle tree. The tree structure allows: + +- Efficient proof that a record exists (O(log n) proof size) +- Efficient comparison between nodes ("do we agree on history?") +- Tamper evidence (any modification changes the root hash) + +The log is append-only: records can be added but never removed or modified. +This gives Byzantine fault tolerance - if one node goes rogue, others have +the canonical record. +""" + +from __future__ import annotations + +import hashlib +import json +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Iterator + +from .exceptions import MerkleError +from .models import AttestationRecord + + +@dataclass +class MerkleNode: + """A node in the merkle tree.""" + + hash: str + left: MerkleNode | None = None + right: MerkleNode | None = None + leaf_data: bytes | None = None # Only for leaf nodes + + @property + def is_leaf(self) -> bool: + return self.leaf_data is not None + + +@dataclass +class InclusionProof: + """ + Proof that a record exists in the tree. + + Contains the sibling hashes needed to recompute the root. + Verifier can check: hash(record) + proof_hashes = known_root + """ + + leaf_hash: str + leaf_index: int + tree_size: int + proof_hashes: list[str] + root_hash: str + + +@dataclass +class ConsistencyProof: + """ + Proof that an older tree is a prefix of a newer tree. + + Used during sync: "I have root X with N entries, you have root Y with M entries. + Prove that your tree includes all my entries." + """ + + old_size: int + new_size: int + proof_hashes: list[str] + + +class MerkleLog: + """ + Append-only merkle log for attestation records. + + Thread-safe for reads, single-writer for appends. + Persistence is handled separately (see storage module). + """ + + def __init__(self) -> None: + self._leaves: list[bytes] = [] # Serialized records + self._leaf_hashes: list[str] = [] + self._root: str | None = None + + @property + def size(self) -> int: + """Number of records in the log.""" + return len(self._leaves) + + @property + def root_hash(self) -> str | None: + """Current merkle root, or None if empty.""" + return self._root + + def append(self, record: AttestationRecord) -> int: + """ + Append a record to the log. + + Returns the index of the new record. + """ + record_bytes = self._serialize_record(record) + leaf_hash = self._hash_leaf(record_bytes) + + self._leaves.append(record_bytes) + self._leaf_hashes.append(leaf_hash) + self._root = self._compute_root() + + return len(self._leaves) - 1 + + def get(self, index: int) -> AttestationRecord: + """Get record at index.""" + if index < 0 or index >= len(self._leaves): + raise MerkleError(f"Index {index} out of range [0, {len(self._leaves)})") + return self._deserialize_record(self._leaves[index]) + + def inclusion_proof(self, index: int) -> InclusionProof: + """ + Generate proof that record at index exists in the tree. + + The proof is O(log n) hashes that let anyone verify + the record is part of the tree with the given root. + """ + if index < 0 or index >= len(self._leaves): + raise MerkleError(f"Index {index} out of range") + + proof_hashes = self._build_inclusion_proof(index, len(self._leaves)) + + return InclusionProof( + leaf_hash=self._leaf_hashes[index], + leaf_index=index, + tree_size=len(self._leaves), + proof_hashes=proof_hashes, + root_hash=self._root or "", + ) + + def consistency_proof(self, old_size: int) -> ConsistencyProof: + """ + Generate proof that tree at old_size is prefix of current tree. + + Used during federation: prove we haven't rewritten history. + """ + if old_size < 0 or old_size > len(self._leaves): + raise MerkleError(f"Old size {old_size} invalid for tree of size {len(self._leaves)}") + + proof_hashes = self._build_consistency_proof(old_size, len(self._leaves)) + + return ConsistencyProof( + old_size=old_size, + new_size=len(self._leaves), + proof_hashes=proof_hashes, + ) + + def verify_inclusion(self, proof: InclusionProof) -> bool: + """Verify an inclusion proof against our current root.""" + if self._root is None: + return False + + # Recompute root from leaf + proof + computed = proof.leaf_hash + index = proof.leaf_index + size = proof.tree_size + + for sibling_hash in proof.proof_hashes: + if index % 2 == 0: + # We're left child, sibling is right + computed = self._hash_pair(computed, sibling_hash) + else: + # We're right child, sibling is left + computed = self._hash_pair(sibling_hash, computed) + index //= 2 + + return computed == proof.root_hash + + def __iter__(self) -> Iterator[AttestationRecord]: + """Iterate over all records in order.""" + for record_bytes in self._leaves: + yield self._deserialize_record(record_bytes) + + def __len__(self) -> int: + return self.size + + # --- Internal methods --- + + def _hash_leaf(self, data: bytes) -> str: + """Hash a leaf node (prefixed with 0x00 to distinguish from internal nodes).""" + return hashlib.sha256(b"\x00" + data).hexdigest() + + def _hash_pair(self, left: str, right: str) -> str: + """Hash two child nodes (prefixed with 0x01).""" + combined = bytes.fromhex(left) + bytes.fromhex(right) + return hashlib.sha256(b"\x01" + combined).hexdigest() + + def _compute_root(self) -> str: + """Compute merkle root from current leaves.""" + if not self._leaf_hashes: + return hashlib.sha256(b"").hexdigest() + + # Build tree bottom-up + level = self._leaf_hashes.copy() + + while len(level) > 1: + next_level = [] + for i in range(0, len(level), 2): + if i + 1 < len(level): + next_level.append(self._hash_pair(level[i], level[i + 1])) + else: + # Odd node: promote to next level + next_level.append(level[i]) + level = next_level + + return level[0] + + def _build_inclusion_proof(self, index: int, size: int) -> list[str]: + """Build the sibling hashes for inclusion proof.""" + proof = [] + level = self._leaf_hashes.copy() + + while len(level) > 1: + # Find sibling + if index % 2 == 0: + if index + 1 < len(level): + proof.append(level[index + 1]) + else: + proof.append(level[index - 1]) + + # Move to parent level + next_level = [] + for i in range(0, len(level), 2): + if i + 1 < len(level): + next_level.append(self._hash_pair(level[i], level[i + 1])) + else: + next_level.append(level[i]) + level = next_level + index //= 2 + + return proof + + def _build_consistency_proof(self, old_size: int, new_size: int) -> list[str]: + """Build consistency proof hashes.""" + # Simplified: return subtree roots that prove consistency + # Full implementation would follow RFC 6962 algorithm + # For now, return empty - federation will implement full version + return [] + + def _serialize_record(self, record: AttestationRecord) -> bytes: + """Serialize record to bytes for storage/hashing.""" + # Use JSON for now - could use protobuf for efficiency + data = { + "sha256": record.image_hashes.sha256, + "phash": record.image_hashes.phash, + "dhash": record.image_hashes.dhash, + "signature": record.signature.hex(), + "attestor": record.attestor_fingerprint, + "timestamp": record.timestamp.isoformat(), + "metadata": record.metadata, + } + return json.dumps(data, sort_keys=True, separators=(",", ":")).encode("utf-8") + + def _deserialize_record(self, data: bytes) -> AttestationRecord: + """Deserialize record from bytes.""" + from .models import ImageHashes + + obj = json.loads(data.decode("utf-8")) + return AttestationRecord( + image_hashes=ImageHashes( + sha256=obj["sha256"], + phash=obj["phash"], + dhash=obj["dhash"], + ), + signature=bytes.fromhex(obj["signature"]), + attestor_fingerprint=obj["attestor"], + timestamp=datetime.fromisoformat(obj["timestamp"]), + metadata=obj.get("metadata", {}), + ) + + +def verify_inclusion_proof( + record_bytes: bytes, + proof: InclusionProof, + expected_root: str, +) -> bool: + """ + Standalone verification of inclusion proof. + + Anyone can verify without having the full log. + """ + leaf_hash = hashlib.sha256(b"\x00" + record_bytes).hexdigest() + if leaf_hash != proof.leaf_hash: + return False + + computed = leaf_hash + index = proof.leaf_index + + for sibling_hash in proof.proof_hashes: + combined = ( + bytes.fromhex(computed) + bytes.fromhex(sibling_hash) + if index % 2 == 0 + else bytes.fromhex(sibling_hash) + bytes.fromhex(computed) + ) + computed = hashlib.sha256(b"\x01" + combined).hexdigest() + index //= 2 + + return computed == expected_root diff --git a/src/soosef/verisoo/models.py b/src/soosef/verisoo/models.py new file mode 100644 index 0000000..0fc74e7 --- /dev/null +++ b/src/soosef/verisoo/models.py @@ -0,0 +1,458 @@ +""" +Core data models for Verisoo. + +Designed for the photographer provenance use case: +"I took this photo in Ukraine, not San Francisco - here's my cryptographic proof." +""" + +from __future__ import annotations + +import hashlib +import json +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + + +@dataclass(frozen=True) +class Identity: + """ + An attestor identity backed by Ed25519 keypair. + + The fingerprint is the first 16 bytes of SHA-256(public_key), hex-encoded. + This provides a short, recognizable identifier without exposing the full key. + """ + + public_key: bytes + fingerprint: str + created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + metadata: dict[str, Any] = field(default_factory=dict) + + @property + def display_name(self) -> str: + """Human-readable name if set, otherwise short fingerprint.""" + return self.metadata.get("name", f"{self.fingerprint[:8]}...") + + def __str__(self) -> str: + return f"Identity({self.display_name})" + + +@dataclass(frozen=True) +class GeoLocation: + """ + Geographic location where image was captured. + + GPS coordinates with optional accuracy and altitude. + """ + + latitude: float # -90 to 90 + longitude: float # -180 to 180 + accuracy_meters: float | None = None # GPS accuracy + altitude_meters: float | None = None + location_name: str | None = None # "Kyiv, Ukraine" - human readable + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = { + "lat": self.latitude, + "lon": self.longitude, + } + if self.accuracy_meters is not None: + d["accuracy"] = self.accuracy_meters + if self.altitude_meters is not None: + d["altitude"] = self.altitude_meters + if self.location_name: + d["name"] = self.location_name + return d + + @classmethod + def from_dict(cls, d: dict[str, Any]) -> GeoLocation: + return cls( + latitude=d["lat"], + longitude=d["lon"], + accuracy_meters=d.get("accuracy"), + altitude_meters=d.get("altitude"), + location_name=d.get("name"), + ) + + def __str__(self) -> str: + if self.location_name: + return f"{self.location_name} ({self.latitude:.4f}, {self.longitude:.4f})" + return f"({self.latitude:.4f}, {self.longitude:.4f})" + + +@dataclass(frozen=True) +class CaptureDevice: + """ + Information about the device that captured the image. + + Helps establish authenticity - "taken with iPhone 15 Pro" vs "photoshopped". + """ + + make: str | None = None # "Apple" + model: str | None = None # "iPhone 15 Pro" + software: str | None = None # "iOS 17.4" + serial_hash: str | None = None # Hash of device serial (privacy-preserving) + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = {} + if self.make: + d["make"] = self.make + if self.model: + d["model"] = self.model + if self.software: + d["software"] = self.software + if self.serial_hash: + d["serial_hash"] = self.serial_hash + return d + + @classmethod + def from_dict(cls, d: dict[str, Any]) -> CaptureDevice: + return cls( + make=d.get("make"), + model=d.get("model"), + software=d.get("software"), + serial_hash=d.get("serial_hash"), + ) + + def __str__(self) -> str: + parts = [p for p in [self.make, self.model] if p] + return " ".join(parts) if parts else "Unknown device" + + +@dataclass(frozen=True) +class CaptureMetadata: + """ + Rich metadata about image capture for provenance. + + This is what lets a photographer say "I took this in Kyiv, not San Francisco." + """ + + # When was it actually captured (from device clock/EXIF, not attestation time) + captured_at: datetime | None = None + + # Where was it captured + location: GeoLocation | None = None + + # What device captured it + device: CaptureDevice | None = None + + # Photographer's notes at capture time + caption: str | None = None + + # Image technical details + width: int | None = None + height: int | None = None + mime_type: str | None = None + + # Original filename + filename: str | None = None + + # Free-form tags + tags: list[str] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = {} + if self.captured_at: + d["captured_at"] = self.captured_at.isoformat() + if self.location: + d["location"] = self.location.to_dict() + if self.device: + d["device"] = self.device.to_dict() + if self.caption: + d["caption"] = self.caption + if self.width: + d["width"] = self.width + if self.height: + d["height"] = self.height + if self.mime_type: + d["mime_type"] = self.mime_type + if self.filename: + d["filename"] = self.filename + if self.tags: + d["tags"] = self.tags + return d + + @classmethod + def from_dict(cls, d: dict[str, Any]) -> CaptureMetadata: + return cls( + captured_at=datetime.fromisoformat(d["captured_at"]) if d.get("captured_at") else None, + location=GeoLocation.from_dict(d["location"]) if d.get("location") else None, + device=CaptureDevice.from_dict(d["device"]) if d.get("device") else None, + caption=d.get("caption"), + width=d.get("width"), + height=d.get("height"), + mime_type=d.get("mime_type"), + filename=d.get("filename"), + tags=d.get("tags", []), + ) + + +@dataclass(frozen=True) +class ImageHashes: + """ + Multi-algorithm image fingerprinting for robust matching. + + Designed to survive social media mangling: + - JPEG recompression + - Resizing + - Format conversion + - Cropping + - Color adjustments + + Match if ANY hash is within threshold - defense in depth. + """ + + sha256: str # Exact match only - rarely survives sharing + phash: str # DCT-based perceptual hash - survives compression + dhash: str # Difference hash - survives resizing + ahash: str | None = None # Average hash - very tolerant + colorhash: str | None = None # Color distribution - survives crops + crop_resistant: str | None = None # Center-region hash + + def matches_exactly(self, other: ImageHashes) -> bool: + """Exact byte-for-byte match.""" + return self.sha256 == other.sha256 + + def matches_perceptually( + self, + other: ImageHashes, + threshold: int = 10, + ) -> tuple[bool, str | None]: + """ + Check if images match perceptually. + + Returns (matches, best_matching_algorithm). + Uses multiple algorithms - match if ANY passes. + """ + # Check each hash type, return first match + checks = [ + ("phash", self.phash, other.phash), + ("dhash", self.dhash, other.dhash), + ] + + if self.ahash and other.ahash: + checks.append(("ahash", self.ahash, other.ahash)) + if self.colorhash and other.colorhash: + checks.append(("colorhash", self.colorhash, other.colorhash)) + if self.crop_resistant and other.crop_resistant: + checks.append(("crop_resistant", self.crop_resistant, other.crop_resistant)) + + for name, h1, h2 in checks: + if h1 and h2: + distance = _hamming_distance(h1, h2) + if distance <= threshold: + return True, name + + return False, None + + def to_dict(self) -> dict[str, Any]: + d = { + "sha256": self.sha256, + "phash": self.phash, + "dhash": self.dhash, + } + if self.ahash: + d["ahash"] = self.ahash + if self.colorhash: + d["colorhash"] = self.colorhash + if self.crop_resistant: + d["crop_resistant"] = self.crop_resistant + return d + + @classmethod + def from_dict(cls, d: dict[str, Any]) -> ImageHashes: + return cls( + sha256=d["sha256"], + phash=d["phash"], + dhash=d["dhash"], + ahash=d.get("ahash"), + colorhash=d.get("colorhash"), + crop_resistant=d.get("crop_resistant"), + ) + + +@dataclass(frozen=True) +class AttestationRecord: + """ + The core attestation record stored in the append-only log. + + This is the cryptographic proof that a specific image existed + at a specific time, attested by a specific identity, with specific metadata. + + Once in the log, it cannot be modified or deleted. + """ + + image_hashes: ImageHashes + signature: bytes + attestor_fingerprint: str + timestamp: datetime # When attestation was created + metadata: dict[str, Any] = field(default_factory=dict) # CaptureMetadata.to_dict() + + @property + def record_id(self) -> str: + """ + Unique identifier for this record. + + SHA-256 of (sha256 || attestor_fingerprint || timestamp_iso)[:32]. + Deterministic, collision-resistant, URL-safe. + """ + content = f"{self.image_hashes.sha256}|{self.attestor_fingerprint}|{self.timestamp.isoformat()}" + return hashlib.sha256(content.encode()).hexdigest()[:32] + + @property + def short_id(self) -> str: + """Short ID for display/URLs (first 12 chars).""" + return self.record_id[:12] + + @property + def capture_metadata(self) -> CaptureMetadata | None: + """Parse metadata as CaptureMetadata if present.""" + if not self.metadata: + return None + try: + return CaptureMetadata.from_dict(self.metadata) + except (KeyError, TypeError): + return None + + @property + def location(self) -> GeoLocation | None: + """Shortcut to capture location.""" + cm = self.capture_metadata + return cm.location if cm else None + + @property + def captured_at(self) -> datetime | None: + """Shortcut to capture time (may differ from attestation time).""" + cm = self.capture_metadata + return cm.captured_at if cm else None + + def to_bytes(self) -> bytes: + """Serialize for signing/hashing.""" + data = { + "hashes": self.image_hashes.to_dict(), + "attestor": self.attestor_fingerprint, + "timestamp": self.timestamp.isoformat(), + "metadata": self.metadata, + } + return json.dumps(data, sort_keys=True, separators=(",", ":")).encode() + + @classmethod + def from_bytes(cls, data: bytes, signature: bytes) -> AttestationRecord: + """Deserialize from wire format.""" + obj = json.loads(data.decode()) + return cls( + image_hashes=ImageHashes.from_dict(obj["hashes"]), + signature=signature, + attestor_fingerprint=obj["attestor"], + timestamp=datetime.fromisoformat(obj["timestamp"]), + metadata=obj.get("metadata", {}), + ) + + +@dataclass +class Attestation: + """ + Full attestation including the image data (for creation/verification). + + This is the "working" object - AttestationRecord is what gets stored/transmitted. + """ + + image_data: bytes + image_hashes: ImageHashes + record: AttestationRecord | None = None + + @property + def is_attested(self) -> bool: + return self.record is not None + + +@dataclass(frozen=True) +class VerificationResult: + """ + Result of verifying an image against attestation records. + + Tells you: "Yes, this image was attested by @photographer in Kyiv on 2024-03-15" + """ + + is_valid: bool + match_type: str | None # "exact", "phash", "dhash", etc. + attestor: Identity | None + record: AttestationRecord | None + error: str | None = None + + # Match quality details + hash_distances: dict[str, int] = field(default_factory=dict) + + @property + def location(self) -> GeoLocation | None: + """Where was the original image taken?""" + return self.record.location if self.record else None + + @property + def captured_at(self) -> datetime | None: + """When was the original image captured?""" + return self.record.captured_at if self.record else None + + @property + def attested_at(self) -> datetime | None: + """When was the attestation created?""" + return self.record.timestamp if self.record else None + + def summary(self) -> str: + """Human-readable summary for display.""" + if not self.is_valid: + return f"Not verified: {self.error or 'No matching attestation found'}" + + parts = [] + if self.attestor: + parts.append(f"Attested by {self.attestor.display_name}") + if self.attested_at: + parts.append(f"on {self.attested_at.strftime('%Y-%m-%d')}") + if self.location: + parts.append(f"in {self.location}") + if self.match_type and self.match_type != "exact": + parts.append(f"({self.match_type} match)") + + return " ".join(parts) if parts else "Verified" + + +@dataclass(frozen=True) +class ProofLink: + """ + A shareable link to an attestation proof. + + Photographers can share these to prove provenance: + "Here's proof I took this photo: verisoo.io/v/a8f3c2d1e9b7" + """ + + record_id: str + base_url: str = "https://verisoo.io" + + @property + def short_id(self) -> str: + return self.record_id[:12] + + @property + def url(self) -> str: + return f"{self.base_url}/v/{self.short_id}" + + @property + def full_url(self) -> str: + return f"{self.base_url}/verify/{self.record_id}" + + def __str__(self) -> str: + return self.url + + +def _hamming_distance(hash1: str, hash2: str) -> int: + """Compute hamming distance between two hex-encoded hashes.""" + # Handle different length hashes by padding shorter one + if len(hash1) != len(hash2): + max_len = max(len(hash1), len(hash2)) + hash1 = hash1.zfill(max_len) + hash2 = hash2.zfill(max_len) + + # Convert hex to int, XOR, count bits + val1 = int(hash1, 16) + val2 = int(hash2, 16) + return bin(val1 ^ val2).count("1") diff --git a/src/soosef/verisoo/storage.py b/src/soosef/verisoo/storage.py new file mode 100644 index 0000000..b38cbc4 --- /dev/null +++ b/src/soosef/verisoo/storage.py @@ -0,0 +1,337 @@ +""" +Local storage for Verisoo. + +Two-layer storage architecture: +1. Binary append-only log (binlog.py) - stores raw records, federation-friendly +2. LMDB index (lmdb_store.py) - fast lookups by attestor, image hash, etc. + +Directory structure: + ~/.verisoo/ + ├── config.json # Node configuration + ├── identity/ + │ ├── private.pem # Node's private key (encrypted) + │ └── public.pem # Node's public key + ├── log.bin # Append-only binary record log + ├── index/ # LMDB index database + │ ├── data.mdb + │ └── lock.mdb + └── peers.json # Federation peer list +""" + +from __future__ import annotations + +import hashlib +import json +from datetime import datetime +from pathlib import Path +from typing import Iterator + +from .binlog import BinaryLog +from .exceptions import VerisooError +from .lmdb_store import LMDBIndex, IndexStats +from .merkle import MerkleLog +from .models import AttestationRecord, Identity, ImageHashes + + +class StorageError(VerisooError): + """Error in storage operations.""" + + pass + + +def _serialize_record(record: AttestationRecord) -> bytes: + """Serialize a record to bytes for storage.""" + data = { + "sha256": record.image_hashes.sha256, + "phash": record.image_hashes.phash, + "dhash": record.image_hashes.dhash, + "signature": record.signature.hex(), + "attestor": record.attestor_fingerprint, + "timestamp": record.timestamp.isoformat(), + "metadata": record.metadata, + } + return json.dumps(data, sort_keys=True, separators=(",", ":")).encode("utf-8") + + +def _deserialize_record(data: bytes) -> AttestationRecord: + """Deserialize a record from bytes.""" + obj = json.loads(data.decode("utf-8")) + return AttestationRecord( + image_hashes=ImageHashes( + sha256=obj["sha256"], + phash=obj["phash"], + dhash=obj["dhash"], + ), + signature=bytes.fromhex(obj["signature"]), + attestor_fingerprint=obj["attestor"], + timestamp=datetime.fromisoformat(obj["timestamp"]), + metadata=obj.get("metadata", {}), + ) + + +def _compute_record_id(record: AttestationRecord) -> str: + """Compute unique ID for a record.""" + # Hash of (sha256 || attestor || timestamp) + content = f"{record.image_hashes.sha256}|{record.attestor_fingerprint}|{record.timestamp.isoformat()}" + return hashlib.sha256(content.encode()).hexdigest()[:32] + + +class LocalStorage: + """ + High-level storage interface combining binary log and LMDB index. + + Thread-safe for reads, single-writer for appends. + """ + + DEFAULT_PATH = Path.home() / ".verisoo" + + def __init__(self, base_path: Path | None = None) -> None: + self.base_path = base_path or self.DEFAULT_PATH + self._ensure_directories() + + # Initialize storage layers + self._binlog = BinaryLog(self.base_path / "log.bin") + self._index = LMDBIndex(self.base_path / "index") + + # Verify consistency between log and index + self._verify_consistency() + + def _ensure_directories(self) -> None: + """Create directory structure if needed.""" + dirs = [ + self.base_path, + self.base_path / "identity", + ] + for d in dirs: + d.mkdir(parents=True, exist_ok=True) + + # Restrict permissions on sensitive directories + try: + self.base_path.chmod(0o700) + (self.base_path / "identity").chmod(0o700) + except OSError: + pass + + def _verify_consistency(self) -> None: + """Verify log and index are in sync.""" + log_size = self._binlog.size + index_size = self._index.get_record_count() + + if log_size > index_size: + # Index is behind - rebuild missing entries + self._rebuild_index_from(index_size) + elif index_size > log_size: + # Index is ahead (shouldn't happen) - reset index count + self._index.set_record_count(log_size) + + def _rebuild_index_from(self, start_index: int) -> None: + """Rebuild index entries from log starting at index.""" + for entry in self._binlog.iterate(): + if entry.index >= start_index: + record = _deserialize_record(entry.data) + record_id = _compute_record_id(record) + self._index.index_record(entry.index, record, record_id) + + self._index.set_record_count(self._binlog.size) + + def close(self) -> None: + """Close storage (flushes LMDB).""" + self._index.close() + + def __enter__(self) -> LocalStorage: + return self + + def __exit__(self, *args) -> None: + self.close() + + # --- Record operations --- + + def append_record(self, record: AttestationRecord) -> int: + """ + Append a record to the log and index it. + + Returns the index of the new record. + """ + # Serialize and append to binary log + data = _serialize_record(record) + index = self._binlog.append(data) + + # Index the record + record_id = _compute_record_id(record) + self._index.index_record(index, record, record_id) + self._index.set_record_count(self._binlog.size) + + return index + + def get_record(self, index: int) -> AttestationRecord: + """Get record by index.""" + data = self._binlog.read(index) + return _deserialize_record(data) + + def get_records_range(self, start: int, end: int) -> list[AttestationRecord]: + """Get records in range [start, end).""" + data_list = self._binlog.read_range(start, end) + return [_deserialize_record(d) for d in data_list] + + def iterate_records(self) -> Iterator[AttestationRecord]: + """Stream all records.""" + for entry in self._binlog.iterate(): + yield _deserialize_record(entry.data) + + @property + def record_count(self) -> int: + """Number of records in the log.""" + return self._binlog.size + + # --- Query operations (via index) --- + + def get_records_by_attestor(self, fingerprint: str) -> list[AttestationRecord]: + """Get all records by an attestor.""" + indices = self._index.get_indices_by_attestor(fingerprint) + return [self.get_record(i) for i in indices] + + def get_records_by_image_sha256(self, sha256: str) -> list[AttestationRecord]: + """Get all records for an image by exact SHA-256.""" + indices = self._index.get_indices_by_sha256(sha256) + return [self.get_record(i) for i in indices] + + def get_records_by_image_phash(self, phash: str) -> list[AttestationRecord]: + """Get all records for an image by perceptual hash.""" + indices = self._index.get_indices_by_phash(phash) + return [self.get_record(i) for i in indices] + + def find_similar_images( + self, phash: str, max_distance: int = 10 + ) -> list[tuple[AttestationRecord, int]]: + """ + Find records for perceptually similar images. + + Returns list of (record, hamming_distance) tuples. + """ + similar = self._index.find_similar_phash(phash, max_distance) + results = [] + for found_phash, distance in similar: + indices = self._index.get_indices_by_phash(found_phash) + for idx in indices: + results.append((self.get_record(idx), distance)) + return results + + # --- Merkle tree --- + + def load_merkle_log(self) -> MerkleLog: + """ + Load records into a MerkleLog for verification. + + Note: This loads all records into memory. For large logs, + use iterate_records() and build incrementally. + """ + log = MerkleLog() + for record in self.iterate_records(): + log.append(record) + return log + + def get_merkle_root(self) -> str | None: + """Get stored merkle root (may be stale - use load_merkle_log for current).""" + return self._index.get_merkle_root() + + def set_merkle_root(self, root: str) -> None: + """Store merkle root after computing.""" + self._index.set_merkle_root(root) + + # --- Federation sync --- + + def get_sync_bytes(self, from_offset: int) -> bytes: + """ + Get raw bytes for federation sync. + + Returns bytes from the binary log starting at offset. + Peer can append these directly to their log. + """ + return self._binlog.get_bytes_from_offset(from_offset) + + def append_sync_bytes(self, data: bytes) -> int: + """ + Append pre-formatted records from federation sync. + + Returns number of records added. + """ + start_index = self._binlog.size + records_added = self._binlog.append_raw_bytes(data) + + # Index the new records + self._rebuild_index_from(start_index) + + return records_added + + @property + def log_file_size(self) -> int: + """Size of the binary log file in bytes.""" + return self._binlog.file_size + + # --- Identity management --- + + def save_identity(self, identity: Identity) -> None: + """Save a known identity to the index.""" + self._index.save_identity(identity) + + def load_identity(self, fingerprint: str) -> Identity | None: + """Load identity by fingerprint.""" + return self._index.get_identity(fingerprint) + + def list_identities(self) -> list[str]: + """List all known identity fingerprints.""" + return self._index.list_identities() + + # --- Node identity (keypair files) --- + + @property + def identity_path(self) -> Path: + return self.base_path / "identity" + + @property + def private_key_path(self) -> Path: + return self.identity_path / "private.pem" + + @property + def public_key_path(self) -> Path: + return self.identity_path / "public.pem" + + def has_node_identity(self) -> bool: + """Check if this node has an identity configured.""" + return self.private_key_path.exists() + + # --- Peers --- + + def save_peers(self, peers: list[dict]) -> None: + """Save peer list.""" + path = self.base_path / "peers.json" + path.write_text(json.dumps(peers, indent=2)) + + def load_peers(self) -> list[dict]: + """Load peer list.""" + path = self.base_path / "peers.json" + if not path.exists(): + return [] + return json.loads(path.read_text()) + + # --- Config --- + + def save_config(self, config: dict) -> None: + """Save node configuration.""" + path = self.base_path / "config.json" + path.write_text(json.dumps(config, indent=2)) + + def load_config(self) -> dict: + """Load node configuration.""" + path = self.base_path / "config.json" + if not path.exists(): + return {} + return json.loads(path.read_text()) + + # --- Stats --- + + def get_stats(self) -> IndexStats: + """Get storage statistics.""" + stats = self._index.get_stats() + stats.log_file_size = self._binlog.file_size + return stats diff --git a/src/soosef/verisoo/verification.py b/src/soosef/verisoo/verification.py new file mode 100644 index 0000000..206d3d5 --- /dev/null +++ b/src/soosef/verisoo/verification.py @@ -0,0 +1,489 @@ +""" +Attestation Verification Module for Verisoo. + +This module answers the fundamental question: + "Was this image attested by this identity at this time?" + +Verification is the counterpart to attestation - it validates that an image +matches a known attestation record and that the record's signature is valid. + +Verification Architecture: +------------------------- + + ┌─────────────────────────────────────────────────────────────────────┐ + │ VERIFICATION PIPELINE │ + └─────────────────────────────────────────────────────────────────────┘ + + Input Image Attestation Record + │ │ + ▼ │ + ┌─────────────────┐ │ + │ Compute Hashes │ │ + │ (SHA + pHash) │ │ + └────────┬────────┘ │ + │ │ + ▼ ▼ + ┌─────────────────────────────────────────────────┐ + │ STEP 1: HASH COMPARISON │ + │ ┌─────────────┐ ┌─────────────┐ │ + │ │ Image Hash │ ─────► │ Record Hash │ │ + │ └─────────────┘ ?? └─────────────┘ │ + │ │ + │ Exact match: SHA-256 identical │ + │ Perceptual match: pHash/dHash within threshold │ + └───────────────────────┬─────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────┐ + │ STEP 2: IDENTITY LOOKUP │ + │ │ + │ Fingerprint ─────► Identity Database │ + │ (resolve public key) │ + └───────────────────────┬─────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────┐ + │ STEP 3: SIGNATURE VERIFICATION │ + │ │ + │ ┌────────────┐ ┌───────────────┐ │ + │ │ Public Key │ ─► │ Verify Ed25519│ ◄─ Record │ + │ └────────────┘ │ Signature │ Signature│ + │ └───────┬───────┘ │ + │ │ │ + │ Valid / Invalid │ + └───────────────────────┬─────────────────────────┘ + │ + ▼ + ┌─────────────────┐ + │ Verification │ + │ Result │ + └─────────────────┘ + +Match Types: +----------- +Verisoo uses multiple matching strategies to handle real-world image sharing: + +1. **Exact Match** (SHA-256) + - Byte-for-byte identical images + - Fails after ANY modification (even metadata changes) + - Use case: Verifying untouched originals + +2. **Perceptual Match** (pHash, dHash) + - Based on visual content, not bytes + - Survives: JPEG recompression, resizing, format conversion + - Use case: Verifying social media copies + +Perceptual Threshold: +------------------- +The threshold parameter controls how similar images must be for perceptual matching: + + threshold=0 : Exact perceptual match (rare for different encodings) + threshold=5 : Very strict - minor differences only + threshold=10 : Default - handles most social media compression + threshold=15 : Looser - catches more heavily modified images + threshold=20+: Very loose - may have false positives + +The threshold is measured in Hamming distance (bit differences between hashes). +Lower = more strict, higher = more tolerant. + +Usage Examples: +-------------- + from .verification import verify_attestation, find_attestations_for_image + + # Full verification with identity lookup + result = verify_attestation( + image_data=downloaded_image, + record=attestation_record, + lookup_identity=storage.load_identity, + ) + if result.is_valid: + print(f"Verified! Attested by {result.attestor.display_name}") + + # Find all attestations for an image + matches = find_attestations_for_image(image_data, all_records) + for record in matches: + print(f"Found attestation from {record.timestamp}") +""" + +from __future__ import annotations + +from typing import Callable + +from .crypto import fingerprint_from_pubkey, verify +from .exceptions import HashMismatchError, SignatureError, VerificationError +from .hashing import hash_image, is_same_image +from .models import ( + AttestationRecord, + Identity, + ImageHashes, + VerificationResult, +) + + +# ============================================================================= +# TYPE DEFINITIONS +# ============================================================================= + +# Identity lookup function type +# Takes a fingerprint string, returns Identity object or None if unknown +IdentityLookup = Callable[[str], Identity | None] + + +# ============================================================================= +# MAIN VERIFICATION FUNCTION +# ============================================================================= + + +def verify_attestation( + image_data: bytes, + record: AttestationRecord, + lookup_identity: IdentityLookup, + *, + perceptual_threshold: int = 10, + require_exact_match: bool = False, +) -> VerificationResult: + """ + Verify an attestation record against an image. + + This is the primary verification function. It performs a complete + verification pipeline to determine if an image matches an attestation + record and if the record's signature is valid. + + Verification Steps: + ------------------ + 1. **Hash Computation**: Compute SHA-256 and perceptual hashes of the + provided image. + + 2. **Hash Comparison**: Compare computed hashes against the record's + hashes. Supports both exact (SHA-256) and perceptual (pHash/dHash) + matching. + + 3. **Identity Lookup**: Resolve the attestor's fingerprint to their + public key using the provided lookup function. + + 4. **Signature Verification**: Rebuild the canonical signing payload + and verify the Ed25519 signature against the attestor's public key. + + Match Types Returned: + -------------------- + - "exact": SHA-256 bytes match (images are identical) + - "phash": Perceptual hash match (DCT-based, survives JPEG) + - "dhash": Difference hash match (survives resizing) + - None: No match found + + Args: + image_data: + Raw bytes of the image to verify. Can be any PIL-supported + format (JPEG, PNG, WebP, etc.). + + record: + AttestationRecord to verify against. Contains the expected + hashes, signature, and attestor fingerprint. + + lookup_identity: + Callable that takes a fingerprint string and returns an + Identity object (with public key), or None if unknown. + Typically: storage.load_identity + + perceptual_threshold: + Maximum Hamming distance for perceptual hash matching. + - 0: Exact perceptual match only + - 10: Default, handles typical social media compression + - 20: Loose, may have false positives + Higher values are more tolerant of image modifications. + + require_exact_match: + If True, only accept SHA-256 exact matches. Perceptual + matches will be rejected. Use for high-security scenarios. + + Returns: + VerificationResult object with: + - is_valid: True if all verification steps passed + - match_type: How the image matched ("exact", "phash", etc.) + - attestor: Identity object if found + - record: The original record (for convenience) + - error: Error message if verification failed + + Failure Scenarios: + ----------------- + - Image doesn't match (no hash match within threshold) + - Unknown attestor (fingerprint not in identity database) + - Fingerprint mismatch (identity's key doesn't match fingerprint) + - Invalid signature (record was tampered with) + + Example: + >>> result = verify_attestation( + ... image_data=suspect_image, + ... record=claimed_attestation, + ... lookup_identity=storage.load_identity, + ... ) + >>> if result.is_valid: + ... print(f"Verified by {result.attestor.display_name}") + ... print(f"Match type: {result.match_type}") + ... else: + ... print(f"Verification failed: {result.error}") + """ + # ------------------------------------------------------------------------- + # STEP 1: Compute hashes of the provided image + # This gives us SHA-256 (exact) and perceptual hashes to compare + # ------------------------------------------------------------------------- + image_hashes = hash_image(image_data) + + # ------------------------------------------------------------------------- + # STEP 2: Compare hashes against the attestation record + # is_same_image() tries exact match first, then perceptual hashes + # ------------------------------------------------------------------------- + is_same, match_type = is_same_image( + image_hashes, + record.image_hashes, + perceptual_threshold=perceptual_threshold, + ) + + # No match found - images are different + if not is_same: + return VerificationResult( + is_valid=False, + match_type=None, + attestor=None, + record=record, + error="Image does not match attestation", + hash_distances={ + "provided_sha256": image_hashes.sha256, + "record_sha256": record.image_hashes.sha256, + }, + ) + + # Normalize match_type for consistent API + # Internal "exact_match" becomes external "exact" + normalized_match_type = "exact" if match_type == "exact_match" else match_type + + # If caller requires exact match, reject perceptual matches + if require_exact_match and normalized_match_type != "exact": + return VerificationResult( + is_valid=False, + match_type=normalized_match_type, + attestor=None, + record=record, + error="Exact match required but only perceptual match found", + ) + + # ------------------------------------------------------------------------- + # STEP 3: Look up the attestor's identity + # We need their public key to verify the signature + # ------------------------------------------------------------------------- + identity = lookup_identity(record.attestor_fingerprint) + + if identity is None: + # Attestor not in our identity database + # Could be from a peer we haven't synced with yet + return VerificationResult( + is_valid=False, + match_type=normalized_match_type, + attestor=None, + record=record, + error=f"Unknown attestor: {record.attestor_fingerprint}", + ) + + # Verify that the identity's public key actually produces this fingerprint + # This catches mismatched or corrupted identity records + computed_fingerprint = fingerprint_from_pubkey(identity.public_key) + if computed_fingerprint != record.attestor_fingerprint: + return VerificationResult( + is_valid=False, + match_type=normalized_match_type, + attestor=identity, + record=record, + error="Identity fingerprint mismatch", + ) + + # ------------------------------------------------------------------------- + # STEP 4: Verify the Ed25519 signature + # Rebuild the canonical payload and check the signature + # ------------------------------------------------------------------------- + from .attestation import _build_signing_payload + + # Reconstruct the exact bytes that were signed + signing_payload = _build_signing_payload( + record.image_hashes, + record.attestor_fingerprint, + record.timestamp, + record.metadata, + ) + + try: + # Ed25519 signature verification + # Raises SignatureError if invalid + verify(identity.public_key, record.signature, signing_payload) + except SignatureError as e: + # Signature invalid - record may have been tampered with + return VerificationResult( + is_valid=False, + match_type=normalized_match_type, + attestor=identity, + record=record, + error=str(e), + ) + + # ------------------------------------------------------------------------- + # SUCCESS: All checks passed + # ------------------------------------------------------------------------- + return VerificationResult( + is_valid=True, + match_type=normalized_match_type, + attestor=identity, + record=record, + error=None, + ) + + +# ============================================================================= +# SIGNATURE-ONLY VERIFICATION +# ============================================================================= + + +def verify_record_signature( + record: AttestationRecord, + identity: Identity, +) -> bool: + """ + Verify just the signature on an attestation record. + + This is a lightweight verification that checks ONLY whether the signature + is valid for the given identity. It does NOT: + - Check if an image matches the record's hashes + - Look up the identity (caller provides it) + - Verify fingerprint matches public key + + Use Cases: + --------- + - **Log validation**: Verify all records in a log weren't tampered with + - **Sync verification**: Check records received from peers + - **Batch processing**: When you already have the identity loaded + + Why No Image Check? + ------------------ + Sometimes you want to verify record integrity without having the original + image. For example, when validating a log backup or checking records + received over the network. + + Args: + record: AttestationRecord to verify + identity: Identity object with the attestor's public key + + Returns: + True if signature is valid, False otherwise + + Example: + >>> # Validate all records in a log + >>> for record in storage.iterate_records(): + ... identity = storage.load_identity(record.attestor_fingerprint) + ... if identity and not verify_record_signature(record, identity): + ... print(f"TAMPERED: {record.record_id}") + """ + from .attestation import _build_signing_payload + + # Rebuild the canonical payload that was originally signed + signing_payload = _build_signing_payload( + record.image_hashes, + record.attestor_fingerprint, + record.timestamp, + record.metadata, + ) + + try: + # Verify Ed25519 signature + verify(identity.public_key, record.signature, signing_payload) + return True + except SignatureError: + # Signature invalid - record was tampered or key mismatch + return False + + +# ============================================================================= +# ATTESTATION SEARCH +# ============================================================================= + + +def find_attestations_for_image( + image_data: bytes, + records: list[AttestationRecord], + *, + perceptual_threshold: int = 10, +) -> list[AttestationRecord]: + """ + Find all attestation records that match an image. + + This function searches through a list of attestation records and returns + all that match the provided image (either exactly or perceptually). + + The returned records are sorted by timestamp (oldest first), which + establishes the **provenance chain** - showing the history of who + attested this image and when. + + Matching Strategy: + ----------------- + For each record, we check: + 1. Exact SHA-256 match (byte-identical) + 2. Perceptual hash match within threshold (visually same) + + If either matches, the record is included in results. + + Use Cases: + --------- + - **Fact-checking**: Find who first attested this viral image + - **Provenance chain**: See all attestations for an image over time + - **Duplicate detection**: Find if image was previously attested + + Args: + image_data: + Raw bytes of the image to search for. + + records: + List of AttestationRecords to search through. + Typically: list(storage.iterate_records()) + + perceptual_threshold: + Maximum Hamming distance for perceptual matching. + Default 10 catches most social media compression. + + Returns: + List of matching AttestationRecords, sorted by timestamp + (oldest first). Empty list if no matches found. + + Performance Note: + This performs a linear scan of all records. For large databases, + consider using indexed lookups (by SHA-256 or perceptual hash) + in the storage layer. + + Example: + >>> # Find all attestations for a downloaded image + >>> image = open("suspect_photo.jpg", "rb").read() + >>> records = list(storage.iterate_records()) + >>> matches = find_attestations_for_image(image, records) + >>> + >>> if matches: + ... first = matches[0] + ... print(f"First attested: {first.timestamp}") + ... print(f"By: {first.attestor_fingerprint[:16]}...") + ... if first.location: + ... print(f"Location: {first.location}") + ... else: + ... print("No attestations found for this image") + """ + # Compute hashes of the query image once (expensive operation) + image_hashes = hash_image(image_data) + matches = [] + + # Linear scan through all records + for record in records: + is_same, _ = is_same_image( + image_hashes, + record.image_hashes, + perceptual_threshold=perceptual_threshold, + ) + if is_same: + matches.append(record) + + # Sort by timestamp (oldest first) to show provenance chain + # The first attestation is the most significant for establishing provenance + return sorted(matches, key=lambda r: r.timestamp) diff --git a/test_data/1mb-jpg-example-file.jpg b/test_data/1mb-jpg-example-file.jpg new file mode 100644 index 0000000..0e3ac45 Binary files /dev/null and b/test_data/1mb-jpg-example-file.jpg differ diff --git a/test_data/2mb-jpg-example-file.jpg b/test_data/2mb-jpg-example-file.jpg new file mode 100644 index 0000000..5bf279a Binary files /dev/null and b/test_data/2mb-jpg-example-file.jpg differ diff --git a/test_data/3072_RSA_QR.png b/test_data/3072_RSA_QR.png new file mode 100644 index 0000000..8ff6ebe Binary files /dev/null and b/test_data/3072_RSA_QR.png differ diff --git a/test_data/3d340f81_20251229.png b/test_data/3d340f81_20251229.png new file mode 100644 index 0000000..0a20ba5 Binary files /dev/null and b/test_data/3d340f81_20251229.png differ diff --git a/test_data/6375ec7b_20251230.png b/test_data/6375ec7b_20251230.png new file mode 100644 index 0000000..0c60980 Binary files /dev/null and b/test_data/6375ec7b_20251230.png differ diff --git a/test_data/admin_recovery_qr.png b/test_data/admin_recovery_qr.png new file mode 100644 index 0000000..f4a3b93 Binary files /dev/null and b/test_data/admin_recovery_qr.png differ diff --git a/test_data/carrier.jpg b/test_data/carrier.jpg new file mode 100644 index 0000000..aae33f9 Binary files /dev/null and b/test_data/carrier.jpg differ diff --git a/test_data/carrier2.jpg b/test_data/carrier2.jpg new file mode 100644 index 0000000..d10c5fd Binary files /dev/null and b/test_data/carrier2.jpg differ diff --git a/test_data/carrier3.JPG b/test_data/carrier3.JPG new file mode 100755 index 0000000..290126f Binary files /dev/null and b/test_data/carrier3.JPG differ diff --git a/test_data/edd29696_20251229.png b/test_data/edd29696_20251229.png new file mode 100644 index 0000000..7e62310 Binary files /dev/null and b/test_data/edd29696_20251229.png differ diff --git a/test_data/justPIN_20251229.png b/test_data/justPIN_20251229.png new file mode 100644 index 0000000..2f2c91d Binary files /dev/null and b/test_data/justPIN_20251229.png differ diff --git a/test_data/key_and_all_20251229.png b/test_data/key_and_all_20251229.png new file mode 100644 index 0000000..d1067ff Binary files /dev/null and b/test_data/key_and_all_20251229.png differ diff --git a/test_data/phonebooth.pem b/test_data/phonebooth.pem new file mode 100644 index 0000000..cf8a94e --- /dev/null +++ b/test_data/phonebooth.pem @@ -0,0 +1,30 @@ +-----BEGIN ENCRYPTED PRIVATE KEY----- +MIIFJTBfBgkqhkiG9w0BBQ0wUjAxBgkqhkiG9w0BBQwwJAQQZA5S460JEEzHr4Gv +6SHaxwICCAAwDAYIKoZIhvcNAgkFADAdBglghkgBZQMEASoEEIUT3kxmLKusysd+ +g2eLYzwEggTAjVjjUGenOSvsc9jyPzq+bvpkml1OXxbPh/014rge+wpSd8Q937eK +6CCfkhp7gGpcK2/Myt9RzATHRFj3Y0t2HNrLXHhBsuQrhO6Nd4RIMhRLWbZL7eyV +hjrACXDTNOJIMHaMj17qu2bWDhoQK9khtYFKTiGnXJgw/qheaq+XoV/dcDXIC3/m +3wlveYLxRB+907u9Ddjqjhyz+58IWZozxaEjCcX7UIdJLul0RvBhAT0RSBGzA1Zr +kvuIya/rx37vtHu4VDBijZyxlieMAXp7oEsi4vC6rEWMBO+mupf9scTuxiO6UJJp ++kh1aH0zBep5X5pseHfsZmtjF+ExfXQDEDDBKIXJteoyozaT3cwXw+0f3+ba2fGl +4gI+SiZeprhOLRAuh6z1HSshSe3+SHubfVQiaZWrrusQOlE/CbxXF7MC6p7YBuw7 +UIl4shjqERe9mSj4bRtCw7DBqnKbCxQjqgAN2P1ELuiH6f+z8kd//AFBMp0IBtwR +AlmIl0yT8x209Kd8ztpqRpoO87FJNOVfmTKIIZqVQls5jglPoeL6xgNdruTydMr8 +4fTqW+O7V69F7hASe4Zxu6VZYDqb9Qg2DEwbIsgERL9t/7bO6Lhpfsk7J4YLgaqu +Tq+BcP62J73aq9lo4VJlA7NaSOzH3Sqi78JCYq4ZrttGbmOqSAKVxDsXq7sI6sJA +va97f5pxhU+g4o0iu1rkaygGA08Ajs/8AzJ9Oyj65zxNONOfBRDWYvfbia1xKBMl +QGnHuyBFAvOvSFwq2qJ7+yUB7PMkXar/Gx2dQrW7a/2ahqjhO4+ssUKbeOpup4K7 +BIXob8guks3s1i3dl0wap8GtwCgPLduEXSvQ2ORiU/avpYdCAA8iqUaxXalZ/lhe +nfTy8Uz/BBXpunTpHJ4A1ruDrdigfoYiI3vnVB1DglX37XillmysO/gu5gwYECHz +OTZSUevcWw88rVVRbUelIs3FwmywCT+NWXJDtfgm1PCXchlJmQx2zjJMBwez3syn +u+SY84ntrB0hyAWmwaHtGbwe4Z9u1FnZ7j+0Y8vTAD4LeWJls34RkboXhzNlJYn5 +s4zp619MY+l+YPgQubhFEsCr6yzPOXQEdg1pk/liZFO9sh2tFR1teg3bM4JKn0w1 +8qpdUmeY3tTU/+Vk9UUZSqhMk8No59a/8//26KN9AOOUUv7j8yLrjsonUkuvkadX +EnsJHVlOnwe0dt+4ll23Hf5+Ka8KjNYAjdeyMrtS5XVnz0zOC6KLnWori+DbuB4n +jezwLC1cHU5KbVDRCnssEN7di0i1UlFFi3oujvC8DOD0k57+rmwpK26gj61tCiwn +TcIvzIvtSNeFgCjrIVldFt2rd36nvgVK6I6NyK4EAdLdVjqV0gVZ5WVhV9x50ZNi +ADoaidbHoxVTBt3ZkKMXjxJss4YtTDerUS3xD1bHMMtSQKMYhe1u/n1ecwkyGaAv +9s9ldUUwmGU6wbHpIixXTlDeRT/w3DVHLlEjHRnqv1o88wJV4kALZxUCfgLaaiQo +SpBl6v1Q70MXd22N+ywJTPS/mScEMb4NiemlNFSVGpT6EioY0lofHB7YNaB4UZES +mOcTA23IguMFuU/jGYp04cGT+gE4X+7CzA== +-----END ENCRYPTED PRIVATE KEY----- diff --git a/test_data/pinandpem_20251229.png b/test_data/pinandpem_20251229.png new file mode 100644 index 0000000..29d07b2 Binary files /dev/null and b/test_data/pinandpem_20251229.png differ diff --git a/test_data/qr_scan.jpg b/test_data/qr_scan.jpg new file mode 100644 index 0000000..80474ce Binary files /dev/null and b/test_data/qr_scan.jpg differ diff --git a/test_data/ref.jpg b/test_data/ref.jpg new file mode 100644 index 0000000..faeaceb Binary files /dev/null and b/test_data/ref.jpg differ diff --git a/test_data/ref2.jpg b/test_data/ref2.jpg new file mode 100644 index 0000000..5e7afb6 Binary files /dev/null and b/test_data/ref2.jpg differ diff --git a/test_data/rpi_20260102.jpg b/test_data/rpi_20260102.jpg new file mode 100644 index 0000000..33560d6 Binary files /dev/null and b/test_data/rpi_20260102.jpg differ diff --git a/test_data/scandal.txt.gz b/test_data/scandal.txt.gz new file mode 100644 index 0000000..bc9d70a Binary files /dev/null and b/test_data/scandal.txt.gz differ diff --git a/test_data/scandal.txt.gz.b64 b/test_data/scandal.txt.gz.b64 new file mode 100644 index 0000000..5b9922b --- /dev/null +++ b/test_data/scandal.txt.gz.b64 @@ -0,0 +1,353 @@ +H4sICKvmUGkCA3NjYW5kYWwudHh0AJV927LjRpLke38Fqh727JqRZ6x3bczaWg9lpUtLp1u3VdVO +WT+CJEhCBwQ4AHgo6o/2O/bH1t0jIjPBU5qxadsdSVUkCCQy4+Lh4fGnP/3pT9V/+L/31Yev3v/4 +9fvvq6cfqy9/+u6bH57e/2ffqd6P8/EyVl8Nfd1XXw+3rvmT/+8/+WL1sd50TTXsq+3Qz00/T//p +N/S/r471eW7G6s//tY//z//ax//Xn+7+V3zku/c/f/zml+op/fHHofpwbMZu2D5X3w3dqZmq6dhU +7VTV3bW+TdWM/7oOp7p/rJ6qY/3SVFPT7YZTdWzqcVcd25Nd6IRlaIcefzxWl36H/1v3t2qY+d99 +fWrwdfwlrtvc/DeabdeeJ/xH3e/sGuexwZXbvp4b/+HjYOvMq0zNb7jIXF3rqeqHGR+oZ/xFtW+6 +mT9m12hOg+6jfm77ah6qbsAt74exehqbvqne77pmfKzed118clrxBuxqQ9/4ndTj3G4vXT12t1V1 +bcamqjfHYcQ1Zl6VD7Idut2K97xtp6baXHATu1M7Ymv4rWzqru63za7CI+0eq+8a3voKyzjXz1ji +eaVnPA3TXJ2bcd9s52ps6mno2/6QF2XYTM34wj861dtji2fQrdp7GTu8AqzH1DT9yu4B66mHHit9 +4tL5dfTuzl3NG8Jbw1vcV1iiutrXHe7/PEwtl0P32Tf8/nQenrX6/K1p2GNzxepMk63cxIte2/mI +6xzaTaOlrKupb7jKH4/NzRfPV4Y3j0fx88LXwmvbIzbjet38tm26jovMv9uN9ZUPzs+8NG1X7Uds +POy0h6niy3tp/EL61a3e5mP15WVOV57HGiu283XFM+Hd8V7marpsj3j+ebzoUew6+G97t8O1r3ZN +126xFXX1PS7T3fDdXy/TjAvOzQnvrOau14ach3SFcdhdtvhWtWsn/D5uC4+wxz9xT9dji589tYcj +3+A4XPmx4bKZq8uZmxbbEj+fT1Td4danSzfjub4dcdt6Y1PT82Vh6dseP3HhJ1cVLl9XW/zgMz+F +p8Wr81fvT3TE767PA95IhTXG0VvZBtFx2nAr4lXxri/jxta91s/hkfrD8njpPnBQL/iGlhJrgJ/B +ycJi3Rrtz1EbXruSdyMrkhYKW7A4ePZ3Wki8tY6rXpzXFTfh7rJph0thLf790ky8F22rE25svD26 +VaOl2ulQVF07z2ZD3Lzx4t3tsfrhhgM1jm19aPTp3dju+Wb5E7B8vkO54RqcO7Nk+hZXcjuczl0z +85vnM7bGlKwI/mg4NestXgmsWdoUWIxpnvz9jzQYl3NVj8PFv6THPw7YZyPMAXbbbkqn9FRP6ejR +FvrbxAXx6O10tNevczZd9vt227qVqjfTMG60q07Yu/NsNnpll8K9YGFsVVb68W6ocS+7iqf/xjN0 +4u9NAy443+yY88dlle0aXw7H5tTi3ifsoxX26snOGzcgXGs37A487vzvL2H0xuoDVqWZaarGFp+r +T2ln6bGwFzfD8OyrWXd4bmwyHSC+iWvTPPPB9M9NM1/5hrfDlj+aN0Z92siUmX3d4ZRNekVhynbj +5ZBf175tRhxXbLbxcPP19bXGIj/zF2yfhwXHeWg7PCz+jQu1gqFozp3Wl6cdT7W5mcmcL7tbem3b +sT019rPDdns5t7LDWJrTiWeRFgLnvDV/WDW/4WLDuIMzxKvQmZ3Spcxe1nEQ90PXDTKUw4UHb8DV +tt2l8VXcdvDU/FvsOP2lm5cbdxV/zzYlz8CGT1tv8K2Bb5Fnejg3HdfOn2nQ/qo7dwQDTCQW5m98 +OTOejy9H/3zy+GDCYahe6sMF72e7xXafYwvvBu6Mvy629XQ5YUPQmlY/7fCrNR+PP7utp+SJPo7N +GbdRnS7jzm1DemXls4bjwn/SkdMXHJrdLf78/YwIAU6h2ow621j32WMiXAEHvO4af184jjhD6Zun +Vu4v1s3MB58ONgHHseFT25XChXB34DowlFs81f7Ci4WLGpv20JuLOLX2IziTiBx2j37Gmttge3Xi +0xz6KZaLvuWlnRGhHLFBtBftnp5gD0f+6nSskxUyNw1bYL9a7/jMcSRq/jRimUkBynPfXAvLeTJj +EDZoj13D08m9hWeue4YNbnl/wkHs6d7W69Y842BvEGcVxqfBLeCCP9Tj9riq/vyXv/xlvX7Sx8YG +Z4zrUBjeuvoVVqRHEEFjhrBjlmX77wrntOo9PKh9kz55qLZYj9ibcr3b5n9wUbCt8RAw63B8iMca +Od/L4biwSnBeE67L6KbRevu6ISJZw7I1pw0WFY5iGNJCnxANRKTM6Geisaxns4D4RUZ+Wvg4c/B0 +3Pe2r/RGbPlHOu1tu2MyES/lA+0HL/RhiziUVlNLZZeamvb3Zhexl8wUTkw76gjC84W7qw8wjm7t +BrxYLNiR/7/Jl0Io0w03bkHF55+xPDB9+JtxGE6T+ZnNCBMIuz9j12CfrFIYtqJN7Gk5nmD9ET/C +oZ5541N91eUR0sB2TgiwaXoPjB+44NgfeFd4WL+SLcnUdsfh0swMwvgck8W98Ho5oMbXtxEk8g6r +6QpHjrDdHw6H3oL4cF8wTDyK/bPFXArlcbZmrZE+UNP7brt6OsfZ2TTHVn97emTKRENOh6lzokWT +xzwNgx2LY71heK8zOs8t3mOTA1W4eotEO22ydpSjmWbGL/FMComwAHpq/Sl3O+MGXyBa+jCkcGfr +LQ40t90O/zyZC7nKfHtkKUvIoMQ8+okx/jUOyoAI6sT8bqx9ITfY8+ki05E3SO8xmDE+1jwK2W+k +aP7EpTYvgm3LJIpbHt8O88Bt5M8faVyz3yMEf2mU23luib/7QnGjbdPq0NU7JU54C8/+Yn2bM7P8 +pFcLd0M7y4XbWe7iKVGcEbxBfAD558qu+xJv1/wWY93xhIdrR0YOI15tvR2HabIt4h5o2x7q0T0r +Lijzjpfuh/LcMkjXZy0TOiDvODCQDUc2wFRZasRcma99oN/dM/ROJmfPc8wL+BHCDSqja227Jpem +bAO7dKtkYF9Px8IUv/3U7JTXT5cWRuU2XFZvK5l+xL246mP1Nla0+lTPkwVM2Hn4pGeLF/eI2ECT +nWs91LFGVHpm7Kp72TZ+vvHFx7fx6x/4hTdv8Vd1P9Fo7NKNPQEfaPRCsbUuSmFf+PS0yDPTV/M8 +TEceq7/LxMJ7t3v/M3zRlwnp9S1u3lIPbjs3/GH6niK/fKz+iSfbtZbvzNziJ1/w9NwM1XF3MpaH +wRPCemT4mB+NL08+F75A36JZfZf++kkbs5W9xqWYDLZwLt/Z55/MCC9XWkfmAEOXHCD+alT0L8ty +RUZliUuRNKVv1wYiIOY7TbccB8NrWOymaBFH/9COXb5NpDI7REuRAbzFK8TSPK2qt7PiUoZhMHVI +7mzhLFXcNuOMZcVB4k+HdcTdb+SEVzJUvLMOW5KbZY9zRMNzUaxZHwYddF59vDiW8ZSNCDJYBol8 +5Lp75sb7CIxu2tUWQW2RcSvBsgSU1m6HcArvkYGL4x9PBeCB49wfeIJuWB7GeXRM27p/wC2c6gPz +Br5H3nDxqt4rBEWU4uH73+seu86BMQaHyBsBd3QeH9Ktt/tGSMyhfWkMBcMmwy7UXcUma7h507bc +I+gKMxY3IasPEwL7nvbbd3yMy/a5s20ZaSF/ebxsNp5HdAPDChiXF2bJ5sNm2B7lrWlrauWnlpHq +FpEjforXipd/bL6o3p5uhtH5AQlkLFsn+GEEKjSGNyV5zX7mQcajERxhtGW2TGasE9YBCKF9xjUd +9LLrIFYXPMg7wpJa4jS1vyGgMlCsHhEsNF21vRD/+GnzQgCgUyLyavtt68vkV8D2EN5wHPRGdIDi +LDAmBkZyJiYUybcHCrtDM2W0q9MWG5hicNFhNAkkbkeDf06XnYWoOthNz/eMxYjQDP+FVSSwg8to +XylXK06t51p04/iZF2biV1uOVT7Ai8/XC1ASXhQr2/NUI2Ge1xNtJsMgegMkYL0/iV3o+4EZXTV1 +WLNbbG+9vDCWq6rd013hoCIWEBbTPU9m/05+Ciz+m07YF0o2gR8Ou4FufxU+dtMRfqJ/4V/3LULJ +WTsFkdxL5A8p7Bq1N+j6kGEd/NF5lUsHUMajFvtQ7LiU6c3DeS0cdFB84rvu2Dgcinho9oOBF8a0 +d4tMlqdOIfvGF2Z3YTTauj/CEjzRRNM9ICrqsUFgEHDe+CsCNy3lEtzECKhc41PDYKDjF/eNEsR0 +gJ9o1BxmOzbdGaYcnk4orwW0DQMGLWHKKZvfgNUKS0mPjCtvHcZIewpO/BNDCcu3tVlogezlGupJ +0+4mMVy/bLxjsZG+AMjCFbQzLB7a8Bhg+Xfcczp4vr1pPZLltifDDzGBxNq1DPlkU9yh8xmFoHn2 +275kMzITH0+GJIPfiBlOgM/3e5o8uAPYST6Zr4k+vViSDDs+MXRtmxe/v7BlzDZonBWgT+Zcsz// +3wiQ+LAWH0W4sqpku7REFvYxEQn/Oxri4sbYdxPjZB2ZIpbEG6L3lFGgg5KrsT2W4pKPsYkIvsK/ +GLIzGZwBeAX5bvNbzWVfZa+/HxtAoErBhHQa5NScpwTvYdE7JjsI3WWshE4KAWAg7xldXoa/pQvm +P2PQQuS/z3HDp0YZHJ3wERYUK2VmE1bnLkaSVzuxDlSbRxibd4sr8+/e6czRITMuev1S3ij40BMX +a7bLrzwtCFeBa1nLydEXpaz7jKQblvrH4bpaBGCGUvPuFN/OXEetId5VI5fiVS/3NIggbLEF46Vb ++fK2xpXWOEcrD4h5U7xsQL+GCAjDcednMa4nX55QLL+r3dr0OkTENrDn+nZLSIbYvp+A+To4RKMI +mXsSx4TAHi412XY51UzKXt0L4Pq3cF7ha5jwKMuoGek0s1m3dov0ABD383rm92XEmvUZHnS8Aw79 +uQQkwNb2OfEkTt08KvpQAOdYIhLsmXWnOYcf+NAvTcSCLaGV4bJLewJ7Sj+vnBBbj6lXRlLoS5tW +EQVRMqtNsCiy2xHXeix2JisTADBwN/g/uk0u0zysBV2taLJq1Bfgb837N/JBw8OW+dTqbbo93jVs +6cJvIuowIEamFAVilm90fSvx4G3M853vUIhCGJkwxGkgoK+YmzZRSbtKgFu7pFV2HO+4wePgyacm +o8PfXEYsvx8Jpe4pjuCuitCIm2Kq940Sdgbj2hgFVqXbDHRY32TMibD5PIxmuO2vEE1H8r2h36oP +cOV8NY/x1gRVJvBXYW4TNowwH16CL/akB27tUCkCy7Y+YIeZlqWO+i/+2raA21SvrcKBAKWlR5dr +eWlRMIsjc6Wn5HuYnkt7ZTmPxQL8a+HjNzrPMmX+xF/1tC8yCN1JG9amgQMpk0E3XUC05poGK/If +OJX63LLEh1vVbRu+MoxA9gISSBVphjW8Auvoqv9tiBX3/CDuQRsDEBprRwhR9J9M+/16MATZ59Ij +2P7R36SP6otWPp3joFl6AIQlP3Ug20qWPAZ+V0Q62CkGcNNjKYBRqRq4SMI9+QdmQXQkbBsZXOxv +CJ+GzymPbKqQXUe7McP+WB09seQumJbPshsWryxgQLzWGpmRigP4VIsaMENT5oWBYT9MEU/AkQHA +YFnTbnOby+heKt0YVmFMB+EhrL3ysDFO3z7n14x4/AKolDF7WUiVuZ/b/T5vwZ/9k+v17D7MCt9M +hpGIhJW0fP0xkjneEJbmm/7A4kMU6HnXtGJdR+ABEVo7FwieApsiPCUmgvDHbqpmURpJBkLwt9+8 +jch+OvGYvj3gLmrcKv+hWF0f9Af49v7TM/5geGk8KJIrALx8GZMB041mwKo4W6c6uAc1theCNuJg +/ujltiC3JBVmCOavdNZYU/+C1p/HGjWl4YBiPQqUSrHykv/IxfNl4tUevn3IqLw9xMP8UDFQRZTD +usPDt3DhXTfBGu3n1UMZa/kL+7YZuVf14a+0t26PD/nMw86itsSUFNyh0VgLeCHPTQC7+hq+8fDz +6sE4RkSCVoub+BmWA8/xwIgmVZIevjngT75v3BBdhJvSTjN/N6ITzqCxCr6tf2c0y7UgvgCk59nC +15qJxMuNVbGru/SXAcCSn3UlVMhhXrQDvzl0jKj4D+R6v6/X5loVUfGPUV33YxDmB9e39VkjUSVH +5xCADwpGfdSTV9rVe5hp/ehXOD3Tpg6H8vCLzrbq/uQXNAH/E+BOm2HHdJr/8YkgAu1e1EH24QgI +i/Z4tHGwtZqmtfE0ogqr7blGWa6bsLLf1US3lNpvBlYUjlEx/Pye/S6xnZC5E72xn+eN0qeTAFAd +iNsjbb5EGDa2l9P5yLx+y9gnr3rKQBbb3wOxmrD6Tg4zlvCtQNk2o64/G1OJ3If3d6wDs6nZ6k/p +NT2CGKdn418kGCsZNcU3iB1sFwd4wkAa+269fvi87y/wuf8gAHgAqw8pSb898jaxx3+5IHus+5xM +F9dxj6GlTwBjPox8SO7cARab52lu+MqN7JNCsM2kbw4EN43JMK0sQ9gLdaZzaYEjvCj0NQt9rftU +8Y9LFb/J28IGkJtLXAm3ztxfuLRiLK9aW1CSODKM3qIsh43Z2IsLoAN+iwG+QAukyzwxHkj0K4Os +gGJF5sIlpg2QYcxp2vtJ21FEr8zY0UtESHcmraNXtADuG7ziA/457O1swKBmtBroS9NNiyLdFiSi +lXMTbIVqv+j5wuLSnApNj27Xky2dEEvnffseC8aSTLAqeEuLjOGfQq+1IjRwFx40Wb6gRAQPSoEI +AjykmbxszyKB52EblYOxjObV+It87E1TX8jJwMpHspvd9x51xlt1uMCmsmQHi7y1XH5sRFBj4doT +LZWEcpml3ReIsDtxYTFYxOaxiB1VnilAcpJemDwc4KzdGS59WaXC49cDrdgjKsgI8g0Z8/D/0TZL +R3w1kibYsy+HifGTG1gzD63CIWwpixGLBBJ3akxMHUQCY8gQZ1XpLerOscXbLw3ygKXoWLxfr/P9 +imR4SnXVJ6UkPFD2BSJlHpJYtnIUBafY/H67H9o5oBdbqlzGs4WwqwgXuxj4g5tmVSkYUfk8ACMl +KU/HjG6QYMDqniFj1JZcV0VBZJy8IljYSPgTFJ+RPRs0TaLPrjU2CJZdiKYc1TBEQTAfQS8oygVo +S17wrkbGrHyKuT6n4/GVVUPeLKPD9Dy0RI3e204WaesFN+VrufakOpH4fgTg90z/8S/htVkfV/Gx +sfw4hUi5cN61p41woJrvCN6hcdaAkm8/2uR5cCU9SuS/i5hl66vtRDRWwazIBNhbiUXA5Dn4nk7u +RUyAtcA74sZQ1KIQiXdB6uUzn8d4cXDuJOZ4QTcl3hM4Pi/u7OGKnHxh9CSC9+sNQUla+C24cSvn +zFl40ZwLvw1nXT/Hc8iEKhkwHq5iJj6cbD3+tlNelHPtfVeTMzjgdfM9AUt9to0PN8s/cGPZNwDX +A2kHdrd1vNhvAa641b3qLbBIjHvlpWlZkDnfYGm/RLEg1huhuNU6mb+QG3M5ZyoeoJGXYJD5Y6ms +M2obxx0BhZ/sjvReFTRWe6TkcUtGk7RoA8jBaMi4TGs94v+1W2BF2CwZThDj4AIMYTLHmm/J2OCG +UzOuFS9iSw6nEnb8eL3DC7MblH/uE59jwXk06nRT7AY8On2yGAu+AehwV75GXEBZGJzpXMzj7m+e +N7DzXCevfby0v9M40I+v7hhiZ5JehNcGjbgo8ziBxLice09cjomCQcohspd2shpgq0InywR2tN0S +/i1Q3k4k33gc1eLqrR7cls+uIsut4Gq/yE1hPMVkHFNdx3FAFlO5FiDuR63Gyo48bcLIyFD3o2Xv +kOYKl1cwclGUeL7oKEYq2vQHi9MBp8Kz1NvE4n37Ty99wUEx/Hxn8LwyQb8vnULc7XRElqKav/HP +9TBRsDAgIMIyRKOC1+hdmRgL+TMH6xVuZoPKitz0MEYNHkSANN7ZgNu+TKKCIgxjtUjFcX/v4pov +scefRxwzYT1kcdfzq5w+YChS7kqaHVIYkih9G38NhxGxhIe24JTWPFoiKt4hx6o5Wc2ceEc9Jdf5 +6cjsgl7gybnLPb1jvvMMZP3TgWT/C17PQ8WvFN3/G17tP5Bkk9adY91e8La6RzwrJWKiVDYQeNx8 +gk9X5p3t0VeWiPgG9VtLxVGG4siHWl+Dk8cPAjJjeyygViNJkN7WBO0jY5m4vz03WUFKOQn/UYRu +SO7pdOmtI0CXT1gYQOq+KdCUkQRc8Ues4uNMvG0tyCjj30gO3Hn6keBfwYYUvQh8XV5EspyGFQhR +/Xv85jIK9vczCexnxhLWpVxgNspYqGl79FZ8S5W/Bdhub3Y6jjzLVk2Vlc3u7NErPamuCvuQb8u3 +61s88KY1M8ofVBnF+ekwCo3Va7fOiUVB48ZK5BepOqoUpLCVxjRW/qr3Kyx/IzvTD4uX+n4WRMh0 +uw20LOrHxm0R/MmHNwxXR2lvLQ1Xg/1Z0aw9ViL+3e/h9XH1ophtkHstdyMeXxHCewy9+mx89pah +9tPj4pDpadAIw+KTXp58yduc3SiRo8VFzTyQbXsPvh8vh4v1FE2DpaHG8NSZvZKhbJS2+uB9Alg6 +0CF7lQqU7Wc+p58q/DaL2kKS6KpTCxLIjZ02dBCPtUzigdCOJobdzfqNnPL3G7wLOQzGDSwWS6Hv +1YsN7Z15RCh5624L+GPbItQ8WR15iiKFYSrGvd46YcoImmzWqs0LHYumHdY/mDbz6VkBVVzF6rHe +/4FtOokAEHT9CWWPXd1le4QSjdNTGPXYS1/UahY879a4JrZxxCkVHubwbSeG1arc40bZSZ1x9ozf +sfLD6/w0ngzg8miJMQGSBTzys/ov8BE3ya9Wu0N2lZac7xWLDmr9SYFndIdMSDu7ot4d+VYOziLn +8paDYUp84lve7j/lUoxjk+7JVU2OAAmnbzyrR8Y3GT6JLDtiOATJFoU4fTiBv71TfjxLCxgYO+Hc +qi/DyFMpi0g+gcxr8hJym5hYXXIXbAuBl9n6aWn79M5K0IJZo1AjFj7dUhrxIJM48QaNwK5eEX3k +AEwPh3obOQT/NnO1vHb1Q/0r4qhbRCeg8jQEOvU4k4oYll/XMjIFwXOV+YjHyNQdP7BWPHr5F65z +SdyMWst0Fhs444/p7Z7VQ0j4UmW3a5+517EjiLKRm4o+BqG0B2WuQ295bgorGSD6K8RDsaHO/HlY +stlZsQaLpVtRLKsc/TKSIdIWJOeDaF3uD+twgiIROUjEdOELrkptsfI/BGd8Ot5imZ4EDJzOsxdw +t030uxSVLhQrbsEcevfqxNjv53dnvQqz05KjuPeU02I7gRGH8g8szOI+/wQWfNOdqm+HeZ5gRmCe +PoB+OJ3YFvIy+JInE4BGQa7N1xcDo79iO0O3/hugpQyBFwbiH8blWvRzvYJvVOTNoVvY5c84IhIg +jImmnTEkR6nmQts+UzIOSnBSksz+RDqSWR27DYn6qK77z38+fEzQJ8J6VVbsmKu5yLz45uLdXxb9 +pqqGuUeU2a1ZMcUStWDi1Py5oDnxh+gKCd2wuDqEn3TrEIgaiNNagnB7loeqlyK1UW8Nt9kO8Apx +Ie1w5Ahsl4qqzhnGcJii25xkAg+j7vnI59EcNT+ydJykrvodJcMkNy6GdelPrXhcW4dHszeu5dT8 +tfrAu92LWSaWWM3gdndx1NqSOE+3tBH4Ej7hg7UIN6pN8GHqLXB+RLbB+1IYzP4aRR++mrsX0YWt +F6ns2k71PsURbt3Nndajhy55Vf5hfH9aX+tKP/sW4KH9LWGDr85uvEVP+WDQF65MbCyRmmwlIrvf +DWd1AlQTiQPqnNwNLAQnkJwYPDmtQM7Px8ksi1qe2OrrhXF6a5KrUjzK/bhr2XVHGklsFK0Cfuqy ++ZVt42r99XivbEzLuzYCtz2brKJMjFAW5E03zE8FUk1yspB/rhvabnXHTBL6Hdp0jkYg8v7LIi53 +9G9DJhG6vTdt0VkvSGoGpW/NRKpW3TzctRdvggHu5drzMVt15vlrJMzYggxf055FsZNBLShZb+4z +6e8upzewZ6Ng4R9xR3/H+gQib9sYDWj/+hfPhL9SNbYDB2h95De/r9l21bHkx//8GbASW7X7vq6e +2PqNHsjqJ3qsZDttu6/X2CRvql/w3kfHDriH6Nm2XIADymLHGtdv1dOPuzFy7nr97yS8eThhrLfk +P1bWUFUYYNHTTsJ/auJ8qZmsFcAdW872xMoLfKJG8SCnCFVgjkKCaGmZ1FMnBhNLZPFw3pvg3aX+ +FWWrBX8hCoz4EeORoH5VAP6fDC8YDfxg8p46sN8VVYwil8Z/4bcOZNWyZjZxoxNjafcWCv/R1zwl +DYK9FaEW0ZTy/bxacXgsAkjcUnFcjmqXtwemURbOh+qgkaBHR4LcTE/WF2JV8nRccSkxUtnsRSSf +VQfS79+9XZLSvGjpZJliZYcBWf8Z//cNzQ/4VUWK+QPzmvaFzipz84o+HKBcTb/4uPrNENQUuYDR +YXbLq6J7zA5iEQ+I9/JYkEG9IZApvR+tz33vp+NK3SZvEj9TCRkA/OVOV17mHCwaChoGYmBhsDLq +c5/IwMeQQgAb1Szz6fC2npPtcmtNStnur6US8FcCs38eW0t4GymgXGvfMV5I24gt1Y4zIcqCvQq0 +xgERFrQZ4ZSL+8npb7NB1yw+YD+Vn1isia50rm8qusVlN3dv4sPRSY2KM1nMu9sCK3uITPelN/dI +dypKQXTW8LXqx0RTD/K0MYIn3AN7BCcSnOQcPP9jGooGDYH4VJDA4872gaoDdESJBfXgTibJ8UKN +j138xmSk/aKXA7WIjswFq6VqS0RGZ2oYCwNBsqfBBfk4vXdkqbsJKMutNKEBgbWzDjjtRZleulAS +LG533NwCZSuj4tp2sO+hgIZPzMFyR7KnF/gTXAwkeDHj0mX+jfxC3+1W278aA8oVcvCds2OJYJik +ols+X4X9wNJc+JKaZex+XDamscC9kUqA8IyTFU+KnTJEeKoa52N5/a/QP9UC9IPDmkXJQOpRfah/ +a9Y/KETiDoNZZ1KyE9SZkdd/pBgIf/CByAmA/heSgwK1dEp2o06hLVE7JuUoHk0h/2Pt8Y+UK3Jj +wNjLDnLiyVGGwrJJA39YtwfpYCcWe9Jaqa3L4ZbqZmymiGr2GGwiTwiMzY98Oy+GQLscmJZWXG24 +RsecGmPZnO6Not70FMdVPROPzkovgqn7D/wz9w1YFzfLMzQ/fn7oVO3pPYYAu6vTchnWFTWgEgw3 +esPe1uzK4lMmaLIwn95Z+obXcUwWgCbxF+vhUuk4Q+fYV2zYk2v0JHcQvJ8J9/3gaYMWy0LWKSSL +9JgHxGKL0xs5PQGiJq/S0TuJA+YFx/Zuz/Pzy9d3PRbO90un+sfFdNzzS9AN6W220QhX7YxYUXTF +IRymkARiFRFTUT0GV/8mRimM2Un2TlI3Dlb3qERUP2Dv1belmxS/OQjSpOXr1yY+1h0e6ijKrb72 +wn/rzLQLBZf5QjUtjx91RWseEMweFG8az8zqlmjWUFYhBOfWCUp/XHrsq9RQlkTFWw5sl1JPfom8 +9l9FP6k3mXJ1KH+j+pJl5t8jwiU1x8i285L0GdH7ogT1KhKc1PYCKZizEWFUgdeTegNJEka4yuwe +xkXDr8p2OHyMa+NihIPVJP8b1XHuk3E3MeQAvftMNEKuGwNJMiseP+O1yv2rPsxXVUqxWQp4mSFm +KxTcukGI+7KVPukt1dET9bnw7L3RIlOxhC0kFIh6V6KC/zDuNNLpIImSkj+w99Q7OTe1A4bOi1YL +E4kRCSWnXy+OkXWGLINgA8K47Z1xFa3m2FIHVmyNGaE2W/9A2yv0naI1g36koD6GsB2gv82mU5Iu +luE5ZLdyw8vRShQgfug5peVAjkRq+TAMeVG/+QEBE5eBUc3DlCunqVqdId2p+hLhArCD76GMhKPy +oRnPjVi51Xs80IV/hPP19+FINvonlHHvQxdff/E0FfTg5FN0xbKQ0MRaVAY97/JUJL19NSCgLLew +lgIc7jczy8nRGnNbpqWqEFnN1YS8Gj8e06Aiy0vKaE/e0ATdCcujCNbIFZaX96q2rd1uJ85scffO +M1y0vyTqHuHmpkCkJ1d1cSQ9LK8ntHfNVdYWtwYiJlU2it31fATZIG5G7/5JTi7auKxvY5tq2SFe +4KVnkUW9XMx1/QNBxqzI+P7+B88prX4KbZB7La1UY3ac2SUEmjmFogaekVQFKY2bIz8hhWM6ivyu +ermNCHCJTjU87qiaBddk2QoVGR9WrTdSILadk/HAsHOGW9a0KIWKIpqV4kLQhwGDt45TntKHxTFJ +NQErym2ayMbqjiIAt1DhWja5iU3RI7xnoL0fxmhKTcfZ2EajigbGUu0LowoC+8k7NQzk3jdqK5sK +SpSjVln5Jz0ZXaxUv6ZldTTumInhqC1uimK5KhXNa/ssfhVhGWqnHX9lsoJK2CyrHUFuhKKAc9JS +CCKPmQrpxhHps3bnWvyg1Bm6+NHw/i+0Jwf7KZc8SswUbhgdcuJRmbrIo24UA3sBQcJhOxW5KSN0 +dYKiNl+84hPaO88SsrwryLFxqGy0MKo7UzAkCLUFg9G1LNE1o9wlHJaiCiGBFOGIECE+NjYHqU2A +UnVs2Su+m3JF+yhFxAyzRSQcC0Q+CVINaRCGohqi7qEsSFwVfbm0JNYUtDd9wTuiS2ZDJDLI+9Di +BYxpvmVCmkFOXPYtO8ZV6RABbNmnz7pJUkK0Lc6CbGPObm/01xCcEf20srJlcLqgKNSjTLVmGJgy +twMLfHg1BOtVHLOG1R3aXnF3z9YxbRGpkSRqPnqU6hJnB5nwRV4/9DhWkhrQcXVqlbqSq/d5/bR8 +12XWZnuQ7vZU/85lMaWqsEheF8cPonIsWM740x7cmon1hdcJjZKcLZfzuEoQ3kGpY+gN0QfvklWC +vopJv6WngC6HrRjTBOPegXc0HswwqZSCFAvRXrx/gOm7NTvxHBIaGyn71CYzIqeH6Oex+rko5ZjM +R8iWhigfqw7TqnIXOAuwZ9Iv99SYHKPYrokcmPSGDBhJ+whQq0vVyYH7DT8uW8FxSIFqvMnV1ZRE +SuJoe1T5u7i89QlYY38K2IS7IWk62L6GPRO8zC7809mLlh0BgWmVxJREjVp0j/GALeKZh0B3SPjZ +X/r+5sidzIZqi5ZwytOg8Ea9J5iGJyfNZAZLeDnRr64WizfhNVRwLGHEhcaMHCMopgJzXgv/XOt5 +ewzYQdphU8HAAZoLldEpu2UlWz+QaF9Wx141zn/h4CTLI4gLLS+1Bjgs/WW6AADmfmdUFDlG8EfM +8fKvlxFBFmf6XDRgbKWhkPHLLyr7odpMSUiY0TgH0ie+35UMhxGghIcfN/ImjjfnaDcgN8AlEF44 +KSwYVNURFvFlmRG5vGxi6ek5leaZBmLq/pfQkYkP8DUNZiPx1TJUX7SxscsCyoSUbWGBKMgMAMyi +Mzn2Lt/A5tJ2cyi96NgVnISiKZPEupWChsla0B4h6Q19n8rWdiiaBb7PTZdebV/bkt5LpqysLdar +fslCK6CyTph4JJPc8V/Zd0N0TVjB50zNTXo3Qp3eauoXoOzYTJrMQneCsUcrugoPF90LX49E9HKk +EF0vY+rnM0XpO4fojRTxe9vgsoyUFYkam7PRF+HLlpDr2tHpp/AzpvvTzq6z6k3K7WxOkuZOor90 +w2FQWzOVL21zzZJyfKPUeJJY9y337yjW8ij0sWzlYZsFSwevcxRvS2Rjkzk75t9bqRvkjVqQRbDd +r5N1UyKkD+Y8vPbksbe1hl/V9eWhrHZnouFKY1s8ZjZbsaBvXAxck5pSib9hNSvr/lq5W7L+vMT1 +AVVRIlvcumeTQqqtm1KRS+1SVfwX29579lOKZXmsD0H02bBL0AOQyZiZRXnaVsXevMtcGo4tM2gi +0VkWsL6lrZXuAI/zO1WRrFgHnlaXNPl6WjFcbTwyJ4zHck5xyBT6ZxlvzkWS4dtBhyQK5W0zlUqd +LD2p4GFubVKb4nAPR1qNfIEq008t4AOig4FReoUh9FxNFJ2R37TshWKMbyhwW+TRO5Iw2kZZu+Ry +TbEfRSVoMzo6Q8YKd9escNY4sv71DFusuRNXps8qmvcHtZezpwZut6EOpzpRlLYa4yE5Nyhu83N8 +jfVsEZHRNHf1LfYaH1PfNcDHGgilDd9SRTI6wEw28jDky9mLVmzHLo5tc55zDUp3BAZflPqSygJF +fzOTSXpyhhSgdtlZIH5S2wk7hOrRWiJ2jI0K2a4ddRhNjI0xBaGFqWiwEg+jzg8peR3TP41Le/fX +DzD13w47OL0bGr3HmYmSH+cnaWh+ZIyCb31onF6zo85f7UJmdcgAbkT8Vve5yEs1MdyPJqK2s9fQ +ixkpZb04KhYX5+aHVy+dNy8FVOGgG4swT6TSmSAV8QRtd9v6+FTKcG8RjM+KIKnadJA4vMn+ldzC +nkSN0hXfccqStq31SgpwYamys7YBsCTQzHkotSSwwMtV1SFvXqS+K6nmBIfPIdwfQIdQnceky2fW +uKuvN+MncaM6lMBBFvA8rp0RbbVQoTODdseeiXDlGk5vcGaP5X5wv6bvqr05vas+1VYizLn/KnNx +o30i5A84h4CIpHEeGldqDVuyU6Ezj60g1DGh6WH0bLdADD2FRZJ+NrhnH3r9ahbyTKlLOS3RMSL7 +P9nqwVZfPDxT0dWwSvJzLY7G+dSpXUJswXs39WSK4s53KCprC7ATC5F7D2WCveLtDw9TyMikTDHO +kGfKdab+ye87LWaOIHLXRGEXwvHdztF2x5gYZey5Xx2q3wyeYwQmFJjarkF+2blkpcNCRTVCwrap +Dp3oX633e99Kaus8LEmRTYZVHpdVsKwTr7THop3V5xVai64zG2NyVxUt5a2lx0NDCDtIa8M6y0tq +j+ANLl+XK+OmlhRnGlPyMp+pFBMGqOF2tlIZ0WxvjUVnIcdVOHHaZkWEmCOSDnNQw6Ms6m6d295K +3PzJu6Y4BVbGsAfDmNAh9+OcmtYWdlU2q/V9shmtk4d9gv7TrfXdOjE8IS65+VOGu/XUKAkHyF4M +gradL46HT+/Hl8m3sKdmZoEt3iqEkSJu2jLLRGAG21bKRdHu+3U8I8jK+Tm1WIXMdmGaI+KCxTbZ +sS2xi5Cohb5yoBToDaB+p9Xx43a4xz1Kc3v6sxWf2JecsJJV5SCU6YqLRIz+G+v5lDs14ObRpAxS +6ft8Nop6SFbXG+EhlB5Q/UeFJOXdmVZuwElRqzLevkFAjUSAFc88fE2P6dC/yIlIBKHVItUE7RN8 +xgZ5BN74rRpN/xsiDgBrtwe9uV8acYYDxU+Qid8ycr9xq8ZIVIQ8HqD+W5qJ8A0Ok1jjvyBxZDCj +MNfkCMJKmCAjv0D5/eiHNBTnzUOKQK+1i6pem7B4ZgFM0FUJufWMLQ20IuPdkHXyC7admA+yDZo7 +0VimsjXias+z5eaNRYn6srpL23QQQrKdzdcWqXF7r2E05yHhlVKVb5ui5oh3tej3jSAZkVHq73ZR +Z28PBx3vOatFWAIvxd1olHziydoR0vEthIdyyHCy9z6XX7ZyM8+5JVVq2Y1L2ZN4O2AdJzJ4Lu7/ +oxU4GB7XNNgpLKIzKhx9ENBq6gcyIlYm2bXy9sluPHy831m+qVaqOWITTxnEe4i0TdQSUtgQRzmu +Y9tLGfhihyWI8GERb/HuMwyniFpIbBbFeCx3XHI5qeq33HbIdJUEtLODcXd/D8iMzH8X7Q8Kim20 +8Fd0VNqNMQqiLBl+1JiY1opPsgI2nyAEf2zkEdORDXLtveSOzJX/Cvme4EdreyQatkV0j9VDatT7 +7It4SDrcn13/+6XPONvd+kdAY3++LgFnE4ujiFNkIHHwSOCIME7io1EivUYQW7iKx4IrutVCyPEL +EHpMqp4Wlj8ptSs+EOyldOp9CMu1CanQWEDG0B/NhqcqWLxKD6vaMRl+UzswwMIudg92m/u1HIX8 +N4kNcuxHW6hBW6usOozkahKsv9U7C8Ay0AHnXtl9T0a1aLJAZxF0JEEc71QHbZgzdDSmBwOI7Egf +JQp+ioAkcBZCcJfO5iHFo58WY9Zk6Fg8VjToNbia4OacgI3FYqCYWBvO68iUG2vVbiE30Lmfy8P8 +WoITKZYyXULwas6xSHVaog8XFPCtj1GdzNHWZ32Ndp91Iaege5EhS+LcNmqDS3WXsenkwgzYrDyZ +81DmmfKRmwc4yB37XksLCB/My5nHlq3DoQQ350E2G6grTO8b+7/Jij188sSsf/fwCpehzBslv/nu +tvpXWXh7Qj9zMlStnOmD+LvitWczadZP7g1j7w6HRfiilfEIM+pUyrxN28e+G0MRtHLeeXS6nDad +QaxoPeunReVaUkvnZkzTapq6VB+HWqJVJYy7FX2kUcrW73sA5wI7RIGlP8AOr2m+rwKYmkk13/KA +psWQN0zP6M0w4LHvcI+YqIjwviMCHtmaXpmN11DZkfs+B1OFnlVKNxK43D9b602g5ryMbftkZDgt +qs9yD4X+Sxo3ZWeWTEoe16F3nQUdsnSbec5jgaMHuGxzF6skG+GmcvEe7f10GDIQ+WhcNx7H9ChV +5nfq4lyLhH1qsty4nKsVOmZZmOkVvpzaZyfrTxMEqzK0E4cFCNNk9XaSPV5JS+GnMBOwqQfqxWrj +hVpk6Ah6zX+L6Wz6Tcysm+9F8PnwiMduhfqLzKy9KHB2d40VlSyYt9RDUHAX4ZIVYPxAmZ+XN0Mj +hDvh2lShBGN+jMsapYJBaxCZ3BFHmEy9V5tJV4/ObNOoKaQWaxYo9WZtPOGiOBGCHa8laJ1Zfumj +CmDoBu9wv6fUVJrVgTbWEjR6JdL9lHaR8LBpQVnvVDWjtdV2iGxHUiaZLsbM1OdVSrakzzpWBG6w +xy6BwaFrDQpFbFpVV3JRQMFLccJA7qc+GXNj8gUznPr9XDhZm7S1qEiuLE2Z+MOm/uyBmiSO6lwn +M1TH76voxJFoy7WPloWHTA0Vn/KSgH5KNiZYmnLTrJdafOwbgW/AUTgVSV3EzMTB+sHH18g5W9jD +sZK8PPGcho3uQYoYm20xifZJWRgn7dmReW68b5XRCgstTAwKjt6nEGh+V9D41eSlMlmzD8AlSjL4 +s/Fe6p7pXVsMf3pcyhfLIjB0RzftrQBb99hgwy7um8JyQvwcR2HrbZNCM9WZfViWscW+dB0UIuGh +1BYvJOnARWi6HqI/vIS1Ehd3Z5NG7npoPAYVTkUpr+d4RkC27xbKeWV9p2wvGXOUoWEyxqDHm4Cp +eHUFLxaITf6K1G1/XGnEmk+5fXPPU8YKimBJG3TXk5Q5709+IDXhz7TLl00f18SEcPBxOpZ96/il +H6jc8pElpNHaa0bzH74KM/nOTg04uaC2pQNxjkTepJz+2yDbeuDldSkwc0Ge65L/9KER+9Tlnt5s +4iQ6Ek3kjm3frijDhq+LNAXd+cI91i6RlbS1Q3pbei7tKar1qh2woNvqU1drckU8TnCM2UFqpirH +2yQV2donbGSqhYI40H1rxnijj2uJIpW5ntjvZlCicIUqQP6xO/zaotyGHMzxM1XBpV1Xvwyvg7cR +hG7T7blnF7qx2KXdQOl4cKrGJMvu5IeogEWdO8ch+qFWqnLeqWPxw1yMtd1rppGa2U0zpXYmfwal +75oxzDb0zYX9tosOJmtvcXUimPcXb/oOuf6oU9hokpMrNIcwgLiAsBqa1UXZWiY9vw7mgFsPxqSt +00fAa9LBKDBgLE4xrHfIqCrbvXEe8doXCbTYL5Zb3COlUSgQV0G/Z+yH6F1x1Y7WDq71Lxk9b/au +b36+vEph0Uq9z+KCBmRyU9qxiB4TVotc6WNhJBbXeR8Y/pMJzHFhiLav1xMacBJx2sZVL0h6zoLy +GouPnbHHGKkLbZMZNCvXiUjCW1o7Pe1k8OnWAiNy0Hxr8skMLXIs8VT4tm96dlYvejbnUkI1zS5G +nZOMkmSgVoxgXK+A/BcJKq/hQNKURxLI7+Hg1CMoDeKYiglBbNSQHjgNCVET3js/ugqi8zxnhTpo +/GsCjg2mcNmWsKl8+euYMuNtNdCzfE4NbqrX9lF4oLSxzLcAoNqVW8ZyBa0IVxLdTdmIcOUt9Tn2 +lxhfZEyI3IdnzUeswfp7XopvReScIhXJEjY6ZTY+r2IxO0Ao2iTOokgCti8JXtk1kdvIsby2EabA +HHZCOSECRysH52LFKm9/0tcs1CkwBU0C6dKeTHq8BsatEuL+2e1YkPC8ymOlt7kY5ZiKJYYcvm6G +v+tFCsG1xrdx8uKnRf+6mewUKDym4ZdBB1y5WFkQt9pg8j9FzN3YjFVvvGK2Pg5dU1QbkRLcyiFz +cLWpEBaU0qCxFnQMgwtssF/Yw7bwn0uyX89JHCYCJL4ww4A14zFcBk373OA8qxpf6BnkY54Vampz +pqxJaSq7JcAjFPqjfaZugWmDClxum1UmwjpzUNnGhObRZgFTGA9YR8CKG4augLPBZmnfQnCXLRP0 +W4xXt0n3JGYQL0fRRagbvec2T7IE3bK2DuenObO2JNBtY5n1okIW0CcUWp0DAB99uaTjfwu51FVJ +oyfxw545d7xmsPCF5kmmx4e44xJHEwL1Po/Ur3A5RVuIZClME5oKG5ylbsF4zPKdthon5B+hhuYl +pCpCMsunLCt98CKPRtHVpIBwf6gT4o6WnofsqGBK6eGrdz8qpVr2uTjwoUwillXq64XJqbLzHtO1 +LAk2Z/tiuzWTWbzrRt1XBa7kTSWX6XlVAEKns4PKVqZoTAxVeUeSFWMH4SuFrBKEXlbDo+MlDuqW +xJHcLa2uDc6LD9KOV3tTrST2pMHLVDqTlk07l9Qd+B6xSG3PPajzgHPGSKbDizlbE0bwhfFRh3xy +XTwp5YnIFIIT3vYbQEViethsC2+6SfNAzEYWoyBEEVvy7xKLJBMAsIrgUc/NrkTd69SVYEiiSjGa +QcdDI9yrl3eOo51AMP2yGe9VQnVgq4E7TWskLSZNE6VHdZwZb/FwYcDH66pC36gUDemHgOODHNxz +o605bHWVehVpcCR0FLdnsiOLiyVhu9BhTrtHd2OTh93UEpUBWnGcFmK5HDL3Num/JfWnesqbUu3s +4szeF0YKLYeYARvyLEEbt8GhexIcvXfYtmSmCMU2kT67N9zD4HO45o4QGTtQlIl9NFp40m28a7fe +JDtVwaKOFh3zLNiZ0ww2BDSfY8iR1Jf05qs8L1Jft9OVgptbkwrCUhxQ6vBj6gxyglI7rdefMkRa +24wTlrrbfveHkgyfbNxrSMQto1U57UsfcEk8vKlZK9s1uNX2YTMu+OfetonX8bu0Iwcf18OzBnN8 +C9E6AkV+ndrKxQ+ux/6Y7IKI7xkGtiZfyUCd5bVFoDUmh9TRpMxRql4SPTUxjyGriaQxquO8zEoL +hRFmxPUtB6kxHDbGUE+q1TXmOLdCjl8vy/1qL7Pl79ST3NOJkAvHam6i6iW9HTUsalJr0XRl+kWM +RZMGfpcDcWJfvYX0eI5B8l2jd9CpAGLyRu1LGhfZ3Jb9oruhnP1un76TITR2nqsOvIQSxS1xVck1 +f6dX6MUudeEGwCljBQVmUFHPmUWQBcFIvIwqnqTSDYbkOp0HrIBmmSat29R1mZAiZfG1T/ByK5qk +9gShf6kG0IkAiY9LLRQbdjGeRCsqXCxR7Li+Hl9S+GChemMFLzsqoNNrKsTobVC6FUv0Aw+yL7X9 +He67QMva2QjVJnjjo6wPCzWen+Fdrm8cy/X54UkxwFvKXumuWHKkUdj46zLBSTI9y+99eg33+GcP +hg6lYbf3Ki9JK8RqLX8gCkToydCA2N3HSKZG1gkbK++xwzoAtARnbd0TmHW00+izQzQjecp67mLI +/sGAGYDQKJRlghm6BxU++eSI+BUv7y4HNCMWf0k9sfVdvMYIY7yjFEVbRM0/83Jqzgyt73F/1+Xz +XrY3cXxWpcQClAz2Sa2vHANgieDORkzACBMYEtx29rGs+q1E1jvriiSUKTYJgJsTCLydgHTkbmM0 +ARYNNklMRTdh0bZp5znhMEKGPLxMOonRW00dHLjtcdso4kYSwoP8rIrHKg9eSLZutP5SFypfBD/2 +05Ih0Pm2n8zLVIj0JlW5GN1zF2WtEtXRMiYc5iFLuRa0EP0Sn2FDNMioqRg65aH5ovabKrdpiZac +wEBp015bpQot5wL4mMvYSCJOEA3otMzl6D5qone+HZyyoXtiXRKXNdKmSDr2CPdcFSgZzlOMAdw+ +T49J5bpeNpJyrKC205kiftv8hF+E1Z1doWUy9V/ripJd9PJlLVRD4aRTNfzappa7uts7WOVhl+oe +7LnTbOdrX06imv0/jFvn0XiKkLU/5nByptrAeep9k+tdOa/yzbIYn7OouJeQlUkYp0DaEKx8IAQG +5dEa3HUXDpJOFWdHAZUTCxle2RL7WFwflxCLvICAxC5Pi9f2vyrVsvEGJZPBGpZbpTwxZzbVH1dG +PnZiUfBtOHT4i+Sn0QE4lENWUsAT9DPYik0SwL7MFpuU47eyWQ0+YqbS3g0ZKOCrQqhEYS3tViZq +q76CG+dwu+mVuIl3xVC0963RalLmo9kcOVX5ceCAP9cMeJjEazCxgBNlPJ1r7Rbv0R1q82CY9iEF +QE6FCXliOUmb1Y52LE1WLeZYS5kFRk9D5xqiylFF99DYFPRDZts00OpMBHkIvoYockZwJurY7i2A +2Bmtp+l9eMuSl4VjGCyZOrBm0uo4xIFnBefpyFr2w6QypSHZC0FBjbrhb7Axul0KnPxAMarGpcuM +z0020ngqKsIXYeb0dukz8eqL4sWiERe5EhWkGDME92FfP1ZO6ISBVcWlSUXMDyaCblQA7JjemNw0 +rxu6SPvJRenLEvN255X3YniW6b9h70QTvdX+7DzF5O1QPtoiwxI/Sdb9lPo2Gf7dFmPfPiYYJxFd +unbO0AfZUrs7HRV9CqSsa5+VeUsWu9ttM7+d5igNAYNcSIR7ShzpFDgGZdXXZ2qQynnNgP2CF58B +H2OOjQ2cJ1c6Q2GO7xPKFQ/LSKh5wPmTN6Fhy8/Gno+u/sivj0Zc2r+mGhm646UhjoU1ckBIxjFI +INbkF3LbU7RNUov63I7RMm/+JqTbn6nObNmCnroYZGV0Z+kgpJVc2NqY/N4WI+js5VGYMk1oMk84 +io2Q1JP0Io0qwkVKvcNazXJeUzKQPhm7qLKO1gTEOhkXMzrQJCvRLEpCBQZn9HR850Ihdpi19+qg +r42n4wSulQ9P9ZbTX01g29LMAn3gWwY4/CIE0zJzm8Qc3wjbudSO4q6mPg98znKLRjmHL5m24TSk +qbS5JyUVpVSV8TFjHE3z3rpcPBL1oVrm2zVTPqLgOIPvy/qMhakWuuUbsHJMUoi3qk5Z0KGcCt1/ +Emfw9b6rT3oNzko6UDwdqRP90eZ/7RyVV8t/eFbn8Evt4aJwqPab1/Axi8UIN0qaAyD55C32EY4k +ZIRCKWv3nYwQvdE6IMGRnZprLhzAJdbPoqzi5Yk4CvCjz8ubR7Bos2GNMsfdxtzIxGkylT2qNvm5 +orOQ7yPexdNn+xD4KvOEjWnhuNwUgYbsYnw2eAvf8l2mnRhZPMoMIah5Ws5Jt7QN8SPdKfAr+J4P +mCt2tk8XlHyGAyrTa+2fonZIF5SgvT+qx6UEoSwRGLr869A6HrpPSpdJ2+hBTUuyham/zAuQSsiS +2UhkkQsFXkd2k8XjSWxgumJIqDtFkg1bG3WXFFXKIpr0UEz5McJXK7f5JIWUiBpsHhxGM1pdU2e+ +c9keZFdSj9CCGGU6fCoScRBqdysU8YshIo8iNx19ULNHRoVftGg8zbQnBrEYLp5FD/8IJ31KHnFs +rKi55CYQ8ODdMs4T4Ir98G6BcBATEeLvkXea65bw5LtOSIsi2pDyHJ/Lv3eRVbKmUq2+dnGnWzHb +KlUUHmNAfQyQSNrr+0ZzjqzqqSL7Uiizdi+tWp3TmAtViHQpG1s3aARQc0/4h4hqfTDgwUBtp9cV +jyRZmyYpK9zr+4WQacUKeYELWEkpJcPUOCTrjO2NbT8vir6I1E5uOGmxaVjcHzgeslLUbX2DMF+1 +slD/9ELXSWMXrRbrdUSE8hrXbDZ32yV6FykZAj7bUoZexXONNYogbY+AergXBRWRNKZHygmZeIGF +q959cHTyL/9S8BxB19Qs4yDtbninkMRq5kIrysbJciioq3sVPCGnWk2XKUpo5OJqhpsaxL2nK2Yx +hKhUatFL/cSYbhgANYNSXdi3iiJsI7rX4fCf3Mn7xuG03KI7X5trIVxx58WTuI62iLFQVM0pMhY/ +t1537iS9ei81yR28Tt32Tm/xjMyA9ikuMHrlU5S1kp+hrpSe/QZb2xYxpGzQHgz/UI6942t7qYFt +WyUmN/kp8cknl7i7hMVUFUFc3k1NLL7tDZNRq8tJN8a4SVoMIdP51Gc1PzfkX4OeiCuzoQX6c3iC +WcPJoiZpE8a8HqpRk03ZEqM97hbjPRiIiGkQsWDezxwJWwDyFqPZVrelhQ3eaKVMtnVz+6LShJ/4 +FI2Rxc9TFvDlZ39tEO5AHfU3Q4IXvWIuwuf0Tu+fIIt5vdMAvV1KgqIQm7uXtZCSuozJ7KEqfXVd +0dp6uVSVi/K4H1lvCrR37W0mrZfLFVsEo8dHK53aMXeWs3/EeRCKpYwy7kGHDkKWCUW29Ewuzfhi +hUMT2c7FNGuzkSZY5EnMupc1Uo+hTYR2mqJPMUrRXbszfKoHSmvA3sbHR7iyEwnaa2LU2WK5lFVf +jEooWmE+H+XZEFmrx6jtqNH7VB++aXi4pbMdcZkXeniL2M20JEA56lRkZkYd893qosfPeXLJNUQU +Vdq0oPRajdfMqQ3o1GDNzMkixGEtPDcfl0ilF5yac3nppP6EnWwzLiL3TsFjMe0rWpSW78sNShZU +Sx3KaU/7NKckYOSvJSm7nVgdpLRsdzPinwg7U+3DRZnuxvD1cLe0PWudCGJZKsJx/AJV/utyugNd +T0+Z/9d6QRdXBK5U3yafeSuYHWGU5MCymrX0bxMAnPX6kyRusDlnJ9p5G+rWGahGAObfSvKkWXIQ +GaD1n0ecErMuwhfD7jeXmMbppN9SFCzp+BjLYDra0ig49LIsB733oTleSO+n0mDUM2tmxO20N7J3 +KJyEtrl4iBIrbOfMCUmjqlR3/AyTNlX+uKzFVIrZBfPuZXPvy3QsPAXSlBSVrY6eGs/YKkkFkqLQ +WhDareVmFb17BZWDK6WepXo5o9uJaFEsFQBkDcjUI3wMOvi1HdMM5LxNAltHF0bBKfwUlV6rSJRs +Lr80ix9WkPDk1PTtTN6iZBiElISLW6bd8ty4Dj+zKZHYfboco/S/xsJ/WyhM/0BJovGeElWOPxx9 +pk2g164WNkRj0EtjxIqYgHpq7lqZD3xCwTCFrohOiMxSTRuvRg+bmUPHK0xoWbGKCsEmDwx5eojJ +HD40fLCaNXHwVyPf0P9XyrghFjt1iYUwZ3VsdnFIVj7N5DJRi0sIChTMRqZ8/5GKdZaxhmHpqLl1 +p1ZtV/RXIYvShI+1LEbAVGoOmYfaRyRv0Q/VNHcnJ2cuPsUwBhiGh6HJyWCTEO5XeanJl6KuQIuz +UDGVVDEvfLdZ0lR6tzUxDTlN+5ReCR5nNEHqIdfNyoahW8kXjkGLVnhAtD2VGXL+0/vcjdvKFU1p +JGLIqIW/Xi5ZDB7SKT56VbDelL1OK/VEhJy6UP67QVSeOcUYC+eD3RIb7C3PvM8n9R736ENlP9vd +NEge4kX5PT1wLqZVYbua3WtqW1YgsE+8UdzyruxzwG0tJmx8acNvCFMvek9wCEPX0jhCGjKxc1bZ +a7pFZtJQk0MFnyXGQDvKF7YkM72aQpI+93ocSYK3JzG5F8Ng+a6lNMXA9iLhbjCDoYfvs8zzplcU +znucLIe6TBsFTwt6lZ7h1Xy01Ov4x59bZRFVTWeqbeT0LVRRkopXahZKMUS6xoMJDd43dAB4bxLW +DwIegt43KqtOx0QiEgRlsIOIVNZY8yaG20sNBRGQhwtFaY8gIs76d6bIdp6yesEJvuIWQN1CdjxI +3EJI+gzXKXC26ZKv5jk8FvMzPsc4sdTuHIN3OGmIlqS7eZ5mhWAaxbIFA7XiCP1N3fmShsFMcA4U +EiG30YmfC+pDnlfj5zyfofHx3tCZMh8qji/NO3cvUzEGS/ucX/NPv00NpRpcj8ACzF1SMsMmer5p +t2oXCZJl0HSDaUqrQejhUP9e/KC4lG+qH25J1M7AvlOhq2xc2tSHyhDMZSlBKY9VzELFqbQeRyNK +hv/6+Od/ZSek0WRPUGsxh/oV6xvLqTZfSaKOfRoLMhc22L3nINX+oBUygWDrW7C6H9RQR4UDQVlx +vQz46K8HaxxRr3k590g0eRquRZ/dj43zBa1jY2l0LIjhwEO8UyUM2YMegWGraQUfpTeZRPlf+A8f +79HAq/D4nO8Ez7yyYe/ysmCyRGNkfc3gWJJD8UX/R+wDq0Za6iz5Ytm4HBVu5XHoYzbete3aocEu +cWUkNLXgHGkr4WZE/5fkTeqx4/1YXSY636vUjuohWFdOvVMlI2n7BJlHdItE4YlS2Jz6qZWur2yg +tYqPiRNv6X7kSRdTUpSFRpeZ8cF9kIUNWnBumpF6yyQ1QwjOwbuHHRxFKp1rDFCz5dNxUCGrmEVh +F/MWZO4aTtGxkPbtK3PxzfTvGma/8QaO3KqxVUeMxKje8vz6DAktSGs9jxEPZhUbcoZTwQiP0cxB +hdZYXGXzrI63u751+QsfMrUVIcACTbcrfUEwMAbDXwsBIw6nrD5jAv9ahIseKZalE9b+rHtO9V8T +/4iUiufo/8hLqF+0VLopMKnA2V1HCEAwZoOSsvel5RQB0KeZvDEm7sldILu1mzgxCzFVAaiP6YNq +7RsLplDq5eakKk5iw1jnxcdlXA31yZP8kjJ+HnydinqmNBgzvbgP0pgfuXwfjpR/4SDxWcaw/yQy +VPRRuFzAylvmdgLiWfRmK0Uy9t4BFALu36hjSZX1p9SLwDJCrOtlyrIYIu6MBaebQn6C9UzeaydB +nOc8CK8rZJr0flZpeNiqWkgl0G1YwUW1G7mqqDGaYfuBUsPe8FX2r7JRL3ENXBtYJagCT847ieS8 +nTpxW1Ni43oqW9ccL4m6LSCru4ZJx+VqdYjCOVFdxFIhmyiiOibNdZrWYUrcTvtz1I4rHJEiwrBg +I0qBUBohBKXuhUnc8HvLsHbHLsnOT4O98lL1IBl/P4cE3ZNKbkhyp2pw02EQtSzEZ0517oHvqbTm +QQeymfFiuqErxZl+e+WAqjKlWdBxTHEkyi+sbFk4kR/chuSGVI95Bh+peCEzlzbNHImfeHLNLuYb +pzS4JhqbtbG4IRA7TfMXXLHbYoCedX3iBwhuOkqSpiEs512J9+wLlx2GD/vyRhqBj4318gHV2DZJ +zlBJgdzPSf+uG669MlxZIYdYqaPsMiEnG6h3LXsSdeuBJBESN50fG6EtlXtRdxX3bJEccA9gsDW7 +JzU/E6rEaa6TCXANQljFSk3m0euNmnQ3Kgv2tiQP8pPHqrsrxy46JJzAG0TuCsAzWaiQx3F4wjKy +Qmzf0u3SFy+/HDAqO1BgKb5wLNx6oG1ycpYEv/e6xTRZJk14cAkLrBbJdHRH9f/v/76aGOKJkjKO +9Zqjpq/Fn7wJNudncJZV9Fhm8ag7722KL+eWRSkGspQtSg0ohnigjKQhTDnyTVNGXfkSetep2srv +phROh8DLOY1lelC5npI/Mp96Zi9jbjDzNM50hzo4FEC0VZqKPCZdA/uZyVPMFBgqs5V2lo8EMqBF +xdWseZREfuwXkomLvHk5yZNaPYq6jZ6AQ3kr8O6iKCLv4l0cTvi8S6VTE45ns8EA8WFTYu4BhwGh +fSGl86px8+QxEYYtrO53AHSn+gUpZFPCOvmXHhdcRKV+5F3ZFKx2IIfoM8WxeHVXlafqfeOhOfEQ +r3LF0KI7zAVpkJXteeMLvITjC6bhHqKBklWvkR98kxsPJBSwaOKntqiFdK5/eZPv62OXqtFDnxcp +lq8D8+6ZFk2kMlmERCXjmpu3p4UYjVJvGhPRCUD2e4w42nkNLlP0glsROcDjZ4eEHxej1xKvwHrJ +VOsuJJSPLWWlY1xmsRNfw6MbA8w0crWd79TK8nt782pSKL2kxecuMc1JXYLOlzjfw/IyhShjHg6b +6k1Cf8rbeDIpvzvwKhgeCZmyOr0O6i4TbaKn3ilMmad0HHppcQz2gzYzVgG/YoEo3OhVq0+nLEsy +5VYay/0S/swoy3nIk9A/vqOce+dWlXJolw0ctWlejcmCGUXX8JWb882zsH2sruX6LttKIxua5kEs +mH1atXcF7/fRXBKzYyPQTMa+djpWil1MVY7jqF77JTusG/1EQsxzH+q1Nbn3rA1IobNGjWUvTi3c +0naOoQ5oA6pz00thFq0mwn1mitf4tBmXVJJTFxoEwHIK7KtkHRshCasq9t772TQqpgiHQnFDcUGa +Rhx909wxZpnbuUsUD+st+FP6nz68Tv+LM0O8kwOhreApDSy3RZD4m9bt9DYrJvaMyqnuxJ3+Y6hv +25+0Pvqj1wTg3SpUJLiZFOB2Np9tdP/pv6HZejmW1a1IvJq1wl17OfHZT2IlVYMNaOwuyrA3NgWJ +U9rauVAKWN7PiYLMjFY3NtGQtUtTfTR9AOYIHHEAs42gl3PSMnAaS6PwWAOB5gAzOUmRdBFmQdX7 +D189PWmLfvfxh+/1dzWF8mJ6CU7dPJ//+i//Mvk+xWSo7vTY+D0QecUBGAWe8nfSoAr9eopNkMFK +sNPaY/OAIoOh5tf3bUXqREjYckqtEKoHSXjQDVf/6/HPsUf+P1sFQeDtyAAA diff --git a/test_data/stegasoo-recovery-qr.png b/test_data/stegasoo-recovery-qr.png new file mode 100644 index 0000000..622c4b9 Binary files /dev/null and b/test_data/stegasoo-recovery-qr.png differ diff --git a/test_data/stegasoo_key_4096_5e663335.pem b/test_data/stegasoo_key_4096_5e663335.pem new file mode 100644 index 0000000..6ce75b1 --- /dev/null +++ b/test_data/stegasoo_key_4096_5e663335.pem @@ -0,0 +1,54 @@ +-----BEGIN ENCRYPTED PRIVATE KEY----- +MIIJtTBfBgkqhkiG9w0BBQ0wUjAxBgkqhkiG9w0BBQwwJAQQJdD0f2FnF8tXObq2 +HeQj8QICCAAwDAYIKoZIhvcNAgkFADAdBglghkgBZQMEASoEEFKD/tvo6am/xKKS +fiNtbagEgglQBJdTsd1JIjihIK+tcV+SbNJggJ0i7R0sh82GxZ21Oca2Ij4FndPU +rwjhyv8977dibIwt1F6oJOkWgt/DLCFVMinQvJaKdKY2Jowgj42MfiRQlFnzXJhY +GI1LHPg4/PWBNUIWKrOYOlVB+Nq4SffjQFlpmQGSxCjLwCNLZCG0ckxWBFrHg1g1 +R1LPnQikBEJ1xvtyMHELlyQia2JPDwvn29vhGtT5Jr9y4762R86RgqbelbB7H5wn +4WG4b9agZERx9vwnF7NQEFpOOhe6CMjEsWdfSswAsUoz/zaHmVz2alCOlQYj1yJj +vDPbHR9NZc1UtuH7g0pbEijUIto/PZcYhXPEvb1knwOA/JY7DuCmvW1t1rNsTSqk +2L8kmjDlr2FDDcNvD2XLHVZzqp3F5jYLtXfkWpOH7rqkrvdqHeu+ve5jxCnesZ0D +rDpcmpbEwqWx/W3slpZEqAdTrSgLcXXDi6OjIzAYDEzCHO/u6djDDKzYF1ziZNxq +bq4ZogP4SfzaGehArnCbPIBIObQp8t2BuXk6veDmEHk4aPSSBbbjKhWXVSbposz4 +ZvespTu2Z4aIT+xb7Rj32fAjiy+IPEI7Mt/KtsV+W2F5CM+QQxWTOdUkt+3OuAJe +VlgnZk4a7yHYLXbyqc/wpHPdD4EEKyCCBuT2lPwu+L/3XNNy8dWL/1y74PbUOyAW +r5wfIalJZ43Zabvgl+LXxCUXrVRFMG1hASXupCY88uU1evvdBjd+anWTd/IpNHBC +g6pvwnHQDeuf9KhzKIRvb2HqMeYM80yir6PMBcayZj6icKSZa9i2KKs6W4IVhS1p +ZDZBbuP01GlwU3pAX+bX7HIBt9wPYYoabUjYDahsvLCKToK8rhLbHd//3qKOuIh2 +7T+DtouVTFu7ipuxaq+VqSAExU5gNXi9xh9fSbJwAf7E//LA9s6UBMTRfJOmC1Wv +gyapSNqeATkvwFNmucTIXbaFTTlR+6WisgEO7eqT7F99k+tDoj/m8HoX84mcesqz +t3zeR0A6L0bq0GAICxdkNMRMXZWuan34T7IvxjdtIsaUm3ReIDf68oW51107Wlts +ZX2IE4P+vrAq4gR0Ra4L2NaDWDawZMIyEFAMRHxNE96TqZzvaNVZW3dOfn0YjRJH +fuvRThuoDGKKM5NzVDuWQJM+PP3dR2I+wamiL4QEeP+czP5FQXxR2C5iwY03Ntcj +ByAp1ZiLoGePEu3PGFIAocntyIy+UTKVMLfvqn1tX3VW19uF4J8eQnp0W4oqOAcZ +DTV6gamXNHrJzI5qtlB5yBf1YZb1bxniLKCiihOyx1O3fY/y178gIePMXX1ZVpQZ +PWdYlyDlw07tk5WnQxxAj4E6iNodlkhm9lfBFf+8GPgFe3esgPyID79KbS5UqN6D +gpnJcV57vsbU8KkjZ1hYEHUCuyR3AWIQOGAjP0Ai/nJADtEF54UZbP6fnOPT6yJR +olek4GiaEFV9SiSReIwKeTHiCZvpN1rMDnGLTn9p2bphOBM8mjBhKfE8Wy1LOYr9 +5HjJleAgtppgDh0dnKPc6kV0e+yHeQXDp0o1RC2J0awW4Oeqr65dJOoynARQ95n6 +UVlahI07BKqWZNRKcmJVvrWaQisDDLfWrvCaGYocTfOBEb9mpJzLZ3NrtE6UBxSj +/caJH0y0dRBaDLJvH44RXK9hXVW0iRp09lpABID9AvUyAFc/G+aKTbxbHkhc6AwB +pITCXPC+EMQ7Z4TcoRykU6+6EMsYNjvZ0l5xpsh5Pe7zsNeBtmBa8z//71ZkjsFi +Ioy3dmD0ruWgkq0dlU2L0BfNr55tsCZUzfd9/u2/hE6Ye4edtsKKQJD6aqoMi8Nk +qDI4t2GS1RHiCZ8hr4Ux5NXvKCFxD5913n5OY70BtMXKg/H/TwoTBqwzSH6fv1JZ +mWUSdtS9hN3fcezkqDwfR8Dzgz6Aq8ewa2HBoqcZ9T551hEGwvyN9QnT0DzkaZNK +VNwvTAHQ5Xs3lbS0X+Giu75nvHJMpKL70Z/aNX5IwobmfAi89jXaUMuGetcVbO98 +SL96j5AxFO6K0PczCgE8CHXJY62Sh/eGYF+Uc7DbRZROxgM035MYBQqa5U17W0/G +h2Mf+qvfrH0jsvTwod9BRbYusnxp0E04+1Y7SdcQfbcbpafc2MAjnQGxU51KQiWf +yZ4D6COBoT1j7eGc/fg6uFKClEH34I97vod89CMj0uJblCieYj+5+pz0aGCgL3yP +6WZb5ogZQkq23p7lMZptmjW+OZGNt5bNEqNTAIhRB5jN1PnvJs81vzQo1rNmoJG6 +rokC3A6Mqic7MssU0B9nUXUA92LEB/YhimO/sccRshbBD2/TuY+KhQdApbU7NtJ3 +giyj/5JEwUmj4ecGXfxhxWYfPrnLG87hO1mogfp1ndCC0efbLR6u8Qb5vlz56luQ +hSvE45gWcVjxo6hJasZHpoqq4aD4CqVLgCi0zSEgXhPS+vgo2CYpW3u5N5Kw3nJG +WmcQOfGUXIoCLsFoiSoLNt5H5uPXi4+rcgi65pio2QwXpYfxlCZpHbEgyvzr+U85 +fiBNPwSvYnQx3DYqx/2mkIZPJO1pSGfDKy68OAnvOMUhQ7jASgmMjK0HeRSpT1E7 +n3+cUk1zJgDbu68laxj0xzU+iyJZr4hk05mmqVfux60WSv7NqurLgLQ3++CZ5XPu +SSuYY89gBlbbl9GLlF8EcmsbqXfqYa1F+6A2bqFBe96jbVo7WEdNXJDuZZxwU2GU +FgDo9tyLxnkGfv3XfSBmZDydltOQm2sgGIZ0EXczbso0F4BDeamolCgL6jhgVs0B +rhJ2kooSEA8/MJMhzUVgRjqNUV6iCW+iFRtX5nD4rW/vODYpFKs/zlSQo5qq8P3/ +eKw7VFlcc/i2V7ZxA48WIvM9HsNsKs3sHCxEHUZHmT/8KTcHuY1LlUA8aE3UMyAB +iqrpMQwn3x6G5UqLa/3IoxGYH9dYvoDjESVKm9CTZjbQdiCpYENsNiZ+TkjBBUwU +m50oRjC8YWhqAHdJxDcbAiiH0zyDYrMgvozLbDpUMjye8wOV94ga4Pb681Qld1vW +rFfytkJPYFCIP0uVrlEuAnfrcvymLAB/tMEbMeiEoFuoRfy2ra7taOeH6tpQcb1N +18QSzGTAcerjkvrpJLxG/aGyzKQDFvnpbObvsH3XJQScTgjhoY3yXPI= +-----END ENCRYPTED PRIVATE KEY----- diff --git a/test_data/stegasoo_rsa_key_3072.png b/test_data/stegasoo_rsa_key_3072.png new file mode 100644 index 0000000..a35b3ee Binary files /dev/null and b/test_data/stegasoo_rsa_key_3072.png differ diff --git a/test_data/stego.pem b/test_data/stego.pem new file mode 100644 index 0000000..6128509 --- /dev/null +++ b/test_data/stego.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDWrgNqtHWzP493 +1QrWiv1Kpo2o+8QUG8tvUDduEWcLCoFOo3BwzFZdslYflXsDRx4hOyOOFF1dqTGs +tGmG1My38tdgFjGrSYaqOrDt169GTz5M4wXuIpr3LRDIrE0WGT9zpC25xLjbnVqw +H6wGQg/Gzdk2zvTdlrbiQZ+nnO3oYuSFNfxXBQw72ld9Ck4VjrGfVzen0ZfoAqx6 +y129xIKXFojUIyLMK1nDScUvzriSqdgEIcTm8UaCS7H3Obs7QFuVNhSn3Pa7GwtQ +gN+Grmfrixfrwjwm4/4CiK4BW3D50xzHGJ8jLqzQJPhBeI6poaS5L5q/Ow0/j52m +aEIJvm9nG0ozdzvvj0jfRpTd1i6svMDGQn15M40ELfIwSxhaxEyybd2Y0D0zaUaO +7QlnZrIevpKAuuSrEDgMrQZH0B8lxkayE+vx+RvMH4OlZl3pgu2cEMnVrwkjshRu +Jpo4MhAJpT9hoVQL2o458fKO3Mq3NgTxX2K9u/WBKV5FgoeEvFvPBGoeX85VE1u7 +iQiqEV/4Tj+kbyUCbNKia4YW+sT6enrMzVPs1xkeFYGfY4a/I9gPYSvjYSYRNrh6 +fRYupqb7fRvtI0BACXnXER4oQzk4/QJAeNDhkJmrW0lD3QMYNz3iK/E8JSYrsdDf +IE5wH/pLFkx0TpYD/NROSGg+4kSpWwIDAQABAoICAGKNhBgKPDZLqYszjEQtjlYk +28eFvOAWWnABfVDUQF+7NN1q/oVp6QQeiJb7QnugvF45Dft9sdb7g0hrkj5Jlqmj +m+gKiEa//EhNpUeqylYWrt0RI3nM/SMX1yqonsLUWGK4Lh2oB80aEbXk9E3zKdb0 +TRFyNMs9EzwPMEH8MuOJ8qqMvAke0hzwAVXv0yTmWvhxS6WI/JmQEKpWjfQ+ME04 +FRx9/M2J4S1bwmBOIEGEIxzSe9ghT2MlaM+7+w4aX3DCYP0m3UXMBoewge0MR7zE +H/klNAQOil+Ta35IAjdOCVoFHZlDJiZvsmAr/iI1iK9TAVFkxJI3VgI8voRGnlD5 +ZxNUkwhgkqAHS7L+lpRm/NKNNWJ8GhMRCp4vVAvig8RryW6Ho0CroOsw0qBCN6zx +919MviMRxODjfqBqBne83qCS6MjnSF1XJDUCSstUOl0bZ7yM0PLIIEXZYQT4MjXm +Bq+KXqiR7jB0dpsmkiP0mQYUZ+PgktWH/4dDL38O3tD5Oj4BG5mPA8dn1DalXYeF +a2AB4jYC+YAJ0TD72rFrcYw60rBMGRnTJ/Zw+TQXjPYqaDQqdQRIf/xJu/UhUOGM +BOJTTZxVAmLdxEKggFMO2bbhWjhAj/jcTk/7cVarMyB6GSVv/nJUxks3mj1VGi6r +Zy5FTv7g6GV9Xrai57oRAoIBAQD6nUm3ks7kY+oMqt4/OZFqeFkOPJij6qLr6i9b +AOEt/5MK9EPLj3b0E8OtoAdu1l+NKqOxzb0u67CBzIot32Iug7n2OFOxVzoA9oFv +0v1SiT+mXRbvCHXd8SjMjmCBrG2hbbQZk8DRo8PeRwEONKiibvkKVnYDlBSd+HAy +AQZiwOVTiSAmAuotnOxq/E8pY1m+YvfeSZxfbS3xLuRzG2Wl5rc5AInLUiYbqJRI +0W8ZU8GG0N/Yel9ZTdwoGAb0dpa7hv831H3iWUy8EUXHpUBd5hUeawZ8U7z1il+s +EuvQ9SmLB+S97AjVkJ5dBaeNR7myyxiRv67yIYWsB9GPIQuPAoIBAQDbSwl5FZvH +PNVIyj2VWCjNopdbbduqDu1Sl6QjykEmqrrh7yZHL7dT8u+VRLWLQDXlUOmcWZ+3 +vbXmpcm+tmXQGs2gVhtLCqDK/1RFx5PbhsbdUoVcaxQHZ1e3jVh8rcFrD8NFKJIY +s7YgaFHPFfEcvdqsxoEidsfSRKv0fHfLhh5K9ygp0XQH44wNoRZcm918a4KScBWz +aO9erRRu9IjH0uZrcOLpY6LMnbjzd9FnlCIWnNnir2pXxdPfZargRftfsPRz8F1m +1gDK2wTaNNP0e3AP726D9amtR2OsmIwjN+StbD82C+D5Rzae/WLllCiuYqB3/Tdq +G1aPGdqVMw91AoIBAEIi5gRS8tRk5awoH3SR7aUqU14lcaa4UfxuFccvST2JV7Wp +VobOunaZDKeyNqPyYM7xNLfEcquCOVdXshN/AvsBej7XCIchXMZKR0hKZt20KJ7v +rBCyq+BBNyIcZK1z4o6aU8/4JQdt70+mb0nfba1IGeK3hHdXScWSfZFEofnHq/1K +j7hYRpJ4+g8VvJJQjKtolL8Nr/2i41/lnxYdYcrRQwMA+Fw2tqXP7Cn3Uxlc+BJq +5tfLG4SGvpW6GYPEE+0zKAEEy5/DLzPmXwLbwkvFX0diuRhwRMqLlMnlSGQhfrAZ +Ci1wr7gkbRZhWqE3D3N2k2pLKpDtdcg7W/sonlMCggEAEXd0lbRm6AOsaVPSe+a9 +uUNOEvY5HN57gKybYk5lyhKnN/+4aMsgKwuakaUbYLAImu9K7C0nT5dIucpSu2ID +0gOQhUjWe6+pBwI3UCeJW17s0AwWmzncKorMrc4QUj30vfZPc1675EPMKeClQ31X +6xYkKMbW2kvjtpfJqnuPfyJ7nzT4efw45bf2G8/M3Ct+BL77c3aCA/xDtKLMkdng +WyotODmRmU1aVjnp1Vqhz0I5RNvRbwDR05PqQEz0w1BPqVuK3QE0L1C0V3QCeu7i +QwLh9iL4BoK2tRqOkdzErLly+W05y7A+hxxo+VjtW6PlaZ5pcaU1mLn8twE0K9YZ +UQKCAQEAvuV8orKNfC7QsQoy6gaYbESqEwIR7X3BmCpOZIW2lWtKFNEna6BOpu+U +8UpWvabZ/g4APO3NXMFiF9GE0iwfnVhWtejMp9cBZb/s1KzOuG7RXjNDsGQIlFsP +I0Ea4JmThKnVy3OsGnAIfb+wkzfyzyoUsa3/i50Ln4TxOIqvnV7eBFITi+IsD7v8 +74IUqeN+OwWELAEbwCQQgEQk/50LopS3bdd0PW8K8z1lB05Xwej8wDVcOttxWWIu +/71pLOyDXpGTKL2P55XT9+XaNG1oHkS29wlxZ864fYFodB2+B6Ka7Gvht0sykZob +qLuMBXlezXwx2ayC9XWuX5Ld7uOSGA== +-----END PRIVATE KEY----- diff --git a/tests/test_key_rotation.py b/tests/test_key_rotation.py index eef90ca..3386096 100644 --- a/tests/test_key_rotation.py +++ b/tests/test_key_rotation.py @@ -182,7 +182,7 @@ class TestRotateChannelKey: assert oct(key_file.stat().st_mode & 0o777) == oct(0o600) def test_archived_key_matches_old_fingerprint(self, tmp_path: Path): - from stegasoo.crypto import get_channel_fingerprint + from soosef.stegasoo.crypto import get_channel_fingerprint ks = _make_manager(tmp_path) ks.generate_channel_key() @@ -197,7 +197,7 @@ class TestRotateChannelKey: assert old_fp == result.old_fingerprint def test_new_channel_key_active_after_rotation(self, tmp_path: Path): - from stegasoo.crypto import get_channel_fingerprint + from soosef.stegasoo.crypto import get_channel_fingerprint ks = _make_manager(tmp_path) ks.generate_channel_key() diff --git a/tests/test_stegasoo.py b/tests/test_stegasoo.py new file mode 100644 index 0000000..9137192 --- /dev/null +++ b/tests/test_stegasoo.py @@ -0,0 +1,681 @@ +""" +Stegasoo Library Unit Tests + +Tests core functionality: encode/decode, LSB/DCT modes, channel keys, validation. +""" + +import io +from pathlib import Path + +import pytest +from PIL import Image + +import soosef.stegasoo as stegasoo +from soosef.stegasoo import ( + decode, + decode_text, + encode, + generate_channel_key, + generate_passphrase, + generate_pin, + has_dct_support, + validate_image, + validate_message, + validate_passphrase, + validate_pin, +) + +# Test data paths +TEST_DATA = Path(__file__).parent.parent / "test_data" +CARRIER_PATH = TEST_DATA / "carrier.jpg" +REF_PATH = TEST_DATA / "ref.jpg" + +# Test credentials +TEST_PASSPHRASE = "tower booty sunny windy toasty spicy" +TEST_PIN = "727643678" +TEST_MESSAGE = "Hello, Stegasoo!" + + +@pytest.fixture +def carrier_bytes(): + """Load carrier image as bytes.""" + return CARRIER_PATH.read_bytes() + + +@pytest.fixture +def ref_bytes(): + """Load reference image as bytes.""" + return REF_PATH.read_bytes() + + +@pytest.fixture +def small_image(): + """Create a small test image in memory.""" + img = Image.new("RGB", (200, 200), color="blue") + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + +class TestVersion: + """Test version info.""" + + def test_version_exists(self): + assert hasattr(stegasoo, "__version__") + assert stegasoo.__version__ + + def test_version_format(self): + parts = stegasoo.__version__.split(".") + assert len(parts) >= 2 + assert all(p.isdigit() for p in parts[:2]) + + +class TestGeneration: + """Test credential generation.""" + + def test_generate_passphrase_default(self): + passphrase = generate_passphrase() + words = passphrase.split() + assert len(words) == stegasoo.DEFAULT_PASSPHRASE_WORDS + + def test_generate_passphrase_custom_length(self): + passphrase = generate_passphrase(words=8) + words = passphrase.split() + assert len(words) == 8 + + def test_generate_pin_default(self): + pin = generate_pin() + assert pin.isdigit() + assert len(pin) == 6 # Default is 6 digits + + def test_generate_pin_custom_length(self): + pin = generate_pin(length=9) + assert pin.isdigit() + assert len(pin) == 9 + + def test_generate_channel_key(self): + key = generate_channel_key() + # Format: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX (39 chars) + assert len(key) == 39 + assert key.count("-") == 7 + + +class TestValidation: + """Test validation functions.""" + + def test_validate_passphrase_valid(self): + result = validate_passphrase(TEST_PASSPHRASE) + assert result.is_valid + + def test_validate_passphrase_too_short(self): + result = validate_passphrase("one two") + assert not result.is_valid + + def test_validate_pin_valid(self): + result = validate_pin(TEST_PIN) + assert result.is_valid + + def test_validate_pin_too_short(self): + result = validate_pin("123") + assert not result.is_valid + + def test_validate_pin_non_numeric(self): + result = validate_pin("abc123") + assert not result.is_valid + + def test_validate_message_valid(self): + result = validate_message("Hello world") + assert result.is_valid + + def test_validate_message_empty(self): + result = validate_message("") + assert not result.is_valid + + def test_validate_image_valid(self, carrier_bytes): + result = validate_image(carrier_bytes) + assert result.is_valid + + def test_validate_image_invalid(self): + result = validate_image(b"not an image") + assert not result.is_valid + + +class TestLSBMode: + """Test LSB (Least Significant Bit) encoding/decoding.""" + + def test_encode_decode_roundtrip(self, carrier_bytes, ref_bytes): + """Basic encode/decode roundtrip.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert result.stego_image + assert len(result.stego_image) > 0 + + decoded = decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert decoded.message == TEST_MESSAGE + + def test_decode_text_helper(self, carrier_bytes, ref_bytes): + """Test decode_text convenience function.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + text = decode_text( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert text == TEST_MESSAGE + + def test_wrong_passphrase_fails(self, carrier_bytes, ref_bytes): + """Decoding with wrong passphrase should fail.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + with pytest.raises(Exception): + decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase="wrong passphrase words here now", + pin=TEST_PIN, + embed_mode="lsb", + ) + + def test_wrong_pin_fails(self, carrier_bytes, ref_bytes): + """Decoding with wrong PIN should fail.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + with pytest.raises(Exception): + decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin="999999999", + embed_mode="lsb", + ) + + def test_wrong_reference_fails(self, carrier_bytes, ref_bytes, small_image): + """Decoding with wrong reference should fail.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + with pytest.raises(Exception): + decode( + stego_image=result.stego_image, + reference_photo=small_image, # Wrong reference + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + +class TestDCTMode: + """Test DCT (Discrete Cosine Transform) encoding/decoding.""" + + @pytest.fixture(autouse=True) + def check_dct_support(self): + """Skip DCT tests if not supported.""" + if not has_dct_support(): + pytest.skip("DCT support not available") + + def test_encode_decode_roundtrip(self, carrier_bytes, ref_bytes): + """Basic DCT encode/decode roundtrip.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="dct", + ) + + assert result.stego_image + + decoded = decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="dct", + ) + + assert decoded.message == TEST_MESSAGE + + def test_dct_jpeg_output(self, carrier_bytes, ref_bytes): + """Test DCT mode with JPEG output.""" + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="dct", + dct_output_format="jpeg", + ) + + assert result.stego_image + # Verify it's JPEG by checking magic bytes + assert result.stego_image[:2] == b"\xff\xd8" + + +class TestChannelKey: + """Test channel key functionality.""" + + def test_encode_with_channel_key(self, carrier_bytes, ref_bytes): + """Encode with channel key.""" + channel_key = generate_channel_key() + + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + channel_key=channel_key, + embed_mode="lsb", + ) + + assert result.stego_image + + # Decode with same channel key + decoded = decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + channel_key=channel_key, + embed_mode="lsb", + ) + + assert decoded.message == TEST_MESSAGE + + def test_wrong_channel_key_fails(self, carrier_bytes, ref_bytes): + """Decoding with wrong channel key should fail.""" + channel_key = generate_channel_key() + wrong_key = generate_channel_key() + + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + channel_key=channel_key, + embed_mode="lsb", + ) + + with pytest.raises(Exception): + decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + channel_key=wrong_key, + embed_mode="lsb", + ) + + +class TestCompression: + """Test message compression.""" + + def test_long_message_compresses(self, carrier_bytes, ref_bytes): + """Long messages should be compressed.""" + long_message = "A" * 1000 + + result = encode( + message=long_message, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert result.stego_image + + decoded = decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert decoded.message == long_message + + +class TestEdgeCases: + """Test edge cases and error handling.""" + + def test_unicode_message(self, carrier_bytes, ref_bytes): + """Test encoding Unicode messages.""" + unicode_msg = "Hello 🦖 Stegasoo! 日本語 émojis" + + result = encode( + message=unicode_msg, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert result.stego_image + + decoded = decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert decoded.message == unicode_msg + + def test_minimum_passphrase(self, carrier_bytes, ref_bytes): + """Test with minimum valid passphrase.""" + min_passphrase = "one two three four" # 4 words minimum + + result = encode( + message=TEST_MESSAGE, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=min_passphrase, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert result.stego_image + + def test_special_characters_in_message(self, carrier_bytes, ref_bytes): + """Test special characters in message.""" + special_msg = "Line1\nLine2\tTab\r\nCRLF" + + result = encode( + message=special_msg, + reference_photo=ref_bytes, + carrier_image=carrier_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert result.stego_image + + decoded = decode( + stego_image=result.stego_image, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + embed_mode="lsb", + ) + + assert decoded.message == special_msg + + +# ============================================================================= +# VIDEO STEGANOGRAPHY TESTS (v4.4.0) +# ============================================================================= + + +@pytest.fixture +def test_video_bytes(): + """Create a minimal test video using ffmpeg. + + Creates a 2-second test video with solid color frames. + Returns None if ffmpeg is not available. + """ + import shutil + import subprocess + import tempfile + + if not shutil.which("ffmpeg"): + return None + + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f: + output_path = f.name + + try: + # Create a simple 2-second video with colored frames + # Using lavfi (libavfilter) to generate test pattern + result = subprocess.run( + [ + "ffmpeg", + "-y", + "-f", + "lavfi", + "-i", + "color=c=blue:s=320x240:d=2:r=10", + "-c:v", + "libx264", + "-pix_fmt", + "yuv420p", + "-g", + "5", # GOP size - creates I-frames every 5 frames + output_path, + ], + capture_output=True, + timeout=30, + ) + + if result.returncode != 0: + return None + + with open(output_path, "rb") as f: + video_data = f.read() + + return video_data + except Exception: + return None + finally: + import os + + try: + os.unlink(output_path) + except OSError: + pass + + +class TestVideoSupport: + """Test video steganography support detection.""" + + def test_video_support_flag_exists(self): + """HAS_VIDEO_SUPPORT flag should exist.""" + assert hasattr(stegasoo, "HAS_VIDEO_SUPPORT") + assert isinstance(stegasoo.HAS_VIDEO_SUPPORT, bool) + + def test_video_constants_exist(self): + """Video-related constants should exist.""" + assert hasattr(stegasoo, "EMBED_MODE_VIDEO_LSB") + assert hasattr(stegasoo, "EMBED_MODE_VIDEO_AUTO") + + +@pytest.mark.skipif( + not stegasoo.HAS_VIDEO_SUPPORT, + reason="Video support not available (ffmpeg or dependencies missing)", +) +class TestVideoFormatDetection: + """Test video format detection.""" + + def test_detect_video_format_mp4(self, test_video_bytes): + """Should detect MP4 format from magic bytes.""" + if test_video_bytes is None: + pytest.skip("Could not create test video") + + from soosef.stegasoo import detect_video_format + + fmt = detect_video_format(test_video_bytes) + assert fmt in ("mp4", "mov") + + def test_detect_video_format_unknown(self): + """Should return 'unknown' for non-video data.""" + from soosef.stegasoo import detect_video_format + + fmt = detect_video_format(b"not a video") + assert fmt == "unknown" + + +@pytest.mark.skipif( + not stegasoo.HAS_VIDEO_SUPPORT, + reason="Video support not available (ffmpeg or dependencies missing)", +) +class TestVideoInfo: + """Test video metadata extraction.""" + + def test_get_video_info(self, test_video_bytes): + """Should extract video metadata.""" + if test_video_bytes is None: + pytest.skip("Could not create test video") + + from soosef.stegasoo import get_video_info + + info = get_video_info(test_video_bytes) + + assert info.width == 320 + assert info.height == 240 + assert info.fps > 0 + assert info.duration_seconds > 0 + assert info.total_frames > 0 + assert info.format in ("mp4", "mov") + + def test_validate_video(self, test_video_bytes): + """Should validate video data.""" + if test_video_bytes is None: + pytest.skip("Could not create test video") + + from soosef.stegasoo import validate_video + + result = validate_video(test_video_bytes, check_duration=False) + + assert result.is_valid + assert result.details.get("format") in ("mp4", "mov") + + +@pytest.mark.skipif( + not stegasoo.HAS_VIDEO_SUPPORT, + reason="Video support not available (ffmpeg or dependencies missing)", +) +class TestVideoCapacity: + """Test video capacity calculation.""" + + def test_calculate_video_capacity(self, test_video_bytes): + """Should calculate steganographic capacity.""" + if test_video_bytes is None: + pytest.skip("Could not create test video") + + from soosef.stegasoo import calculate_video_capacity + + capacity_info = calculate_video_capacity(test_video_bytes) + + assert capacity_info.total_frames > 0 + assert capacity_info.i_frames > 0 + assert capacity_info.usable_capacity_bytes > 0 + assert capacity_info.embed_mode == "video_lsb" + assert capacity_info.resolution == (320, 240) + + +@pytest.mark.skipif( + not stegasoo.HAS_VIDEO_SUPPORT, + reason="Video support not available (ffmpeg or dependencies missing)", +) +class TestVideoEncodeDecode: + """Test video steganography round-trip.""" + + def test_video_roundtrip(self, test_video_bytes, ref_bytes): + """Test encoding and decoding a message in video.""" + if test_video_bytes is None: + pytest.skip("Could not create test video") + + from soosef.stegasoo import decode_video, encode_video + + message = "Secret video message!" + + # Encode + stego_video, stats = encode_video( + message=message, + reference_photo=ref_bytes, + carrier_video=test_video_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + ) + + assert stego_video + assert len(stego_video) > 0 + assert stats.frames_modified > 0 + assert stats.codec == "ffv1" # Should use lossless codec + + # Decode + result = decode_video( + stego_video=stego_video, + reference_photo=ref_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + ) + + assert result.is_text + assert result.message == message + + def test_video_wrong_passphrase_fails(self, test_video_bytes, ref_bytes): + """Decoding with wrong passphrase should fail.""" + if test_video_bytes is None: + pytest.skip("Could not create test video") + + from soosef.stegasoo import decode_video, encode_video + + message = "Secret video message!" + + stego_video, _ = encode_video( + message=message, + reference_photo=ref_bytes, + carrier_video=test_video_bytes, + passphrase=TEST_PASSPHRASE, + pin=TEST_PIN, + ) + + with pytest.raises(Exception): + decode_video( + stego_video=stego_video, + reference_photo=ref_bytes, + passphrase="wrong passphrase words here", + pin=TEST_PIN, + ) diff --git a/tests/test_stegasoo_audio.py b/tests/test_stegasoo_audio.py new file mode 100644 index 0000000..221f15e --- /dev/null +++ b/tests/test_stegasoo_audio.py @@ -0,0 +1,862 @@ +""" +Tests for Stegasoo audio steganography. + +Tests cover: +- Audio LSB roundtrip (encode + decode) +- Audio spread spectrum roundtrip (v0 legacy + v2 per-channel) +- Wrong credentials fail to decode +- Capacity calculations (per-tier) +- Format detection +- Audio validation +- Per-channel stereo/multichannel embedding (v4.4.0) +- Chip tier roundtrips (v4.4.0) +- LFE channel skipping (v4.4.0) +- Backward compat: v0 decode from v2 code +- Header v2 build/parse roundtrip +- Round-robin bit distribution +""" + +import io +from pathlib import Path + +import numpy as np +import pytest +import soundfile as sf + +from soosef.stegasoo.constants import AUDIO_ENABLED, EMBED_MODE_AUDIO_LSB, EMBED_MODE_AUDIO_SPREAD +from soosef.stegasoo.models import AudioCapacityInfo, AudioEmbedStats, AudioInfo + +pytestmark = pytest.mark.skipif(not AUDIO_ENABLED, reason="Audio support disabled (STEGASOO_AUDIO)") + +# Path to real test data files +_TEST_DATA = Path(__file__).parent.parent / "test_data" +_REFERENCE_PNG = _TEST_DATA / "reference.png" +_SPEECH_WAV = _TEST_DATA / "stupid_elitist_speech.wav" + + +# ============================================================================= +# FIXTURES +# ============================================================================= + + +@pytest.fixture +def carrier_wav() -> bytes: + """Generate a small test WAV file (1 second, 44100 Hz, mono, 16-bit).""" + sample_rate = 44100 + duration = 1.0 + num_samples = int(sample_rate * duration) + t = np.linspace(0, duration, num_samples, endpoint=False) + samples = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16) + + buf = io.BytesIO() + sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def carrier_wav_stereo() -> bytes: + """Generate a stereo test WAV file (5 seconds for spread spectrum capacity).""" + sample_rate = 44100 + duration = 5.0 + num_samples = int(sample_rate * duration) + t = np.linspace(0, duration, num_samples, endpoint=False) + left = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16) + right = (np.sin(2 * np.pi * 880 * t) * 16000).astype(np.int16) + samples = np.column_stack([left, right]) + + buf = io.BytesIO() + sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def carrier_wav_long() -> bytes: + """Generate a longer WAV (15 seconds) for spread spectrum tests.""" + sample_rate = 44100 + duration = 15.0 + num_samples = int(sample_rate * duration) + t = np.linspace(0, duration, num_samples, endpoint=False) + samples = ( + (np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t)) + * 5000 + ).astype(np.int16) + + buf = io.BytesIO() + sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def carrier_wav_stereo_long() -> bytes: + """Generate a stereo WAV (15 seconds) for per-channel spread tests.""" + sample_rate = 48000 + duration = 15.0 + num_samples = int(sample_rate * duration) + t = np.linspace(0, duration, num_samples, endpoint=False) + left = (np.sin(2 * np.pi * 440 * t) * 10000).astype(np.float64) / 32768.0 + right = (np.sin(2 * np.pi * 660 * t) * 10000).astype(np.float64) / 32768.0 + samples = np.column_stack([left, right]) + + buf = io.BytesIO() + sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def carrier_wav_5_1() -> bytes: + """Generate a 6-channel (5.1) WAV for LFE skip tests.""" + sample_rate = 48000 + duration = 15.0 + num_samples = int(sample_rate * duration) + t = np.linspace(0, duration, num_samples, endpoint=False) + + # 6 channels with different frequencies + freqs = [440, 554, 660, 80, 880, 1100] # ch3 = LFE (low freq) + channels = [] + for freq in freqs: + ch = (np.sin(2 * np.pi * freq * t) * 8000).astype(np.float64) / 32768.0 + channels.append(ch) + samples = np.column_stack(channels) + + buf = io.BytesIO() + sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def carrier_wav_spread_integration() -> bytes: + """Generate a very long WAV (150 seconds) for spread spectrum integration tests.""" + sample_rate = 44100 + duration = 150.0 + num_samples = int(sample_rate * duration) + t = np.linspace(0, duration, num_samples, endpoint=False) + samples = ( + (np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t)) + * 5000 + ).astype(np.int16) + + buf = io.BytesIO() + sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def reference_photo() -> bytes: + """Load real reference photo from test_data, or generate a small one.""" + if _REFERENCE_PNG.exists(): + return _REFERENCE_PNG.read_bytes() + from PIL import Image + + img = Image.new("RGB", (100, 100), color=(128, 64, 32)) + buf = io.BytesIO() + img.save(buf, "PNG") + buf.seek(0) + return buf.read() + + +@pytest.fixture +def speech_wav() -> bytes: + """Load real speech WAV from test_data (48kHz mono, ~68s).""" + if not _SPEECH_WAV.exists(): + pytest.skip("test_data/stupid_elitist_speech.wav not found") + return _SPEECH_WAV.read_bytes() + + +# ============================================================================= +# AUDIO LSB TESTS +# ============================================================================= + + +class TestAudioLSB: + """Tests for audio LSB steganography.""" + + def test_calculate_capacity(self, carrier_wav): + from soosef.stegasoo.audio_steganography import calculate_audio_lsb_capacity + + capacity = calculate_audio_lsb_capacity(carrier_wav) + assert capacity > 0 + # 1 second at 44100 Hz mono should give ~5KB capacity at 1 bit/sample + assert capacity > 4000 + + def test_embed_extract_roundtrip(self, carrier_wav): + """Test basic LSB embed/extract roundtrip.""" + from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb + + payload = b"Hello, audio steganography!" + key = b"\x42" * 32 + + stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key) + + assert isinstance(stats, AudioEmbedStats) + assert stats.embed_mode == EMBED_MODE_AUDIO_LSB + assert stats.bytes_embedded > 0 + assert stats.samples_modified > 0 + assert 0 < stats.capacity_used <= 1.0 + + extracted = extract_from_audio_lsb(stego_audio, key) + assert extracted is not None + assert extracted == payload + + def test_embed_extract_stereo(self, carrier_wav_stereo): + """Test LSB roundtrip with stereo audio.""" + from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb + + payload = b"Stereo test message" + key = b"\xAB" * 32 + + stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav_stereo, key) + assert stats.channels == 2 + + extracted = extract_from_audio_lsb(stego_audio, key) + assert extracted == payload + + def test_wrong_key_fails(self, carrier_wav): + """Test that wrong key produces no valid extraction.""" + from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb + + payload = b"Secret message" + correct_key = b"\x42" * 32 + wrong_key = b"\xFF" * 32 + + stego_audio, _ = embed_in_audio_lsb(payload, carrier_wav, correct_key) + + extracted = extract_from_audio_lsb(stego_audio, wrong_key) + assert extracted is None or extracted != payload + + def test_two_bits_per_sample(self, carrier_wav): + """Test embedding with 2 bits per sample.""" + from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb + + payload = b"Two bits per sample test" + key = b"\x55" * 32 + + stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key, bits_per_sample=2) + + extracted = extract_from_audio_lsb(stego_audio, key, bits_per_sample=2) + assert extracted == payload + + def test_generate_sample_indices(self): + """Test deterministic sample index generation.""" + from soosef.stegasoo.audio_steganography import generate_sample_indices + + key = b"\x42" * 32 + indices1 = generate_sample_indices(key, 10000, 100) + indices2 = generate_sample_indices(key, 10000, 100) + + assert indices1 == indices2 + assert all(0 <= i < 10000 for i in indices1) + assert len(set(indices1)) == len(indices1) + + +# ============================================================================= +# AUDIO SPREAD SPECTRUM TESTS (v2 per-channel) +# ============================================================================= + + +class TestAudioSpread: + """Tests for audio spread spectrum steganography (v2 per-channel).""" + + def test_calculate_capacity_default_tier(self, carrier_wav_long): + from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity + + capacity = calculate_audio_spread_capacity(carrier_wav_long) + assert isinstance(capacity, AudioCapacityInfo) + assert capacity.usable_capacity_bytes > 0 + assert capacity.embed_mode == EMBED_MODE_AUDIO_SPREAD + assert capacity.chip_tier == 2 # default + assert capacity.chip_length == 1024 + + def test_calculate_capacity_per_tier(self, carrier_wav_long): + """Capacity should increase as chip length decreases.""" + from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity + + cap_lossless = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=0) + cap_high = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=1) + cap_low = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=2) + + assert cap_lossless.chip_length == 256 + assert cap_high.chip_length == 512 + assert cap_low.chip_length == 1024 + + # Smaller chip = more capacity + assert cap_lossless.usable_capacity_bytes > cap_high.usable_capacity_bytes + assert cap_high.usable_capacity_bytes > cap_low.usable_capacity_bytes + + def test_spread_roundtrip_default_tier(self, carrier_wav_long): + """Test spread spectrum embed/extract roundtrip (default tier 2).""" + from soosef.stegasoo.spread_steganography import ( + embed_in_audio_spread, + extract_from_audio_spread, + ) + + payload = b"Spread test v2" + seed = b"\x42" * 32 + + stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed) + + assert isinstance(stats, AudioEmbedStats) + assert stats.embed_mode == EMBED_MODE_AUDIO_SPREAD + assert stats.chip_tier == 2 + assert stats.chip_length == 1024 + + extracted = extract_from_audio_spread(stego_audio, seed) + assert extracted is not None + assert extracted == payload + + def test_spread_roundtrip_tier_0(self, carrier_wav_long): + """Test spread spectrum at tier 0 (chip=256, lossless).""" + from soosef.stegasoo.spread_steganography import ( + embed_in_audio_spread, + extract_from_audio_spread, + ) + + payload = b"Lossless tier test with more data to embed for coverage" + seed = b"\x42" * 32 + + stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed, chip_tier=0) + assert stats.chip_tier == 0 + assert stats.chip_length == 256 + + extracted = extract_from_audio_spread(stego_audio, seed) + assert extracted is not None + assert extracted == payload + + def test_spread_roundtrip_tier_1(self, carrier_wav_long): + """Test spread spectrum at tier 1 (chip=512, high lossy).""" + from soosef.stegasoo.spread_steganography import ( + embed_in_audio_spread, + extract_from_audio_spread, + ) + + payload = b"High lossy tier test" + seed = b"\x42" * 32 + + stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed, chip_tier=1) + assert stats.chip_tier == 1 + assert stats.chip_length == 512 + + extracted = extract_from_audio_spread(stego_audio, seed) + assert extracted is not None + assert extracted == payload + + def test_wrong_seed_fails(self, carrier_wav_long): + """Test that wrong seed produces no valid extraction.""" + from soosef.stegasoo.spread_steganography import ( + embed_in_audio_spread, + extract_from_audio_spread, + ) + + payload = b"Secret spread" + correct_seed = b"\x42" * 32 + wrong_seed = b"\xFF" * 32 + + stego_audio, _ = embed_in_audio_spread(payload, carrier_wav_long, correct_seed) + + extracted = extract_from_audio_spread(stego_audio, wrong_seed) + assert extracted is None or extracted != payload + + def test_per_channel_stereo_roundtrip(self, carrier_wav_stereo_long): + """Test that stereo per-channel embedding/extraction works.""" + from soosef.stegasoo.spread_steganography import ( + embed_in_audio_spread, + extract_from_audio_spread, + ) + + payload = b"Stereo per-channel test" + seed = b"\xAB" * 32 + + stego_audio, stats = embed_in_audio_spread( + payload, carrier_wav_stereo_long, seed, chip_tier=0 + ) + assert stats.channels == 2 + assert stats.embeddable_channels == 2 + + extracted = extract_from_audio_spread(stego_audio, seed) + assert extracted is not None + assert extracted == payload + + def test_per_channel_preserves_spatial_mix(self, carrier_wav_stereo_long): + """Verify that per-channel embedding doesn't destroy the spatial mix. + + The difference between left and right channels should be preserved + (not zeroed out as the old mono-broadcast approach would do). + """ + from soosef.stegasoo.spread_steganography import embed_in_audio_spread + + payload = b"Spatial preservation test" + seed = b"\xCD" * 32 + + # Read original + orig_samples, _ = sf.read(io.BytesIO(carrier_wav_stereo_long), dtype="float64", always_2d=True) + orig_diff = orig_samples[:, 0] - orig_samples[:, 1] + + # Embed + stego_bytes, _ = embed_in_audio_spread( + payload, carrier_wav_stereo_long, seed, chip_tier=0 + ) + + # Read stego + stego_samples, _ = sf.read(io.BytesIO(stego_bytes), dtype="float64", always_2d=True) + stego_diff = stego_samples[:, 0] - stego_samples[:, 1] + + # The channel difference should not be identical (embedding adds different + # noise per channel), but should be very close (embedding is subtle) + # With the old mono-broadcast approach, stego_diff would equal orig_diff + # exactly in unmodified regions but differ where data was embedded. + # With per-channel, both channels get independent modifications. + correlation = np.corrcoef(orig_diff, stego_diff)[0, 1] + assert correlation > 0.95, f"Spatial mix correlation too low: {correlation}" + + def test_capacity_scales_with_channels(self, carrier_wav_long, carrier_wav_stereo_long): + """Stereo should have roughly double the capacity of mono.""" + from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity + + mono_cap = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=0) + stereo_cap = calculate_audio_spread_capacity(carrier_wav_stereo_long, chip_tier=0) + + # Stereo should be ~1.5-2.2x mono (not exact because header is ch0 only + # and the files have slightly different durations/sample rates) + ratio = stereo_cap.usable_capacity_bytes / mono_cap.usable_capacity_bytes + assert ratio > 1.3, f"Stereo/mono capacity ratio too low: {ratio}" + + def test_lfe_skip_5_1(self, carrier_wav_5_1): + """LFE channel (index 3) should be unmodified in 6-channel audio.""" + from soosef.stegasoo.spread_steganography import embed_in_audio_spread + + payload = b"LFE skip test" + seed = b"\xEE" * 32 + + # Read original LFE channel + orig_samples, _ = sf.read(io.BytesIO(carrier_wav_5_1), dtype="float64", always_2d=True) + orig_lfe = orig_samples[:, 3].copy() + + stego_bytes, stats = embed_in_audio_spread( + payload, carrier_wav_5_1, seed, chip_tier=0 + ) + assert stats.embeddable_channels == 5 # 6 channels - 1 LFE = 5 + + stego_samples, _ = sf.read(io.BytesIO(stego_bytes), dtype="float64", always_2d=True) + stego_lfe = stego_samples[:, 3] + + # LFE channel should be completely unmodified + np.testing.assert_array_equal(orig_lfe, stego_lfe) + + def test_lfe_skip_roundtrip(self, carrier_wav_5_1): + """5.1 audio embed/extract roundtrip with LFE skipping.""" + from soosef.stegasoo.spread_steganography import ( + embed_in_audio_spread, + extract_from_audio_spread, + ) + + payload = b"5.1 surround test" + seed = b"\xEE" * 32 + + stego_bytes, stats = embed_in_audio_spread( + payload, carrier_wav_5_1, seed, chip_tier=0 + ) + assert stats.channels == 6 + assert stats.embeddable_channels == 5 + + extracted = extract_from_audio_spread(stego_bytes, seed) + assert extracted is not None + assert extracted == payload + + +# ============================================================================= +# HEADER V2 TESTS +# ============================================================================= + + +class TestHeaderV2: + """Tests for v2 header construction and parsing.""" + + def test_header_v2_build_parse_roundtrip(self): + from soosef.stegasoo.spread_steganography import _build_header_v2, _parse_header + + data_length = 12345 + chip_tier = 1 + num_ch = 2 + lfe_skipped = False + + header = _build_header_v2(data_length, chip_tier, num_ch, lfe_skipped) + assert len(header) == 20 + + magic_valid, version, length, tier, nch, lfe = _parse_header(header) + assert magic_valid + assert version == 2 + assert length == data_length + assert tier == chip_tier + assert nch == num_ch + assert lfe is False + + def test_header_v2_with_lfe_flag(self): + from soosef.stegasoo.spread_steganography import _build_header_v2, _parse_header + + header = _build_header_v2(999, 0, 5, lfe_skipped=True) + magic_valid, version, length, tier, nch, lfe = _parse_header(header) + assert magic_valid + assert version == 2 + assert length == 999 + assert tier == 0 + assert nch == 5 + assert lfe is True + + def test_header_v0_build_parse(self): + from soosef.stegasoo.spread_steganography import _build_header_v0, _parse_header + + header = _build_header_v0(4567) + assert len(header) == 16 + + magic_valid, version, length, tier, nch, lfe = _parse_header(header) + assert magic_valid + assert version == 0 + assert length == 4567 + assert tier is None + assert nch is None + + def test_header_bad_magic(self): + from soosef.stegasoo.spread_steganography import _parse_header + + bad_header = b"XXXX" + b"\x00" * 16 + magic_valid, version, length, tier, nch, lfe = _parse_header(bad_header) + assert not magic_valid + + +# ============================================================================= +# ROUND-ROBIN BIT DISTRIBUTION TESTS +# ============================================================================= + + +class TestRoundRobin: + """Tests for round-robin bit distribution.""" + + def test_distribute_and_collect_identity(self): + from soosef.stegasoo.spread_steganography import ( + _collect_bits_round_robin, + _distribute_bits_round_robin, + ) + + bits = [1, 0, 1, 1, 0, 0, 1, 0, 1, 1] + for num_ch in [1, 2, 3, 4, 5]: + per_ch = _distribute_bits_round_robin(bits, num_ch) + assert len(per_ch) == num_ch + reassembled = _collect_bits_round_robin(per_ch) + assert reassembled == bits, f"Failed for {num_ch} channels" + + def test_distribute_round_robin_ordering(self): + from soosef.stegasoo.spread_steganography import _distribute_bits_round_robin + + bits = [0, 1, 2, 3, 4, 5] # using ints for clarity + per_ch = _distribute_bits_round_robin(bits, 3) + # ch0: bits 0, 3 ch1: bits 1, 4 ch2: bits 2, 5 + assert per_ch[0] == [0, 3] + assert per_ch[1] == [1, 4] + assert per_ch[2] == [2, 5] + + def test_distribute_uneven(self): + from soosef.stegasoo.spread_steganography import ( + _collect_bits_round_robin, + _distribute_bits_round_robin, + ) + + bits = [0, 1, 2, 3, 4] # 5 bits across 3 channels + per_ch = _distribute_bits_round_robin(bits, 3) + assert per_ch[0] == [0, 3] + assert per_ch[1] == [1, 4] + assert per_ch[2] == [2] + + reassembled = _collect_bits_round_robin(per_ch) + assert reassembled == bits + + +# ============================================================================= +# CHANNEL MANAGEMENT TESTS +# ============================================================================= + + +class TestChannelManagement: + """Tests for embeddable channel selection.""" + + def test_mono(self): + from soosef.stegasoo.spread_steganography import _embeddable_channels + + assert _embeddable_channels(1) == [0] + + def test_stereo(self): + from soosef.stegasoo.spread_steganography import _embeddable_channels + + assert _embeddable_channels(2) == [0, 1] + + def test_5_1_skips_lfe(self): + from soosef.stegasoo.spread_steganography import _embeddable_channels + + channels = _embeddable_channels(6) + assert channels == [0, 1, 2, 4, 5] + assert 3 not in channels # LFE skipped + + def test_7_1_skips_lfe(self): + from soosef.stegasoo.spread_steganography import _embeddable_channels + + channels = _embeddable_channels(8) + assert 3 not in channels + assert len(channels) == 7 + + def test_quad_no_skip(self): + from soosef.stegasoo.spread_steganography import _embeddable_channels + + # 4 channels < 6, so no LFE skip + assert _embeddable_channels(4) == [0, 1, 2, 3] + + +# ============================================================================= +# FORMAT DETECTION TESTS +# ============================================================================= + + +class TestFormatDetection: + """Tests for audio format detection.""" + + def test_detect_wav(self, carrier_wav): + from soosef.stegasoo.audio_utils import detect_audio_format + + assert detect_audio_format(carrier_wav) == "wav" + + def test_detect_unknown(self): + from soosef.stegasoo.audio_utils import detect_audio_format + + assert detect_audio_format(b"not audio data") == "unknown" + + def test_detect_empty(self): + from soosef.stegasoo.audio_utils import detect_audio_format + + assert detect_audio_format(b"") == "unknown" + + +# ============================================================================= +# AUDIO INFO TESTS +# ============================================================================= + + +class TestAudioInfo: + """Tests for audio info extraction.""" + + def test_get_wav_info(self, carrier_wav): + from soosef.stegasoo.audio_utils import get_audio_info + + info = get_audio_info(carrier_wav) + assert isinstance(info, AudioInfo) + assert info.sample_rate == 44100 + assert info.channels == 1 + assert info.format == "wav" + assert abs(info.duration_seconds - 1.0) < 0.1 + + def test_get_stereo_info(self, carrier_wav_stereo): + from soosef.stegasoo.audio_utils import get_audio_info + + info = get_audio_info(carrier_wav_stereo) + assert info.channels == 2 + + +# ============================================================================= +# VALIDATION TESTS +# ============================================================================= + + +class TestAudioValidation: + """Tests for audio validation.""" + + def test_validate_valid_audio(self, carrier_wav): + from soosef.stegasoo.audio_utils import validate_audio + + result = validate_audio(carrier_wav) + assert result.is_valid + + def test_validate_empty_audio(self): + from soosef.stegasoo.audio_utils import validate_audio + + result = validate_audio(b"") + assert not result.is_valid + + def test_validate_invalid_audio(self): + from soosef.stegasoo.audio_utils import validate_audio + + result = validate_audio(b"not audio data at all") + assert not result.is_valid + + def test_validate_audio_embed_mode(self): + from soosef.stegasoo.validation import validate_audio_embed_mode + + assert validate_audio_embed_mode("audio_lsb").is_valid + assert validate_audio_embed_mode("audio_spread").is_valid + assert validate_audio_embed_mode("audio_auto").is_valid + assert not validate_audio_embed_mode("invalid").is_valid + + +# ============================================================================= +# INTEGRATION TESTS +# ============================================================================= + + +class TestIntegration: + """End-to-end integration tests using encode_audio/decode_audio.""" + + def test_lsb_encode_decode(self, carrier_wav, reference_photo): + from soosef.stegasoo.decode import decode_audio + from soosef.stegasoo.encode import encode_audio + + stego_audio, stats = encode_audio( + message="Hello from audio steganography!", + reference_photo=reference_photo, + carrier_audio=carrier_wav, + passphrase="test words here now", + pin="123456", + embed_mode="audio_lsb", + ) + + assert len(stego_audio) > 0 + + result = decode_audio( + stego_audio=stego_audio, + reference_photo=reference_photo, + passphrase="test words here now", + pin="123456", + embed_mode="audio_lsb", + ) + + assert result.is_text + assert result.message == "Hello from audio steganography!" + + def test_lsb_wrong_credentials(self, carrier_wav, reference_photo): + from soosef.stegasoo.decode import decode_audio + from soosef.stegasoo.encode import encode_audio + + stego_audio, _ = encode_audio( + message="Secret", + reference_photo=reference_photo, + carrier_audio=carrier_wav, + passphrase="correct horse battery staple", + pin="123456", + embed_mode="audio_lsb", + ) + + with pytest.raises(Exception): + decode_audio( + stego_audio=stego_audio, + reference_photo=reference_photo, + passphrase="wrong passphrase words here", + pin="654321", + embed_mode="audio_lsb", + ) + + def test_spread_encode_decode(self, carrier_wav_spread_integration, reference_photo): + """Test full spread spectrum encode/decode pipeline.""" + from soosef.stegasoo.decode import decode_audio + from soosef.stegasoo.encode import encode_audio + + stego_audio, stats = encode_audio( + message="Spread integration test", + reference_photo=reference_photo, + carrier_audio=carrier_wav_spread_integration, + passphrase="test words here now", + pin="123456", + embed_mode="audio_spread", + ) + + result = decode_audio( + stego_audio=stego_audio, + reference_photo=reference_photo, + passphrase="test words here now", + pin="123456", + embed_mode="audio_spread", + ) + + assert result.message == "Spread integration test" + + def test_spread_encode_decode_with_chip_tier( + self, carrier_wav_spread_integration, reference_photo + ): + """Test spread spectrum with explicit chip tier.""" + from soosef.stegasoo.decode import decode_audio + from soosef.stegasoo.encode import encode_audio + + stego_audio, stats = encode_audio( + message="Tier 0 integration", + reference_photo=reference_photo, + carrier_audio=carrier_wav_spread_integration, + passphrase="test words here now", + pin="123456", + embed_mode="audio_spread", + chip_tier=0, + ) + + assert stats.chip_tier == 0 + assert stats.chip_length == 256 + + result = decode_audio( + stego_audio=stego_audio, + reference_photo=reference_photo, + passphrase="test words here now", + pin="123456", + embed_mode="audio_spread", + ) + + assert result.message == "Tier 0 integration" + + def test_auto_detect_lsb(self, carrier_wav, reference_photo): + """Test auto-detection finds LSB encoded audio.""" + from soosef.stegasoo.decode import decode_audio + from soosef.stegasoo.encode import encode_audio + + stego_audio, _ = encode_audio( + message="Auto-detect test", + reference_photo=reference_photo, + carrier_audio=carrier_wav, + passphrase="test words here now", + pin="123456", + embed_mode="audio_lsb", + ) + + result = decode_audio( + stego_audio=stego_audio, + reference_photo=reference_photo, + passphrase="test words here now", + pin="123456", + embed_mode="audio_auto", + ) + + assert result.message == "Auto-detect test" + + def test_spread_with_real_speech(self, speech_wav, reference_photo): + """Test spread spectrum with real speech audio from test_data.""" + from soosef.stegasoo.decode import decode_audio + from soosef.stegasoo.encode import encode_audio + + message = "Hidden in a speech about elitism" + + stego_audio, stats = encode_audio( + message=message, + reference_photo=reference_photo, + carrier_audio=speech_wav, + passphrase="test words here now", + pin="123456", + embed_mode="audio_spread", + chip_tier=0, # lossless tier for max capacity + ) + + assert stats.chip_tier == 0 + + result = decode_audio( + stego_audio=stego_audio, + reference_photo=reference_photo, + passphrase="test words here now", + pin="123456", + embed_mode="audio_spread", + ) + + assert result.message == message diff --git a/tests/test_verisoo_hashing.py b/tests/test_verisoo_hashing.py new file mode 100644 index 0000000..974ce5f --- /dev/null +++ b/tests/test_verisoo_hashing.py @@ -0,0 +1,85 @@ +"""Basic tests for image hashing.""" + +from io import BytesIO + +import pytest +from PIL import Image + +from soosef.verisoo.hashing import hash_image, perceptual_distance, is_same_image + + +def create_test_image(width: int = 100, height: int = 100, color: tuple = (255, 0, 0)) -> bytes: + """Create a simple test image.""" + img = Image.new("RGB", (width, height), color) + buffer = BytesIO() + img.save(buffer, format="PNG") + return buffer.getvalue() + + +class TestHashImage: + """Tests for hash_image function.""" + + def test_hash_returns_all_components(self): + """Hash should return sha256, phash, and dhash.""" + image_data = create_test_image() + hashes = hash_image(image_data) + + assert hashes.sha256 + assert hashes.phash + assert hashes.dhash + assert len(hashes.sha256) == 64 # SHA-256 hex + + def test_identical_images_same_hash(self): + """Identical bytes should produce identical hashes.""" + image_data = create_test_image() + hash1 = hash_image(image_data) + hash2 = hash_image(image_data) + + assert hash1.sha256 == hash2.sha256 + assert hash1.phash == hash2.phash + assert hash1.dhash == hash2.dhash + + def test_different_images_different_hash(self): + """Different images should produce different SHA-256.""" + red = create_test_image(color=(255, 0, 0)) + blue = create_test_image(color=(0, 0, 255)) + + hash_red = hash_image(red) + hash_blue = hash_image(blue) + + assert hash_red.sha256 != hash_blue.sha256 + + +class TestPerceptualDistance: + """Tests for perceptual distance calculation.""" + + def test_identical_hashes_zero_distance(self): + """Identical hashes should have zero distance.""" + h = "0123456789abcdef" + assert perceptual_distance(h, h) == 0 + + def test_different_hashes_nonzero_distance(self): + """Different hashes should have positive distance.""" + h1 = "0000000000000000" + h2 = "0000000000000001" + assert perceptual_distance(h1, h2) == 1 + + def test_completely_different_max_distance(self): + """Completely different hashes should have max distance.""" + h1 = "0000000000000000" + h2 = "ffffffffffffffff" + assert perceptual_distance(h1, h2) == 64 # 16 hex chars = 64 bits + + +class TestIsSameImage: + """Tests for image comparison.""" + + def test_exact_match(self): + """Identical bytes should be exact match.""" + image_data = create_test_image() + hash1 = hash_image(image_data) + hash2 = hash_image(image_data) + + is_same, reason = is_same_image(hash1, hash2) + assert is_same + assert reason == "exact"